datamarket 0.9.50__py3-none-any.whl → 0.9.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -137,8 +137,13 @@ def normalize(
137
137
  # Parameter mapping
138
138
  if isinstance(mode, str):
139
139
  mode = NormalizationMode[mode.upper()]
140
+ if not isinstance(mode, NormalizationMode):
141
+ raise TypeError("mode must be NormalizationMode or str")
142
+
140
143
  if isinstance(naming, str):
141
144
  naming = NamingConvention[naming.upper()]
145
+ if not isinstance(naming, NamingConvention):
146
+ raise TypeError("naming must be NamingConvention or str")
142
147
 
143
148
  _allowed_symbols_set: Set[str] = set(allowed_symbols) if allowed_symbols else set()
144
149
 
@@ -148,7 +153,11 @@ def normalize(
148
153
  elif not isinstance(s, str):
149
154
  return str(s)
150
155
  else:
151
- text = prettify(strip_html(str(s), True))
156
+ raw_text = str(s)
157
+ if naming is NamingConvention.NONE:
158
+ text = raw_text
159
+ else:
160
+ text = prettify(strip_html(raw_text, True))
152
161
 
153
162
  if mode is NormalizationMode.NONE:
154
163
  normalized = text
@@ -170,9 +179,7 @@ def normalize(
170
179
 
171
180
  for c in intermediate_text:
172
181
  cat = unicodedata.category(c)
173
- if c in _allowed_symbols_set: # Allowed symbols are part of tokens
174
- current_token_chars.append(c)
175
- elif c.isalnum():
182
+ if c in _allowed_symbols_set or c.isalnum(): # Allowed symbols are part of tokens
176
183
  current_token_chars.append(c)
177
184
  elif mode is NormalizationMode.FULL and cat.startswith("S"):
178
185
  # Transliterate S* category symbols not in allowed_symbols
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.50
3
+ Version: 0.9.51
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -22,11 +22,11 @@ datamarket/utils/playwright/sync_api.py,sha256=Tw_-KLB3vipFuEQwcX8iCbj7giCzcwXB-
22
22
  datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
23
23
  datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
24
24
  datamarket/utils/strings/__init__.py,sha256=b6TYOT9v7y9ID-lDyZk4E8BH2uIPbsF2ZSLGjCQ1MCQ,43
25
- datamarket/utils/strings/normalization.py,sha256=QLZ-THzjGOK9eWPPR1PrsffwQkSOx_Mgha4IYaJPrR0,8713
25
+ datamarket/utils/strings/normalization.py,sha256=rj0wfJSjqcCRp-ruHqc5pylO3_TOmY5_V1lKzkyWoAA,8991
26
26
  datamarket/utils/strings/obfuscation.py,sha256=8gMepfjPq0N4_IpKR6i2dy_9VJugQ3qJiRiRvKavB3s,5246
27
27
  datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
28
28
  datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
29
- datamarket-0.9.50.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
30
- datamarket-0.9.50.dist-info/METADATA,sha256=1wfoqKSzeRQXyzLGEwoG2eqAcSi5UqOJa9h7S-Bj-Ks,7326
31
- datamarket-0.9.50.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
- datamarket-0.9.50.dist-info/RECORD,,
29
+ datamarket-0.9.51.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
30
+ datamarket-0.9.51.dist-info/METADATA,sha256=UPV2cxDlddvKHiKY5tSt-dDkA7reLhyIX1KAIfcjxag,7326
31
+ datamarket-0.9.51.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
32
+ datamarket-0.9.51.dist-info/RECORD,,