datamarket 0.7.89__py3-none-any.whl → 0.7.125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
4
+ import re
5
+ from typing import Literal
6
+
7
+ from ...params.nominatim import COUNTRY_PARSING_RULES
8
+
9
+ ########################################################################################################################
10
+ # FUNCTIONS
11
+
12
+
13
+ def parse_phone_number(number: str, country_code: Literal["es", "pt"]) -> str | None:
14
+ """Clean and standardize phone number from a certain country_code
15
+
16
+ Args:
17
+ number (str): phone number
18
+ country_code (Literal["es", "pt"]): country code of the phone number to parse
19
+
20
+ Raises:
21
+ ValueError: when parsing is not supported for a certain country
22
+
23
+ Returns:
24
+ str | None: standardized phone number
25
+ """
26
+ clean_number = re.sub(r"\D", "", number)
27
+ if country_code in {"es", "pt"}:
28
+ # Get the validation regex from params
29
+ pattern = COUNTRY_PARSING_RULES[country_code]["phone_validate_pattern"]
30
+
31
+ # Validate and extract in one step
32
+ if len(clean_number) >= 9: # Check if the cleaned number has at least 9 digits
33
+ match = pattern.match(clean_number)
34
+
35
+ # Return the captured group (the 9-digit number)
36
+ return match.group(0)[-9:] if match else None
37
+ else:
38
+ return None # Or handle the case where the number is too short
39
+ else:
40
+ raise ValueError(f"Country code ({country_code}) is not currently supported")
datamarket/utils/typer.py CHANGED
@@ -9,6 +9,7 @@ from typing_extensions import Annotated
9
9
  ########################################################################################################################
10
10
  # TYPES
11
11
 
12
+
12
13
  class Dict(dict):
13
14
  def __init__(self, value: str):
14
15
  super().__init__(json.loads(value))
@@ -25,4 +26,4 @@ def parse_json_dict(value: str) -> Dict:
25
26
 
26
27
 
27
28
  DictArg = Annotated[Dict, typer.Argument(parser=parse_json_dict)]
28
- DictOpt = Annotated[Dict, typer.Option(parser=parse_json_dict)]
29
+ DictOpt = Annotated[Dict, typer.Option(parser=parse_json_dict)]
@@ -1,17 +1,17 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.89
3
+ Version: 0.7.125
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
- License-File: LICENSE
7
6
  Author: DataMarket
8
7
  Author-email: techsupport@datamarket.es
9
- Requires-Python: >=3.12,<3.13
8
+ Requires-Python: >=3.12,<4.0
10
9
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
11
10
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
12
11
  Classifier: Operating System :: OS Independent
13
12
  Classifier: Programming Language :: Python :: 3
14
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
15
  Provides-Extra: aws
16
16
  Provides-Extra: azure-storage-blob
17
17
  Provides-Extra: boto3
@@ -20,10 +20,10 @@ Provides-Extra: chompjs
20
20
  Provides-Extra: click
21
21
  Provides-Extra: clickhouse-driver
22
22
  Provides-Extra: datetime
23
+ Provides-Extra: ddgs
23
24
  Provides-Extra: demjson3
24
25
  Provides-Extra: dnspython
25
26
  Provides-Extra: drive
26
- Provides-Extra: duckduckgo-search
27
27
  Provides-Extra: fake-useragent
28
28
  Provides-Extra: geoalchemy2
29
29
  Provides-Extra: geopandas
@@ -45,6 +45,7 @@ Provides-Extra: pii
45
45
  Provides-Extra: pillow
46
46
  Provides-Extra: playwright
47
47
  Provides-Extra: playwright-stealth
48
+ Provides-Extra: plotly
48
49
  Provides-Extra: pyarrow
49
50
  Provides-Extra: pydrive2
50
51
  Provides-Extra: pymupdf
@@ -53,9 +54,7 @@ Provides-Extra: pyrate-limiter
53
54
  Provides-Extra: pysocks
54
55
  Provides-Extra: pyspark
55
56
  Provides-Extra: pytest
56
- Provides-Extra: rapidfuzz
57
57
  Provides-Extra: retry
58
- Provides-Extra: rnet
59
58
  Provides-Extra: shapely
60
59
  Provides-Extra: soda-core-mysql
61
60
  Provides-Extra: soda-core-postgres
@@ -65,6 +64,7 @@ Provides-Extra: undetected-chromedriver
65
64
  Provides-Extra: xmltodict
66
65
  Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0)
67
66
  Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
67
+ Requires-Dist: babel (>=2.0.0,<3.0.0)
68
68
  Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
69
69
  Requires-Dist: boto3 (>=1.35.0,<1.36.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
70
70
  Requires-Dist: browserforge (>=1.2.0,<2.0.0) ; extra == "camoufox"
@@ -74,9 +74,9 @@ Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
74
74
  Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
75
75
  Requires-Dist: croniter (>=3.0.0,<4.0.0)
76
76
  Requires-Dist: datetime (>=5.0,<6.0) ; extra == "datetime"
77
+ Requires-Dist: ddgs (>=9.0.0,<10.0.0) ; extra == "ddgs"
77
78
  Requires-Dist: demjson3 (>=3.0.0,<4.0.0) ; extra == "demjson3"
78
79
  Requires-Dist: dnspython (>=2.0.0,<3.0.0) ; extra == "dnspython"
79
- Requires-Dist: duckduckgo-search (>=7.0.0,<8.0.0) ; extra == "duckduckgo-search"
80
80
  Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
81
81
  Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
82
82
  Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
@@ -102,6 +102,7 @@ Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
102
102
  Requires-Dist: pendulum (>=3.0.0,<4.0.0)
103
103
  Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
104
104
  Requires-Dist: playwright (==1.47.0) ; extra == "playwright" or extra == "camoufox"
105
+ Requires-Dist: plotly (>=6.0.0,<7.0.0) ; extra == "plotly"
105
106
  Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
106
107
  Requires-Dist: presidio-analyzer[phonenumbers] (>=2.0.0,<3.0.0) ; extra == "pii"
107
108
  Requires-Dist: presidio-anonymizer (>=2.0.0,<3.0.0) ; extra == "pii"
@@ -116,10 +117,10 @@ Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
116
117
  Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
117
118
  Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
118
119
  Requires-Dist: python-string-utils (>=1.0.0,<2.0.0)
119
- Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
120
+ Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0)
120
121
  Requires-Dist: requests (>=2.0.0,<3.0.0)
121
122
  Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
122
- Requires-Dist: rnet (>=2.0.0,<3.0.0) ; extra == "rnet"
123
+ Requires-Dist: rnet (>=3.0.0rc10,<4.0.0)
123
124
  Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
124
125
  Requires-Dist: soda-core-mysql-utf8-hotfix (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
125
126
  Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
@@ -0,0 +1,36 @@
1
+ datamarket/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ datamarket/exceptions/__init__.py,sha256=FHLh-Qp9XpM4LkAocppCf_llW2CWVVghGorkqxqt1wk,34
3
+ datamarket/exceptions/main.py,sha256=S5EksLt_pmmX5OY-_keB12K3r5R-lTBnqdJ9VBPy8D8,3674
4
+ datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ datamarket/interfaces/alchemy.py,sha256=2EZp7fn7-o8yL918dnqIYZ-gO7FUXGH8d8TzQFa7XRI,15769
6
+ datamarket/interfaces/aws.py,sha256=4HEN_VfQuEEvDnksRYlcMBUdKbgJXWBkLnymKpyRtrs,4781
7
+ datamarket/interfaces/azure.py,sha256=0pqd6LmQzRGjOUu85YKlPeQnlwsq0q5laNUw_iI3XPw,5180
8
+ datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjHk,4840
9
+ datamarket/interfaces/ftp.py,sha256=LH3Oz19k_xUNhzDXcrq5Ofb4c3uiph5pWUqpgiaDvHI,2671
10
+ datamarket/interfaces/nominatim.py,sha256=57hlW0w6XHBWEmyLyMn6eq1o_T5caYcLNBSNI1qLWCQ,16145
11
+ datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
12
+ datamarket/interfaces/proxy.py,sha256=fke9THv2h1jpr5cxJ25w0bYchuboErokQlSiq50FWVE,14632
13
+ datamarket/interfaces/tinybird.py,sha256=cNG-kAPTdQn2inlNX9LPf-VVdtnLud947ApLVO40Now,2594
14
+ datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ datamarket/params/nominatim.py,sha256=yWOBZ0CQ7YfsBbhpbOZZEgFQTTc6vATPOPDQ7EmWGBk,14648
16
+ datamarket/utils/__init__.py,sha256=FHLh-Qp9XpM4LkAocppCf_llW2CWVVghGorkqxqt1wk,34
17
+ datamarket/utils/airflow.py,sha256=Tc8vFB85NGJn0vgEkvT_yGMbn_NmW0OAJa9fy1qKocQ,804
18
+ datamarket/utils/alchemy.py,sha256=B-6cdMiEStzD4JKhi7Xpk7pVs7eUcdT_fHqpfm2ToNc,637
19
+ datamarket/utils/main.py,sha256=MDCR-EWKgWMXo2XmLR_K7YEp26vSTcuwuijzNcMt5EQ,7271
20
+ datamarket/utils/nominatim.py,sha256=HUJfR86lw68PzaLfhZOCIT5YlopDvRSbwEY2JCf0WyI,5704
21
+ datamarket/utils/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ datamarket/utils/playwright/async_api.py,sha256=Wl2LFHiOTZDg4Jic5nJDPSk1g_AWsY04XKvs3m_ZTrQ,10838
23
+ datamarket/utils/playwright/sync_api.py,sha256=JrVZkphQfCxOtl0oxQZd3W0LALdT7qVV6kwph7FfA94,10729
24
+ datamarket/utils/requests.py,sha256=-FErEhB5f4oQEVeSrQjquU7ulDwxiwx93lTnnKy3Ft0,24274
25
+ datamarket/utils/selenium.py,sha256=Fc2BJzTH7_xIqjBP9LbZODF69RSH4fF8LhD5WuGdlZ0,2457
26
+ datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
27
+ datamarket/utils/strings/__init__.py,sha256=b6TYOT9v7y9ID-lDyZk4E8BH2uIPbsF2ZSLGjCQ1MCQ,43
28
+ datamarket/utils/strings/normalization.py,sha256=tlZHq8h9AtcANkaJ2AOrR6UD5yKShn1cLldfFfFQgTA,8990
29
+ datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnmH3FPlgUUjM,5246
30
+ datamarket/utils/strings/standardization.py,sha256=j_NbT-O1XnxDvDhct8panfkrfAC8R5OX6XM5fYBZ4RU,1496
31
+ datamarket/utils/typer.py,sha256=geWuwMwGQjBQhxo27hX0vEAeRl1j1TS0u2oFVfpAs5I,816
32
+ datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
33
+ datamarket-0.7.125.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
34
+ datamarket-0.7.125.dist-info/METADATA,sha256=Ln7uyk5CVKifcSeXSFENsSfi8gMR03h8jpiRRK0aGA8,7397
35
+ datamarket-0.7.125.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
36
+ datamarket-0.7.125.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.2.1
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,33 +0,0 @@
1
- datamarket/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- datamarket/exceptions/__init__.py,sha256=-Vu-RZNKjW6fYCLqbUJTkKNuHeA8Yi_gyR50oZNaA_8,33
3
- datamarket/exceptions/main.py,sha256=MP5ql6M7DoMbBf-Dg_2ohcUFdWXgzv-dXHntPPit31s,453
4
- datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- datamarket/interfaces/alchemy.py,sha256=mQwjDqBpz1QHRV2JTCALvn5iK_ky69oE2Gw-EtRXsqQ,14664
6
- datamarket/interfaces/aws.py,sha256=KojyDcCR2r5HdMj2xUJanELBuElWlTKe0jjHPRxeryo,3983
7
- datamarket/interfaces/azure.py,sha256=PnPlo95skYiq63qYa4QDvEnVYi2JblPmMSfbTsmXhFs,4937
8
- datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjHk,4840
9
- datamarket/interfaces/ftp.py,sha256=K219-PP21EhQo1A1LkvRLahlrw2-pf4svBN0LogZaJE,2813
10
- datamarket/interfaces/nominatim.py,sha256=HLk0FcdfbOVCF_i71l-Hlb17swL0W1a3Gg2n5OLD0tM,15507
11
- datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
12
- datamarket/interfaces/proxy.py,sha256=Uu-dHvpQOLNBZPGHAanLXnKT1789ArcHfOw8exECt34,5398
13
- datamarket/interfaces/tinybird.py,sha256=AYrcRGNOCoCt7ojilkWa27POROee9sTCwZ61GGHEPeM,2698
14
- datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- datamarket/params/nominatim.py,sha256=fELSriTLSpotoT_k6Ft98fnD8EKHA8WopxPYPLSRfgs,1531
16
- datamarket/utils/__init__.py,sha256=FHLh-Qp9XpM4LkAocppCf_llW2CWVVghGorkqxqt1wk,34
17
- datamarket/utils/airflow.py,sha256=al0vc0YUikNu3Oy51VSn52I7pMU40akFBOl_UlHa2E4,795
18
- datamarket/utils/alchemy.py,sha256=SRq6kgh1aANXVShBPgAuglmNhZssPWwWEY503gKSia8,635
19
- datamarket/utils/main.py,sha256=WweHHt3Ti-tVXdmLnpNYGsYpyTaCx_o1mvnL7_NomVY,5450
20
- datamarket/utils/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- datamarket/utils/playwright/async_api.py,sha256=UbA2D4ScBtYeMfrRjly4RO-s8wXIub9c05J1eoOCpsQ,5782
22
- datamarket/utils/playwright/sync_api.py,sha256=Tw_-KLB3vipFuEQwcX8iCbj7giCzcwXB-bhl_ncR-2Q,5542
23
- datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
24
- datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
25
- datamarket/utils/strings/__init__.py,sha256=b6TYOT9v7y9ID-lDyZk4E8BH2uIPbsF2ZSLGjCQ1MCQ,43
26
- datamarket/utils/strings/normalization.py,sha256=rj0wfJSjqcCRp-ruHqc5pylO3_TOmY5_V1lKzkyWoAA,8991
27
- datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnmH3FPlgUUjM,5246
28
- datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
29
- datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
30
- datamarket-0.7.89.dist-info/METADATA,sha256=FvRyHNG0GSXDgIM0n-pI68O1CG6o9TTFuD-PUuq_4Xc,7370
31
- datamarket-0.7.89.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
32
- datamarket-0.7.89.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
- datamarket-0.7.89.dist-info/RECORD,,