datamarket 0.7.98__tar.gz → 0.7.99__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.7.98 → datamarket-0.7.99}/PKG-INFO +1 -1
- {datamarket-0.7.98 → datamarket-0.7.99}/pyproject.toml +1 -1
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/params/nominatim.py +1 -1
- datamarket-0.7.99/src/datamarket/utils/strings/standardization.py +38 -0
- datamarket-0.7.98/src/datamarket/utils/strings/standardization.py +0 -69
- {datamarket-0.7.98 → datamarket-0.7.99}/LICENSE +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/README.md +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/__init__.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/exceptions/__init__.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/exceptions/main.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/alchemy.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/aws.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/azure.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/ftp.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/nominatim.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/peerdb.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/proxy.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/main.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/nominatim.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/playwright/__init__.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/playwright/async_api.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/playwright/sync_api.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/strings/__init__.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/strings/normalization.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/strings/obfuscation.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/typer.py +0 -0
- {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/types.py +0 -0
|
@@ -412,7 +412,7 @@ COUNTRY_PARSING_RULES = {
|
|
|
412
412
|
|
|
413
413
|
"zip_search_pattern": re.compile(r"\b\d{5}\b"),
|
|
414
414
|
|
|
415
|
-
"phone_validate_pattern": re.compile(r"^(\+?34)?[
|
|
415
|
+
"phone_validate_pattern": re.compile(r"^(\+?34)?[67]\d{8}$")
|
|
416
416
|
},
|
|
417
417
|
"pt": {
|
|
418
418
|
"zip_validate_pattern": re.compile(r"^\d{4}[- ]{0,1}\d{3}$|^\d{4}$"),
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
########################################################################################################################
|
|
2
|
+
# IMPORTS
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from typing import Literal
|
|
6
|
+
from ...params.nominatim import COUNTRY_PARSING_RULES
|
|
7
|
+
|
|
8
|
+
########################################################################################################################
|
|
9
|
+
# FUNCTIONS
|
|
10
|
+
|
|
11
|
+
def parse_phone_number(number: str, country_code: Literal["es", "pt"]) -> str | None:
|
|
12
|
+
"""Clean and standardize phone number from a certain country_code
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
number (str): phone number
|
|
16
|
+
country_code (Literal["es", "pt"]): country code of the phone number to parse
|
|
17
|
+
|
|
18
|
+
Raises:
|
|
19
|
+
ValueError: when parsing is not supported for a certain country
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
str | None: standardized phone number
|
|
23
|
+
"""
|
|
24
|
+
clean_number = re.sub(r"\D", "", number)
|
|
25
|
+
if country_code in {"es", "pt"}:
|
|
26
|
+
# Get the validation regex from params
|
|
27
|
+
pattern = COUNTRY_PARSING_RULES[country_code]["phone_validate_pattern"]
|
|
28
|
+
|
|
29
|
+
# Validate and extract in one step
|
|
30
|
+
if len(clean_number) >= 9: # Check if the cleaned number has at least 9 digits
|
|
31
|
+
match = pattern.match(clean_number)
|
|
32
|
+
|
|
33
|
+
# Return the captured group (the 9-digit number)
|
|
34
|
+
return match.group(0)[-9:] if match else None
|
|
35
|
+
else:
|
|
36
|
+
return None # Or handle the case where the number is too short
|
|
37
|
+
else:
|
|
38
|
+
raise ValueError(f"Country code ({country_code}) is not currently supported")
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
########################################################################################################################
|
|
2
|
-
# IMPORTS
|
|
3
|
-
|
|
4
|
-
import re
|
|
5
|
-
from typing import Literal
|
|
6
|
-
from ...params.nominatim import COUNTRY_PARSING_RULES
|
|
7
|
-
|
|
8
|
-
########################################################################################################################
|
|
9
|
-
# FUNCTIONS
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _standardize_es_phone_number(number: str) -> str | None:
|
|
13
|
-
"""Standardize phone numbers from Spain using regex validation.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
number (str): cleaned, digits-only phone number
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
str | None: standardized 9-digit phone number
|
|
20
|
-
"""
|
|
21
|
-
# Get the validation regex from params
|
|
22
|
-
pattern = COUNTRY_PARSING_RULES["es"]["phone_validate_pattern"]
|
|
23
|
-
|
|
24
|
-
# Validate and extract in one step
|
|
25
|
-
match = pattern.match(number)
|
|
26
|
-
|
|
27
|
-
# Return the captured group (the 9-digit number)
|
|
28
|
-
return match.group(1) if match else None
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def _standardize_pt_phone_number(number: str) -> str | None:
|
|
32
|
-
"""Standardize phone numbers from Portugal using regex validation.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
number (str): cleaned, digits-only phone number
|
|
36
|
-
|
|
37
|
-
Returns:
|
|
38
|
-
str | None: standardized 9-digit phone number
|
|
39
|
-
"""
|
|
40
|
-
# Get the validation regex from params
|
|
41
|
-
pattern = COUNTRY_PARSING_RULES["pt"]["phone_validate_pattern"]
|
|
42
|
-
|
|
43
|
-
# Validate and extract in one step
|
|
44
|
-
match = pattern.match(number)
|
|
45
|
-
|
|
46
|
-
# Return the captured group (the 9-digit number)
|
|
47
|
-
return match.group(1) if match else None
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def parse_phone_number(number: str, country_code: Literal["es", "pt"]) -> str | None:
|
|
51
|
-
"""Clean and standardize phone number from a certain country_code
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
number (str): phone number
|
|
55
|
-
country_code (Literal["es", "pt"]): country code of the phone number to parse
|
|
56
|
-
|
|
57
|
-
Raises:
|
|
58
|
-
ValueError: when parsing is not supported for a certain country
|
|
59
|
-
|
|
60
|
-
Returns:
|
|
61
|
-
str | None: standardized phone number
|
|
62
|
-
"""
|
|
63
|
-
clean_number = re.sub(r"\D", "", number)
|
|
64
|
-
if country_code == "es":
|
|
65
|
-
return _standardize_es_phone_number(clean_number)
|
|
66
|
-
elif country_code == "pt":
|
|
67
|
-
return _standardize_pt_phone_number(clean_number)
|
|
68
|
-
else:
|
|
69
|
-
raise ValueError(f"Country code ({country_code}) is not currently supported")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|