datamarket 0.7.98__tar.gz → 0.7.99__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (36) hide show
  1. {datamarket-0.7.98 → datamarket-0.7.99}/PKG-INFO +1 -1
  2. {datamarket-0.7.98 → datamarket-0.7.99}/pyproject.toml +1 -1
  3. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/params/nominatim.py +1 -1
  4. datamarket-0.7.99/src/datamarket/utils/strings/standardization.py +38 -0
  5. datamarket-0.7.98/src/datamarket/utils/strings/standardization.py +0 -69
  6. {datamarket-0.7.98 → datamarket-0.7.99}/LICENSE +0 -0
  7. {datamarket-0.7.98 → datamarket-0.7.99}/README.md +0 -0
  8. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/__init__.py +0 -0
  9. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/exceptions/__init__.py +0 -0
  10. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/exceptions/main.py +0 -0
  11. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/__init__.py +0 -0
  12. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/alchemy.py +0 -0
  13. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/aws.py +0 -0
  14. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/azure.py +0 -0
  15. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/drive.py +0 -0
  16. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/ftp.py +0 -0
  17. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/nominatim.py +0 -0
  18. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/peerdb.py +0 -0
  19. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/proxy.py +0 -0
  20. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/interfaces/tinybird.py +0 -0
  21. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/params/__init__.py +0 -0
  22. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/__init__.py +0 -0
  23. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/airflow.py +0 -0
  24. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/alchemy.py +0 -0
  25. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/main.py +0 -0
  26. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/nominatim.py +0 -0
  27. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/playwright/__init__.py +0 -0
  28. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/playwright/async_api.py +0 -0
  29. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/playwright/sync_api.py +0 -0
  30. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/selenium.py +0 -0
  31. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/soda.py +0 -0
  32. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/strings/__init__.py +0 -0
  33. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/strings/normalization.py +0 -0
  34. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/strings/obfuscation.py +0 -0
  35. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/typer.py +0 -0
  36. {datamarket-0.7.98 → datamarket-0.7.99}/src/datamarket/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.98
3
+ Version: 0.7.99
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.7.98"
3
+ version = "0.7.99"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -412,7 +412,7 @@ COUNTRY_PARSING_RULES = {
412
412
 
413
413
  "zip_search_pattern": re.compile(r"\b\d{5}\b"),
414
414
 
415
- "phone_validate_pattern": re.compile(r"^(\+?34)?[6|7]\d{8}$")
415
+ "phone_validate_pattern": re.compile(r"^(\+?34)?[67]\d{8}$")
416
416
  },
417
417
  "pt": {
418
418
  "zip_validate_pattern": re.compile(r"^\d{4}[- ]{0,1}\d{3}$|^\d{4}$"),
@@ -0,0 +1,38 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
4
+ import re
5
+ from typing import Literal
6
+ from ...params.nominatim import COUNTRY_PARSING_RULES
7
+
8
+ ########################################################################################################################
9
+ # FUNCTIONS
10
+
11
+ def parse_phone_number(number: str, country_code: Literal["es", "pt"]) -> str | None:
12
+ """Clean and standardize phone number from a certain country_code
13
+
14
+ Args:
15
+ number (str): phone number
16
+ country_code (Literal["es", "pt"]): country code of the phone number to parse
17
+
18
+ Raises:
19
+ ValueError: when parsing is not supported for a certain country
20
+
21
+ Returns:
22
+ str | None: standardized phone number
23
+ """
24
+ clean_number = re.sub(r"\D", "", number)
25
+ if country_code in {"es", "pt"}:
26
+ # Get the validation regex from params
27
+ pattern = COUNTRY_PARSING_RULES[country_code]["phone_validate_pattern"]
28
+
29
+ # Validate and extract in one step
30
+ if len(clean_number) >= 9: # Check if the cleaned number has at least 9 digits
31
+ match = pattern.match(clean_number)
32
+
33
+ # Return the captured group (the 9-digit number)
34
+ return match.group(0)[-9:] if match else None
35
+ else:
36
+ return None # Or handle the case where the number is too short
37
+ else:
38
+ raise ValueError(f"Country code ({country_code}) is not currently supported")
@@ -1,69 +0,0 @@
1
- ########################################################################################################################
2
- # IMPORTS
3
-
4
- import re
5
- from typing import Literal
6
- from ...params.nominatim import COUNTRY_PARSING_RULES
7
-
8
- ########################################################################################################################
9
- # FUNCTIONS
10
-
11
-
12
- def _standardize_es_phone_number(number: str) -> str | None:
13
- """Standardize phone numbers from Spain using regex validation.
14
-
15
- Args:
16
- number (str): cleaned, digits-only phone number
17
-
18
- Returns:
19
- str | None: standardized 9-digit phone number
20
- """
21
- # Get the validation regex from params
22
- pattern = COUNTRY_PARSING_RULES["es"]["phone_validate_pattern"]
23
-
24
- # Validate and extract in one step
25
- match = pattern.match(number)
26
-
27
- # Return the captured group (the 9-digit number)
28
- return match.group(1) if match else None
29
-
30
-
31
- def _standardize_pt_phone_number(number: str) -> str | None:
32
- """Standardize phone numbers from Portugal using regex validation.
33
-
34
- Args:
35
- number (str): cleaned, digits-only phone number
36
-
37
- Returns:
38
- str | None: standardized 9-digit phone number
39
- """
40
- # Get the validation regex from params
41
- pattern = COUNTRY_PARSING_RULES["pt"]["phone_validate_pattern"]
42
-
43
- # Validate and extract in one step
44
- match = pattern.match(number)
45
-
46
- # Return the captured group (the 9-digit number)
47
- return match.group(1) if match else None
48
-
49
-
50
- def parse_phone_number(number: str, country_code: Literal["es", "pt"]) -> str | None:
51
- """Clean and standardize phone number from a certain country_code
52
-
53
- Args:
54
- number (str): phone number
55
- country_code (Literal["es", "pt"]): country code of the phone number to parse
56
-
57
- Raises:
58
- ValueError: when parsing is not supported for a certain country
59
-
60
- Returns:
61
- str | None: standardized phone number
62
- """
63
- clean_number = re.sub(r"\D", "", number)
64
- if country_code == "es":
65
- return _standardize_es_phone_number(clean_number)
66
- elif country_code == "pt":
67
- return _standardize_pt_phone_number(clean_number)
68
- else:
69
- raise ValueError(f"Country code ({country_code}) is not currently supported")
File without changes
File without changes