sibi-dst 2025.1.12__py3-none-any.whl → 2025.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. sibi_dst/__init__.py +7 -1
  2. sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
  3. sibi_dst/df_helper/_df_helper.py +417 -117
  4. sibi_dst/df_helper/_parquet_artifact.py +255 -283
  5. sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
  6. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
  7. sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
  8. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
  9. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
  10. sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
  11. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
  12. sibi_dst/osmnx_helper/__init__.py +1 -0
  13. sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +203 -0
  14. sibi_dst/osmnx_helper/route_path_builder.py +97 -0
  15. sibi_dst/osmnx_helper/utils.py +2 -0
  16. sibi_dst/utils/base.py +302 -96
  17. sibi_dst/utils/clickhouse_writer.py +472 -206
  18. sibi_dst/utils/data_utils.py +139 -186
  19. sibi_dst/utils/data_wrapper.py +317 -73
  20. sibi_dst/utils/date_utils.py +1 -0
  21. sibi_dst/utils/df_utils.py +193 -213
  22. sibi_dst/utils/file_utils.py +3 -2
  23. sibi_dst/utils/filepath_generator.py +314 -152
  24. sibi_dst/utils/log_utils.py +581 -242
  25. sibi_dst/utils/manifest_manager.py +60 -76
  26. sibi_dst/utils/parquet_saver.py +33 -27
  27. sibi_dst/utils/phone_formatter.py +88 -95
  28. sibi_dst/utils/update_planner.py +180 -178
  29. sibi_dst/utils/webdav_client.py +116 -166
  30. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
  31. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +32 -28
  32. {sibi_dst-2025.1.12.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0
@@ -3,50 +3,45 @@ import fsspec
3
3
  import threading
4
4
  import uuid
5
5
  from typing import List, Optional, Set, Dict, Any
6
- import json, base64, hashlib
6
+
7
7
  from sibi_dst.utils import Logger
8
8
 
9
9
 
10
10
  class MissingManifestManager:
11
11
  """
12
- A thread-safe manager for a Parquet file manifest.
13
-
14
- This class handles creating, reading, and appending to a Parquet manifest file
15
- that tracks a list of paths. It is designed to be resilient, using atomic
16
- file operations to prevent data corruption during writes, and can clean up
17
- orphaned temporary files from previous runs.
18
-
19
- Attributes:
20
- fs (fsspec.AbstractFileSystem): The filesystem object to interact with.
21
- manifest_path (str): The full path to the manifest file.
22
- clear_existing (bool): If True, any existing manifest will be overwritten
23
- on the first save operation of this instance's lifecycle.
24
- logger (Logger): A logger instance for logging messages.
12
+ Thread-safe manager for a Parquet file manifest of missing partitions.
13
+
14
+ - Atomic writes via temp copy remove
15
+ - Cleans up orphan temp files (best-effort)
16
+ - Stores a simple table with a single column: 'path'
25
17
  """
26
18
 
27
19
  def __init__(
28
- self,
29
- fs: fsspec.AbstractFileSystem,
30
- manifest_path: str,
31
- clear_existing: bool = False,
32
- **kwargs: Any,
20
+ self,
21
+ fs: fsspec.AbstractFileSystem,
22
+ manifest_path: str,
23
+ clear_existing: bool = False,
24
+ **kwargs: Any,
33
25
  ):
34
26
  self.fs: fsspec.AbstractFileSystem = fs
35
27
  self.manifest_path: str = manifest_path.rstrip("/")
36
28
  self.clear_existing: bool = clear_existing
37
-
29
+ self.clear_existing: bool = clear_existing
30
+ self.ignore_missing: bool = kwargs.get("ignore_missing", False)
31
+ if self.clear_existing:
32
+ self.ignore_missing = False
38
33
  self.debug: bool = kwargs.get("debug", False)
39
34
  self.logger: Logger = kwargs.get(
40
35
  "logger",
41
- Logger.default_logger(logger_name="missing_manifest_manager")
36
+ Logger.default_logger(logger_name="missing_manifest_manager"),
42
37
  )
43
38
  self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
44
39
 
45
40
  self._new_records: List[Dict[str, str]] = []
46
41
  self._loaded_paths: Optional[Set[str]] = None
47
- self._lock = threading.Lock() # A standard Lock is sufficient
42
+ self._lock = threading.Lock()
48
43
 
49
- # Clean up any orphaned temp files from previous failed runs
44
+ # Clean up any orphaned temp files from previous failed runs (best-effort)
50
45
  self._cleanup_orphaned_files()
51
46
 
52
47
  def _safe_exists(self, path: str) -> bool:
@@ -59,13 +54,8 @@ class MissingManifestManager:
59
54
 
60
55
  def load_existing(self) -> Set[str]:
61
56
  """
62
- Loads the set of paths from the existing manifest file.
63
-
64
- The result is cached in memory. If the manifest does not exist or fails
65
- to load, an empty set is returned. This operation is thread-safe.
66
-
67
- Returns:
68
- A set of strings, where each string is a path from the manifest.
57
+ Loads the set of paths from the existing manifest file into memory.
58
+ Returns an empty set if not found or unreadable.
69
59
  """
70
60
  with self._lock:
71
61
  if self._loaded_paths is not None:
@@ -77,7 +67,6 @@ class MissingManifestManager:
77
67
 
78
68
  try:
79
69
  df = pd.read_parquet(self.manifest_path, filesystem=self.fs)
80
- # Robustly extract non-empty, non-null paths
81
70
  paths = (
82
71
  df.get("path", pd.Series(dtype=str))
83
72
  .dropna().astype(str)
@@ -96,9 +85,6 @@ class MissingManifestManager:
96
85
  def record(self, full_path: str) -> None:
97
86
  """
98
87
  Records a new path to be added to the manifest upon the next save.
99
-
100
- Args:
101
- full_path: The path to record.
102
88
  """
103
89
  if not full_path or not isinstance(full_path, str):
104
90
  return
@@ -107,12 +93,7 @@ class MissingManifestManager:
107
93
 
108
94
  def save(self) -> None:
109
95
  """
110
- Saves all new records to the manifest file.
111
-
112
- This method merges new records with existing ones (unless `clear_existing`
113
- is True), removes duplicates, and writes the result back to the manifest.
114
- The write operation is performed atomically by writing to a temporary file
115
- first, then renaming or copying it to the final destination.
96
+ Saves all new records to the manifest file atomically.
116
97
  """
117
98
  with self._lock:
118
99
  if not self._new_records and not self.clear_existing:
@@ -143,65 +124,68 @@ class MissingManifestManager:
143
124
 
144
125
  # Ensure parent directory exists
145
126
  parent = self.manifest_path.rsplit("/", 1)[0]
146
- self.fs.makedirs(parent, exist_ok=True)
127
+ try:
128
+ self.fs.makedirs(parent, exist_ok=True)
129
+ except TypeError:
130
+ try:
131
+ self.fs.makedirs(parent)
132
+ except FileExistsError:
133
+ pass
147
134
 
148
135
  # Perform an atomic write using a temporary file
149
136
  temp_path = f"{self.manifest_path}.tmp-{uuid.uuid4().hex}"
150
137
  try:
151
138
  out_df.to_parquet(temp_path, filesystem=self.fs, index=False)
152
- self.fs.copy(temp_path, self.manifest_path)
153
- self.fs.rm_file(temp_path)
154
- self.logger.info(f"Copied manifest to {self.manifest_path} (temp: {temp_path})")
139
+ # some fs lack atomic rename; copy then remove
140
+ if hasattr(self.fs, "rename"):
141
+ try:
142
+ self.fs.rename(temp_path, self.manifest_path)
143
+ except Exception:
144
+ self.fs.copy(temp_path, self.manifest_path)
145
+ self.fs.rm_file(temp_path)
146
+ else:
147
+ self.fs.copy(temp_path, self.manifest_path)
148
+ self.fs.rm_file(temp_path)
149
+ self.logger.info(f"Wrote manifest to {self.manifest_path}")
155
150
  except Exception as e:
156
151
  self.logger.error(f"Failed to write or move manifest: {e}")
157
- # Re-raise so the caller knows the save operation failed
158
- #raise
152
+ # not re-raising to avoid breaking the ETL run
159
153
  finally:
160
- # CRITICAL: Always clean up the temporary file
161
- if self._safe_exists(temp_path):
162
- try:
163
- self._cleanup_orphaned_files()
164
- except Exception as e:
165
- self.logger.error(f"Failed to remove temporary file '{temp_path}': {e}")
154
+ # Always try to clean temp leftovers
155
+ try:
156
+ if self._safe_exists(temp_path):
157
+ if hasattr(self.fs, "rm_file"):
158
+ self.fs.rm_file(temp_path)
159
+ else:
160
+ self.fs.rm(temp_path, recursive=False)
161
+ except Exception:
162
+ pass
166
163
 
167
164
  # Reset internal state
168
165
  self._new_records.clear()
169
- self._loaded_paths = set(out_df["path"].tolist())
170
- # After the first successful save, disable clear_existing behavior
166
+ try:
167
+ self._loaded_paths = set(out_df["path"].tolist())
168
+ except Exception:
169
+ self._loaded_paths = None
171
170
  self.clear_existing = False
172
171
 
173
172
  def _cleanup_orphaned_files(self) -> None:
174
- """Finds and removes any orphaned temporary manifest files from prior runs."""
175
- self.logger.debug("Checking for orphaned temporary files...")
176
- if not hasattr(self.fs, "s3"):
177
- self.logger.info("Filesystem is not s3fs; skipping temp cleanup.")
178
- return
173
+ """Best-effort removal of leftover temporary manifest files."""
179
174
  try:
180
-
181
- # Use glob to find all files matching the temp pattern in a filesystem-agnostic way
182
175
  temp_file_pattern = f"{self.manifest_path}.tmp-*"
183
176
  orphaned_files = self.fs.glob(temp_file_pattern)
184
-
185
177
  if not orphaned_files:
186
- self.logger.debug("No orphaned files found.")
187
178
  return
188
179
 
189
- self.logger.info(f"Found {orphaned_files} orphaned temp manifest(s). Cleaning up...")
190
180
  for f_path in orphaned_files:
191
181
  try:
192
- self.fs.rm_file(f_path)
182
+ if hasattr(self.fs, "rm_file"):
183
+ self.fs.rm_file(f_path)
184
+ else:
185
+ self.fs.rm(f_path, recursive=False)
193
186
  self.logger.info(f"Deleted orphaned file: {f_path}")
194
187
  except Exception as e:
195
188
  self.logger.warning(f"Failed to delete orphaned temp file '{f_path}': {e}")
196
189
  except Exception as e:
197
- # This is a non-critical operation, so we just log the error
198
- self.logger.error(f"An unexpected error occurred during temp file cleanup: {e}")
199
-
200
- @staticmethod
201
- def _parse_s3_path(s3_path: str):
202
- if not s3_path.startswith("s3://"):
203
- raise ValueError("Invalid S3 path. Must start with 's3://'.")
204
- path_parts = s3_path[5:].split("/", 1)
205
- bucket_name = path_parts[0]
206
- prefix = path_parts[1] if len(path_parts) > 1 else ""
207
- return bucket_name, prefix
190
+ # Non-critical
191
+ self.logger.debug(f"Temp cleanup skipped: {e}")
@@ -1,10 +1,7 @@
1
- import logging
2
1
  import warnings
3
- from typing import Optional
4
2
 
5
3
  import dask.dataframe as dd
6
4
  import pyarrow as pa
7
- from fsspec import AbstractFileSystem
8
5
 
9
6
  from . import ManagedResource
10
7
 
@@ -14,19 +11,20 @@ warnings.filterwarnings("ignore", message="Passing 'overwrite=True' to to_parque
14
11
  class ParquetSaver(ManagedResource):
15
12
  """
16
13
  Saves Dask DataFrames to Parquet, with a workaround for S3-compatible
17
- storage that fails on batch delete operations.
14
+ storage providers that misbehave on batch delete operations.
15
+
16
+ Assumes `df_result` is a Dask DataFrame.
18
17
  """
19
18
 
20
19
  def __init__(
21
- self,
22
- df_result: dd.DataFrame,
23
- parquet_storage_path: str,
24
- **kwargs,
20
+ self,
21
+ df_result: dd.DataFrame,
22
+ parquet_storage_path: str,
23
+ **kwargs,
25
24
  ):
26
25
  super().__init__(**kwargs)
27
26
  self.df_result = df_result
28
27
  self.parquet_storage_path = parquet_storage_path.rstrip("/")
29
- # Determine protocol for special handling (e.g., 's3')
30
28
  if not self.fs:
31
29
  raise ValueError("File system (fs) must be provided to ParquetSaver.")
32
30
 
@@ -36,7 +34,7 @@ class ParquetSaver(ManagedResource):
36
34
 
37
35
  def save_to_parquet(self, output_directory_name: str = "default_output", overwrite: bool = True):
38
36
  """
39
- Saves the DataFrame to a Parquet dataset.
37
+ Saves the Dask DataFrame to a Parquet dataset.
40
38
 
41
39
  If overwrite is True, it manually clears the destination directory before
42
40
  writing to avoid issues with certain S3-compatible storage providers.
@@ -52,18 +50,18 @@ class ParquetSaver(ManagedResource):
52
50
 
53
51
  schema = self._define_schema()
54
52
  self.logger.info(f"Saving DataFrame to Parquet dataset at: {full_path}")
55
- self.df_result = self.df_result.persist()
53
+
54
+ # persist then write (lets the graph be shared if the caller reuses it)
55
+ ddf = self.df_result.persist()
56
+
56
57
  try:
57
- # We call to_parquet with overwrite=False because we have already
58
- # handled the directory clearing manually.
59
- self.df_result.to_parquet(
58
+ ddf.to_parquet(
60
59
  path=full_path,
61
60
  engine="pyarrow",
62
61
  schema=schema,
63
- overwrite=False,
62
+ overwrite=False, # we've handled deletion already
64
63
  filesystem=self.fs,
65
64
  write_index=False,
66
- compute=True, # Use compute=True over persisted ddf for immediate execution.
67
65
  )
68
66
  self.logger.info(f"Successfully saved Parquet dataset to: {full_path}")
69
67
  except Exception as e:
@@ -73,8 +71,8 @@ class ParquetSaver(ManagedResource):
73
71
  def _clear_directory_safely(self, directory: str):
74
72
  """
75
73
  Clears the contents of a directory robustly.
76
- - For S3, it deletes files one-by-one to bypass the 'MissingContentMD5' error.
77
- - For other filesystems, it uses the standard recursive remove.
74
+ - For S3, deletes files one-by-one to bypass brittle multi-delete.
75
+ - For other filesystems, uses the standard recursive remove.
78
76
  """
79
77
  if self.protocol == "s3":
80
78
  self.logger.warning(
@@ -82,15 +80,23 @@ class ParquetSaver(ManagedResource):
82
80
  "This may be slow for directories with many files."
83
81
  )
84
82
  # Glob all contents (files and subdirs) and delete them individually.
85
- # Calling fs.rm() on a single file path should trigger a single
86
- # DeleteObject call, avoiding the faulty batch operation.
87
- # We sort by length descending to delete contents of subdirectories first.
88
83
  all_paths = self.fs.glob(f"{directory}/**")
89
- paths_to_delete = sorted([p for p in all_paths if p != directory], key=len, reverse=True)
90
-
91
- for path in paths_to_delete:
84
+ # delete contents (deepest first)
85
+ for path in sorted([p for p in all_paths if p != directory], key=len, reverse=True):
92
86
  self.logger.debug(f"Deleting: {path}")
93
- self.fs.rm_file(path)
87
+ try:
88
+ # prefer rm_file if available (minio, s3fs expose it)
89
+ if hasattr(self.fs, "rm_file"):
90
+ self.fs.rm_file(path)
91
+ else:
92
+ self.fs.rm(path, recursive=False)
93
+ except Exception as e:
94
+ self.logger.warning(f"Failed to delete '{path}': {e}")
95
+ # remove the (now empty) directory if present
96
+ try:
97
+ self.fs.rm(directory, recursive=False)
98
+ except Exception:
99
+ pass
94
100
  else:
95
101
  # Standard, fast deletion for other filesystems (local, etc.)
96
102
  self.fs.rm(directory, recursive=True)
@@ -98,6 +104,7 @@ class ParquetSaver(ManagedResource):
98
104
  def _define_schema(self) -> pa.Schema:
99
105
  """
100
106
  Defines a PyArrow schema dynamically based on DataFrame's column types.
107
+ Works for Dask by using known dtypes on the collection.
101
108
  """
102
109
  pandas_dtype_to_pa = {
103
110
  "object": pa.string(), "string": pa.string(),
@@ -113,5 +120,4 @@ class ParquetSaver(ManagedResource):
113
120
  pa.field(c, pandas_dtype_to_pa.get(str(d), pa.string()))
114
121
  for c, d in self.df_result.dtypes.items()
115
122
  ]
116
- return pa.schema(fields)
117
-
123
+ return pa.schema(fields)
@@ -1,127 +1,120 @@
1
1
  import re
2
2
  from enum import Enum
3
- from typing import Optional, Union, Callable
3
+ from typing import Optional, Union, Callable, Tuple, Iterable
4
4
 
5
- class CountryCode(Enum):
6
- """Enum for supported country codes, including phone number length and formatting rules."""
7
5
 
8
- USA = ("1", 10, lambda number: f"({number[:3]}) {number[3:6]}-{number[6:]}")
9
- UK = ("44", 10, lambda number: f"{number[:2]} {number[2:6]} {number[6:]}")
10
- FRANCE = ("33", 9, lambda number: f"{number[:1]} {number[1:3]} {number[3:5]} {number[5:]}")
11
- SPAIN = ("34", 9, lambda number: f"{number[:2]} {number[2:5]} {number[5:]}")
12
- DEFAULT = ("506", 8, lambda number: f"{number[:4]}-{number[4:]}")
6
+ def _only_digits(s: str) -> str:
7
+ return re.sub(r"\D", "", s)
13
8
 
14
- def __init__(self, code: str, length: int, formatter: Callable[[str], str]):
15
- """
16
- Initialize a CountryCode enum member.
17
-
18
- :param code: The country code.
19
- :type code: str
20
- :param length: The expected length of the phone number (excluding the country code).
21
- :type length: int
22
- :param formatter: A function to format the phone number.
23
- :type formatter: Callable[[str], str]
24
- """
25
- self.code = code
26
- self.length = length
27
- self.formatter = formatter
28
9
 
29
- @property
30
- def value(self) -> str:
31
- """
32
- Get the country code value.
10
+ def _normalize_raw_input(phone: Union[str, int, float]) -> str:
11
+ """
12
+ Normalize raw input to just digits, preserving leading zeros for strings.
13
+ Reject floats because they lose leading zeros and can be formatted (e.g., 1e10).
14
+ """
15
+ if isinstance(phone, float):
16
+ # Floats are unsafe for phone numbers; caller should pass string or int
17
+ raise ValueError("Phone numbers as float are ambiguous; pass a string or int.")
18
+ if isinstance(phone, int):
19
+ # int loses leading zeros by definition, but this matches your original behavior
20
+ return str(phone)
21
+ if not isinstance(phone, str):
22
+ raise TypeError("phone_number must be str|int")
23
+
24
+ phone = phone.strip()
25
+ # Allow leading '+' or '00' international format; we'll strip them before digit normalization
26
+ if phone.startswith("+"):
27
+ phone = phone[1:]
28
+ elif phone.startswith("00"):
29
+ phone = phone[2:]
30
+ return _only_digits(phone)
33
31
 
34
- :return: The country code.
35
- :rtype: str
36
- """
37
- return self.code
38
32
 
39
- def validate_length(self, number: str) -> bool:
40
- """
41
- Validate the length of the phone number for this country.
33
+ class CountryCode(Enum):
34
+ """
35
+ Supported countries with:
36
+ - dial_code: country calling code
37
+ - nsn_length: expected National Significant Number length (no country code)
38
+ - formatter: formats the national number
39
+ - trunk_prefix: '0' for countries that commonly include a trunk code domestically (strip if present)
40
+ """
42
41
 
43
- :param number: The phone number part to validate.
44
- :type number: str
45
- :return: True if the number length is valid, False otherwise.
46
- :rtype: bool
47
- """
48
- return len(number) == self.length
42
+ USA = ("1", 10, lambda n: f"({n[:3]}) {n[3:6]}-{n[6:]}", "")
43
+ UK = ("44", 10, lambda n: f"{n[:2]} {n[2:6]} {n[6:]}", "0")
44
+ FRANCE= ("33", 9, lambda n: f"{n[:1]} {n[1:3]} {n[3:5]} {n[5:]}", "0")
45
+ SPAIN = ("34", 9, lambda n: f"{n[:2]} {n[2:5]} {n[5:]}", "")
46
+ # Default to Costa Rica in your original code
47
+ DEFAULT = ("506", 8, lambda n: f"{n[:4]}-{n[4:]}", "")
49
48
 
50
- def format_number(self, number: str) -> str:
51
- """
52
- Format the phone number according to this country's rules.
49
+ def __init__(self, dial_code: str, nsn_length: int, formatter: Callable[[str], str], trunk_prefix: str):
50
+ self.dial_code = dial_code
51
+ self.nsn_length = nsn_length
52
+ self.formatter = formatter
53
+ self.trunk_prefix = trunk_prefix
54
+
55
+ def validate_length(self, nsn: str) -> bool:
56
+ return len(nsn) == self.nsn_length
57
+
58
+ def strip_trunk(self, nsn: str) -> str:
59
+ if self.trunk_prefix and nsn.startswith(self.trunk_prefix) and len(nsn) > self.nsn_length:
60
+ # If someone passed trunk + nsn (e.g., '0' + 10 digits for UK),
61
+ # remove only a single leading trunk.
62
+ return nsn[1:]
63
+ return nsn
64
+
65
+ def format_number(self, nsn: str) -> str:
66
+ return self.formatter(nsn)
53
67
 
54
- :param number: The phone number part to format.
55
- :type number: str
56
- :return: The formatted number.
57
- :rtype: str
58
- """
59
- return self.formatter(number)
60
68
 
61
69
  class PhoneNumberFormatter:
62
70
  """
63
- A utility class for validating and formatting phone numbers based on country-specific rules.
64
-
65
- The class supports phone numbers for the UK, USA, France, and Spain. It detects the country code
66
- from the input or uses a default country code if missing. Phone numbers are formatted according
67
- to country-specific rules.
71
+ Validate and format a phone number into E.164-like string with country-specific formatting of the NSN.
72
+ Keeps backward compatibility with your previous API.
68
73
  """
69
74
 
70
75
  def __init__(self, default_country_code: CountryCode = CountryCode.DEFAULT):
71
- """
72
- Initialize the PhoneNumberFormatter with a default country code.
73
-
74
- :param default_country_code: The default country code to use if missing.
75
- :type default_country_code: CountryCode
76
- """
77
76
  self.default_country_code = default_country_code
78
77
 
79
78
  def format_phone_number(self, phone_number: Union[str, int, float]) -> Optional[str]:
80
79
  """
81
- Validate and format a phone number according to country-specific rules.
82
-
83
- If the input is numeric (e.g., an integer or float), it will be converted to a string.
84
- If the country code is missing, the default country code will be used. The phone number
85
- will be formatted according to the detected country's rules.
86
-
87
- :param phone_number: The phone number to validate and format. Can be a string, integer, or float.
88
- :type phone_number: Union[str, int, float]
89
- :return: The formatted phone number, or None if the input is invalid.
90
- :rtype: Optional[str]
80
+ Returns: "+<country_code> <pretty national format>" or None if invalid.
91
81
  """
92
- # Convert numeric input to string
93
- if isinstance(phone_number, (int, float)):
94
- phone_number = str(int(phone_number)) # Convert to integer first to remove decimal points
95
-
96
- # Remove all non-digit characters
97
- digits = re.sub(r"\D", "", phone_number)
82
+ try:
83
+ digits = _normalize_raw_input(phone_number)
84
+ except (TypeError, ValueError):
85
+ return None
98
86
 
99
- # Validate the length of the phone number
100
- if not digits or len(digits) < 7: # Minimum length for a valid phone number
87
+ if not digits or len(digits) < 7: # minimal sanity check
101
88
  return None
102
89
 
103
- # Detect the country code
104
- country_code, number = self._detect_country_code(digits)
90
+ country, nsn = self._detect_country_code(digits)
105
91
 
106
- # Validate the number length for the detected country
107
- if not country_code.validate_length(number):
108
- return None
92
+ # Strip a single trunk prefix if present (e.g., UK/FR leading '0' before the NSN)
93
+ nsn = country.strip_trunk(nsn)
109
94
 
110
- # Format the phone number based on the country code
111
- formatted_number = country_code.format_number(number)
95
+ if not country.validate_length(nsn):
96
+ return None
112
97
 
113
- return f"+{country_code.value} {formatted_number}"
98
+ pretty = country.format_number(nsn)
99
+ return f"+{country.dial_code} {pretty}"
114
100
 
115
- def _detect_country_code(self, digits: str) -> tuple[CountryCode, str]:
101
+ def _detect_country_code(self, digits: str) -> Tuple[CountryCode, str]:
116
102
  """
117
- Detect the country code from the input digits.
118
-
119
- :param digits: The phone number digits (without non-digit characters).
120
- :type digits: str
121
- :return: A tuple containing the detected country code and the remaining number.
122
- :rtype: tuple[CountryCode, str]
103
+ Detect the country by trying the longest dial codes first to avoid prefix collisions.
104
+ Falls back to default if none matches.
123
105
  """
124
- for country_code in CountryCode:
125
- if digits.startswith(country_code.value):
126
- return country_code, digits[len(country_code.value):]
106
+ # Iterate members excluding DEFAULT for detection, sorted by dial_code length desc
107
+ candidates: Iterable[CountryCode] = (
108
+ c for c in sorted(
109
+ (m for m in CountryCode if m is not CountryCode.DEFAULT),
110
+ key=lambda m: len(m.dial_code),
111
+ reverse=True,
112
+ )
113
+ )
114
+
115
+ for country in candidates:
116
+ if digits.startswith(country.dial_code):
117
+ return country, digits[len(country.dial_code):]
118
+
119
+ # No match → assume default country; entire string is NSN
127
120
  return self.default_country_code, digits