sibi-dst 0.3.40__py3-none-any.whl → 0.3.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. sibi_dst/df_helper/__init__.py +2 -0
  2. sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +262 -0
  3. sibi_dst/df_helper/_df_helper.py +5 -2
  4. sibi_dst/df_helper/_parquet_artifact.py +8 -2
  5. sibi_dst/df_helper/_parquet_reader.py +5 -1
  6. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +1 -0
  7. sibi_dst/osmnx_helper/__init__.py +2 -2
  8. sibi_dst/osmnx_helper/v1/basemaps/__init__.py +0 -0
  9. sibi_dst/osmnx_helper/{basemaps → v1/basemaps}/router_plotter.py +85 -30
  10. sibi_dst/osmnx_helper/v2/__init__.py +0 -0
  11. sibi_dst/osmnx_helper/v2/base_osm_map.py +153 -0
  12. sibi_dst/osmnx_helper/v2/basemaps/__init__.py +0 -0
  13. sibi_dst/osmnx_helper/v2/basemaps/utils.py +0 -0
  14. sibi_dst/utils/__init__.py +3 -0
  15. sibi_dst/utils/data_utils.py +66 -25
  16. sibi_dst/utils/data_wrapper.py +222 -285
  17. sibi_dst/utils/date_utils.py +118 -113
  18. sibi_dst/utils/df_utils.py +7 -0
  19. sibi_dst/utils/log_utils.py +57 -18
  20. sibi_dst/utils/parquet_saver.py +4 -2
  21. sibi_dst/utils/phone_formatter.py +127 -0
  22. sibi_dst/utils/storage_manager.py +14 -7
  23. sibi_dst-0.3.43.dist-info/METADATA +194 -0
  24. {sibi_dst-0.3.40.dist-info → sibi_dst-0.3.43.dist-info}/RECORD +29 -22
  25. sibi_dst-0.3.40.dist-info/METADATA +0 -62
  26. /sibi_dst/osmnx_helper/{basemaps → v1}/__init__.py +0 -0
  27. /sibi_dst/osmnx_helper/{base_osm_map.py → v1/base_osm_map.py} +0 -0
  28. /sibi_dst/osmnx_helper/{basemaps → v1/basemaps}/calendar_html.py +0 -0
  29. /sibi_dst/osmnx_helper/{utils.py → v1/utils.py} +0 -0
  30. {sibi_dst-0.3.40.dist-info → sibi_dst-0.3.43.dist-info}/WHEEL +0 -0
@@ -140,21 +140,32 @@ class DateUtils:
140
140
  'last_month': lambda: cls.get_month_range(n=-1),
141
141
  'current_year': lambda: cls.get_year_timerange(today().year),
142
142
  'current_quarter': lambda: (
143
- cls.get_first_day_of_the_quarter(today()), cls.get_last_day_of_the_quarter(today())),
143
+ cls.get_first_day_of_the_quarter(today()), cls.get_last_day_of_the_quarter(today())),
144
144
  'ytd': lambda: (datetime.date(today().year, 1, 1), today()),
145
145
  }
146
146
 
147
- def is_file_older_than(self, file_path: str, max_age_minutes: int, fs: Optional[fsspec.AbstractFileSystem] = None,
148
- ignore_missing: bool = False, verbose: bool = False) -> bool:
147
+
148
+ class FileAgeChecker:
149
+ def __init__(self, logger=None):
150
+ self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
151
+
152
+ def is_file_older_than(
153
+ self,
154
+ file_path: str,
155
+ max_age_minutes: int,
156
+ fs: Optional[fsspec.AbstractFileSystem] = None,
157
+ ignore_missing: bool = False,
158
+ verbose: bool = False,
159
+ ) -> bool:
149
160
  """
150
- Check if a file or a partitioned Parquet dataset is older than the specified max_age_minutes.
161
+ Check if a file or directory is older than the specified max_age_minutes.
151
162
 
152
- :param file_path: Path to the file or dataset.
163
+ :param file_path: Path to the file or directory.
153
164
  :param max_age_minutes: Maximum allowed age in minutes.
154
- :param fs: Filesystem object (e.g., S3, local). If not provided, defaults to the local filesystem.
155
- :param ignore_missing: If True, treat missing files as not old. Defaults to False.
156
- :param verbose: If True, log detailed information. Defaults to False.
157
- :return: True if the file or dataset is older than max_age_minutes, False otherwise.
165
+ :param fs: Filesystem object. Defaults to local filesystem.
166
+ :param ignore_missing: Treat missing paths as not old if True.
167
+ :param verbose: Enable detailed logging.
168
+ :return: True if older than max_age_minutes, False otherwise.
158
169
  """
159
170
  fs = fs or fsspec.filesystem("file")
160
171
  self.logger.info(f"Checking age for {file_path}...")
@@ -165,136 +176,129 @@ class DateUtils:
165
176
  return not ignore_missing
166
177
 
167
178
  if fs.isdir(file_path):
168
- self.logger.info(f"Found that {file_path} is a directory...")
169
- return self._is_directory_older_than(file_path, max_age_minutes, fs, verbose)
170
-
179
+ self.logger.info(f"Found directory: {file_path}")
180
+ age = self._get_directory_age_minutes(file_path, fs, verbose)
171
181
  elif fs.isfile(file_path):
172
- return self._is_file_older_than(file_path, max_age_minutes, fs, verbose)
173
-
182
+ age = self._get_file_age_minutes(file_path, fs, verbose)
174
183
  else:
175
- self.logger.warning(f"Path {file_path} is neither a file nor a directory.")
184
+ self.logger.warning(f"Path {file_path} is neither file nor directory.")
176
185
  return True
177
186
 
187
+ return age > max_age_minutes
188
+
178
189
  except Exception as e:
179
- self.logger.warning(f"Error checking age for {file_path}: {str(e)}")
190
+ self.logger.warning(f"Error checking {file_path}: {str(e)}")
180
191
  return True
181
192
 
182
- def _is_directory_older_than(self, dir_path: str, max_age_minutes: int, fs: fsspec.AbstractFileSystem,
183
- verbose: bool) -> bool:
193
+ def get_file_or_dir_age_minutes(
194
+ self,
195
+ file_path: str,
196
+ fs: Optional[fsspec.AbstractFileSystem] = None,
197
+ ) -> float:
184
198
  """
185
- Check if the oldest file in a directory is older than the specified max_age_minutes.
199
+ Get age of file/directory in minutes. Returns infinity for errors/missing paths.
186
200
 
187
- :param dir_path: Path to the directory.
188
- :param max_age_minutes: Maximum allowed age in minutes.
189
- :param fs: Filesystem object.
190
- :param verbose: If True, log detailed information.
191
- :return: True if the oldest file is older than max_age_minutes, False otherwise.
201
+ :param file_path: Path to check.
202
+ :param fs: Filesystem object. Defaults to local filesystem.
203
+ :return: Age in minutes or infinity if unavailable.
192
204
  """
193
- all_files = fs.ls(dir_path)
194
- if not all_files:
195
- self.logger.info(f"No files found in dataset: {dir_path}.")
196
- return True
197
-
198
- modification_times = [
199
- self._get_modification_time(fs.info(file), file)
200
- for file in all_files
201
- if self._is_valid_file(file, fs)
202
- ]
205
+ fs = fs or fsspec.filesystem("file")
206
+ try:
207
+ if not fs.exists(file_path):
208
+ self.logger.info(f"Path not found: {file_path}")
209
+ return float("inf")
203
210
 
204
- if not modification_times:
205
- self.logger.warning(f"No valid modification times found for dataset: {dir_path}. Assuming dataset is old.")
206
- return True
211
+ if fs.isdir(file_path):
212
+ return self._get_directory_age_minutes(file_path, fs, verbose=False)
213
+ if fs.isfile(file_path):
214
+ return self._get_file_age_minutes(file_path, fs, verbose=False)
207
215
 
208
- oldest_modification_time = min(modification_times)
209
- dataset_age_minutes = (datetime.datetime.now(
210
- datetime.timezone.utc) - oldest_modification_time).total_seconds() / 60
216
+ self.logger.warning(f"Invalid path type: {file_path}")
217
+ return float("inf")
211
218
 
212
- if verbose:
213
- self.logger.info(
214
- f"Oldest file in dataset {dir_path} is {round(dataset_age_minutes, 2)} minutes old "
215
- f"(threshold: {max_age_minutes} minutes)"
216
- )
219
+ except Exception as e:
220
+ self.logger.warning(f"Error getting age for {file_path}: {str(e)}")
221
+ return float("inf")
222
+
223
+ def _get_directory_age_minutes(
224
+ self,
225
+ dir_path: str,
226
+ fs: fsspec.AbstractFileSystem,
227
+ verbose: bool,
228
+ ) -> float:
229
+ """Calculate age of oldest file in directory."""
230
+ try:
231
+ all_files = fs.ls(dir_path)
232
+ except Exception as e:
233
+ self.logger.warning(f"Error listing {dir_path}: {str(e)}")
234
+ return float("inf")
217
235
 
218
- return dataset_age_minutes > max_age_minutes
236
+ if not all_files:
237
+ self.logger.info(f"Empty directory: {dir_path}")
238
+ return float("inf")
219
239
 
220
- def _is_file_older_than(self, file_path: str, max_age_minutes: int, fs: fsspec.AbstractFileSystem,
221
- verbose: bool) -> bool:
222
- """
223
- Check if a single file is older than the specified max_age_minutes.
240
+ modification_times = []
241
+ for file in all_files:
242
+ try:
243
+ info = fs.info(file)
244
+ mod_time = self._get_modification_time(info, file)
245
+ modification_times.append(mod_time)
246
+ except Exception as e:
247
+ self.logger.warning(f"Skipping {file}: {str(e)}")
224
248
 
225
- :param file_path: Path to the file.
226
- :param max_age_minutes: Maximum allowed age in minutes.
227
- :param fs: Filesystem object.
228
- :param verbose: If True, log detailed information.
229
- :return: True if the file is older than max_age_minutes, False otherwise.
230
- """
231
- info = fs.info(file_path)
232
- if verbose:
233
- self.logger.debug(f"File info for {file_path}: {info}")
234
-
235
- file_modification_datetime = self._get_modification_time(info, file_path)
236
- file_age_minutes = (datetime.datetime.now(
237
- datetime.timezone.utc) - file_modification_datetime).total_seconds() / 60
238
-
239
- if verbose:
240
- self.logger.debug(
241
- f"File {file_path} is {round(file_age_minutes, 2)} minutes old "
242
- f"(threshold: {max_age_minutes} minutes)"
243
- )
249
+ if not modification_times:
250
+ self.logger.warning(f"No valid files in {dir_path}")
251
+ return float("inf")
252
+
253
+ oldest = min(modification_times)
254
+ age = (datetime.datetime.now(datetime.timezone.utc) - oldest).total_seconds() / 60
255
+ self.logger.info(f"Oldest in {dir_path}: {age:.2f} minutes")
256
+
257
+ return age
258
+
259
+ def _get_file_age_minutes(
260
+ self,
261
+ file_path: str,
262
+ fs: fsspec.AbstractFileSystem,
263
+ verbose: bool,
264
+ ) -> float:
265
+ """Calculate file age in minutes."""
266
+ try:
267
+ info = fs.info(file_path)
268
+ mod_time = self._get_modification_time(info, file_path)
269
+ age = (datetime.datetime.now(datetime.timezone.utc) - mod_time).total_seconds() / 60
244
270
 
245
- return file_age_minutes > max_age_minutes
271
+ if verbose:
272
+ self.logger.debug(f"{file_path} info: {info}")
273
+ self.logger.debug(f"File age: {age:.2f} minutes")
246
274
 
247
- def _is_valid_file(self, file_path: str, fs: fsspec.AbstractFileSystem) -> bool:
248
- """
249
- Check if a file is valid (exists and has a valid modification time).
275
+ return age
250
276
 
251
- :param file_path: Path to the file.
252
- :param fs: Filesystem object.
253
- :return: True if the file is valid, False otherwise.
254
- """
255
- try:
256
- fs.info(file_path)
257
- return True
258
277
  except Exception as e:
259
- self.logger.warning(f"Error checking file age for {file_path}: {str(e)}")
260
- return False
278
+ self.logger.warning(f"Error processing {file_path}: {str(e)}")
279
+ return float("inf")
261
280
 
262
281
  def _get_modification_time(self, info: Dict, file_path: str) -> datetime.datetime:
263
- """
264
- Extract the modification time from file info.
282
+ """Extract modification time from filesystem info with timezone awareness."""
283
+ try:
284
+ if "LastModified" in info: # S3-like
285
+ lm = info["LastModified"]
286
+ return lm if isinstance(lm, datetime.datetime) else datetime.datetime.fromisoformat(
287
+ lm[:-1]).astimezone()
265
288
 
266
- :param info: File info dictionary.
267
- :param file_path: Path to the file (for logging purposes).
268
- :return: Modification time as a timezone-aware datetime object.
269
- """
270
- if "LastModified" in info: # S3-compatible filesystem
271
- last_modified = info["LastModified"]
272
- if isinstance(last_modified, datetime.datetime):
273
- return last_modified
274
- else:
275
- return datetime.datetime.strptime(last_modified, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
276
- tzinfo=datetime.timezone.utc)
289
+ if "mtime" in info: # Local filesystem
290
+ return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
277
291
 
278
- elif "mtime" in info: # Local filesystem
279
- return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
292
+ if "modified" in info: # FTP/SSH
293
+ return datetime.datetime.strptime(
294
+ info["modified"], "%Y-%m-%d %H:%M:%S"
295
+ ).replace(tzinfo=datetime.timezone.utc)
280
296
 
281
- elif "modified" in info: # FTP or SSH filesystem
282
- modified_str = info["modified"]
283
- try:
284
- return datetime.datetime.strptime(modified_str, "%Y-%m-%d %H:%M:%S").replace(
285
- tzinfo=datetime.timezone.utc)
286
- except ValueError:
287
- try:
288
- return datetime.datetime.strptime(modified_str, "%b %d %H:%M").replace(
289
- year=datetime.datetime.now().year, tzinfo=datetime.timezone.utc
290
- )
291
- except ValueError:
292
- self.logger.warning(f"Unsupported modification time format for {file_path}: {modified_str}")
293
- raise ValueError("Unsupported modification time format")
297
+ raise KeyError("No valid modification time key found")
294
298
 
295
- else: # Fallback for unsupported filesystems
296
- self.logger.warning(f"Modification time not available for {file_path}.")
297
- raise ValueError("Modification time not available")
299
+ except (KeyError, ValueError) as e:
300
+ self.logger.warning(f"Invalid mod time for {file_path}: {str(e)}")
301
+ raise ValueError(f"Unsupported modification time format for {file_path}") from e
298
302
 
299
303
 
300
304
  class BusinessDays:
@@ -315,6 +319,7 @@ class BusinessDays:
315
319
  :ivar week_mask: Boolean array indicating working days within a week.
316
320
  :type week_mask: numpy.ndarray
317
321
  """
322
+
318
323
  def __init__(self, holiday_list, logger):
319
324
  """
320
325
  Initialize a BusinessDays object with a given holiday list.
@@ -14,6 +14,13 @@ class DfUtils:
14
14
  """
15
15
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
16
16
 
17
+ @classmethod
18
+ def compute_to_list(cls, series):
19
+ return series.compute().tolist() if hasattr(series, "compute") else series.tolist()
20
+
21
+ def extract_unique_values(self, df, *columns):
22
+ return {col: self.compute_to_list(df[col].dropna().unique()) for col in columns}
23
+
17
24
  def align_and_merge_by_type(self, df_left, df_right, type_mapping, how='left'):
18
25
  """
19
26
  Align column data types in two DataFrames based on a type mapping dictionary and perform the merge.
@@ -1,6 +1,8 @@
1
1
  import logging
2
2
  import os
3
3
  import sys
4
+ import time
5
+ from typing import Optional
4
6
 
5
7
 
6
8
  class Logger:
@@ -23,15 +25,26 @@ class Logger:
23
25
  :ivar logger: The initialized logger instance used for logging messages.
24
26
  :type logger: logging.Logger
25
27
  """
26
- def __init__(self, log_dir, logger_name, log_file):
28
+
29
+ def __init__(self, log_dir: str, logger_name: str, log_file: str, log_level: int = logging.DEBUG):
30
+ """
31
+ Initialize the Logger instance.
32
+
33
+ :param log_dir: Directory where logs are stored.
34
+ :param logger_name: Name of the logger instance.
35
+ :param log_file: Base name of the log file.
36
+ :param log_level: Logging level (defaults to DEBUG).
37
+ """
27
38
  self.log_dir = log_dir
28
39
  self.logger_name = logger_name
29
40
  self.log_file = log_file
41
+ self.log_level = log_level
30
42
  self.logger = None
31
43
 
32
44
  self._setup()
33
45
 
34
46
  def _setup(self):
47
+ """Set up the logger with file and console handlers."""
35
48
  # Ensure the log directory exists
36
49
  os.makedirs(self.log_dir, exist_ok=True)
37
50
 
@@ -47,45 +60,71 @@ class Logger:
47
60
 
48
61
  # Create a logger
49
62
  self.logger = logging.getLogger(self.logger_name)
50
- self.logger.setLevel(logging.DEBUG) # Log all levels DEBUG and above
63
+ self.logger.setLevel(self.log_level)
51
64
 
52
- # Create a file handler
53
- handler = logging.FileHandler(log_file_path)
65
+ # Create a formatter
66
+ formatter = logging.Formatter(
67
+ '[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s',
68
+ datefmt='%Y-%m-%d %H:%M:%S'
69
+ )
54
70
 
55
- # Create a formatter and add it to the handler
56
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
57
- handler.setFormatter(formatter)
71
+ formatter.converter = time.localtime # << Set local time explicitly
58
72
 
59
- # Add the handler to the logger
60
- self.logger.addHandler(handler)
73
+ # Create a file handler
74
+ file_handler = logging.FileHandler(log_file_path)
75
+ file_handler.setFormatter(formatter)
76
+ self.logger.addHandler(file_handler)
77
+
78
+ # Create a console handler (optional)
79
+ console_handler = logging.StreamHandler()
80
+ console_handler.setFormatter(formatter)
81
+ self.logger.addHandler(console_handler)
61
82
 
62
83
  @classmethod
63
- def default_logger(cls, log_dir='./logs/', logger_name=None, log_file=None):
84
+ def default_logger(
85
+ cls,
86
+ log_dir: str = './logs/',
87
+ logger_name: Optional[str] = None,
88
+ log_file: Optional[str] = None,
89
+ log_level: int = logging.INFO
90
+ ) -> 'Logger':
64
91
  """
65
92
  Class-level method to create a default logger with generic parameters.
66
- :param log_dir: Directory where logs are stored.
93
+
94
+ :param log_dir: Directory where logs are stored (defaults to './logs/').
67
95
  :param logger_name: Name of the logger (defaults to __name__).
68
96
  :param log_file: Name of the log file (defaults to logger_name).
97
+ :param log_level: Logging level (defaults to INFO).
69
98
  :return: Instance of Logger.
70
99
  """
71
100
  logger_name = logger_name or __name__
72
101
  log_file = log_file or logger_name
73
- return cls(log_dir=log_dir, logger_name=logger_name, log_file=log_file)
102
+ return cls(log_dir=log_dir, logger_name=logger_name, log_file=log_file, log_level=log_level)
74
103
 
75
- def set_level(self, level):
104
+ def set_level(self, level: int):
105
+ """
106
+ Set the logging level for the logger.
107
+
108
+ :param level: Logging level (e.g., logging.DEBUG, logging.INFO).
109
+ """
76
110
  self.logger.setLevel(level)
77
111
 
78
- def debug(self, msg):
112
+ def debug(self, msg: str):
113
+ """Log a debug message."""
79
114
  self.logger.debug(msg)
80
115
 
81
- def info(self, msg):
116
+ def info(self, msg: str):
117
+ """Log an info message."""
82
118
  self.logger.info(msg)
83
119
 
84
- def warning(self, msg):
120
+ def warning(self, msg: str):
121
+ """Log a warning message."""
85
122
  self.logger.warning(msg)
86
123
 
87
- def error(self, msg):
124
+ def error(self, msg: str):
125
+ """Log an error message."""
88
126
  self.logger.error(msg)
89
127
 
90
- def critical(self, msg):
128
+ def critical(self, msg: str):
129
+ """Log a critical message."""
91
130
  self.logger.critical(msg)
@@ -1,4 +1,5 @@
1
1
  import base64
2
+ import logging
2
3
  from pathlib import Path
3
4
  from typing import Optional
4
5
 
@@ -16,7 +17,7 @@ from sibi_dst.utils import Logger
16
17
 
17
18
 
18
19
  class ParquetSaver:
19
- def __init__(self, df_result, parquet_storage_path, logger=None, fs=None):
20
+ def __init__(self, df_result, parquet_storage_path, logger=None, fs=None, debug=False):
20
21
  """
21
22
  Initialize ParquetSaver.
22
23
  :param df_result: Dask DataFrame to save.
@@ -26,8 +27,9 @@ class ParquetSaver:
26
27
  """
27
28
  self.df_result = df_result
28
29
  self.parquet_storage_path = parquet_storage_path.rstrip("/")
30
+ self.debug = debug
29
31
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
30
-
32
+ self.logger.set_level(logging.DEBUG if self.debug else logging.INFO)
31
33
  self.fs = fs
32
34
  self.protocol = self.parquet_storage_path.split(":")[0]
33
35
 
@@ -0,0 +1,127 @@
1
+ import re
2
+ from enum import Enum
3
+ from typing import Optional, Union, Callable
4
+
5
+ class CountryCode(Enum):
6
+ """Enum for supported country codes, including phone number length and formatting rules."""
7
+
8
+ USA = ("1", 10, lambda number: f"({number[:3]}) {number[3:6]}-{number[6:]}")
9
+ UK = ("44", 10, lambda number: f"{number[:2]} {number[2:6]} {number[6:]}")
10
+ FRANCE = ("33", 9, lambda number: f"{number[:1]} {number[1:3]} {number[3:5]} {number[5:]}")
11
+ SPAIN = ("34", 9, lambda number: f"{number[:2]} {number[2:5]} {number[5:]}")
12
+ DEFAULT = ("506", 8, lambda number: f"{number[:4]}-{number[4:]}")
13
+
14
+ def __init__(self, code: str, length: int, formatter: Callable[[str], str]):
15
+ """
16
+ Initialize a CountryCode enum member.
17
+
18
+ :param code: The country code.
19
+ :type code: str
20
+ :param length: The expected length of the phone number (excluding the country code).
21
+ :type length: int
22
+ :param formatter: A function to format the phone number.
23
+ :type formatter: Callable[[str], str]
24
+ """
25
+ self.code = code
26
+ self.length = length
27
+ self.formatter = formatter
28
+
29
+ @property
30
+ def value(self) -> str:
31
+ """
32
+ Get the country code value.
33
+
34
+ :return: The country code.
35
+ :rtype: str
36
+ """
37
+ return self.code
38
+
39
+ def validate_length(self, number: str) -> bool:
40
+ """
41
+ Validate the length of the phone number for this country.
42
+
43
+ :param number: The phone number part to validate.
44
+ :type number: str
45
+ :return: True if the number length is valid, False otherwise.
46
+ :rtype: bool
47
+ """
48
+ return len(number) == self.length
49
+
50
+ def format_number(self, number: str) -> str:
51
+ """
52
+ Format the phone number according to this country's rules.
53
+
54
+ :param number: The phone number part to format.
55
+ :type number: str
56
+ :return: The formatted number.
57
+ :rtype: str
58
+ """
59
+ return self.formatter(number)
60
+
61
+ class PhoneNumberFormatter:
62
+ """
63
+ A utility class for validating and formatting phone numbers based on country-specific rules.
64
+
65
+ The class supports phone numbers for the UK, USA, France, and Spain. It detects the country code
66
+ from the input or uses a default country code if missing. Phone numbers are formatted according
67
+ to country-specific rules.
68
+ """
69
+
70
+ def __init__(self, default_country_code: CountryCode = CountryCode.DEFAULT):
71
+ """
72
+ Initialize the PhoneNumberFormatter with a default country code.
73
+
74
+ :param default_country_code: The default country code to use if missing.
75
+ :type default_country_code: CountryCode
76
+ """
77
+ self.default_country_code = default_country_code
78
+
79
+ def format_phone_number(self, phone_number: Union[str, int, float]) -> Optional[str]:
80
+ """
81
+ Validate and format a phone number according to country-specific rules.
82
+
83
+ If the input is numeric (e.g., an integer or float), it will be converted to a string.
84
+ If the country code is missing, the default country code will be used. The phone number
85
+ will be formatted according to the detected country's rules.
86
+
87
+ :param phone_number: The phone number to validate and format. Can be a string, integer, or float.
88
+ :type phone_number: Union[str, int, float]
89
+ :return: The formatted phone number, or None if the input is invalid.
90
+ :rtype: Optional[str]
91
+ """
92
+ # Convert numeric input to string
93
+ if isinstance(phone_number, (int, float)):
94
+ phone_number = str(int(phone_number)) # Convert to integer first to remove decimal points
95
+
96
+ # Remove all non-digit characters
97
+ digits = re.sub(r"\D", "", phone_number)
98
+
99
+ # Validate the length of the phone number
100
+ if not digits or len(digits) < 7: # Minimum length for a valid phone number
101
+ return None
102
+
103
+ # Detect the country code
104
+ country_code, number = self._detect_country_code(digits)
105
+
106
+ # Validate the number length for the detected country
107
+ if not country_code.validate_length(number):
108
+ return None
109
+
110
+ # Format the phone number based on the country code
111
+ formatted_number = country_code.format_number(number)
112
+
113
+ return f"+{country_code.value} {formatted_number}"
114
+
115
+ def _detect_country_code(self, digits: str) -> tuple[CountryCode, str]:
116
+ """
117
+ Detect the country code from the input digits.
118
+
119
+ :param digits: The phone number digits (without non-digit characters).
120
+ :type digits: str
121
+ :return: A tuple containing the detected country code and the remaining number.
122
+ :rtype: tuple[CountryCode, str]
123
+ """
124
+ for country_code in CountryCode:
125
+ if digits.startswith(country_code.value):
126
+ return country_code, digits[len(country_code.value):]
127
+ return self.default_country_code, digits
@@ -4,13 +4,14 @@ import fsspec
4
4
 
5
5
 
6
6
  class StorageManager:
7
- def __init__(self, storage_path, fs_type="file", fs_options=None):
7
+ def __init__(self, storage_path, fs_type="file", fs_options=None, debug=False):
8
8
  """
9
9
  Initializes the StorageManager with the base storage path and file system settings.
10
10
  :param storage_path: Base path for the storage (e.g., "s3://my-bucket").
11
11
  :param fs_type: File system type (e.g., "file", "s3").
12
12
  :param fs_options: Dictionary of options for fsspec file system (e.g., credentials).
13
13
  """
14
+ self.debug = debug
14
15
  # Ensure the storage_path ends with a slash for consistency
15
16
  self.storage_path = storage_path.rstrip("/")
16
17
  self.fs_type = fs_type
@@ -35,9 +36,11 @@ class StorageManager:
35
36
  :param dirs_to_create: List of subdirectories to create.
36
37
  :param clear_existing: Whether to clear existing directories.
37
38
  """
38
- print(f"Setting up directories under: {base_path}")
39
+ if self.debug:
40
+ print(f"Setting up directories under: {base_path}")
39
41
  if clear_existing:
40
- print(f"Warning: All existing contents in {base_path} will be removed.")
42
+ if self.debug:
43
+ print(f"Warning: All existing contents in {base_path} will be removed.")
41
44
  if self.fs.exists(base_path):
42
45
  self.fs.rm(base_path, recursive=True)
43
46
 
@@ -47,7 +50,8 @@ class StorageManager:
47
50
  # Create subdirectories
48
51
  for sub_directory in dirs_to_create:
49
52
  sub_path = self.join_paths(base_path, sub_directory)
50
- print(f"Creating directory: {sub_path}")
53
+ if self.debug:
54
+ print(f"Creating directory: {sub_path}")
51
55
  if clear_existing and self.fs.exists(sub_path):
52
56
  self.fs.rm(sub_path, recursive=True)
53
57
  self.fs.mkdirs(sub_path, exist_ok=True)
@@ -63,7 +67,8 @@ class StorageManager:
63
67
  # Ensure directories exist (optionally clear existing ones)
64
68
  for depot, sub_directories in depots.items():
65
69
  depot_path = self.join_paths(self.storage_path, depot)
66
- print(f"Rebuilding depot at: {depot_path}")
70
+ if self.debug:
71
+ print(f"Rebuilding depot at: {depot_path}")
67
72
  self.setup_directories(depot_path, sub_directories, clear_existing=clear_existing)
68
73
 
69
74
  # Generate depot_paths dictionary
@@ -89,9 +94,11 @@ class StorageManager:
89
94
  :param depots: Dictionary where keys are depot names and values are subdirectory lists.
90
95
  :param clear_existing: Whether to clear existing directories.
91
96
  """
92
- print("Rebuilding depot structure...")
97
+ if self.debug:
98
+ print("Rebuilding depot structure...")
93
99
  self.rebuild_depot_paths(depots, clear_existing=clear_existing)
94
- print("Rebuild complete.")
100
+ if self.debug:
101
+ print("Rebuild complete.")
95
102
 
96
103
  def get_fs_instance(self):
97
104
  return fsspec.filesystem(self.fs_type, **self.fs_options)