sibi-dst 0.3.31__py3-none-any.whl → 0.3.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,37 @@ from geopy.distance import geodesic
23
23
 
24
24
 
25
25
  class PBFHandler:
26
+ """
27
+ Handles the creation, management, and visualization of graph data derived
28
+ from .pbf (Protocolbuffer Binary Format) files. This class enables the
29
+ loading, processing, saving, and reutilization of graph, node, and edge
30
+ data for geographical regions, supporting verbose mode for detailed outputs.
31
+
32
+ :ivar graph: The generated graph object representing the spatial network; can be None if not yet loaded or processed.
33
+ :type graph: Optional[NetworkX.Graph]
34
+ :ivar nodes: GeoDataFrame representing the nodes of the graph; can be None if not yet loaded or processed.
35
+ :type nodes: Optional[geopandas.GeoDataFrame]
36
+ :ivar edges: GeoDataFrame representing the edges of the graph; can be None if not yet loaded or processed.
37
+ :type edges: Optional[geopandas.GeoDataFrame]
38
+ :ivar rebuild: Indicates whether to rebuild the graph data, ignoring any existing cached files. Default is ``False``.
39
+ :type rebuild: bool
40
+ :ivar verbose: Enables verbose mode to provide detailed status messages during operations. Default is ``False``.
41
+ :type verbose: bool
42
+ :ivar place: The name of the geographical region to process with OpenStreetMap. Default is ``Costa Rica``.
43
+ :type place: str
44
+ :ivar filepath: The path to the directory where the graph, nodes, and edges pickle files are saved. Default is ``gis_data/``.
45
+ :type filepath: str
46
+ :ivar file_prefix: The prefix for the filenames of the saved graph, node, and edge pickle files. Default is ``costa-rica-``.
47
+ :type file_prefix: str
48
+ :ivar network_type: The type of network to extract from OpenStreetMap, such as "all" or other specific network types. Default is ``all``.
49
+ :type network_type: str
50
+ :ivar graph_file: Full path of the file to save or load the graph data as a pickle file.
51
+ :type graph_file: str
52
+ :ivar node_file: Full path of the file to save or load the graph's node data as a pickle file.
53
+ :type node_file: str
54
+ :ivar edge_file: Full path of the file to save or load the graph's edge data as a pickle file.
55
+ :type edge_file: str
56
+ """
26
57
  def __init__(self, **kwargs):
27
58
  self.graph = None
28
59
  self.nodes = None
@@ -38,6 +69,23 @@ class PBFHandler:
38
69
  self.edge_file = f"{self.filepath}{self.file_prefix}edges.pkl"
39
70
 
40
71
  def load(self):
72
+ """
73
+ Loads the required data files for processing. If the files do not exist or
74
+ if the `rebuild` flag is set to True, it will process and recreate the
75
+ necessary data from the source. Otherwise, it will load the data from
76
+ existing pickle files. This function ensures the target directory exists,
77
+ and processes files conditionally based on their presence.
78
+
79
+ :param verbose: Flag to control the verbosity of the function's output.
80
+ :param rebuild: Indicates whether the data should be rebuilt from the raw
81
+ source files.
82
+ :param graph_file: Path to the graph file to be loaded or rebuilt.
83
+ :param node_file: Path to the node file to be loaded or rebuilt.
84
+ :param edge_file: Path to the edge file to be loaded or rebuilt.
85
+ :param filepath: Path to the directory where files are processed and saved.
86
+
87
+ :return: None
88
+ """
41
89
  if self.verbose:
42
90
  print("Loading data...")
43
91
 
@@ -62,7 +110,31 @@ class PBFHandler:
62
110
 
63
111
  def process_pbf(self):
64
112
  """
65
- Load a PBF file and create a graph.
113
+ Processes the Protocolbuffer Binary Format (PBF) data specified for a given place by
114
+ utilizing the OSMnx library to create a graph representation and extracts nodes and
115
+ edges into GeoDataFrames. The function provides verbose output if enabled.
116
+
117
+ :param self: Refers to the current instance of the class containing this method.
118
+
119
+ :param self.verbose: bool
120
+ A flag to control verbose output. If True, detailed processing status messages are
121
+ logged to the console.
122
+
123
+ :param self.place: str
124
+ The name or description of the geographic place for which PBF data is processed. It
125
+ is used to construct a graph representation of the place.
126
+
127
+ :param self.network_type: str
128
+ The type of network graph to be created, typically one of 'all', 'walk', 'drive',
129
+ etc., reflecting the type of paths or streets included in the graph.
130
+
131
+ :return: None
132
+ This function does not return a value, but updates class attributes ``graph``,
133
+ ``nodes``, and ``edges``.
134
+
135
+ :raises Exception:
136
+ Raises a general exception when there is an error in processing the PBF data. Error
137
+ details are printed when verbose output is enabled.
66
138
  """
67
139
  try:
68
140
  if self.verbose:
@@ -79,7 +151,20 @@ class PBFHandler:
79
151
 
80
152
  def save_to_pickle(self):
81
153
  """
82
- Save the graph, nodes, and edges to pickle files.
154
+ Saves data, including graph, nodes, and edges, to pickle files. Each data object is
155
+ saved to its corresponding file if available. If verbose mode is enabled, prints
156
+ messages indicating the saving progress and success.
157
+
158
+ :param self:
159
+ Represents the instance of the class that contains attributes `graph_file`,
160
+ `graph`, `node_file`, `nodes`, `edge_file`, `edges`, and `verbose`. These
161
+ attributes determine the files to save to and the data to save.
162
+
163
+ :raises Exception:
164
+ Raises an exception if an error occurs during the saving process.
165
+
166
+ :return:
167
+ None
83
168
  """
84
169
  try:
85
170
  if self.verbose:
@@ -104,7 +189,13 @@ class PBFHandler:
104
189
 
105
190
  def load_from_pickle(self):
106
191
  """
107
- Load the graph, nodes, and edges from pickle files.
192
+ Loads data from pickle files specified by the attributes `graph_file`, `node_file`,
193
+ and `edge_file` and assigns them to the corresponding attributes `graph`,
194
+ `nodes`, and `edges`, respectively. Displays verbose messages during the load
195
+ process if the `verbose` attribute is set to True.
196
+
197
+ :raises Exception: If an error occurs during reading or deserialization of the
198
+ pickle files.
108
199
  """
109
200
  try:
110
201
  if self.verbose:
@@ -128,7 +219,13 @@ class PBFHandler:
128
219
 
129
220
  def plot_graph(self):
130
221
  """
131
- Plot the graph.
222
+ Plots the loaded graph using the OSMnx library.
223
+
224
+ This method checks if a graph is loaded and, if available, plots it. Outputs
225
+ verbose messages during the process if verbosity is enabled.
226
+
227
+ :raises Exception: Raises if an error occurs during the plotting process.
228
+ :return: None
132
229
  """
133
230
  try:
134
231
  if self.graph is not None:
@@ -145,6 +242,23 @@ class PBFHandler:
145
242
 
146
243
 
147
244
  def get_bounding_box_from_points(gps_points, margin=0.001):
245
+ """
246
+ Calculates a bounding box from a list of GPS points, with an optional margin added
247
+ to expand the bounding box in all directions. The function iterates over the GPS
248
+ points to determine the maximum and minimum latitude and longitude values, then
249
+ applies the specified margin to calculate the bounding box's boundaries.
250
+
251
+ :param gps_points: A list of GPS points, where each point is represented as a tuple
252
+ containing a latitude and a longitude (latitude, longitude).
253
+ :type gps_points: list[tuple[float, float]]
254
+ :param margin: An optional margin value to expand the bounding box in all directions.
255
+ Default value is 0.001.
256
+ :type margin: float
257
+ :return: A tuple containing the bounding box boundaries in the following order:
258
+ north (maximum latitude), south (minimum latitude), east (maximum longitude),
259
+ and west (minimum longitude), each adjusted with the margin.
260
+ :rtype: tuple[float, float, float, float]
261
+ """
148
262
  latitudes = [point[0] for point in gps_points]
149
263
  longitudes = [point[1] for point in gps_points]
150
264
 
@@ -157,6 +271,28 @@ def get_bounding_box_from_points(gps_points, margin=0.001):
157
271
 
158
272
 
159
273
  def add_arrows(map_object, locations, color, n_arrows):
274
+ """
275
+ Adds directional arrows to a map object to indicate paths or flows along a polyline
276
+ defined by the given locations.
277
+
278
+ The function computes directional arrows based on the locations list, places them
279
+ along the defined path at intervals determined by the number of arrows, and adds
280
+ these arrows to the specified `map_object`.
281
+
282
+ .. note::
283
+ The function works optimally when the number of locations is greater than two.
284
+
285
+ :param map_object: The folium map object to which the directional arrows will be added.
286
+ :param locations: A list containing tuples of latitude and longitude values that define
287
+ the polyline. Each tuple represents a geographic point.
288
+ :type locations: list[tuple[float, float]]
289
+ :param color: The color to be used for the directional arrows.
290
+ :type color: str
291
+ :param n_arrows: The number of arrows to be drawn along the path.
292
+ :type n_arrows: int
293
+ :return: The modified folium map object containing the added arrows.
294
+ :rtype: folium.Map
295
+ """
160
296
  # Get the number of locations
161
297
  n = len(locations)
162
298
 
@@ -179,6 +315,26 @@ def add_arrows(map_object, locations, color, n_arrows):
179
315
 
180
316
 
181
317
  def extract_subgraph(G, north, south, east, west):
318
+ """
319
+ Extracts a subgraph from the input graph `G` within a specified bounding box. The bounding
320
+ box is defined by its north, south, east, and west coordinates. The function identifies
321
+ nodes from the graph that lie within this bounding box and creates a subgraph containing
322
+ only these nodes and their corresponding edges.
323
+
324
+ :param G: The input graph representing the original main graph.
325
+ :type G: networkx.Graph
326
+ :param north: The northern latitude that defines the upper boundary of the bounding box.
327
+ :type north: float
328
+ :param south: The southern latitude that defines the lower boundary of the bounding box.
329
+ :type south: float
330
+ :param east: The eastern longitude that defines the right boundary of the bounding box.
331
+ :type east: float
332
+ :param west: The western longitude that defines the left boundary of the bounding box.
333
+ :type west: float
334
+ :return: A subgraph extracted from the input graph `G` containing nodes and edges within
335
+ the specified bounding box.
336
+ :rtype: networkx.Graph
337
+ """
182
338
  # Create a bounding box polygon
183
339
  # from osmnx v2 this is how it is done
184
340
  if ox.__version__ >= '2.0':
@@ -199,6 +355,26 @@ def extract_subgraph(G, north, south, east, west):
199
355
 
200
356
 
201
357
  def get_distance_between_points(point_a, point_b, unit='km'):
358
+ """
359
+ Calculate the geographical distance between two points on Earth.
360
+
361
+ This function computes the distance between two points on the Earth's surface
362
+ specified in their geographical coordinates (latitude, longitude). The calculation
363
+ employs the geodesic distance, which represents the shortest distance between
364
+ two points on the Earth's surface. The distance can be returned in different units of
365
+ measurement depending on the provided parameter.
366
+
367
+ :param point_a: A tuple representing the latitude and longitude of the first
368
+ point in decimal degrees (e.g., (latitude, longitude)). Must be a tuple of
369
+ two float values.
370
+ :param point_b: A tuple representing the latitude and longitude of the second
371
+ point in decimal degrees (e.g., (latitude, longitude)). Must be a tuple of
372
+ two float values.
373
+ :param unit: A string value representing the unit of the calculated distance. Can be
374
+ 'km' for kilometers (default), 'm' for meters, or 'mi' for miles.
375
+ :return: A float value of the distance between the two points in the specified unit.
376
+ Returns 0 if the input validation fails or the specified unit is invalid.
377
+ """
202
378
  if not isinstance(point_a, tuple) or len(point_a) != 2:
203
379
  return 0
204
380
  if not all(isinstance(x, float) and not math.isnan(x) for x in point_a):
@@ -226,6 +402,20 @@ tile_options = {
226
402
 
227
403
 
228
404
  def attach_supported_tiles(map_object, default_tile="OpenStreetMap"):
405
+ """
406
+ Attaches supported tile layers to a given folium map object, excluding the
407
+ default tile layer, to provide layer selection functionality in the map.
408
+
409
+ This function allows dynamic addition of multiple tile layers to the map
410
+ object while avoiding duplication of the default tile. By filtering out the
411
+ default tile, it prevents redundancy and ensures a cleaner map interface.
412
+
413
+ :param map_object: The folium map object to which the tile layers will be added.
414
+ It must be an instance of Folium's Map class or a compatible map object.
415
+ :param default_tile: The name of the default tile layer to exclude from the
416
+ list of tiles added to the map. If not specified, defaults to 'OpenStreetMap'.
417
+ :return: None. The function modifies the provided map object in place.
418
+ """
229
419
  # Normalize the default tile name to lowercase for comparison
230
420
  normalized_default_tile = default_tile.lower()
231
421
 
@@ -237,12 +427,44 @@ def attach_supported_tiles(map_object, default_tile="OpenStreetMap"):
237
427
 
238
428
 
239
429
  def get_graph(**options):
430
+ """
431
+ Generates and returns a graph along with its nodes and edges based on the
432
+ provided options. The function initializes a PBFHandler instance with the
433
+ given options, processes any data required, and retrieves the resulting
434
+ graph structure.
435
+
436
+ :param options: Variable-length keyword arguments passed to initialize the
437
+ PBFHandler instance. These parameters play a role in
438
+ determining how the graph data is processed and structured.
439
+ :return: Returns a tuple containing three elements:
440
+ - The generated graph object
441
+ - The list or collection of nodes within the graph
442
+ - The list or collection of edges that describe relationships
443
+ between nodes in the graph
444
+ """
240
445
  handler = PBFHandler(**options)
241
446
  handler.load()
242
447
  return handler.graph, handler.nodes, handler.edges
243
448
 
244
449
 
245
450
  def add_query_params(url, params):
451
+ """
452
+ Update the query parameters of a given URL with new parameters.
453
+
454
+ This function takes a URL and a dictionary of parameters, merges these
455
+ parameters with the existing parameters in the URL, and returns a new URL
456
+ with updated query parameters.
457
+
458
+ :param url: The original URL whose query parameters are to be updated,
459
+ including the scheme, netloc, path, and optional query string and fragment.
460
+ :type url: str
461
+ :param params: A dictionary containing the new parameters to be added or updated
462
+ in the query string of the given URL.
463
+ :type params: dict
464
+ :return: A new URL with updated query parameters after merging the original
465
+ and new parameters.
466
+ :rtype: str
467
+ """
246
468
  # Parse the original URL
247
469
  url_components = urlsplit(url)
248
470
 
@@ -9,6 +9,33 @@ from sibi_dst.utils import Logger
9
9
 
10
10
 
11
11
  class ClickHouseWriter:
12
+ """
13
+ Provides functionality to write a Dask DataFrame to a ClickHouse database using
14
+ a specified schema. This class handles the creation of tables, schema generation,
15
+ data transformation, and data insertion. It ensures compatibility between Dask
16
+ data types and ClickHouse types.
17
+
18
+ :ivar clickhouse_host: Host address of the ClickHouse database.
19
+ :type clickhouse_host: str
20
+ :ivar clickhouse_port: Port of the ClickHouse database.
21
+ :type clickhouse_port: int
22
+ :ivar clickhouse_dbname: Name of the database to connect to in ClickHouse.
23
+ :type clickhouse_dbname: str
24
+ :ivar clickhouse_user: Username for database authentication.
25
+ :type clickhouse_user: str
26
+ :ivar clickhouse_password: Password for database authentication.
27
+ :type clickhouse_password: str
28
+ :ivar clickhouse_table: Name of the table to store the data in.
29
+ :type clickhouse_table: str
30
+ :ivar logger: Logger instance for logging messages.
31
+ :type logger: logging.Logger
32
+ :ivar client: Instance of the ClickHouse database client.
33
+ :type client: clickhouse_connect.Client or None
34
+ :ivar df: Dask DataFrame to be written into ClickHouse.
35
+ :type df: dask.dataframe.DataFrame
36
+ :ivar order_by: Field or column name to use for table ordering.
37
+ :type order_by: str
38
+ """
12
39
  dtype_to_clickhouse = {
13
40
  'int64': 'Int64',
14
41
  'int32': 'Int32',
@@ -5,12 +5,43 @@ from sibi_dst.utils import Logger
5
5
 
6
6
 
7
7
  class DataUtils:
8
-
8
+ """
9
+ Utility class for data transformation, manipulation, and merging.
10
+
11
+ This class provides functionalities for transforming numeric and boolean columns, merging
12
+ lookup data, checking DataFrame emptiness, and converting columns to datetime format in
13
+ Pandas or Dask DataFrames. It is designed to handle data preprocessing steps efficiently
14
+ for both small-scale and large-scale datasets. Logging and debug options are available
15
+ to trace execution and monitor operations.
16
+
17
+ :ivar logger: Logger instance for logging messages.
18
+ :type logger: logging.Logger
19
+ :ivar debug: Flag to enable or disable debug mode.
20
+ :type debug: bool
21
+ """
9
22
  def __init__(self, logger=None, **kwargs):
10
23
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
11
24
  self.debug = kwargs.get('debug', False)
12
25
 
13
26
  def transform_numeric_cols(self, df, columns, fill_value=0, dtype=int):
27
+ """
28
+ This function transforms the specified numeric columns in the given dataframe by converting
29
+ their data types to the specified dtype, with an optional parameter for replacing missing
30
+ values. It first checks if the provided columns exist in the dataframe, processes each column
31
+ to replace non-numeric values with NaN, fills NaN values with the given fill_value, and finally
32
+ converts the column to the specified dtype.
33
+
34
+ :param df: DataFrame to be transformed.
35
+ :type df: dask.dataframe.DataFrame
36
+ :param columns: List of column names to be transformed.
37
+ :type columns: list[str]
38
+ :param fill_value: Value used to replace missing or invalid data. Default is 0.
39
+ :type fill_value: int or float
40
+ :param dtype: Target data type for the columns after transformation. Default is int.
41
+ :type dtype: type
42
+ :return: Transformed dataframe with the specified numeric columns converted and modified.
43
+ :rtype: dask.dataframe.DataFrame
44
+ """
14
45
  if not columns:
15
46
  self.logger.warning('No columns specified')
16
47
  self.logger.debug(f'Dataframe type:{type(df)}')
@@ -12,6 +12,62 @@ from sibi_dst.utils import ParquetSaver
12
12
 
13
13
 
14
14
  class DataWrapper:
15
+ """
16
+ Utility class for handling file-based operations, including processing and saving data
17
+ in Parquet format, while managing a hierarchy of conditions such as overwrite, history
18
+ threshold, and missing file detection.
19
+
20
+ This class aims to simplify the process of managing large datasets stored in a filesystem.
21
+ It allows for controlled updates to data files based on parameters set by the user, with
22
+ support for different filesystem types and options.
23
+
24
+ It also provides features like logging actions, managing processing threads, generating
25
+ update plans, checking file age, and dynamically creating date ranges for data operations.
26
+
27
+ The design supports flexible integration with user-defined classes (dataclasses) to define
28
+ custom loading and processing behavior.
29
+
30
+ :ivar dataclass: The user-defined class for data processing.
31
+ :type dataclass: Type
32
+ :ivar date_field: The name of the date field in the user-defined class.
33
+ :type date_field: str
34
+ :ivar data_path: Base path for the dataset storage.
35
+ :type data_path: str
36
+ :ivar parquet_filename: File name for the Parquet file.
37
+ :type parquet_filename: str
38
+ :ivar start_date: Start date for processing.
39
+ :type start_date: datetime.date
40
+ :ivar end_date: End date for processing.
41
+ :type end_date: datetime.date
42
+ :ivar fs: File system object for managing files.
43
+ :type fs: Optional[fsspec.AbstractFileSystem]
44
+ :ivar filesystem_type: Type of the filesystem (e.g., "file", "s3").
45
+ :type filesystem_type: str
46
+ :ivar filesystem_options: Additional options for initializing the filesystem.
47
+ :type filesystem_options: Optional[Dict]
48
+ :ivar verbose: Flag to enable verbose logging.
49
+ :type verbose: bool
50
+ :ivar class_params: Parameters to initialize the dataclass.
51
+ :type class_params: Optional[Dict]
52
+ :ivar load_params: Additional parameters for loading functions.
53
+ :type load_params: Optional[Dict]
54
+ :ivar reverse_order: Flag to reverse the order of date range generation.
55
+ :type reverse_order: bool
56
+ :ivar overwrite: Whether to overwrite all files during processing.
57
+ :type overwrite: bool
58
+ :ivar ignore_missing: Whether to ignore missing files.
59
+ :type ignore_missing: bool
60
+ :ivar logger: Logger instance for logging information.
61
+ :type logger: Optional[Logger]
62
+ :ivar max_age_minutes: Maximum file age threshold in minutes.
63
+ :type max_age_minutes: int
64
+ :ivar history_days_threshold: Number of days for the history threshold.
65
+ :type history_days_threshold: int
66
+ :ivar show_progress: Flag to enable progress display.
67
+ :type show_progress: bool
68
+ :ivar timeout: Timeout in seconds for processing tasks with threads.
69
+ :type timeout: Optional[int]
70
+ """
15
71
  DEFAULT_MAX_AGE_MINUTES = 1440
16
72
  DEFAULT_HISTORY_DAYS_THRESHOLD = 30
17
73
 
@@ -80,7 +136,19 @@ class DataWrapper:
80
136
  yield date.date()
81
137
 
82
138
  def process(self):
83
- """Execute the update plan using 'update_priority' to determine processing order."""
139
+ """
140
+ Processes update tasks by generating an update plan, filtering required updates, and distributing
141
+ the workload across threads based on priority levels.
142
+
143
+ This method operates by assessing required updates through generated conditions,
144
+ grouping them by priority levels, and processing them in parallel threads.
145
+ Each thread handles the updates for a specific priority level, ensuring a streamlined approach
146
+ to handling the updates efficiently.
147
+
148
+ :raises TimeoutError: If a thread processing a priority level exceeds the allowed timeout duration.
149
+
150
+ :return: None
151
+ """
84
152
  update_plan_table = self.generate_update_plan_with_conditions()
85
153
 
86
154
  # Display the update plan table to the user if requested
@@ -171,7 +239,20 @@ class DataWrapper:
171
239
  return True #
172
240
 
173
241
  def process_date(self, date: datetime.date):
174
- """Process a specific date by regenerating data as necessary."""
242
+ """
243
+ Processes data for a given date and saves it as a Parquet file.
244
+
245
+ This method processes data for the specified date by loading the data
246
+ corresponding to that day, saving it into a structured storage format
247
+ (Parquet), and logging relevant information such as processing time
248
+ and errors that may occur during the process. It uses provided
249
+ dataclass and parameters to operate and ensures the data is stored
250
+ in a structured folder hierarchy.
251
+
252
+ :param date: The specific date for which data processing and saving should occur
253
+ :type date: datetime.date
254
+ :return: None
255
+ """
175
256
  folder = f'{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/'
176
257
  full_parquet_filename = f"{folder}{self.parquet_filename}"
177
258
 
@@ -196,10 +277,17 @@ class DataWrapper:
196
277
 
197
278
  def generate_update_plan_with_conditions(self):
198
279
  """
199
- Generate an update plan that evaluates files based on the specified hierarchy:
200
- 1. Overwrite (all files regenerated).
201
- 2. History threshold: Files within `history_days_threshold` are evaluated for `max_age_minutes`.
202
- 3. Missing files: Detect missing files, ignoring future dates.
280
+ Generates an update plan for data files based on specific conditions. The function evaluates the need for updating or
281
+ overwriting data files for a given date range. Conditions include file existence, whether the file falls within a
282
+ specified historical threshold, and the necessity to overwrite or handle missing files. A priority map is utilized to
283
+ assign priority levels to update categories.
284
+
285
+ :raises FileNotFoundError: If any file is referenced that does not exist and the ``ignore_missing`` property is set to False.
286
+ :raises AttributeError: If any required attribute like ``fs``, ``dataclass``, or others are not properly set or initialized.
287
+
288
+ :return: A Pandas DataFrame representing the update plan, where each row contains information about a date, the conditions
289
+ evaluated for that date, and the determined update priority.
290
+ :rtype: pandas.DataFrame
203
291
  """
204
292
  rows = []
205
293
 
@@ -8,6 +8,24 @@ from sibi_dst.utils import Logger
8
8
 
9
9
 
10
10
  class DateUtils:
11
+ """
12
+ Utility class for date-related operations.
13
+
14
+ The DateUtils class provides a variety of operations to manipulate and retrieve
15
+ information about dates, such as calculating week ranges, determining start or
16
+ end dates for specific periods (quarters, months, years), and dynamically
17
+ registering custom time period functions. It also supports parsing specific
18
+ periods for date range computations and ensuring the input date is correctly
19
+ converted to the desired format.
20
+
21
+ :ivar logger: Logger instance used for logging messages. Defaults to the logger
22
+ for the current class if not provided.
23
+ :type logger: Logger
24
+
25
+ :ivar _PERIOD_FUNCTIONS: Stores dynamically registered period functions that
26
+ return start and end dates.
27
+ :type _PERIOD_FUNCTIONS: Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]]
28
+ """
11
29
  _PERIOD_FUNCTIONS: Dict[str, Callable[[], Tuple[datetime.date, datetime.date]]] = {}
12
30
 
13
31
  def __init__(self, logger=None):
@@ -127,6 +145,23 @@ class DateUtils:
127
145
 
128
146
 
129
147
  class BusinessDays:
148
+ """
149
+ Provides functionality for handling business days calculations with a custom
150
+ holiday list. The class includes methods for calculating the number of
151
+ business days, modifying dates by adding business days, and applying these
152
+ operations to Dask DataFrames.
153
+
154
+ :ivar logger: Logger instance for logging error, warning, and debug messages.
155
+ :type logger: logging.Logger
156
+ :ivar HOLIDAY_LIST: Dictionary mapping years to lists of holiday dates.
157
+ :type HOLIDAY_LIST: dict
158
+ :ivar bd_cal: Numpy busdaycalendar object containing holidays and week mask.
159
+ :type bd_cal: numpy.busdaycalendar
160
+ :ivar holidays: Array of holiday dates used by the business day calendar.
161
+ :type holidays: numpy.ndarray
162
+ :ivar week_mask: Boolean array indicating working days within a week.
163
+ :type week_mask: numpy.ndarray
164
+ """
130
165
  def __init__(self, holiday_list, logger):
131
166
  """
132
167
  Initialize a BusinessDays object with a given holiday list.
@@ -1,11 +1,28 @@
1
- # Copyright (c) 2023. ISTMO Center S.A. All Rights Reserved
2
- #
3
1
  import logging
4
2
  import os
5
3
  import sys
6
4
 
7
5
 
8
6
  class Logger:
7
+ """
8
+ Handles the creation, setup, and management of logging functionalities.
9
+
10
+ This class facilitates logging by creating and managing a logger instance with
11
+ customizable logging directory, name, and file. It ensures logs from a script
12
+ are stored in a well-defined directory and file, and provides various logging
13
+ methods for different log levels. The logger automatically formats and handles
14
+ log messages. Additionally, this class provides a class method to initialize a
15
+ logger with default behaviors.
16
+
17
+ :ivar log_dir: Path to the directory where log files are stored.
18
+ :type log_dir: str
19
+ :ivar logger_name: Name of the logger instance.
20
+ :type logger_name: str
21
+ :ivar log_file: Base name of the log file.
22
+ :type log_file: str
23
+ :ivar logger: The initialized logger instance used for logging messages.
24
+ :type logger: logging.Logger
25
+ """
9
26
  def __init__(self, log_dir, logger_name, log_file):
10
27
  self.log_dir = log_dir
11
28
  self.logger_name = logger_name
@@ -38,6 +38,7 @@ class ParquetSaver:
38
38
  schema = self._define_schema()
39
39
  self._convert_dtypes(schema)
40
40
  self._save_dataframe_to_parquet(full_path, schema)
41
+ self.fs.close()
41
42
 
42
43
  def _define_schema(self) -> pa.Schema:
43
44
  """Define a PyArrow schema dynamically based on df_result column types."""
@@ -90,4 +90,7 @@ class StorageManager:
90
90
  """
91
91
  print("Rebuilding depot structure...")
92
92
  self.rebuild_depot_paths(depots, clear_existing=clear_existing)
93
- print("Rebuild complete.")
93
+ print("Rebuild complete.")
94
+
95
+ def get_fs_instance(self):
96
+ return fsspec.filesystem(self.fs_type, **self.fs_options)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.31
3
+ Version: 0.3.33
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -20,6 +20,7 @@ Requires-Dist: django (>=5.1.4,<6.0.0)
20
20
  Requires-Dist: djangorestframework (>=3.15.2,<4.0.0)
21
21
  Requires-Dist: folium (>=0.19.4,<0.20.0)
22
22
  Requires-Dist: geopandas (>=1.0.1,<2.0.0)
23
+ Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
23
24
  Requires-Dist: httpx (>=0.27.2,<0.28.0)
24
25
  Requires-Dist: ipython (>=8.29.0,<9.0.0)
25
26
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
@@ -42,6 +43,7 @@ Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0)
42
43
  Requires-Dist: tornado (>=6.4.1,<7.0.0)
43
44
  Requires-Dist: tqdm (>=4.67.0,<5.0.0)
44
45
  Requires-Dist: uvicorn (>=0.34.0,<0.35.0)
46
+ Requires-Dist: uvicorn-worker (>=0.3.0,<0.4.0)
45
47
  Description-Content-Type: text/markdown
46
48
 
47
49
  # sibi-dst