sibi-dst 0.3.32__py3-none-any.whl → 0.3.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,4 @@
1
- # Copyright (c) 2023. ISTMO Center S.A. All Rights Reserved
2
- #
1
+
3
2
  import keyword
4
3
  import re
5
4
  from functools import lru_cache
@@ -49,13 +48,57 @@ apps_label = "datacubes"
49
48
 
50
49
 
51
50
  class DjangoSqlModelBuilder:
51
+ """
52
+ Handles the dynamic creation of Django ORM models based on database table structures.
53
+
54
+ This class takes input parameters such as database connection and table name,
55
+ and dynamically maps the table's schema to a Django ORM model. The resultant model
56
+ can be used for various ORM operations like querying, saving, and deleting records.
57
+ The class utilizes Django's introspection features and allows customization
58
+ through its fields and methods.
59
+
60
+ :ivar connection_name: The name of the database connection being used.
61
+ :type connection_name: str
62
+ :ivar table: The name of the database table for which the model is being built.
63
+ :type table: str
64
+ :ivar model: The dynamically generated Django model or None if not created yet.
65
+ :type model: type | None
66
+ """
52
67
  def __init__(self, **kwargs):
68
+ """
69
+ Represents an initialization method for a class that handles the
70
+ assignment of attributes and processes the given keyword arguments
71
+ through an internal utility function. This method sets up the
72
+ necessary attributes for later use.
73
+
74
+ :param kwargs: A collection of keyword arguments used by the internal
75
+ parsing method to populate the attributes of the class. Specific
76
+ expected keys and their usage should be detailed in the internal
77
+ implementation.
78
+ """
53
79
  self.connection_name = None
54
80
  self.table = None
55
81
  self.model = None
56
82
  self.__parse_builder(**kwargs)
57
83
 
58
84
  def __parse_builder(self, **kwargs):
85
+ """
86
+ Parses and initializes the builder properties based on provided keyword
87
+ arguments. Validates that the required 'connection_name' and 'table'
88
+ values are present and sets the corresponding attributes. If validation
89
+ fails, raises appropriate errors. Returns the updated builder object
90
+ after initialization. This method is primarily intended for internal
91
+ use to configure the builder.
92
+
93
+ :param kwargs: Keyword arguments containing configuration values for
94
+ initializing the builder. Should include 'connection_name'
95
+ and 'table' keys.
96
+ :type kwargs: dict
97
+ :return: Returns the instance of the builder object after initialization.
98
+ :rtype: self
99
+ :raises ValueError: If 'connection_name' or 'table' is not provided in
100
+ the keyword arguments.
101
+ """
59
102
  self.connection_name = kwargs.get("connection_name", None)
60
103
  self.table = kwargs.get("table", None)
61
104
  self.model = None
@@ -67,6 +110,22 @@ class DjangoSqlModelBuilder:
67
110
 
68
111
  @lru_cache(maxsize=None)
69
112
  def build_model(self):
113
+ """
114
+ Builds and retrieves a model instance with dynamically defined fields.
115
+
116
+ This method attempts to retrieve a model instance by its name and, if it
117
+ does not exist, creates a new model with the specified table structure.
118
+ The model is either fetched or constructed using the provided data about
119
+ its fields. The result is cached for repeated calls to improve performance
120
+ and avoid redundant computations.
121
+
122
+ :raises LookupError: If the model cannot be fetched or created due to an
123
+ invalid lookup.
124
+
125
+ :return: A model instance dynamically constructed or retrieved for the
126
+ specified table and fields.
127
+ :rtype: Model
128
+ """
70
129
  model = None
71
130
  model_fields = self.get_model_fields()
72
131
  model_name = self.table2model(self.table)
@@ -78,6 +137,25 @@ class DjangoSqlModelBuilder:
78
137
  return model
79
138
 
80
139
  def create_model(self, name, fields) -> type:
140
+ """
141
+ Creates a Django model class dynamically.
142
+
143
+ This function takes in a model name and a dictionary of fields, dynamically
144
+ creates a Meta class where additional metadata for the model (like
145
+ `db_table`, `managed`, `app_label`) is defined, and then uses Python's
146
+ standard library `type()` function to generate and return the model class
147
+ on the fly.
148
+
149
+ :param name: The name of the model class to create.
150
+ :type name: str
151
+ :param fields: A dictionary mapping field names to their definitions in
152
+ Django's model field format. Each field definition should include
153
+ the field type and optional parameters.
154
+ :type fields: dict
155
+ :return: The dynamically created Django model class based on the provided
156
+ name and fields.
157
+ :rtype: type
158
+ """
81
159
  def parse_args(arg_string):
82
160
  arg_dict = {}
83
161
  # Match keyword arguments in the form key=value
@@ -118,9 +196,32 @@ class DjangoSqlModelBuilder:
118
196
 
119
197
  @staticmethod
120
198
  def table2model(table_name):
199
+ """
200
+ Converts a database table name to a corresponding model name by transforming
201
+ it from snake_case to CamelCase. This method takes a string representing
202
+ a table name, splits it by underscores, capitalizes the first letter of
203
+ each part, and then joins them into a single string.
204
+
205
+ :param table_name: The name of the database table in snake_case format
206
+ :type table_name: str
207
+ :return: A string representing the equivalent model name in CamelCase format
208
+ :rtype: str
209
+ """
121
210
  return "".join([x.title() for x in table_name.split("_")])
122
211
 
123
212
  def get_model_fields(self):
213
+ """
214
+ Generates the data structure for model fields from a database table using
215
+ introspection. The method extracts information about columns, primary keys,
216
+ unique constraints, and additional metadata to define the fields of the model.
217
+
218
+ :raises ValueError: If the specified connection or table is not found.
219
+ :raises Exception: For any database or introspection-related errors.
220
+
221
+ :returns: Dictionary containing the model field definitions based on the
222
+ table's structure and metadata.
223
+ :rtype: dict
224
+ """
124
225
  connection = connections[self.connection_name]
125
226
  if connection is None:
126
227
  raise ValueError("Connection %s not found" % self.connection_name)
@@ -265,7 +366,21 @@ class DjangoSqlModelBuilder:
265
366
  @staticmethod
266
367
  def normalize_col_name(col_name, used_column_names, is_relation):
267
368
  """
268
- Modify the column name to make it Python-compatible as a field name
369
+ Normalizes a column name to conform to Python's variable naming conventions and addresses potential
370
+ name conflicts or issues with reserved words. Applies transformations to ensure the column name:
371
+ - Is lowercase.
372
+ - Replaces unsuitable characters with underscores.
373
+ - Avoids conflicts with Python keywords and digits at the start of the name.
374
+ - Resolves conflicts with previously used column names.
375
+
376
+ :param col_name: The original column name provided from the schema.
377
+ :param used_column_names: A list of previously used column names to avoid naming collisions.
378
+ :param is_relation: A boolean indicating if the column represents a relation (e.g., foreign key).
379
+ :return: A tuple containing:
380
+ - The normalized column name (str).
381
+ - A dictionary (`field_params`) with any relevant information for database configuration.
382
+ Includes the original column name if specific transformations were applied.
383
+ - A list (`field_notes`) containing strings explaining the applied transformations.
269
384
  """
270
385
  field_params = {}
271
386
  field_notes = []
@@ -326,9 +441,20 @@ class DjangoSqlModelBuilder:
326
441
  @staticmethod
327
442
  def get_field_type(connection, row):
328
443
  """
329
- Given the database connection, the table name, and the cursor row
330
- description, this routine will return the given field type name, as
331
- well as any additional keyword parameters and notes for the field.
444
+ Determines the type of a database field based on its description and connection
445
+ introspection, and includes metadata such as parameters and additional notes.
446
+
447
+ This function extracts the field type from the database's introspection
448
+ interface and adds corresponding parameters (e.g., `max_length`, `decimal_places`)
449
+ and relevant notes if certain properties are inferred or guessed.
450
+
451
+ :param connection: The database connection object used for introspection.
452
+ :type connection: Any
453
+ :param row: An object containing field metadata, such as type code,
454
+ display size, collation, precision, and scale.
455
+ :type row: Any
456
+ :return: A tuple containing the field type, its parameters, and any notes.
457
+ :rtype: tuple[str, dict, list[str]]
332
458
  """
333
459
  field_params = {}
334
460
  field_notes = []
@@ -9,6 +9,26 @@ from sibi_dst.utils import Logger
9
9
 
10
10
 
11
11
  class HttpConfig(BaseModel):
12
+ """
13
+ Configuration for HTTP client operations, designed to manage and fetch data
14
+ from HTTP endpoints asynchronously. This class serves as a centralized configuration
15
+ and operation hub encapsulating settings such as base URL, query parameters, API keys,
16
+ and logger support. It employs `httpx` for HTTP interactions and leverages Dask for the
17
+ resulting data handling and transformation.
18
+
19
+ :ivar base_url: The base URL for HTTP communication.
20
+ :type base_url: HttpUrl
21
+ :ivar params: Optional dictionary containing query parameters to be used with GET requests.
22
+ :type params: Optional[Dict[str, Any]]
23
+ :ivar logger: The logger instance for logging operations. If not provided, a default logger
24
+ is initialized using the class name.
25
+ :type logger: Optional[Logger]
26
+ :ivar timeout: The timeout value in seconds for HTTP requests. Defaults to 300.
27
+ :type timeout: Optional[int]
28
+ :ivar api_key: The optional secret API key for authorization. If present, it will populate
29
+ the Authorization header in HTTP requests.
30
+ :type api_key: Optional[SecretStr]
31
+ """
12
32
  base_url: HttpUrl
13
33
  params: Optional[Dict[str, Any]] = Field(default_factory=dict)
14
34
  logger: Optional[Logger] = None
@@ -17,12 +37,43 @@ class HttpConfig(BaseModel):
17
37
  model_config = ConfigDict(arbitrary_types_allowed=True)
18
38
 
19
39
  def __init__(self, logger=None, **data):
40
+ """
41
+ Initializes the class with a logger and other data parameters.
42
+
43
+ This constructor allows the option to provide a custom logger. If no logger
44
+ is supplied during initialization, a default logger specific to the class
45
+ is created using the Logger utility. It also initializes the instance
46
+ with additional data passed as keyword arguments.
47
+
48
+ :param logger: Optional logger instance. If not provided, a default
49
+ logger is created using the class name as the logger name.
50
+ :type logger: logging.Logger, optional
51
+ :param data: Arbitrary keyword arguments containing data to initialize
52
+ the class.
53
+ :type data: dict
54
+ """
20
55
  super().__init__(**data)
21
56
  # Initialize the logger if not provided
22
57
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
23
58
 
24
59
  async def fetch_data(self, **options) -> dd.DataFrame:
25
- """Asynchronously fetch JSON data from HTTP endpoint, substituting options into the URL path."""
60
+ """
61
+ Fetches data from a specified HTTP JSON source and returns it as a dask DataFrame.
62
+
63
+ This asynchronous method constructs a request URL based on the provided options
64
+ and sends an HTTP GET request. The fetched JSON data is normalized and
65
+ converted to a dask DataFrame for further use. It handles request errors and
66
+ JSON parsing errors effectively.
67
+
68
+ :param options: Arbitrary keyword arguments representing dynamic path segments
69
+ to be appended to the base URL.
70
+ :type options: dict
71
+ :return: A dask DataFrame containing the structured data retrieved
72
+ from the HTTP JSON source.
73
+ :rtype: dd.DataFrame
74
+ :raises httpx.RequestError: If there is an issue with the HTTP request.
75
+ :raises ValueError: If there is an error parsing JSON data.
76
+ """
26
77
  try:
27
78
  # Build URL with options as path segments
28
79
 
@@ -5,11 +5,39 @@ from sibi_dst.utils import Logger
5
5
 
6
6
 
7
7
  class ParquetFilterHandler(object):
8
+ """
9
+ Handles parquet filtering operations using dask dataframes.
10
+
11
+ This class is designed to apply complex filtering logic on dask dataframes
12
+ based on specified filter criteria. It includes support for operations such
13
+ as exact matches, ranges, string pattern matches, and null checks. Additionally,
14
+ it handles datetime-related field filtering including precise truncations and
15
+ specific date/time attributes.
16
+
17
+ :ivar logger: Logger object to handle logging within the class. Defaults to the class-level logger.
18
+ :type logger: Logger
19
+ """
8
20
  def __init__(self, logger=None):
9
21
  self.logger = logger or Logger.default_logger(logger_name=self.__class__.__name__)
10
22
 
11
23
  @staticmethod
12
24
  def apply_filters_dask(df, filters):
25
+ """
26
+ Applies a set of filters to a Dask DataFrame, enabling complex filtering operations
27
+ such as comparisons, ranges, string match operations, and more. Handles special
28
+ cases for datetime operations, including casting and extracting specific datetime
29
+ components for filtering.
30
+
31
+ :param df: Dask DataFrame to which the filters will be applied.
32
+ :type df: dask.dataframe.DataFrame
33
+ :param filters: Dictionary defining the filtering logic, where the keys specify
34
+ the column name and filter operation, and the values specify the corresponding
35
+ filter values to apply.
36
+ :type filters: dict
37
+ :return: A filtered Dask DataFrame based on the defined logic in the filters.
38
+ :rtype: dask.dataframe.DataFrame
39
+ :raises ValueError: If an unsupported operation is encountered in the filters.
40
+ """
13
41
  dt_operators = ['date', 'time']
14
42
  date_operators = ['year', 'month', 'day', 'hour', 'minute', 'second', 'week_day']
15
43
  comparison_operators = [
@@ -11,6 +11,45 @@ from sibi_dst.utils import Logger
11
11
 
12
12
 
13
13
  class ParquetConfig(BaseModel):
14
+ """
15
+ Represents configuration for managing and validating parquet file operations.
16
+
17
+ The `ParquetConfig` class provides attributes and methods necessary to handle operations
18
+ on parquet files in a file system. It includes functionalities for ensuring file paths
19
+ and extensions, validating storage paths and parameters, determining file recency,
20
+ and calculating the size of parquet files. This class is designed with flexibility to handle
21
+ different file systems through the integration with `fsspec` and allows storage path validations
22
+ with optional logging support.
23
+
24
+ :ivar load_parquet: Indicates whether parquet data should be loaded based on the
25
+ current configuration and validation.
26
+ :type load_parquet: bool
27
+ :ivar parquet_filename: The name of the parquet file, optional if folders are used.
28
+ :type parquet_filename: Optional[str]
29
+ :ivar parquet_storage_path: The base path for storing or retrieving parquet files.
30
+ :type parquet_storage_path: Optional[str]
31
+ :ivar parquet_full_path: The full path to a specific parquet file, derived from the
32
+ storage path and filename when applicable.
33
+ :type parquet_full_path: Optional[str]
34
+ :ivar parquet_folder_list: A list of folder paths to parquet data, derived from start
35
+ and end dates if specified.
36
+ :type parquet_folder_list: Optional[List[str]]
37
+ :ivar parquet_size_bytes: The total size of the parquet files, in bytes.
38
+ :type parquet_size_bytes: int
39
+ :ivar parquet_max_age_minutes: Maximum acceptable age of the most recent parquet file, in minutes.
40
+ :type parquet_max_age_minutes: int
41
+ :ivar parquet_is_recent: Indicates whether the parquet file is considered recent based
42
+ on the `parquet_max_age_minutes` condition.
43
+ :type parquet_is_recent: bool
44
+ :ivar parquet_start_date: The start date for parquet file validation or file path generation.
45
+ :type parquet_start_date: Optional[str]
46
+ :ivar parquet_end_date: The end date for parquet file validation or file path generation.
47
+ :type parquet_end_date: Optional[str]
48
+ :ivar fs: The file system object used for storage operations, compliant with `fsspec`.
49
+ :type fs: Optional[fsspec.spec.AbstractFileSystem]
50
+ :ivar logger: A logger for handling logging operations.
51
+ :type logger: Optional[Logger]
52
+ """
14
53
  load_parquet: bool = False
15
54
  parquet_filename: Optional[str] = None
16
55
  parquet_storage_path: Optional[str] = None
@@ -27,6 +66,20 @@ class ParquetConfig(BaseModel):
27
66
 
28
67
  @model_validator(mode='after')
29
68
  def check_parquet_params(self):
69
+ """
70
+ Validates and configures the parameters required for managing parquet files. This includes
71
+ configuring paths through `fsspec`, identifying file storage paths, checking the validity of
72
+ dates related to parquet files, ensuring proper parquet file extensions, and determining
73
+ whether existing parquet files are recent and loadable.
74
+
75
+ :return: The current instance with validated and migrated attributes configured for
76
+ handling parquet files.
77
+
78
+ :raises ValueError: If certain conditions are not met, such as missing or invalid
79
+ `parquet_storage_path`, providing only one of
80
+ `parquet_start_date` or `parquet_end_date`, or if the
81
+ `parquet_end_date` is earlier than the `parquet_start_date`.
82
+ """
30
83
  # Configure paths based on fsspec
31
84
  if self.logger is None:
32
85
  self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
@@ -72,6 +125,23 @@ class ParquetConfig(BaseModel):
72
125
  return self
73
126
 
74
127
  def is_file_recent(self):
128
+ """
129
+ Determines whether the file at the specified parquet path is considered recent
130
+ based on its modification time and the maximum age limit defined.
131
+
132
+ The function first checks for the existence of the file at the specified
133
+ `parquet_full_path`. If the file does not exist, the function will return
134
+ False. If `parquet_max_age_minutes` is set to 0, it implies no maximum age
135
+ limit, and the function will return True. Otherwise, it retrieves the file's
136
+ last modified time and calculates the age of the file by comparing it with the
137
+ current time. The function returns True if the file's age does not exceed the
138
+ maximum age specified by `parquet_max_age_minutes`, otherwise it returns
139
+ False.
140
+
141
+ :return: Whether the file is considered recent based on its existence,
142
+ modification time, and maximum age limit.
143
+ :rtype: bool
144
+ """
75
145
  if not self.fs.exists(self.parquet_full_path):
76
146
  return False
77
147
  if self.parquet_max_age_minutes == 0:
@@ -80,6 +150,24 @@ class ParquetConfig(BaseModel):
80
150
  return (datetime.datetime.now() - file_time) <= datetime.timedelta(minutes=self.parquet_max_age_minutes)
81
151
 
82
152
  def get_parquet_size_bytes(self):
153
+ """
154
+ Calculate the total size, in bytes, of all Parquet files within the defined
155
+ folders specified by `parquet_folder_list`. The function iteratively goes
156
+ through each folder in the provided list, applying a recursive wildcard
157
+ search to include all levels of nested directories, and calculates the
158
+ cumulative size of all found Parquet files using the file system's size
159
+ retrieval method.
160
+
161
+ :raises AttributeError: If `fs` or `parquet_folder_list` attributes are not set
162
+ or improperly configured when the method is called.
163
+ :raises NotImplementedError: If the `fs.size` or `fs.glob` methods are
164
+ unimplemented in the provided file system object or it otherwise lacks
165
+ necessary support for these operations.
166
+
167
+ :return: The cumulative size of all Parquet files located in the folders
168
+ defined by `parquet_folder_list`, measured in bytes.
169
+ :rtype: int
170
+ """
83
171
  total_size = 0
84
172
  for folder in self.parquet_folder_list:
85
173
  # Use a double wildcard ** to match any level of nested directories
@@ -88,7 +176,14 @@ class ParquetConfig(BaseModel):
88
176
  return total_size
89
177
 
90
178
  def load_files(self):
91
-
179
+ """
180
+ Loads parquet files into a Dask DataFrame based on the specified conditions. This
181
+ method checks if parquet file loading is enabled and loads either from a list of
182
+ parquet folder paths or a single specified parquet path.
183
+
184
+ :return: A Dask DataFrame containing loaded parquet file data.
185
+ :rtype: dask.dataframe.DataFrame
186
+ """
92
187
  if self.load_parquet:
93
188
  if self.parquet_folder_list:
94
189
  return dd.read_parquet(self.parquet_folder_list, engine="pyarrow", filesystem=self.fs)
@@ -97,5 +192,14 @@ class ParquetConfig(BaseModel):
97
192
 
98
193
  @staticmethod
99
194
  def ensure_file_extension(filepath: str, extension: str) -> str:
195
+ """
196
+ Ensures that the specified file has the desired extension. If the file already has the
197
+ specified extension, it returns the filepath unchanged. Otherwise, it updates the file
198
+ extension to the given one and returns the modified filepath.
199
+
200
+ :param filepath: The path to the file as a string.
201
+ :param extension: The desired file extension, without the leading dot.
202
+ :return: The updated file path as a string, ensuring it has the specified extension.
203
+ """
100
204
  path = Path(filepath)
101
205
  return str(path.with_suffix(f".{extension}")) if path.suffix != f".{extension}" else filepath
@@ -9,6 +9,23 @@ from ._sql_model_builder import SqlAlchemyModelBuilder
9
9
 
10
10
 
11
11
  class SqlAlchemyConnectionConfig(BaseModel):
12
+ """
13
+ Configuration class for managing an SQLAlchemy database connection.
14
+
15
+ This class provides configurations to establish a connection to a database,
16
+ validate the connection, and dynamically build a SQLAlchemy model for a specific
17
+ table if required. It initializes the database engine using the provided connection URL
18
+ and ensures that the connection and table information are properly validated.
19
+
20
+ :ivar connection_url: The URL used to connect to the database.
21
+ :type connection_url: str
22
+ :ivar table: The name of the database table for which a model will be constructed.
23
+ :type table: Optional[str]
24
+ :ivar model: The dynamically built SQLAlchemy model for the specified table.
25
+ :type model: Any
26
+ :ivar engine: The SQLAlchemy engine instance reused for database connections.
27
+ :type engine: Optional[Any]
28
+ """
12
29
  connection_url: str
13
30
  table: Optional[str] = None
14
31
  model: Any = None
@@ -8,6 +8,41 @@ from ._db_connection import SqlAlchemyConnectionConfig
8
8
 
9
9
 
10
10
  class SqlAlchemyLoadFromDb:
11
+ """
12
+ The SqlAlchemyLoadFromDb class provides functionality to load data from a
13
+ database using SQLAlchemy into a Dask DataFrame. It is capable of handling
14
+ large datasets efficiently by utilizing the Dask framework for parallel
15
+ computations.
16
+
17
+ This class is initialized with a database connection configuration, query
18
+ configuration, optional parameters, and a logger. It can execute a query
19
+ using the specified configurations and read the results into a Dask
20
+ DataFrame. This is useful for processing and analyzing large-scale data.
21
+
22
+ :ivar df: Dask DataFrame to store the loaded data.
23
+ :type df: dd.DataFrame
24
+ :ivar db_connection: Database connection configuration object, containing details
25
+ such as the table, model, and engine to be used for the query.
26
+ :type db_connection: SqlAlchemyConnectionConfig
27
+ :ivar table_name: Name of the database table being queried.
28
+ :type table_name: str
29
+ :ivar model: SQLAlchemy model associated with the database connection.
30
+ :type model: sqlalchemy.ext.declarative.api.DeclarativeMeta
31
+ :ivar engine: SQLAlchemy engine used for executing queries.
32
+ :type engine: sqlalchemy.engine.base.Engine
33
+ :ivar logger: Logger instance for logging debug and error information.
34
+ :type logger: Logger
35
+ :ivar query_config: Query configuration, including query-related details such
36
+ as the SQL query or query settings.
37
+ :type query_config: QueryConfig
38
+ :ivar params_config: Parameters configuration, including filter parameters for
39
+ the query.
40
+ :type params_config: ParamsConfig
41
+ :ivar debug: Debug flag indicating whether debug mode is enabled.
42
+ :type debug: bool
43
+ :ivar chunk_size: Size of data chunks to process at a time.
44
+ :type chunk_size: int
45
+ """
11
46
  df: dd.DataFrame = None
12
47
 
13
48
  def __init__(
@@ -19,7 +54,28 @@ class SqlAlchemyLoadFromDb:
19
54
  **kwargs,
20
55
  ):
21
56
  """
22
- Initialize the loader with database connection, query, and parameters.
57
+ Initializes an instance of the class, setting up a database connection,
58
+ query configuration, parameter configuration, and other optional settings
59
+ like debugging and logging. The class aims to manage the integration and
60
+ interaction with SQLAlchemy-based database operations.
61
+
62
+ :param plugin_sqlalchemy:
63
+ The SQLAlchemy connection configuration object, which provides
64
+ the connection details like engine, table name, and model
65
+ associated with the database operations.
66
+ :param plugin_query:
67
+ The query configuration object, used to define specific query
68
+ options or rules. Defaults to None.
69
+ :param plugin_params:
70
+ The parameters configuration object, used for any additional
71
+ parameterized settings or configurations. Defaults to None.
72
+ :param logger:
73
+ Optional logger instance for logging purposes. If not provided,
74
+ a default logger is instantiated using the standard logging system.
75
+ :param kwargs:
76
+ Optional additional keyword arguments for customization. Can
77
+ include optional settings like `debug` mode or `chunk_size`
78
+ for batch operations.
23
79
  """
24
80
  self.db_connection = plugin_sqlalchemy
25
81
  self.table_name = self.db_connection.table
@@ -33,13 +89,35 @@ class SqlAlchemyLoadFromDb:
33
89
 
34
90
  def build_and_load(self) -> dd.DataFrame:
35
91
  """
36
- Load data into a Dask DataFrame based on the query and parameters.
92
+ Builds and returns the resulting dataframe after calling the internal
93
+ build and load function. This method triggers the `_build_and_load`
94
+ function to process and prepare the data before returning it as
95
+ a dask dataframe.
96
+
97
+ :raises RuntimeError: If any error occurs during the build or load process.
98
+
99
+ :return: The processed data in a dask dataframe.
100
+ :rtype: dd.DataFrame
37
101
  """
38
102
  self._build_and_load()
39
103
  return self.df
40
104
 
41
105
  def _build_and_load(self) -> dd.DataFrame:
106
+ """
107
+ Builds and loads a Dask DataFrame from a SQLAlchemy-compatible source.
108
+
109
+ This method initializes a SQLAlchemyDask object with the provided model,
110
+ filters, engine URL, logger, chunk size, and debug configuration.
111
+ It attempts to load the data using the ``read_frame`` method of
112
+ SQLAlchemyDask. If the data cannot be loaded or the query returns
113
+ no rows, it creates and returns an empty Dask DataFrame.
42
114
 
115
+ :raises Exception: On failure to load data or to create a DataFrame.
116
+
117
+ :return: A Dask DataFrame object containing the queried data or an
118
+ empty DataFrame if the query returns no results or fails.
119
+ :rtype: dask.dataframe.DataFrame
120
+ """
43
121
  try:
44
122
  self.df = SQLAlchemyDask(
45
123
  model=self.model,