sibi-dst 0.3.32__py3-none-any.whl → 0.3.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,41 @@ from ._db_connection import SqlAlchemyConnectionConfig
8
8
 
9
9
 
10
10
  class SqlAlchemyLoadFromDb:
11
+ """
12
+ The SqlAlchemyLoadFromDb class provides functionality to load data from a
13
+ database using SQLAlchemy into a Dask DataFrame. It is capable of handling
14
+ large datasets efficiently by utilizing the Dask framework for parallel
15
+ computations.
16
+
17
+ This class is initialized with a database connection configuration, query
18
+ configuration, optional parameters, and a logger. It can execute a query
19
+ using the specified configurations and read the results into a Dask
20
+ DataFrame. This is useful for processing and analyzing large-scale data.
21
+
22
+ :ivar df: Dask DataFrame to store the loaded data.
23
+ :type df: dd.DataFrame
24
+ :ivar db_connection: Database connection configuration object, containing details
25
+ such as the table, model, and engine to be used for the query.
26
+ :type db_connection: SqlAlchemyConnectionConfig
27
+ :ivar table_name: Name of the database table being queried.
28
+ :type table_name: str
29
+ :ivar model: SQLAlchemy model associated with the database connection.
30
+ :type model: sqlalchemy.ext.declarative.api.DeclarativeMeta
31
+ :ivar engine: SQLAlchemy engine used for executing queries.
32
+ :type engine: sqlalchemy.engine.base.Engine
33
+ :ivar logger: Logger instance for logging debug and error information.
34
+ :type logger: Logger
35
+ :ivar query_config: Query configuration, including query-related details such
36
+ as the SQL query or query settings.
37
+ :type query_config: QueryConfig
38
+ :ivar params_config: Parameters configuration, including filter parameters for
39
+ the query.
40
+ :type params_config: ParamsConfig
41
+ :ivar debug: Debug flag indicating whether debug mode is enabled.
42
+ :type debug: bool
43
+ :ivar chunk_size: Size of data chunks to process at a time.
44
+ :type chunk_size: int
45
+ """
11
46
  df: dd.DataFrame = None
12
47
 
13
48
  def __init__(
@@ -19,7 +54,28 @@ class SqlAlchemyLoadFromDb:
19
54
  **kwargs,
20
55
  ):
21
56
  """
22
- Initialize the loader with database connection, query, and parameters.
57
+ Initializes an instance of the class, setting up a database connection,
58
+ query configuration, parameter configuration, and other optional settings
59
+ like debugging and logging. The class aims to manage the integration and
60
+ interaction with SQLAlchemy-based database operations.
61
+
62
+ :param plugin_sqlalchemy:
63
+ The SQLAlchemy connection configuration object, which provides
64
+ the connection details like engine, table name, and model
65
+ associated with the database operations.
66
+ :param plugin_query:
67
+ The query configuration object, used to define specific query
68
+ options or rules. Defaults to None.
69
+ :param plugin_params:
70
+ The parameters configuration object, used for any additional
71
+ parameterized settings or configurations. Defaults to None.
72
+ :param logger:
73
+ Optional logger instance for logging purposes. If not provided,
74
+ a default logger is instantiated using the standard logging system.
75
+ :param kwargs:
76
+ Optional additional keyword arguments for customization. Can
77
+ include optional settings like `debug` mode or `chunk_size`
78
+ for batch operations.
23
79
  """
24
80
  self.db_connection = plugin_sqlalchemy
25
81
  self.table_name = self.db_connection.table
@@ -33,13 +89,35 @@ class SqlAlchemyLoadFromDb:
33
89
 
34
90
  def build_and_load(self) -> dd.DataFrame:
35
91
  """
36
- Load data into a Dask DataFrame based on the query and parameters.
92
+ Builds and returns the resulting dataframe after calling the internal
93
+ build and load function. This method triggers the `_build_and_load`
94
+ function to process and prepare the data before returning it as
95
+ a dask dataframe.
96
+
97
+ :raises RuntimeError: If any error occurs during the build or load process.
98
+
99
+ :return: The processed data in a dask dataframe.
100
+ :rtype: dd.DataFrame
37
101
  """
38
102
  self._build_and_load()
39
103
  return self.df
40
104
 
41
105
  def _build_and_load(self) -> dd.DataFrame:
106
+ """
107
+ Builds and loads a Dask DataFrame from a SQLAlchemy-compatible source.
108
+
109
+ This method initializes a SQLAlchemyDask object with the provided model,
110
+ filters, engine URL, logger, chunk size, and debug configuration.
111
+ It attempts to load the data using the ``read_frame`` method of
112
+ SQLAlchemyDask. If the data cannot be loaded or the query returns
113
+ no rows, it creates and returns an empty Dask DataFrame.
42
114
 
115
+ :raises Exception: On failure to load data or to create a DataFrame.
116
+
117
+ :return: A Dask DataFrame object containing the queried data or an
118
+ empty DataFrame if the query returns no results or fails.
119
+ :rtype: dask.dataframe.DataFrame
120
+ """
43
121
  try:
44
122
  self.df = SQLAlchemyDask(
45
123
  model=self.model,
@@ -10,6 +10,28 @@ apps_label = "datacubes"
10
10
 
11
11
 
12
12
  class SqlAlchemyModelBuilder:
13
+ """
14
+ Provides functionality for building SQLAlchemy ORM models dynamically from
15
+ reflected database tables. This class is intended for use with a SQLAlchemy
16
+ engine and metadata to automatically generate ORM models for specified
17
+ database tables.
18
+
19
+ The primary purpose of this class is to simplify the process of creating
20
+ SQLAlchemy ORM models by reflecting tables from a connected database,
21
+ dynamically generating model classes, and handling relationships between
22
+ tables.
23
+
24
+ :ivar engine: SQLAlchemy engine connected to the database.
25
+ :type engine: Engine
26
+ :ivar table_name: Name of the table for which the model is generated.
27
+ :type table_name: str
28
+ :ivar metadata: SQLAlchemy MetaData instance for reflecting tables.
29
+ :type metadata: MetaData
30
+ :ivar table: Reflected SQLAlchemy Table object for the specified table name.
31
+ :type table: Optional[Table]
32
+ :ivar class_name: Dynamically normalized class name derived from table_name.
33
+ :type class_name: str
34
+ """
13
35
  _model_cache = {} # Local cache for model classes
14
36
 
15
37
  def __init__(self, engine, table_name):
@@ -27,6 +49,16 @@ class SqlAlchemyModelBuilder:
27
49
  self.class_name = self.normalize_class_name(self.table_name)
28
50
 
29
51
  def build_model(self) -> type:
52
+ """
53
+ Builds and returns a database model class corresponding to the specified table name.
54
+ The method checks if the model is already registered in the ORM's registry. If not,
55
+ it reflects the database schema of the specified table and dynamically creates the
56
+ model class.
57
+
58
+ :raises ValueError: If the specified table does not exist in the database.
59
+ :return: A database model class corresponding to the specified table name.
60
+ :rtype: type
61
+ """
30
62
  # Check if the model is already registered
31
63
  model = Base.registry._class_registry.get(self.class_name)
32
64
  if model:
@@ -42,10 +74,17 @@ class SqlAlchemyModelBuilder:
42
74
 
43
75
  def create_model(self) -> type:
44
76
  """
45
- Create a SQLAlchemy ORM model for the reflected table.
77
+ Generates a SQLAlchemy model class dynamically based on the specified table and
78
+ its columns. The method extracts column information, defines the necessary
79
+ attributes, and creates the model class if it doesn't already exist in the
80
+ SQLAlchemy base registry.
46
81
 
47
- Returns:
48
- type: Dynamically generated SQLAlchemy ORM model class.
82
+ :raises KeyError: If the table or table name does not exist in the provided
83
+ schema.
84
+ :raises Exception: If the model creation fails for any reason.
85
+
86
+ :return: The dynamically created or fetched model class.
87
+ :rtype: type
49
88
  """
50
89
  # Normalize the class name from the table name
51
90
  columns = self.get_columns(self.table)
@@ -70,13 +109,17 @@ class SqlAlchemyModelBuilder:
70
109
 
71
110
  def get_columns(self, table: Table):
72
111
  """
73
- Extract columns from the table and create corresponding SQLAlchemy fields.
74
-
75
- Args:
76
- table (Table): SQLAlchemy Table object.
77
-
78
- Returns:
79
- dict: Dictionary of column attributes.
112
+ Extracts and returns a dictionary of column names and their corresponding column
113
+ objects from a given table, excluding reserved names. Reserved names are used
114
+ internally and should not overlap with column names in the provided table. The
115
+ method ensures sanitized column names through normalization and filters out any
116
+ column matching reserved keywords.
117
+
118
+ :param table: The table object from which columns are to be extracted.
119
+ :type table: Table
120
+ :return: A dictionary containing the sanitized column names as keys and their
121
+ corresponding column objects as values, excluding reserved names.
122
+ :rtype: dict
80
123
  """
81
124
  columns = {}
82
125
  reserved_names = ["metadata", "class_", "table"]
@@ -89,11 +132,18 @@ class SqlAlchemyModelBuilder:
89
132
 
90
133
  def add_relationships(self, attrs, table: Table):
91
134
  """
92
- Add relationships to the model for foreign keys.
93
-
94
- Args:
95
- attrs (dict): Attributes of the dynamically created model.
96
- table (Table): SQLAlchemy Table object.
135
+ Adds relationships to the provided attributes dictionary for a given database table.
136
+
137
+ This method iterates through the foreign keys of the provided table, constructs
138
+ relationship attributes, and updates the attributes dictionary with relationships
139
+ that connect the current table to related tables.
140
+
141
+ :param attrs: Dictionary of attributes to which relationships will be added.
142
+ The dictionary will be updated with new relationship mappings.
143
+ :type attrs: dict
144
+ :param table: A database table object containing foreign key relationships.
145
+ The method will use this table to establish relationships.
146
+ :return: None
97
147
  """
98
148
  for fk in table.foreign_keys:
99
149
  related_table_name = fk.column.table.name
@@ -104,26 +154,37 @@ class SqlAlchemyModelBuilder:
104
154
  @staticmethod
105
155
  def normalize_class_name(table_name: str) -> str:
106
156
  """
107
- Normalize a table name into a valid Python class name.
108
-
109
- Args:
110
- table_name (str): Name of the table.
111
-
112
- Returns:
113
- str: Normalized class name.
157
+ Generate a normalized class name from a given table name by capitalizing
158
+ each word separated by underscores and concatenating them.
159
+
160
+ This static method takes a string representation of a table name, where
161
+ words are separated by underscores, and converts it into a camel case
162
+ class name. It processes the string by capitalizing the first letter of
163
+ each word and removing the underscores. The normalized class name
164
+ returned can be used programmatically for various purposes, such as
165
+ class generation or naming conventions.
166
+
167
+ :param table_name: The table name to normalize, with words separated by
168
+ underscores. E.g., 'sample_table' becomes 'SampleTable'.
169
+ :type table_name: str
170
+ :return: A normalized class name in camel case format.
171
+ :rtype: str
114
172
  """
115
173
  return "".join(word.capitalize() for word in table_name.split("_"))
116
174
 
117
175
  @staticmethod
118
176
  def normalize_column_name(column_name: str) -> str:
119
177
  """
120
- Normalize a column name into a valid Python identifier.
121
-
122
- Args:
123
- column_name (str): Name of the column.
124
-
125
- Returns:
126
- str: Normalized column name.
178
+ Normalize a column name by replacing any non-word characters or leading numbers
179
+ with underscores, while ensuring it does not conflict with reserved keywords
180
+ such as 'class', 'def', 'return', etc. If the normalized name conflicts with
181
+ a Python reserved keyword, "_field" is appended to it.
182
+
183
+ :param column_name: The original name of the column to be normalized.
184
+ :type column_name: str
185
+ :return: A normalized column name that is safe and compatible for usage
186
+ in various contexts such as database columns or Python code.
187
+ :rtype: str
127
188
  """
128
189
  column_name = re.sub(r"\W|^(?=\d)", "_", column_name)
129
190
  if column_name in {"class", "def", "return", "yield", "global"}:
@@ -27,6 +27,36 @@ LOOKUP_SEP = "__"
27
27
 
28
28
 
29
29
  class ParamsConfig(BaseModel):
30
+ """
31
+ Defines a configuration model for parameters with functionality for parsing,
32
+ validation, and conversion of legacy filters.
33
+
34
+ This class extends BaseModel from Pydantic and is designed to handle multiple
35
+ sets of configurations, including field mappings, filters, dataframe parameters,
36
+ and dataframe options. It allows for flexible parsing of parameters across a
37
+ variety of supported structures and ensures that legacy filters can be
38
+ appropriately converted for compatibility.
39
+
40
+ :ivar field_map: Maps field names to their equivalent legacy field names.
41
+ :type field_map: Optional[Dict]
42
+ :ivar legacy_filters: Indicates whether legacy filters should be processed.
43
+ :type legacy_filters: bool
44
+ :ivar sticky_filters: Stores additional filters as key-value pairs that persist
45
+ across parameter parsing.
46
+ :type sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]]
47
+ :ivar filters: Holds all the current filters including sticky and dynamically
48
+ parsed filters.
49
+ :type filters: Dict[str, Union[str, Dict, bool, int, float, list, tuple]]
50
+ :ivar df_params: Contains parameters related to dataframe configurations in a
51
+ structured format.
52
+ :type df_params: Dict[str, Union[tuple, str, bool, None]]
53
+ :ivar df_options: Stores optional configurations for a dataframe, allowing for
54
+ additional behavior customization.
55
+ :type df_options: Dict[str, Union[bool, str, None]]
56
+ :ivar params: Dictionary of parameters provided for configuration, supporting
57
+ both basic and nested structures.
58
+ :type params: Dict[str, Union[str, bool, int, float, List[Union[str, int, bool, float]]]]
59
+ """
30
60
  field_map: Optional[Dict] = Field(default_factory=dict)
31
61
  legacy_filters: bool = False
32
62
  sticky_filters: Dict[str, Union[str, bool, int, float, list, tuple]] = Field(default_factory=dict)
@@ -42,6 +72,17 @@ class ParamsConfig(BaseModel):
42
72
  return self
43
73
 
44
74
  def parse_params(self, params):
75
+ """
76
+ Parses and separates the given parameters into specific categories such as dataframe parameters,
77
+ dataframe options, and filters. Updates existing class attributes with the parsed values,
78
+ retaining any sticky filters. Also handles the legacy filters if provided.
79
+
80
+ :param params: Dictionary containing parameters to process. These parameters can include specific
81
+ keys relevant for dataframe configuration (e.g., dataframe parameters, dataframe options)
82
+ as well as arbitrary filter settings.
83
+ :type params: dict
84
+ :return: None
85
+ """
45
86
  self.legacy_filters = params.pop('legacy_filters', self.legacy_filters)
46
87
  self.field_map = params.pop('field_map', self.field_map)
47
88
  self.sticky_filters = params.pop('params', self.sticky_filters)
@@ -60,6 +101,24 @@ class ParamsConfig(BaseModel):
60
101
  self.convert_legacy_filters()
61
102
 
62
103
  def convert_legacy_filters(self):
104
+ """
105
+ Converts legacy filter fields in the `self.filters` dictionary to their
106
+ modern equivalents using the mappings provided in `self.field_map`.
107
+ This method ensures backward compatibility for filters by automatically
108
+ translating the old field names into the current system.
109
+
110
+ The function first verifies that the required dictionaries (`legacy_filters`,
111
+ `field_map`, `filters`) are valid. It creates a reverse map of `field_map` for
112
+ efficient lookup, processes the key names within `self.filters`, and updates
113
+ them to reflect the legacy mapping.
114
+
115
+ :raises KeyError: If any required dictionary key is missing during processing.
116
+
117
+ :param self.legacy_filters: A boolean flag indicating whether legacy filters
118
+ are being used.
119
+ :type self.legacy_filters: bool
120
+
121
+ """
63
122
  if not self.legacy_filters or not self.field_map or not self.filters:
64
123
  return
65
124
  # create a reverse map of the field_map
@@ -8,6 +8,20 @@ app_geo_locator_test_place = os.environ.get('GEO_LOCATOR_TEST_PLACE', "San Jose,
8
8
 
9
9
 
10
10
  class GeolocationService:
11
+ """
12
+ Provides geolocation services, such as forward and reverse geocoding.
13
+
14
+ This class is intended to interface with a geocoding service (e.g., Nominatim)
15
+ for performing geocoding operations. It initializes the geolocation service
16
+ based on a provided or default configuration and provides methods for geocoding
17
+ addresses or retrieving addresses from coordinates.
18
+
19
+ :ivar geolocator: Instance of the geocoding service used for geolocation tasks.
20
+ Will be `None` if initialization fails or is incomplete.
21
+ :type geolocator: Optional[Nominatim]
22
+ :ivar debug: Indicates whether debug messages are enabled.
23
+ :type debug: bool
24
+ """
11
25
  debug: bool = False
12
26
 
13
27
  def __init__(self, debug=False):
@@ -5,6 +5,16 @@ geolocator = None
5
5
 
6
6
 
7
7
  def get_geolocator():
8
+ """
9
+ Provides a function to instantiate or retrieve a global geolocator instance
10
+ using the GeolocationService class. If the geolocator has already been
11
+ created, it will return the original global instance. Otherwise, it initializes
12
+ a new instance of the GeolocationService with debugging enabled and stores it
13
+ globally.
14
+
15
+ :return: The global instance of the GeolocationService
16
+ :rtype: GeolocationService
17
+ """
8
18
  global geolocator
9
19
  if geolocator is None:
10
20
  geolocator = GeolocationService(debug=True)
@@ -15,6 +25,23 @@ def get_geolocator():
15
25
 
16
26
 
17
27
  def get_address_by_coordinates(latitude, longitude, exactly_one=True):
28
+ """
29
+ Retrieves the address based on the provided geographic coordinates (latitude and
30
+ longitude). Utilizes the geopy library's geolocator to find and reverse-geocode
31
+ the location associated with the given coordinates. Returns a human-readable
32
+ address if available or an error message for specific conditions.
33
+
34
+ :param latitude: The latitude of the location to find the address for.
35
+ :type latitude: float
36
+ :param longitude: The longitude of the location to find the address for.
37
+ :type longitude: float
38
+ :param exactly_one: If true, ensures exactly one result is returned. If false,
39
+ returns a list of possible matches. Defaults to True.
40
+ :type exactly_one: bool, optional
41
+ :return: A string containing the human-readable address of the location or an
42
+ error message in case of failure.
43
+ :rtype: str
44
+ """
18
45
  geolocator = get_geolocator()
19
46
  try:
20
47
  location = geolocator.reverse((latitude, longitude), exactly_one=exactly_one)
@@ -28,10 +55,17 @@ def get_address_by_coordinates(latitude, longitude, exactly_one=True):
28
55
 
29
56
  def get_coordinates_for_address(address):
30
57
  """
31
- Geocode an address using a custom Nominatim server.
58
+ Gets geographical coordinates (latitude and longitude) along with the full formatted
59
+ address for a given address string. Makes use of a geolocation service to retrieve
60
+ the data and handles possible exceptions during the process.
61
+
62
+ :param address: The address as a string for which coordinates need to be determined.
63
+ :type address: str
32
64
 
33
- :param address: The address to geocode.
34
- :return: A dictionary with the location's latitude, longitude, and full address, or a message if an error occurs.
65
+ :return: A dictionary containing the full formatted address, latitude, and longitude
66
+ if the location is found. Otherwise, returns a string describing an error
67
+ or that no location was found.
68
+ :rtype: dict or str
35
69
  """
36
70
  geolocator = get_geolocator()
37
71
  try: