sibi-dst 0.3.63__py3-none-any.whl → 2025.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. sibi_dst/df_helper/_df_helper.py +186 -591
  2. sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -2
  3. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +161 -115
  4. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +291 -97
  5. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +34 -105
  6. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +175 -162
  7. sibi_dst/df_helper/core/__init__.py +0 -4
  8. sibi_dst/df_helper/core/_defaults.py +1 -50
  9. sibi_dst/df_helper/core/_query_config.py +2 -2
  10. sibi_dst/utils/__init__.py +0 -2
  11. sibi_dst/utils/data_wrapper.py +9 -12
  12. sibi_dst/utils/log_utils.py +15 -11
  13. sibi_dst/utils/update_planner.py +2 -0
  14. sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +325 -50
  15. sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +2 -2
  16. sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +330 -51
  17. sibi_dst/v3/__init__.py +0 -0
  18. sibi_dst/v3/backends/__init__.py +0 -0
  19. sibi_dst/v3/df_helper/__init__.py +0 -0
  20. sibi_dst/v3/df_helper/_df_helper.py +91 -0
  21. sibi_dst-2025.1.1.dist-info/METADATA +55 -0
  22. {sibi_dst-0.3.63.dist-info → sibi_dst-2025.1.1.dist-info}/RECORD +23 -26
  23. sibi_dst/df_helper/backends/django/__init__.py +0 -11
  24. sibi_dst/df_helper/backends/django/_db_connection.py +0 -88
  25. sibi_dst/df_helper/backends/django/_io_dask.py +0 -450
  26. sibi_dst/df_helper/backends/django/_load_from_db.py +0 -227
  27. sibi_dst/df_helper/backends/django/_sql_model_builder.py +0 -493
  28. sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -119
  29. sibi_dst/utils/airflow_manager.py +0 -212
  30. sibi_dst-0.3.63.dist-info/METADATA +0 -90
  31. {sibi_dst-0.3.63.dist-info → sibi_dst-2025.1.1.dist-info}/WHEEL +0 -0
@@ -1,493 +0,0 @@
1
-
2
- import keyword
3
- import re
4
- from functools import lru_cache
5
-
6
- from django.apps import apps
7
- from django.db import connections
8
- from django.db import models
9
- from django.db.models.constants import LOOKUP_SEP
10
-
11
- FIELD_MAP = {
12
- "AutoField": models.AutoField,
13
- "BigAutoField": models.BigAutoField,
14
- "BigIntegerField": models.BigIntegerField,
15
- "BinaryField": models.BinaryField,
16
- "BooleanField": models.BooleanField,
17
- "CharField": models.CharField,
18
- "DateField": models.DateField,
19
- "DateTimeField": models.DateTimeField,
20
- "DecimalField": models.DecimalField,
21
- "DurationField": models.DurationField,
22
- "EmailField": models.EmailField,
23
- "FileField": models.FileField,
24
- "FilePathField": models.FilePathField,
25
- "FloatField": models.FloatField,
26
- "ImageField": models.ImageField,
27
- "IntegerField": models.IntegerField,
28
- "GenericIPAddressField": models.GenericIPAddressField,
29
- "NullBooleanField": models.NullBooleanField,
30
- "PositiveIntegerField": models.PositiveIntegerField,
31
- "PositiveSmallIntegerField": models.PositiveSmallIntegerField,
32
- "SlugField": models.SlugField,
33
- "SmallIntegerField": models.SmallIntegerField,
34
- "TextField": models.TextField,
35
- "TimeField": models.TimeField,
36
- "URLField": models.URLField,
37
- "UUIDField": models.UUIDField,
38
- # For related fields, they may need to be handled depending on use cases
39
- "ForeignKey": models.ForeignKey,
40
- "OneToOneField": models.OneToOneField,
41
- "ManyToManyField": models.ManyToManyField,
42
- }
43
- # the following is the name of the app that will be used to associate the created on-the-fly model.
44
- # It must be registered in INSTALLED_APPS in settings.py to prevent django from throwing an error
45
- # when a model is reloaded.
46
-
47
- apps_label = "datacubes"
48
-
49
-
50
- class DjangoSqlModelBuilder:
51
- """
52
- Handles the dynamic creation of Django ORM models based on database table structures.
53
-
54
- This class takes input parameters such as database connection and table name,
55
- and dynamically maps the table's schema to a Django ORM model. The resultant model
56
- can be used for various ORM operations like querying, saving, and deleting records.
57
- The class utilizes Django's introspection features and allows customization
58
- through its fields and methods.
59
-
60
- :ivar connection_name: The name of the database connection being used.
61
- :type connection_name: str
62
- :ivar table: The name of the database table for which the model is being built.
63
- :type table: str
64
- :ivar model: The dynamically generated Django model or None if not created yet.
65
- :type model: type | None
66
- """
67
- def __init__(self, **kwargs):
68
- """
69
- Represents an initialization method for a class that handles the
70
- assignment of attributes and processes the given keyword arguments
71
- through an internal utility function. This method sets up the
72
- necessary attributes for later use.
73
-
74
- :param kwargs: A collection of keyword arguments used by the internal
75
- parsing method to populate the attributes of the class. Specific
76
- expected keys and their usage should be detailed in the internal
77
- implementation.
78
- """
79
- self.connection_name = None
80
- self.table = None
81
- self.model = None
82
- self.__parse_builder(**kwargs)
83
-
84
- def __parse_builder(self, **kwargs):
85
- """
86
- Parses and initializes the builder properties based on provided keyword
87
- arguments. Validates that the required 'connection_name' and 'table'
88
- values are present and sets the corresponding attributes. If validation
89
- fails, raises appropriate errors. Returns the updated builder object
90
- after initialization. This method is primarily intended for internal
91
- use to configure the builder.
92
-
93
- :param kwargs: Keyword arguments containing configuration values for
94
- initializing the builder. Should include 'connection_name'
95
- and 'table' keys.
96
- :type kwargs: dict
97
- :return: Returns the instance of the builder object after initialization.
98
- :rtype: self
99
- :raises ValueError: If 'connection_name' or 'table' is not provided in
100
- the keyword arguments.
101
- """
102
- self.connection_name = kwargs.get("connection_name", None)
103
- self.table = kwargs.get("table", None)
104
- self.model = None
105
- if not self.connection_name:
106
- raise ValueError("Connection name is required")
107
- if not self.table:
108
- raise ValueError("Table name is required")
109
- return self
110
-
111
- @lru_cache(maxsize=None)
112
- def build_model(self):
113
- """
114
- Builds and retrieves a model instance with dynamically defined fields.
115
-
116
- This method attempts to retrieve a model instance by its name and, if it
117
- does not exist, creates a new model with the specified table structure.
118
- The model is either fetched or constructed using the provided data about
119
- its fields. The result is cached for repeated calls to improve performance
120
- and avoid redundant computations.
121
-
122
- :raises LookupError: If the model cannot be fetched or created due to an
123
- invalid lookup.
124
-
125
- :return: A model instance dynamically constructed or retrieved for the
126
- specified table and fields.
127
- :rtype: Model
128
- """
129
- model = None
130
- model_fields = self.get_model_fields()
131
- model_name = self.table2model(self.table)
132
- if model_fields:
133
- try:
134
- model = apps.get_model(apps_label, model_name)
135
- except LookupError:
136
- model = self.create_model(model_name, model_fields)
137
- return model
138
-
139
- def create_model(self, name, fields) -> type:
140
- """
141
- Creates a Django model class dynamically.
142
-
143
- This function takes in a model name and a dictionary of fields, dynamically
144
- creates a Meta class where additional metadata for the model (like
145
- `db_table`, `managed`, `app_label`) is defined, and then uses Python's
146
- standard library `type()` function to generate and return the model class
147
- on the fly.
148
-
149
- :param name: The name of the model class to create.
150
- :type name: str
151
- :param fields: A dictionary mapping field names to their definitions in
152
- Django's model field format. Each field definition should include
153
- the field type and optional parameters.
154
- :type fields: dict
155
- :return: The dynamically created Django model class based on the provided
156
- name and fields.
157
- :rtype: type
158
- """
159
- def parse_args(arg_string):
160
- arg_dict = {}
161
- # Match keyword arguments in the form key=value
162
- for match in re.finditer(r"(\w+)=(\w+)", arg_string):
163
- key, value = match.groups()
164
- # Try to convert value to an integer, if possible
165
- try:
166
- value = int(value)
167
- except ValueError:
168
- # If it's not an integer, leave it as a string
169
- pass
170
- arg_dict[key] = value
171
- return arg_dict
172
-
173
- class Meta:
174
- pass
175
-
176
- setattr(Meta, "db_table", self.table)
177
- setattr(Meta, "managed", False)
178
- setattr(Meta, "app_label", apps_label)
179
-
180
- model = None
181
- attrs = {
182
- "Meta": Meta,
183
- "__module__": f"{apps_label}.models",
184
- "objects": models.Manager(),
185
- }
186
- if fields:
187
- for field_name, field_type in fields.items():
188
- field_type, args = field_type.replace("models.", "").split("(", 1)
189
- args = args.rstrip(")")
190
- field_params = parse_args(args)
191
- field_class = FIELD_MAP[field_type]
192
- attrs[field_name] = field_class(**field_params)
193
- model = type(name, (models.Model,), attrs)
194
-
195
- return model
196
-
197
- @staticmethod
198
- def table2model(table_name):
199
- """
200
- Converts a database table name to a corresponding model name by transforming
201
- it from snake_case to CamelCase. This method takes a string representing
202
- a table name, splits it by underscores, capitalizes the first letter of
203
- each part, and then joins them into a single string.
204
-
205
- :param table_name: The name of the database table in snake_case format
206
- :type table_name: str
207
- :return: A string representing the equivalent model name in CamelCase format
208
- :rtype: str
209
- """
210
- return "".join([x.title() for x in table_name.split("_")])
211
-
212
- def get_model_fields(self):
213
- """
214
- Generates the data structure for model fields from a database table using
215
- introspection. The method extracts information about columns, primary keys,
216
- unique constraints, and additional metadata to define the fields of the model.
217
-
218
- :raises ValueError: If the specified connection or table is not found.
219
- :raises Exception: For any database or introspection-related errors.
220
-
221
- :returns: Dictionary containing the model field definitions based on the
222
- table's structure and metadata.
223
- :rtype: dict
224
- """
225
- connection = connections[self.connection_name]
226
- if connection is None:
227
- raise ValueError("Connection %s not found" % self.connection_name)
228
- current_model = None
229
- try:
230
- with connection.cursor() as cursor:
231
- if hasattr(connection, "introspection"):
232
- table_info = connection.introspection.get_table_list(cursor)
233
- table_info = {
234
- info.name: info
235
- for info in table_info
236
- if info.name == self.table
237
- }
238
- if len(table_info) == 0:
239
- raise ValueError("Table %s not found" % self.table)
240
- try:
241
- relations = connection.introspection.get_relations(
242
- cursor, self.table
243
- )
244
- except NotImplementedError:
245
- relations = {}
246
- try:
247
- constraints = connection.introspection.get_constraints(
248
- cursor, self.table
249
- )
250
- except NotImplementedError:
251
- constraints = {}
252
- if hasattr(connection.introspection, "get_primary_columns"):
253
- primary_key_columns = (
254
- connection.introspection.get_primary_columns(
255
- cursor, self.table
256
- )
257
- )
258
- primary_key_column = (
259
- primary_key_columns[0] if primary_key_columns else None
260
- )
261
- else:
262
- primary_key_columns = []
263
- primary_key_column = (
264
- connection.introspection.get_primary_key_column(
265
- cursor, self.table
266
- )
267
- )
268
-
269
- unique_columns = [
270
- c["columns"][0]
271
- for c in constraints.values()
272
- if c["unique"] and len(c["columns"]) == 1
273
- ]
274
- table_description = connection.introspection.get_table_description(
275
- cursor, self.table
276
- )
277
-
278
- used_column_names = [] # Holds column names used in the table so far
279
- column_to_field_name = {} # Maps column names to names of model fields
280
- current_model = {}
281
- for row in table_description:
282
- comment_notes = (
283
- []
284
- ) # Holds Field notes, to be displayed in a Python comment.
285
- extra_params = {} # Holds Field parameters such as 'db_column'.
286
- column_name = row.name
287
- # we do not want to use model relations
288
- # is_relation = column_name in relations
289
- is_relation = False
290
- att_name, params, notes = self.normalize_col_name(
291
- column_name, used_column_names, is_relation
292
- )
293
- extra_params.update(params)
294
- comment_notes.extend(notes)
295
-
296
- used_column_names.append(att_name)
297
- column_to_field_name[column_name] = att_name
298
-
299
- # Add primary_key and unique, if necessary.
300
- if column_name == primary_key_column:
301
- extra_params["primary_key"] = True
302
- if len(primary_key_columns) > 1:
303
- comment_notes.append(
304
- "The composite primary key (%s) found, that is not "
305
- "supported. The first column is selected."
306
- % ", ".join(primary_key_columns)
307
- )
308
- elif column_name in unique_columns:
309
- extra_params["unique"] = True
310
-
311
- field_type, field_params, field_notes = self.get_field_type(
312
- connection, row
313
- )
314
- extra_params.update(field_params)
315
- comment_notes.extend(field_notes)
316
-
317
- field_type += "("
318
-
319
- if att_name == "id" and extra_params == {"primary_key": True}:
320
- if field_type == "AutoField(":
321
- continue
322
- elif (
323
- field_type
324
- == connection.features.introspected_field_types["AutoField"]
325
- + "("
326
- ):
327
- comment_notes.append("AutoField?")
328
-
329
- # Add 'null' and 'blank', if the 'null_ok' flag was present in the
330
- # table description.
331
- if row.null_ok: # If it's NULL...
332
- extra_params["blank"] = True
333
- extra_params["null"] = True
334
-
335
- field_desc = "%s%s" % (
336
- "" if "." in field_type else "models.",
337
- field_type,
338
- )
339
- if field_type.startswith(("ForeignKey(", "OneToOneField(")):
340
- field_desc += ", models.DO_NOTHING"
341
-
342
- # Add comment.
343
- if (
344
- hasattr(connection.features, "supports_comments")
345
- and row.comment
346
- ):
347
- extra_params["db_comment"] = row.comment
348
- # if connection.features.supports_comments and row.comment:
349
- # extra_params["db_comment"] = row.comment
350
-
351
- if extra_params:
352
- if not field_desc.endswith("("):
353
- field_desc += ", "
354
- field_desc += ", ".join(
355
- "%s=%r" % (k, v) for k, v in extra_params.items()
356
- )
357
- field_desc += ")"
358
- if comment_notes:
359
- field_desc += " # " + " ".join(comment_notes)
360
- current_model[att_name] = field_desc
361
- except Exception as e:
362
- print(e)
363
- raise e
364
- return current_model
365
-
366
- @staticmethod
367
- def normalize_col_name(col_name, used_column_names, is_relation):
368
- """
369
- Normalizes a column name to conform to Python's variable naming conventions and addresses potential
370
- name conflicts or issues with reserved words. Applies transformations to ensure the column name:
371
- - Is lowercase.
372
- - Replaces unsuitable characters with underscores.
373
- - Avoids conflicts with Python keywords and digits at the start of the name.
374
- - Resolves conflicts with previously used column names.
375
-
376
- :param col_name: The original column name provided from the schema.
377
- :param used_column_names: A list of previously used column names to avoid naming collisions.
378
- :param is_relation: A boolean indicating if the column represents a relation (e.g., foreign key).
379
- :return: A tuple containing:
380
- - The normalized column name (str).
381
- - A dictionary (`field_params`) with any relevant information for database configuration.
382
- Includes the original column name if specific transformations were applied.
383
- - A list (`field_notes`) containing strings explaining the applied transformations.
384
- """
385
- field_params = {}
386
- field_notes = []
387
-
388
- new_name = col_name.lower()
389
- if new_name != col_name:
390
- field_notes.append("Field name made lowercase.")
391
-
392
- if is_relation:
393
- if new_name.endswith("_id"):
394
- new_name = new_name.removesuffix("_id")
395
- else:
396
- field_params["db_column"] = col_name
397
-
398
- new_name, num_repl = re.subn(r"\W", "_", new_name)
399
- if num_repl > 0:
400
- field_notes.append("Field renamed to remove unsuitable characters.")
401
-
402
- if new_name.find(LOOKUP_SEP) >= 0:
403
- while new_name.find(LOOKUP_SEP) >= 0:
404
- new_name = new_name.replace(LOOKUP_SEP, "_")
405
- if col_name.lower().find(LOOKUP_SEP) >= 0:
406
- # Only add the comment if the double underscore was in the original name
407
- field_notes.append(
408
- "Field renamed because it contained more than one '_' in a row."
409
- )
410
- # Commented this because we want to keep the original name regardless of the name given
411
- # if new_name.startswith("_"):
412
- # new_name = "field%s" % new_name
413
- # field_notes.append("Field renamed because it started with '_'.")
414
-
415
- if new_name.endswith("_"):
416
- new_name = "%sfield" % new_name
417
- field_notes.append("Field renamed because it ended with '_'.")
418
-
419
- if keyword.iskeyword(new_name):
420
- new_name += "_field"
421
- field_notes.append("Field renamed because it was a Python reserved word.")
422
-
423
- if new_name[0].isdigit():
424
- new_name = "number_%s" % new_name
425
- field_notes.append(
426
- "Field renamed because it wasn't a valid Python identifier."
427
- )
428
-
429
- if new_name in used_column_names:
430
- num = 0
431
- while "%s_%d" % (new_name, num) in used_column_names:
432
- num += 1
433
- new_name = "%s_%d" % (new_name, num)
434
- field_notes.append("Field renamed because of name conflict.")
435
-
436
- if col_name != new_name and field_notes:
437
- field_params["db_column"] = col_name
438
-
439
- return new_name, field_params, field_notes
440
-
441
- @staticmethod
442
- def get_field_type(connection, row):
443
- """
444
- Determines the type of a database field based on its description and connection
445
- introspection, and includes metadata such as parameters and additional notes.
446
-
447
- This function extracts the field type from the database's introspection
448
- interface and adds corresponding parameters (e.g., `max_length`, `decimal_places`)
449
- and relevant notes if certain properties are inferred or guessed.
450
-
451
- :param connection: The database connection object used for introspection.
452
- :type connection: Any
453
- :param row: An object containing field metadata, such as type code,
454
- display size, collation, precision, and scale.
455
- :type row: Any
456
- :return: A tuple containing the field type, its parameters, and any notes.
457
- :rtype: tuple[str, dict, list[str]]
458
- """
459
- field_params = {}
460
- field_notes = []
461
-
462
- try:
463
- field_type = connection.introspection.get_field_type(row.type_code, row)
464
- except KeyError:
465
- field_type = "TextField"
466
- field_notes.append("This field type is a guess.")
467
-
468
- # Add max_length for all CharFields.
469
- if field_type == "CharField" and row.display_size:
470
- size = int(row.display_size)
471
- if size and size > 0:
472
- field_params["max_length"] = size
473
-
474
- if field_type in {"CharField", "TextField"} and row.collation:
475
- field_params["db_collation"] = row.collation
476
-
477
- if field_type == "DecimalField":
478
- if row.precision is None or row.scale is None:
479
- field_notes.append(
480
- "max_digits and decimal_places have been guessed, as this "
481
- "database handles decimal fields as float"
482
- )
483
- field_params["max_digits"] = (
484
- row.precision if row.precision is not None else 10
485
- )
486
- field_params["decimal_places"] = (
487
- row.scale if row.scale is not None else 5
488
- )
489
- else:
490
- field_params["max_digits"] = row.precision
491
- field_params["decimal_places"] = row.scale
492
-
493
- return field_type, field_params, field_notes
@@ -1,119 +0,0 @@
1
- import datetime
2
-
3
- from sqlalchemy import func, cast
4
- from sqlalchemy.sql.sqltypes import Date, Time
5
-
6
-
7
- class SqlAlchemyFilterHandler:
8
- @staticmethod
9
- def apply_filters_sqlalchemy(query, model, filters):
10
- """
11
- Apply Django-like filters to an SQLAlchemy query.
12
-
13
- Args:
14
- query: The base SQLAlchemy query.
15
- model: The SQLAlchemy model to filter.
16
- filters: A dictionary of filters with Django-like syntax.
17
-
18
- Returns:
19
- query: The filtered SQLAlchemy query.
20
- """
21
- # Define operators and their SQLAlchemy equivalents
22
- dt_operators = ["date", "time"]
23
- date_operators = ["year", "month", "day", "hour", "minute", "second", "week_day"]
24
-
25
- comparison_operators = [
26
- "gte", "lte", "gt", "lt", "exact", "in", "range",
27
- "contains", "startswith", "endswith", "isnull",
28
- ]
29
-
30
- operation_map = {
31
- "exact": lambda col, val: col == val,
32
- "gt": lambda col, val: col > val,
33
- "gte": lambda col, val: col >= val,
34
- "lt": lambda col, val: col < val,
35
- "lte": lambda col, val: col <= val,
36
- "in": lambda col, val: col.in_(val),
37
- "range": lambda col, val: col.between(val[0], val[1]),
38
- "contains": lambda col, val: col.like(f"%{val}%"),
39
- "startswith": lambda col, val: col.like(f"{val}%"),
40
- "endswith": lambda col, val: col.like(f"%{val}"),
41
- "isnull": lambda col, val: col.is_(None) if val else col.isnot(None),
42
- }
43
-
44
- def parse_filter_value(casting, value):
45
- """
46
- Convert filter value to appropriate type based on the casting (e.g., date).
47
- """
48
- if casting == "date":
49
- if isinstance(value, str):
50
- return datetime.date.fromisoformat(value)
51
- if isinstance(value, list):
52
- return [datetime.date.fromisoformat(v) for v in value]
53
- return value
54
-
55
- def handle_date_operator(column, date_op):
56
- """
57
- Handle filtering on specific datetime parts (e.g., year, month).
58
- """
59
- if date_op == "year":
60
- return func.extract("year", column)
61
- elif date_op == "month":
62
- return func.extract("month", column)
63
- elif date_op == "day":
64
- return func.extract("day", column)
65
- elif date_op == "hour":
66
- return func.extract("hour", column)
67
- elif date_op == "minute":
68
- return func.extract("minute", column)
69
- elif date_op == "second":
70
- return func.extract("second", column)
71
- elif date_op == "week_day":
72
- # SQLAlchemy uses 1 for Sunday, 2 for Monday, etc.
73
- return func.strftime("%w", column)
74
- else:
75
- raise ValueError(f"Unsupported date operator: {date_op}")
76
-
77
- for key, value in filters.items():
78
- parts = key.split("__")
79
- field_name = parts[0]
80
- casting = None
81
- operation = "exact"
82
-
83
- if len(parts) == 3:
84
- # Adjust logic based on the parts
85
- _, casting, operation = parts
86
- elif len(parts) == 2:
87
- # Could be either a casting or an operation
88
- if parts[1] in comparison_operators:
89
- operation = parts[1]
90
- elif parts[1] in dt_operators + date_operators:
91
- casting = parts[1]
92
-
93
- # Get the column from the model
94
- column = getattr(model, field_name, None)
95
- # column = model.__table__.columns.get(field_name)
96
- if not column:
97
- raise AttributeError(f"Field '{field_name}' not found in model '{model.__name__}'")
98
-
99
- # Convert the filter value to the correct type
100
- parsed_value = parse_filter_value(casting, value)
101
-
102
- # Handle casting (e.g., date, time)
103
- if casting == "date":
104
- column = cast(column, Date)
105
- elif casting == "time":
106
- column = cast(column, Time)
107
-
108
- # Handle specific datetime parts (e.g., year, month)
109
- if casting in date_operators:
110
- column = handle_date_operator(column, casting)
111
-
112
- # Apply the filter operation
113
- if operation in operation_map:
114
- condition = operation_map[operation](column, parsed_value)
115
- query = query.filter(condition)
116
- else:
117
- raise ValueError(f"Unsupported operation: {operation}")
118
-
119
- return query