etlplus 0.4.7__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ """
2
+ :mod:`etlplus.database.orm` module.
3
+
4
+ Dynamic SQLAlchemy model generation from YAML table specs.
5
+
6
+ Usage
7
+ -----
8
+ >>> from etlplus.database.orm import load_and_build_models
9
+ >>> registry = load_and_build_models('examples/configs/ddl_spec.yml')
10
+ >>> Player = registry['dbo.Customers']
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ from typing import Any
17
+ from typing import Final
18
+
19
+ from sqlalchemy import Boolean
20
+ from sqlalchemy import CheckConstraint
21
+ from sqlalchemy import Date
22
+ from sqlalchemy import DateTime
23
+ from sqlalchemy import Enum
24
+ from sqlalchemy import Float
25
+ from sqlalchemy import ForeignKey
26
+ from sqlalchemy import ForeignKeyConstraint
27
+ from sqlalchemy import Index
28
+ from sqlalchemy import Integer
29
+ from sqlalchemy import LargeBinary
30
+ from sqlalchemy import Numeric
31
+ from sqlalchemy import PrimaryKeyConstraint
32
+ from sqlalchemy import String
33
+ from sqlalchemy import Text
34
+ from sqlalchemy import Time
35
+ from sqlalchemy import UniqueConstraint
36
+ from sqlalchemy import text
37
+ from sqlalchemy.dialects.postgresql import JSONB
38
+ from sqlalchemy.dialects.postgresql import UUID as PG_UUID
39
+ from sqlalchemy.orm import DeclarativeBase
40
+ from sqlalchemy.orm import mapped_column
41
+ from sqlalchemy.types import TypeEngine
42
+
43
+ from ..types import StrPath
44
+ from .schema import ForeignKeySpec
45
+ from .schema import TableSpec
46
+ from .schema import load_table_specs
47
+ from .types import ModelRegistry
48
+ from .types import TypeFactory
49
+
50
+ # SECTION: EXPORTS ========================================================== #
51
+
52
+
53
+ __all__ = [
54
+ # Classes
55
+ 'Base',
56
+ # Functions
57
+ 'build_models',
58
+ 'load_and_build_models',
59
+ 'resolve_type',
60
+ ]
61
+
62
+
63
+ # SECTION: INTERNAL CONSTANTS =============================================== #
64
+
65
+ _TYPE_MAPPING: Final[dict[str, TypeFactory]] = {
66
+ 'int': lambda _: Integer(),
67
+ 'integer': lambda _: Integer(),
68
+ 'bigint': lambda _: Integer(),
69
+ 'smallint': lambda _: Integer(),
70
+ 'bool': lambda _: Boolean(),
71
+ 'boolean': lambda _: Boolean(),
72
+ 'uuid': lambda _: PG_UUID(as_uuid=True),
73
+ 'uniqueidentifier': lambda _: PG_UUID(as_uuid=True),
74
+ 'rowversion': lambda _: LargeBinary(),
75
+ 'varbinary': lambda _: LargeBinary(),
76
+ 'blob': lambda _: LargeBinary(),
77
+ 'text': lambda _: Text(),
78
+ 'string': lambda _: Text(),
79
+ 'varchar': lambda p: String(length=p[0]) if p else String(),
80
+ 'nvarchar': lambda p: String(length=p[0]) if p else String(),
81
+ 'char': lambda p: String(length=p[0] if p else 1),
82
+ 'nchar': lambda p: String(length=p[0] if p else 1),
83
+ 'numeric': lambda p: Numeric(
84
+ precision=p[0] if p else None,
85
+ scale=p[1] if len(p) > 1 else None,
86
+ ),
87
+ 'decimal': lambda p: Numeric(
88
+ precision=p[0] if p else None,
89
+ scale=p[1] if len(p) > 1 else None,
90
+ ),
91
+ 'float': lambda _: Float(),
92
+ 'real': lambda _: Float(),
93
+ 'double': lambda _: Float(),
94
+ 'datetime': lambda _: DateTime(timezone=True),
95
+ 'datetime2': lambda _: DateTime(timezone=True),
96
+ 'timestamp': lambda _: DateTime(timezone=True),
97
+ 'date': lambda _: Date(),
98
+ 'time': lambda _: Time(),
99
+ 'json': lambda _: JSONB(),
100
+ 'jsonb': lambda _: JSONB(),
101
+ }
102
+
103
+
104
+ # SECTION: CLASSES ========================================================== #
105
+
106
+
107
+ class Base(DeclarativeBase):
108
+ """Base class for all ORM models."""
109
+
110
+ __abstract__ = True
111
+
112
+
113
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
114
+
115
+
116
+ def _class_name(
117
+ table: str,
118
+ ) -> str:
119
+ """
120
+ Convert table name to PascalCase class name.
121
+
122
+ Parameters
123
+ ----------
124
+ table : str
125
+ Table name.
126
+
127
+ Returns
128
+ -------
129
+ str
130
+ PascalCase class name.
131
+ """
132
+ parts = re.split(r'[^A-Za-z0-9]+', table)
133
+ return ''.join(p.capitalize() for p in parts if p)
134
+
135
+
136
+ def _parse_type_decl(
137
+ type_str: str,
138
+ ) -> tuple[str, list[int]]:
139
+ """
140
+ Parse a type declaration string into its name and parameters.
141
+
142
+ Parameters
143
+ ----------
144
+ type_str : str
145
+ Type declaration string, e.g., "varchar(255)".
146
+
147
+ Returns
148
+ -------
149
+ tuple[str, list[int]]
150
+ A tuple containing the type name and a list of integer parameters.
151
+ """
152
+ m = re.match(
153
+ r'^(?P<name>[A-Za-z0-9_]+)(?:\((?P<params>[^)]*)\))?$',
154
+ type_str.strip(),
155
+ )
156
+ if not m:
157
+ return type_str.lower(), []
158
+ name = m.group('name').lower()
159
+ params_raw = m.group('params')
160
+ if not params_raw:
161
+ return name, []
162
+ params = [p.strip() for p in params_raw.split(',') if p.strip()]
163
+ parsed: list[int] = []
164
+ for p in params:
165
+ try:
166
+ parsed.append(int(p))
167
+ except ValueError:
168
+ continue
169
+ return name, parsed
170
+
171
+
172
+ def _table_kwargs(
173
+ spec: TableSpec,
174
+ ) -> dict[str, str]:
175
+ """
176
+ Generate table keyword arguments based on the table specification.
177
+
178
+ Parameters
179
+ ----------
180
+ spec : TableSpec
181
+ Table specification.
182
+
183
+ Returns
184
+ -------
185
+ dict[str, str]
186
+ Dictionary of table keyword arguments.
187
+ """
188
+ kwargs: dict[str, str] = {}
189
+ if spec.schema_name:
190
+ kwargs['schema'] = spec.schema_name
191
+ return kwargs
192
+
193
+
194
+ # SECTION: FUNCTIONS ======================================================== #
195
+
196
+
197
+ def build_models(
198
+ specs: list[TableSpec],
199
+ *,
200
+ base: type[DeclarativeBase] = Base,
201
+ ) -> ModelRegistry:
202
+ """
203
+ Build SQLAlchemy ORM models from table specifications.
204
+ Parameters
205
+ ----------
206
+ specs : list[TableSpec]
207
+ List of table specifications.
208
+ base : type[DeclarativeBase], optional
209
+ Base class for the ORM models (default: :class:`Base`).
210
+ Returns
211
+ -------
212
+ ModelRegistry
213
+ Registry mapping fully qualified table names to ORM model classes.
214
+ """
215
+ registry: ModelRegistry = {}
216
+
217
+ for spec in specs:
218
+ table_args: list[object] = []
219
+ table_kwargs = _table_kwargs(spec)
220
+ pk_cols = set(spec.primary_key.columns) if spec.primary_key else set()
221
+
222
+ # Pre-handle multi-column constraints.
223
+ if spec.primary_key and len(spec.primary_key.columns) > 1:
224
+ table_args.append(
225
+ PrimaryKeyConstraint(
226
+ *spec.primary_key.columns,
227
+ name=spec.primary_key.name,
228
+ ),
229
+ )
230
+ for uc in spec.unique_constraints:
231
+ table_args.append(UniqueConstraint(*uc.columns, name=uc.name))
232
+ for idx in spec.indexes:
233
+ table_args.append(
234
+ Index(
235
+ idx.name,
236
+ *idx.columns,
237
+ unique=idx.unique,
238
+ postgresql_where=text(idx.where) if idx.where else None,
239
+ ),
240
+ )
241
+ composite_fks = [fk for fk in spec.foreign_keys if len(fk.columns) > 1]
242
+ for fk in composite_fks:
243
+ table_args.append(
244
+ ForeignKeyConstraint(
245
+ fk.columns,
246
+ [f'{fk.ref_table}.{c}' for c in fk.ref_columns],
247
+ ondelete=fk.ondelete,
248
+ ),
249
+ )
250
+
251
+ fk_by_column = {
252
+ fk.columns[0]: fk
253
+ for fk in spec.foreign_keys
254
+ if len(fk.columns) == 1 and len(fk.ref_columns) == 1
255
+ }
256
+
257
+ attrs: dict[str, object] = {'__tablename__': spec.table}
258
+
259
+ for col in spec.columns:
260
+ col_fk: ForeignKeySpec | None = fk_by_column.get(col.name)
261
+ fk_arg = (
262
+ ForeignKey(
263
+ f'{col_fk.ref_table}.{col_fk.ref_columns[0]}',
264
+ ondelete=col_fk.ondelete,
265
+ )
266
+ if col_fk
267
+ else None
268
+ )
269
+ col_type: TypeEngine = (
270
+ Enum(*col.enum, name=f'{spec.table}_{col.name}_enum')
271
+ if col.enum
272
+ else resolve_type(col.type)
273
+ )
274
+ fk_args: list[ForeignKey] = []
275
+ if fk_arg:
276
+ fk_args.append(fk_arg)
277
+
278
+ kwargs: dict[str, Any] = {
279
+ 'nullable': col.nullable,
280
+ 'primary_key': col.name in pk_cols and len(pk_cols) == 1,
281
+ 'unique': col.unique,
282
+ }
283
+ if col.default:
284
+ kwargs['server_default'] = text(col.default)
285
+ if col.identity:
286
+ kwargs['autoincrement'] = True
287
+
288
+ attrs[col.name] = mapped_column(*fk_args, type_=col_type, **kwargs)
289
+
290
+ if col.check:
291
+ table_args.append(
292
+ CheckConstraint(
293
+ col.check,
294
+ name=f'ck_{spec.table}_{col.name}',
295
+ ),
296
+ )
297
+
298
+ if table_args or table_kwargs:
299
+ args_tuple = tuple(table_args)
300
+ attrs['__table_args__'] = (
301
+ (*args_tuple, table_kwargs) if table_kwargs else args_tuple
302
+ )
303
+
304
+ cls_name = _class_name(spec.table)
305
+ model_cls = type(cls_name, (base,), attrs)
306
+ registry[spec.fq_name] = model_cls
307
+
308
+ return registry
309
+
310
+
311
+ def load_and_build_models(
312
+ path: StrPath,
313
+ *,
314
+ base: type[DeclarativeBase] = Base,
315
+ ) -> ModelRegistry:
316
+ """
317
+ Load table specifications from a file and build SQLAlchemy models.
318
+
319
+ Parameters
320
+ ----------
321
+ path : StrPath
322
+ Path to the YAML file containing table specifications.
323
+ base : type[DeclarativeBase], optional
324
+ Base class for the ORM models (default: :class:`Base`).
325
+
326
+ Returns
327
+ -------
328
+ ModelRegistry
329
+ Registry mapping fully qualified table names to ORM model classes.
330
+ """
331
+ return build_models(load_table_specs(path), base=base)
332
+
333
+
334
+ def resolve_type(
335
+ type_str: str,
336
+ ) -> TypeEngine:
337
+ """
338
+ Resolve a string type declaration to a SQLAlchemy :class:`TypeEngine`.
339
+
340
+ Parameters
341
+ ----------
342
+ type_str : str
343
+ String representation of the type declaration.
344
+
345
+ Returns
346
+ -------
347
+ TypeEngine
348
+ SQLAlchemy type engine instance corresponding to the type declaration.
349
+ """
350
+ name, params = _parse_type_decl(type_str)
351
+ factory = _TYPE_MAPPING.get(name)
352
+ if factory:
353
+ return factory(params)
354
+ return Text()
@@ -0,0 +1,274 @@
1
+ """
2
+ :mod:`etlplus.database.schema` module.
3
+
4
+ Helpers for loading and translating YAML definitions of database table schema
5
+ specifications into Pydantic models for dynamic SQLAlchemy generation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+ from typing import Any
12
+ from typing import ClassVar
13
+
14
+ from pydantic import BaseModel
15
+ from pydantic import ConfigDict
16
+ from pydantic import Field
17
+
18
+ from ..file import File
19
+ from ..types import StrPath
20
+
21
+ # SECTION: EXPORTS ========================================================== #
22
+
23
+
24
+ __all__ = [
25
+ # Classes
26
+ 'ColumnSpec',
27
+ 'ForeignKeySpec',
28
+ 'IdentitySpec',
29
+ 'IndexSpec',
30
+ 'PrimaryKeySpec',
31
+ 'UniqueConstraintSpec',
32
+ 'TableSpec',
33
+ # Functions
34
+ 'load_table_specs',
35
+ ]
36
+
37
+
38
+ # SECTION: CLASSES ========================================================== #
39
+
40
+
41
+ class ColumnSpec(BaseModel):
42
+ """
43
+ Column specification suitable for ODBC / SQLite DDL.
44
+
45
+ Attributes
46
+ ----------
47
+ model_config : ClassVar[ConfigDict]
48
+ Pydantic model configuration.
49
+ name : str
50
+ Unquoted column name.
51
+ type : str
52
+ SQL type string, e.g., INT, NVARCHAR(100).
53
+ nullable : bool
54
+ True if NULL values are allowed.
55
+ default : str | None
56
+ Default value expression, or None if no default.
57
+ identity : IdentitySpec | None
58
+ Identity specification, or None if not an identity column.
59
+ check : str | None
60
+ Check constraint expression, or None if no check constraint.
61
+ enum : list[str] | None
62
+ List of allowed string values for enum-like columns, or None.
63
+ unique : bool
64
+ True if the column has a UNIQUE constraint.
65
+ """
66
+
67
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra='forbid')
68
+
69
+ name: str
70
+ type: str = Field(description='SQL type string, e.g., INT, NVARCHAR(100)')
71
+ nullable: bool = True
72
+ default: str | None = None
73
+ identity: IdentitySpec | None = None
74
+ check: str | None = None
75
+ enum: list[str] | None = None
76
+ unique: bool = False
77
+
78
+
79
+ class ForeignKeySpec(BaseModel):
80
+ """
81
+ Foreign key specification.
82
+
83
+ Attributes
84
+ ----------
85
+ model_config : ClassVar[ConfigDict]
86
+ Pydantic model configuration.
87
+ columns : list[str]
88
+ List of local column names.
89
+ ref_table : str
90
+ Referenced table name.
91
+ ref_columns : list[str]
92
+ List of referenced column names.
93
+ ondelete : str | None
94
+ ON DELETE action, or None.
95
+ """
96
+
97
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra='forbid')
98
+
99
+ columns: list[str]
100
+ ref_table: str
101
+ ref_columns: list[str]
102
+ ondelete: str | None = None
103
+
104
+
105
+ class IdentitySpec(BaseModel):
106
+ """
107
+ Identity specification.
108
+
109
+ Attributes
110
+ ----------
111
+ model_config : ClassVar[ConfigDict]
112
+ Pydantic model configuration.
113
+ seed : int | None
114
+ Identity seed value (default: 1).
115
+ increment : int | None
116
+ Identity increment value (default: 1).
117
+ """
118
+
119
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra='forbid')
120
+
121
+ seed: int | None = Field(default=None, ge=1)
122
+ increment: int | None = Field(default=None, ge=1)
123
+
124
+
125
+ class IndexSpec(BaseModel):
126
+ """
127
+ Index specification.
128
+
129
+ Attributes
130
+ ----------
131
+ model_config : ClassVar[ConfigDict]
132
+ Pydantic model configuration.
133
+ name : str
134
+ Index name.
135
+ columns : list[str]
136
+ List of column names included in the index.
137
+ unique : bool
138
+ True if the index is unique.
139
+ where : str | None
140
+ Optional WHERE clause for filtered indexes.
141
+ """
142
+
143
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra='forbid')
144
+
145
+ name: str
146
+ columns: list[str]
147
+ unique: bool = False
148
+ where: str | None = None
149
+
150
+
151
+ class PrimaryKeySpec(BaseModel):
152
+ """
153
+ Primary key specification.
154
+
155
+ Attributes
156
+ ----------
157
+ model_config : ClassVar[ConfigDict]
158
+ Pydantic model configuration.
159
+ name : str | None
160
+ Primary key constraint name, or None if unnamed.
161
+ columns : list[str]
162
+ List of column names included in the primary key.
163
+ """
164
+
165
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra='forbid')
166
+
167
+ name: str | None = None
168
+ columns: list[str]
169
+
170
+
171
+ class UniqueConstraintSpec(BaseModel):
172
+ """
173
+ Unique constraint specification.
174
+
175
+ Attributes
176
+ ----------
177
+ model_config : ClassVar[ConfigDict]
178
+ Pydantic model configuration.
179
+ name : str | None
180
+ Unique constraint name, or None if unnamed.
181
+ columns : list[str]
182
+ List of column names included in the unique constraint.
183
+ """
184
+
185
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra='forbid')
186
+
187
+ name: str | None = None
188
+ columns: list[str]
189
+
190
+
191
+ class TableSpec(BaseModel):
192
+ """
193
+ Table specification.
194
+
195
+ Attributes
196
+ ----------
197
+ model_config : ClassVar[ConfigDict]
198
+ Pydantic model configuration.
199
+ table : str
200
+ Table name.
201
+ schema_name : str | None
202
+ Schema name, or None if not specified.
203
+ create_schema : bool
204
+ Whether to create the schema if it does not exist.
205
+ columns : list[ColumnSpec]
206
+ List of column specifications.
207
+ primary_key : PrimaryKeySpec | None
208
+ Primary key specification, or None if no primary key.
209
+ unique_constraints : list[UniqueConstraintSpec]
210
+ List of unique constraint specifications.
211
+ indexes : list[IndexSpec]
212
+ List of index specifications.
213
+ foreign_keys : list[ForeignKeySpec]
214
+ List of foreign key specifications.
215
+ """
216
+
217
+ model_config: ClassVar[ConfigDict] = ConfigDict(extra='forbid')
218
+
219
+ table: str = Field(alias='name')
220
+ schema_name: str | None = Field(default=None, alias='schema')
221
+ create_schema: bool = False
222
+ columns: list[ColumnSpec]
223
+ primary_key: PrimaryKeySpec | None = None
224
+ unique_constraints: list[UniqueConstraintSpec] = Field(
225
+ default_factory=list,
226
+ )
227
+ indexes: list[IndexSpec] = Field(default_factory=list)
228
+ foreign_keys: list[ForeignKeySpec] = Field(default_factory=list)
229
+
230
+ # -- Properties -- #
231
+
232
+ @property
233
+ def fq_name(self) -> str:
234
+ """
235
+ Fully qualified table name, including schema if specified.
236
+ """
237
+ return (
238
+ f'{self.schema_name}.{self.table}'
239
+ if self.schema_name
240
+ else self.table
241
+ )
242
+
243
+
244
+ # SECTION: FUNCTIONS ======================================================== #
245
+
246
+
247
+ def load_table_specs(
248
+ path: StrPath,
249
+ ) -> list[TableSpec]:
250
+ """
251
+ Load table specifications from a YAML file.
252
+
253
+ Parameters
254
+ ----------
255
+ path : StrPath
256
+ Path to the YAML file containing table specifications.
257
+
258
+ Returns
259
+ -------
260
+ list[TableSpec]
261
+ A list of TableSpec instances parsed from the YAML file.
262
+ """
263
+ data = File.read_file(Path(path))
264
+ if not data:
265
+ return []
266
+
267
+ if isinstance(data, dict) and 'table_schemas' in data:
268
+ items: list[Any] = data['table_schemas'] or []
269
+ elif isinstance(data, list):
270
+ items = data
271
+ else:
272
+ items = [data]
273
+
274
+ return [TableSpec.model_validate(item) for item in items]
@@ -0,0 +1,33 @@
1
+ """
2
+ :mod:`etlplus.database.types` module.
3
+
4
+ Shared type aliases leveraged across :mod:`etlplus.database` modules.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Callable
10
+
11
+ from sqlalchemy.orm import DeclarativeBase
12
+ from sqlalchemy.types import TypeEngine
13
+
14
+ # SECTION: EXPORTS ========================================================== #
15
+
16
+
17
+ __all__ = [
18
+ # Type Aliases
19
+ 'ModelRegistry',
20
+ 'TypeFactory',
21
+ ]
22
+
23
+
24
+ # SECTION: TYPE ALIASES ===================================================== #
25
+
26
+
27
+ # pylint: disable=invalid-name
28
+
29
+ # Registry mapping fully qualified table names to declarative classes.
30
+ type ModelRegistry = dict[str, type[DeclarativeBase]]
31
+
32
+ # Callable producing a SQLAlchemy TypeEngine from parsed parameters.
33
+ type TypeFactory = Callable[[list[int]], TypeEngine]
etlplus/run.py CHANGED
@@ -142,10 +142,8 @@ def run(
142
142
  """
143
143
  Run a pipeline job defined in a YAML configuration.
144
144
 
145
- This mirrors the run-mode logic from ``etlplus.cli.cmd_pipeline``
146
- (without the list/summary modes). By default it reads the configuration
147
- from ``in/pipeline.yml``, but callers can provide an explicit
148
- ``config_path`` to override this.
145
+ By default it reads the configuration from ``in/pipeline.yml``, but callers
146
+ can provide an explicit ``config_path`` to override this.
149
147
 
150
148
  Parameters
151
149
  ----------
@@ -0,0 +1,5 @@
1
+ """
2
+ :mod:`etlplus.templates` package.
3
+
4
+ This package defines bundled Jinja2 templates for ``etlplus``.
5
+ """