etlplus 0.4.7__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -190,6 +190,8 @@ class PipelineConfig:
190
190
  Target connectors, parsed tolerantly.
191
191
  jobs : list[JobConfig]
192
192
  Job orchestration definitions.
193
+ table_schemas : list[dict[str, Any]]
194
+ Optional DDL-style table specifications used by the render command.
193
195
  """
194
196
 
195
197
  # -- Attributes -- #
@@ -208,6 +210,7 @@ class PipelineConfig:
208
210
  transforms: dict[str, dict[str, Any]] = field(default_factory=dict)
209
211
  targets: list[Connector] = field(default_factory=list)
210
212
  jobs: list[JobConfig] = field(default_factory=list)
213
+ table_schemas: list[dict[str, Any]] = field(default_factory=list)
211
214
 
212
215
  # -- Class Methods -- #
213
216
 
@@ -312,6 +315,13 @@ class PipelineConfig:
312
315
  # Jobs
313
316
  jobs = _build_jobs(raw)
314
317
 
318
+ # Table schemas (optional, tolerant pass-through structures).
319
+ table_schemas: list[dict[str, Any]] = []
320
+ for entry in raw.get('table_schemas', []) or []:
321
+ spec = maybe_mapping(entry)
322
+ if spec is not None:
323
+ table_schemas.append(dict(spec))
324
+
315
325
  return cls(
316
326
  name=name,
317
327
  version=version,
@@ -325,4 +335,5 @@ class PipelineConfig:
325
335
  transforms=transforms,
326
336
  targets=targets,
327
337
  jobs=jobs,
338
+ table_schemas=table_schemas,
328
339
  )
@@ -0,0 +1,42 @@
1
+ """
2
+ :mod:`etlplus.database` package.
3
+
4
+ Database utilities for:
5
+ - DDL rendering and schema management.
6
+ - Schema parsing from configuration files.
7
+ - Dynamic ORM generation.
8
+ - Database engine/session management.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from .ddl import load_table_spec
14
+ from .ddl import render_table_sql
15
+ from .ddl import render_tables
16
+ from .ddl import render_tables_to_string
17
+ from .engine import engine
18
+ from .engine import load_database_url_from_config
19
+ from .engine import make_engine
20
+ from .engine import session
21
+ from .orm import build_models
22
+ from .orm import load_and_build_models
23
+ from .schema import load_table_specs
24
+
25
+ # SECTION: EXPORTS ========================================================== #
26
+
27
+
28
+ __all__ = [
29
+ # Functions
30
+ 'build_models',
31
+ 'load_and_build_models',
32
+ 'load_database_url_from_config',
33
+ 'load_table_spec',
34
+ 'load_table_specs',
35
+ 'make_engine',
36
+ 'render_table_sql',
37
+ 'render_tables',
38
+ 'render_tables_to_string',
39
+ # Singletons
40
+ 'engine',
41
+ 'session',
42
+ ]
@@ -0,0 +1,311 @@
1
+ """
2
+ :mod:`etlplus.database.ddl` module.
3
+
4
+ DDL rendering utilities for pipeline table schemas.
5
+
6
+ Exposes helpers to load YAML/JSON table specs and render them into SQL via
7
+ Jinja templates. Mirrors the behavior of ``tools/render_ddl.py`` so the CLI
8
+ can emit DDLs without shelling out to that script.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import importlib.resources
14
+ import os
15
+ from collections.abc import Iterable
16
+ from collections.abc import Mapping
17
+ from pathlib import Path
18
+ from typing import Any
19
+ from typing import Final
20
+
21
+ from jinja2 import DictLoader
22
+ from jinja2 import Environment
23
+ from jinja2 import FileSystemLoader
24
+ from jinja2 import StrictUndefined
25
+
26
+ from ..file import File
27
+
28
+ # SECTION: EXPORTS ========================================================== #
29
+
30
+
31
+ __all__ = [
32
+ 'TEMPLATES',
33
+ 'load_table_spec',
34
+ 'render_table_sql',
35
+ 'render_tables',
36
+ 'render_tables_to_string',
37
+ ]
38
+
39
+
40
+ # SECTION: INTERNAL CONSTANTS =============================================== #
41
+
42
+
43
+ _SUPPORTED_SPEC_SUFFIXES: Final[frozenset[str]] = frozenset(
44
+ {
45
+ '.json',
46
+ '.yml',
47
+ '.yaml',
48
+ },
49
+ )
50
+
51
+
52
+ # SECTION: CONSTANTS ======================================================== #
53
+
54
+
55
+ TEMPLATES: Final[dict[str, str]] = {
56
+ 'ddl': 'ddl.sql.j2',
57
+ 'view': 'view.sql.j2',
58
+ }
59
+
60
+
61
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
62
+
63
+
64
+ def _load_template_text(
65
+ filename: str,
66
+ ) -> str:
67
+ """Return the bundled template text.
68
+
69
+ Parameters
70
+ ----------
71
+ filename : str
72
+ Template filename located inside the package data folder.
73
+
74
+ Returns
75
+ -------
76
+ str
77
+ Raw template contents.
78
+
79
+ Raises
80
+ ------
81
+ FileNotFoundError
82
+ If the template file cannot be located in package data.
83
+ """
84
+
85
+ try:
86
+ return (
87
+ importlib.resources.files(
88
+ 'etlplus.templates',
89
+ )
90
+ .joinpath(filename)
91
+ .read_text(encoding='utf-8')
92
+ )
93
+ except FileNotFoundError as exc: # pragma: no cover - deployment guard
94
+ raise FileNotFoundError(
95
+ f'Could not load template {filename} '
96
+ f'from etlplus.templates package data.',
97
+ ) from exc
98
+
99
+
100
+ def _resolve_template(
101
+ *,
102
+ template_key: str | None,
103
+ template_path: str | None,
104
+ ) -> tuple[Environment, str]:
105
+ """Return environment and template name for rendering.
106
+
107
+ Parameters
108
+ ----------
109
+ template_key : str | None
110
+ Named template key bundled with the package.
111
+ template_path : str | None
112
+ Explicit template file override.
113
+
114
+ Returns
115
+ -------
116
+ tuple[Environment, str]
117
+ Pair of configured Jinja environment and the template identifier.
118
+
119
+ Raises
120
+ ------
121
+ FileNotFoundError
122
+ If the provided template path does not exist.
123
+ ValueError
124
+ If the template key is unknown.
125
+ """
126
+ file_override = template_path or os.environ.get('TEMPLATE_NAME')
127
+ if file_override:
128
+ path = Path(file_override)
129
+ if not path.exists():
130
+ raise FileNotFoundError(f'Template file not found: {path}')
131
+ loader = FileSystemLoader(str(path.parent))
132
+ env = Environment(
133
+ loader=loader,
134
+ undefined=StrictUndefined,
135
+ trim_blocks=True,
136
+ lstrip_blocks=True,
137
+ )
138
+ return env, path.name
139
+
140
+ key = (template_key or 'ddl').strip()
141
+ if key not in TEMPLATES:
142
+ choices = ', '.join(sorted(TEMPLATES))
143
+ raise ValueError(
144
+ f'Unknown template key "{key}". Choose from: {choices}',
145
+ )
146
+
147
+ # Load template from package data
148
+ template_filename = TEMPLATES[key]
149
+ template_source = _load_template_text(template_filename)
150
+
151
+ env = Environment(
152
+ loader=DictLoader({key: template_source}),
153
+ undefined=StrictUndefined,
154
+ trim_blocks=True,
155
+ lstrip_blocks=True,
156
+ )
157
+ return env, key
158
+
159
+
160
+ # SECTION: FUNCTIONS ======================================================== #
161
+
162
+
163
+ def load_table_spec(
164
+ path: Path | str,
165
+ ) -> dict[str, Any]:
166
+ """
167
+ Load a table specification from disk.
168
+
169
+ Parameters
170
+ ----------
171
+ path : Path | str
172
+ Path to the JSON or YAML specification file.
173
+
174
+ Returns
175
+ -------
176
+ dict[str, Any]
177
+ Parsed table specification mapping.
178
+
179
+ Raises
180
+ ------
181
+ ImportError
182
+ If the file cannot be read due to missing dependencies.
183
+ RuntimeError
184
+ If the YAML dependency is missing for YAML specs.
185
+ TypeError
186
+ If the loaded spec is not a mapping.
187
+ ValueError
188
+ If the file suffix is not supported.
189
+ """
190
+
191
+ spec_path = Path(path)
192
+ suffix = spec_path.suffix.lower()
193
+
194
+ if suffix not in _SUPPORTED_SPEC_SUFFIXES:
195
+ raise ValueError('Spec must be .json, .yml, or .yaml')
196
+
197
+ try:
198
+ spec = File.read_file(spec_path)
199
+ except ImportError as e:
200
+ if suffix in {'.yml', '.yaml'}:
201
+ raise RuntimeError(
202
+ 'Missing dependency: pyyaml is required for YAML specs.',
203
+ ) from e
204
+ raise
205
+
206
+ if not isinstance(spec, Mapping):
207
+ raise TypeError('Table spec must be a mapping')
208
+
209
+ return dict(spec)
210
+
211
+
212
+ def render_table_sql(
213
+ spec: Mapping[str, Any],
214
+ *,
215
+ template: str | None = 'ddl',
216
+ template_path: str | None = None,
217
+ ) -> str:
218
+ """
219
+ Render a single table spec into SQL text.
220
+
221
+ Parameters
222
+ ----------
223
+ spec : Mapping[str, Any]
224
+ Table specification mapping.
225
+ template : str | None, optional
226
+ Template key to use (default: 'ddl').
227
+ template_path : str | None, optional
228
+ Path to a custom template file (overrides ``template``).
229
+
230
+ Returns
231
+ -------
232
+ str
233
+ Rendered SQL string.
234
+ """
235
+ env, template_name = _resolve_template(
236
+ template_key=template,
237
+ template_path=template_path,
238
+ )
239
+ tmpl = env.get_template(template_name)
240
+ return tmpl.render(spec=spec).rstrip() + '\n'
241
+
242
+
243
+ def render_tables(
244
+ specs: Iterable[Mapping[str, Any]],
245
+ *,
246
+ template: str | None = 'ddl',
247
+ template_path: str | None = None,
248
+ ) -> list[str]:
249
+ """
250
+ Render multiple table specs into a list of SQL payloads.
251
+
252
+ Parameters
253
+ ----------
254
+ specs : Iterable[Mapping[str, Any]]
255
+ Table specification mappings.
256
+ template : str | None, optional
257
+ Template key to use (default: 'ddl').
258
+ template_path : str | None, optional
259
+ Path to a custom template file (overrides ``template``).
260
+
261
+ Returns
262
+ -------
263
+ list[str]
264
+ Rendered SQL strings for each table spec.
265
+ """
266
+
267
+ return [
268
+ render_table_sql(spec, template=template, template_path=template_path)
269
+ for spec in specs
270
+ ]
271
+
272
+
273
+ def render_tables_to_string(
274
+ spec_paths: Iterable[Path | str],
275
+ *,
276
+ template: str | None = 'ddl',
277
+ template_path: Path | str | None = None,
278
+ ) -> str:
279
+ """
280
+ Render one or more specs and concatenate the SQL payloads.
281
+
282
+ Parameters
283
+ ----------
284
+ spec_paths : Iterable[Path | str]
285
+ Paths to table specification files.
286
+ template : str | None, optional
287
+ Template key bundled with ETLPlus. Defaults to ``'ddl'``.
288
+ template_path : Path | str | None, optional
289
+ Custom Jinja template to override the bundled templates.
290
+
291
+ Returns
292
+ -------
293
+ str
294
+ Concatenated SQL payload suitable for writing to disk or stdout.
295
+ """
296
+
297
+ resolved_template_path = (
298
+ str(template_path) if template_path is not None else None
299
+ )
300
+ rendered_sql: list[str] = []
301
+ for spec_path in spec_paths:
302
+ spec = load_table_spec(spec_path)
303
+ rendered_sql.append(
304
+ render_table_sql(
305
+ spec,
306
+ template=template,
307
+ template_path=resolved_template_path,
308
+ ),
309
+ )
310
+
311
+ return ''.join(rendered_sql)
@@ -0,0 +1,146 @@
1
+ """
2
+ :mod:`etlplus.database.engine` module.
3
+
4
+ Lightweight engine/session factory with optional config-driven URL loading.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from collections.abc import Mapping
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from sqlalchemy import create_engine
15
+ from sqlalchemy.engine import Engine
16
+ from sqlalchemy.orm import sessionmaker
17
+
18
+ from ..file import File
19
+
20
+ # SECTION: EXPORTS ========================================================== #
21
+
22
+
23
+ __all__ = [
24
+ # Functions
25
+ 'load_database_url_from_config',
26
+ 'make_engine',
27
+ # Singletons
28
+ 'engine',
29
+ 'session',
30
+ ]
31
+
32
+
33
+ # SECTION: INTERNAL CONSTANTS =============================================== #
34
+
35
+
36
+ DATABASE_URL: str = (
37
+ os.getenv('DATABASE_URL')
38
+ or os.getenv('DATABASE_DSN')
39
+ or 'sqlite+pysqlite:///:memory:'
40
+ )
41
+
42
+
43
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
44
+
45
+
46
+ def _resolve_url_from_mapping(cfg: Mapping[str, Any]) -> str | None:
47
+ """
48
+ Return a URL/DSN from a mapping if present.
49
+
50
+ Parameters
51
+ ----------
52
+ cfg : Mapping[str, Any]
53
+ Configuration mapping potentially containing connection fields.
54
+
55
+ Returns
56
+ -------
57
+ str | None
58
+ Resolved URL/DSN string, if present.
59
+ """
60
+ conn = cfg.get('connection_string') or cfg.get('url') or cfg.get('dsn')
61
+ if isinstance(conn, str) and conn.strip():
62
+ return conn.strip()
63
+
64
+ # Some configs nest defaults.
65
+ # E.g., databases: { mssql: { default: {...} } }
66
+ default_cfg = cfg.get('default')
67
+ if isinstance(default_cfg, Mapping):
68
+ return _resolve_url_from_mapping(default_cfg)
69
+
70
+ return None
71
+
72
+
73
+ # SECTION: FUNCTIONS ======================================================== #
74
+
75
+
76
+ def load_database_url_from_config(
77
+ path: str | Path,
78
+ *,
79
+ name: str | None = None,
80
+ ) -> str:
81
+ """
82
+ Extract a database URL/DSN from a YAML/JSON config file.
83
+
84
+ The loader is schema-tolerant: it looks for a top-level "databases" map
85
+ and then for a named entry (``name``). Each entry may contain either a
86
+ ``connection_string``/``url``/``dsn`` or a nested ``default`` block with
87
+ those fields.
88
+
89
+ Parameters
90
+ ----------
91
+ path : str | Path
92
+ Location of the configuration file.
93
+ name : str | None, optional
94
+ Named database entry under the ``databases`` map (default:
95
+ ``default``).
96
+
97
+ Returns
98
+ -------
99
+ str
100
+ Resolved database URL/DSN string.
101
+
102
+ Raises
103
+ ------
104
+ KeyError
105
+ If the specified database entry is not found.
106
+ TypeError
107
+ If the config structure is invalid.
108
+ ValueError
109
+ If no connection string/URL/DSN is found for the specified entry.
110
+ """
111
+ cfg = File.read_file(Path(path))
112
+ if not isinstance(cfg, Mapping):
113
+ raise TypeError('Database config must be a mapping')
114
+
115
+ databases = cfg.get('databases') if isinstance(cfg, Mapping) else None
116
+ if not isinstance(databases, Mapping):
117
+ raise KeyError('Config missing top-level "databases" mapping')
118
+
119
+ target = name or 'default'
120
+ entry = databases.get(target)
121
+ if entry is None:
122
+ raise KeyError(f'Database entry "{target}" not found in config')
123
+ if not isinstance(entry, Mapping):
124
+ raise TypeError(f'Database entry "{target}" must be a mapping')
125
+
126
+ url = _resolve_url_from_mapping(entry)
127
+ if not url:
128
+ raise ValueError(
129
+ f'Database entry "{target}" lacks connection_string/url/dsn',
130
+ )
131
+ return url
132
+
133
+
134
+ def make_engine(url: str | None = None, **engine_kwargs: Any) -> Engine:
135
+ """Create a SQLAlchemy Engine, defaulting to env config if no URL given."""
136
+
137
+ resolved_url = url or DATABASE_URL
138
+ return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
139
+
140
+
141
+ # SECTION: SINGLETONS ======================================================= #
142
+
143
+
144
+ # Default engine/session for callers that rely on module-level singletons.
145
+ engine = make_engine()
146
+ session = sessionmaker(bind=engine, autoflush=False, autocommit=False)