etlplus 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +37 -0
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +51 -3
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +39 -28
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +96 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +40 -0
- etlplus/cli/commands.py +94 -61
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +40 -12
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +4 -7
- etlplus/database/README.md +48 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +1 -107
- etlplus/file/README.md +105 -0
- etlplus/file/__init__.py +25 -0
- etlplus/file/_imports.py +141 -0
- etlplus/file/_io.py +160 -0
- etlplus/file/accdb.py +78 -0
- etlplus/file/arrow.py +78 -0
- etlplus/file/avro.py +176 -0
- etlplus/file/bson.py +77 -0
- etlplus/file/cbor.py +78 -0
- etlplus/file/cfg.py +79 -0
- etlplus/file/conf.py +80 -0
- etlplus/file/core.py +322 -0
- etlplus/file/csv.py +79 -0
- etlplus/file/dat.py +78 -0
- etlplus/file/dta.py +77 -0
- etlplus/file/duckdb.py +78 -0
- etlplus/file/enums.py +343 -0
- etlplus/file/feather.py +111 -0
- etlplus/file/fwf.py +77 -0
- etlplus/file/gz.py +123 -0
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/ini.py +79 -0
- etlplus/file/ion.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/json.py +98 -0
- etlplus/file/log.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mdb.py +78 -0
- etlplus/file/msgpack.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/ndjson.py +108 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/orc.py +111 -0
- etlplus/file/parquet.py +113 -0
- etlplus/file/pb.py +78 -0
- etlplus/file/pbf.py +77 -0
- etlplus/file/properties.py +78 -0
- etlplus/file/proto.py +77 -0
- etlplus/file/psv.py +79 -0
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sqlite.py +78 -0
- etlplus/file/stub.py +84 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/tab.py +81 -0
- etlplus/file/toml.py +78 -0
- etlplus/file/tsv.py +80 -0
- etlplus/file/txt.py +102 -0
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xls.py +88 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xlsx.py +99 -0
- etlplus/file/xml.py +185 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/yaml.py +95 -0
- etlplus/file/zip.py +175 -0
- etlplus/file/zsav.py +77 -0
- etlplus/ops/README.md +50 -0
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +81 -99
- etlplus/{load.py → ops/load.py} +78 -101
- etlplus/{run.py → ops/run.py} +159 -127
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +53 -17
- etlplus/{validate.py → ops/validate.py} +22 -12
- etlplus/templates/README.md +46 -0
- etlplus/types.py +5 -4
- etlplus/utils.py +136 -2
- etlplus/workflow/README.md +52 -0
- etlplus/{config → workflow}/__init__.py +10 -23
- etlplus/{config → workflow}/connector.py +58 -44
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +105 -32
- etlplus/{config → workflow}/pipeline.py +59 -51
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
- etlplus-0.9.2.dist-info/RECORD +134 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- etlplus/file.py +0 -657
- etlplus/validation/__init__.py +0 -44
- etlplus-0.9.0.dist-info/RECORD +0 -65
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.ops.validate` module.
|
|
3
3
|
|
|
4
4
|
Validate dicts and lists of dicts using simple, schema-like rules.
|
|
5
5
|
|
|
@@ -11,8 +11,8 @@ Highlights
|
|
|
11
11
|
----------
|
|
12
12
|
- Centralized type map and helpers for clarity and reuse.
|
|
13
13
|
- Consistent error wording; field and item paths like ``[2].email``.
|
|
14
|
-
- Small, focused public API with
|
|
15
|
-
|
|
14
|
+
- Small, focused public API with :func:`load_data`, :func:`validate_field`,
|
|
15
|
+
:func:`validate`.
|
|
16
16
|
|
|
17
17
|
Examples
|
|
18
18
|
--------
|
|
@@ -34,11 +34,11 @@ from typing import Final
|
|
|
34
34
|
from typing import Literal
|
|
35
35
|
from typing import TypedDict
|
|
36
36
|
|
|
37
|
+
from ..types import JSONData
|
|
38
|
+
from ..types import Record
|
|
39
|
+
from ..types import StrAnyMap
|
|
40
|
+
from ..types import StrPath
|
|
37
41
|
from .load import load_data
|
|
38
|
-
from .types import JSONData
|
|
39
|
-
from .types import Record
|
|
40
|
-
from .types import StrAnyMap
|
|
41
|
-
from .types import StrPath
|
|
42
42
|
|
|
43
43
|
# SECTION: EXPORTS ========================================================== #
|
|
44
44
|
|
|
@@ -66,7 +66,7 @@ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
# SECTION:
|
|
69
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
class FieldRules(TypedDict, total=False):
|
|
@@ -279,11 +279,15 @@ def _type_matches(
|
|
|
279
279
|
bool
|
|
280
280
|
``True`` if the value matches the expected type; ``False`` if not.
|
|
281
281
|
"""
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
282
|
+
if expected == 'number':
|
|
283
|
+
return _is_number(value)
|
|
284
|
+
if expected == 'integer':
|
|
285
|
+
return isinstance(value, int) and not isinstance(value, bool)
|
|
286
|
+
if expected == 'boolean':
|
|
287
|
+
return isinstance(value, bool)
|
|
285
288
|
|
|
286
|
-
|
|
289
|
+
py_type = TYPE_MAP.get(expected)
|
|
290
|
+
return isinstance(value, py_type) if py_type else False
|
|
287
291
|
|
|
288
292
|
|
|
289
293
|
def _validate_record(
|
|
@@ -330,6 +334,9 @@ def _validate_record(
|
|
|
330
334
|
# SECTION: FUNCTIONS ======================================================== #
|
|
331
335
|
|
|
332
336
|
|
|
337
|
+
# -- Helpers -- #
|
|
338
|
+
|
|
339
|
+
|
|
333
340
|
def validate_field(
|
|
334
341
|
value: Any,
|
|
335
342
|
rules: StrAnyMap | FieldRules,
|
|
@@ -425,6 +432,9 @@ def validate_field(
|
|
|
425
432
|
return {'valid': len(errors) == 0, 'errors': errors}
|
|
426
433
|
|
|
427
434
|
|
|
435
|
+
# -- Orchestration -- #
|
|
436
|
+
|
|
437
|
+
|
|
428
438
|
def validate(
|
|
429
439
|
source: StrPath | JSONData,
|
|
430
440
|
rules: RulesMap | None = None,
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# `etlplus.templates` Subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
|
|
4
|
+
|
|
5
|
+
- Provides Jinja2 templates for DDL and view generation
|
|
6
|
+
- Supports templated SQL for multiple database backends
|
|
7
|
+
- Includes helpers for rendering templates with schema metadata
|
|
8
|
+
|
|
9
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
10
|
+
|
|
11
|
+
- [`etlplus.templates` Subpackage](#etlplus-templates-subpackage)
|
|
12
|
+
- [Available Templates](#available-templates)
|
|
13
|
+
- [Rendering Templates](#rendering-templates)
|
|
14
|
+
- [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)
|
|
15
|
+
- [See Also](#see-also)
|
|
16
|
+
|
|
17
|
+
## Available Templates
|
|
18
|
+
|
|
19
|
+
- `ddl.sql.j2`: Generic DDL (CREATE TABLE) template
|
|
20
|
+
- `view.sql.j2`: Generic view creation template
|
|
21
|
+
|
|
22
|
+
## Rendering Templates
|
|
23
|
+
|
|
24
|
+
Use the helpers to render templates with your schema or table metadata:
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from etlplus.templates import render_template
|
|
28
|
+
|
|
29
|
+
sql = render_template("ddl.sql.j2", schema=my_schema)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Example: Rendering a DDL Template
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from etlplus.templates import render_template
|
|
36
|
+
|
|
37
|
+
schema = {"name": "users", "columns": [ ... ]}
|
|
38
|
+
sql = render_template("ddl.sql.j2", schema=schema)
|
|
39
|
+
print(sql)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## See Also
|
|
43
|
+
|
|
44
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
45
|
+
- DDL template in [ddl.sql.j2](ddl.sql.j2)
|
|
46
|
+
- View template in [view.sql.j2](view.sql.j2)
|
etlplus/types.py
CHANGED
|
@@ -11,8 +11,9 @@ Notes
|
|
|
11
11
|
|
|
12
12
|
See Also
|
|
13
13
|
--------
|
|
14
|
-
- :mod:`etlplus.api.types` for HTTP-specific aliases
|
|
15
|
-
- :mod:`etlplus.
|
|
14
|
+
- :mod:`etlplus.api.types` for HTTP-specific aliases and data classes
|
|
15
|
+
- :mod:`etlplus.workflow.types` for workflow-specific aliases and TypedDict
|
|
16
|
+
surfaces
|
|
16
17
|
|
|
17
18
|
Examples
|
|
18
19
|
--------
|
|
@@ -193,8 +194,8 @@ type AggregateSpec = StrAnyMap
|
|
|
193
194
|
|
|
194
195
|
# -- Pipelines-- #
|
|
195
196
|
|
|
196
|
-
# Unified pipeline step spec consumed by :mod:`etlplus.transform`.
|
|
197
|
-
type StepSpec = FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
197
|
+
# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
|
|
198
|
+
type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
198
199
|
|
|
199
200
|
# Collections of steps
|
|
200
201
|
|
etlplus/utils.py
CHANGED
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
10
|
from collections.abc import Callable
|
|
11
|
+
from collections.abc import Iterable
|
|
11
12
|
from collections.abc import Mapping
|
|
12
13
|
from typing import Any
|
|
13
14
|
from typing import TypeVar
|
|
@@ -25,6 +26,7 @@ __all__ = [
|
|
|
25
26
|
# Mapping utilities
|
|
26
27
|
'cast_str_dict',
|
|
27
28
|
'coerce_dict',
|
|
29
|
+
'deep_substitute',
|
|
28
30
|
'maybe_mapping',
|
|
29
31
|
# Float coercion
|
|
30
32
|
'to_float',
|
|
@@ -39,7 +41,8 @@ __all__ = [
|
|
|
39
41
|
# Generic number coercion
|
|
40
42
|
'to_number',
|
|
41
43
|
# Text processing
|
|
42
|
-
'
|
|
44
|
+
'normalize_choice',
|
|
45
|
+
'normalize_str',
|
|
43
46
|
]
|
|
44
47
|
|
|
45
48
|
|
|
@@ -56,6 +59,52 @@ Num = TypeVar('Num', int, float)
|
|
|
56
59
|
# -- Data Utilities -- #
|
|
57
60
|
|
|
58
61
|
|
|
62
|
+
def deep_substitute(
|
|
63
|
+
value: Any,
|
|
64
|
+
vars_map: StrAnyMap | None,
|
|
65
|
+
env_map: Mapping[str, str] | None,
|
|
66
|
+
) -> Any:
|
|
67
|
+
"""
|
|
68
|
+
Recursively substitute ``${VAR}`` tokens in nested structures.
|
|
69
|
+
|
|
70
|
+
Only strings are substituted; other types are returned as-is.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
value : Any
|
|
75
|
+
The value to perform substitutions on.
|
|
76
|
+
vars_map : StrAnyMap | None
|
|
77
|
+
Mapping of variable names to replacement values (lower precedence).
|
|
78
|
+
env_map : Mapping[str, str] | None
|
|
79
|
+
Mapping of environment variables overriding ``vars_map`` values
|
|
80
|
+
(higher precedence).
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Any
|
|
85
|
+
New structure with substitutions applied where tokens were found.
|
|
86
|
+
"""
|
|
87
|
+
substitutions = _prepare_substitutions(vars_map, env_map)
|
|
88
|
+
|
|
89
|
+
def _apply(node: Any) -> Any:
|
|
90
|
+
match node:
|
|
91
|
+
case str():
|
|
92
|
+
return _replace_tokens(node, substitutions)
|
|
93
|
+
case Mapping():
|
|
94
|
+
return {k: _apply(v) for k, v in node.items()}
|
|
95
|
+
case list() | tuple() as seq:
|
|
96
|
+
apply = [_apply(item) for item in seq]
|
|
97
|
+
return apply if isinstance(seq, list) else tuple(apply)
|
|
98
|
+
case set():
|
|
99
|
+
return {_apply(item) for item in node}
|
|
100
|
+
case frozenset():
|
|
101
|
+
return frozenset(_apply(item) for item in node)
|
|
102
|
+
case _:
|
|
103
|
+
return node
|
|
104
|
+
|
|
105
|
+
return _apply(value)
|
|
106
|
+
|
|
107
|
+
|
|
59
108
|
def cast_str_dict(
|
|
60
109
|
mapping: StrAnyMap | None,
|
|
61
110
|
) -> dict[str, str]:
|
|
@@ -372,7 +421,7 @@ def to_number(
|
|
|
372
421
|
# -- Text Processing -- #
|
|
373
422
|
|
|
374
423
|
|
|
375
|
-
def
|
|
424
|
+
def normalize_str(
|
|
376
425
|
value: str | None,
|
|
377
426
|
) -> str:
|
|
378
427
|
"""
|
|
@@ -392,6 +441,36 @@ def normalized_str(
|
|
|
392
441
|
return (value or '').strip().lower()
|
|
393
442
|
|
|
394
443
|
|
|
444
|
+
def normalize_choice(
|
|
445
|
+
value: str | None,
|
|
446
|
+
*,
|
|
447
|
+
mapping: Mapping[str, str],
|
|
448
|
+
default: str,
|
|
449
|
+
normalize: Callable[[str | None], str] = normalize_str,
|
|
450
|
+
) -> str:
|
|
451
|
+
"""
|
|
452
|
+
Normalize a string choice using a mapping and fallback.
|
|
453
|
+
|
|
454
|
+
Parameters
|
|
455
|
+
----------
|
|
456
|
+
value : str | None
|
|
457
|
+
Input value to normalize.
|
|
458
|
+
mapping : Mapping[str, str]
|
|
459
|
+
Mapping of acceptable normalized inputs to output values.
|
|
460
|
+
default : str
|
|
461
|
+
Default return value when input is missing or unrecognized.
|
|
462
|
+
normalize : Callable[[str | None], str], optional
|
|
463
|
+
Normalization function applied to *value*. Defaults to
|
|
464
|
+
:func:`normalize_str`.
|
|
465
|
+
|
|
466
|
+
Returns
|
|
467
|
+
-------
|
|
468
|
+
str
|
|
469
|
+
Normalized mapped value or ``default``.
|
|
470
|
+
"""
|
|
471
|
+
return mapping.get(normalize(value), default)
|
|
472
|
+
|
|
473
|
+
|
|
395
474
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
396
475
|
|
|
397
476
|
|
|
@@ -425,6 +504,61 @@ def _clamp(
|
|
|
425
504
|
return value
|
|
426
505
|
|
|
427
506
|
|
|
507
|
+
def _prepare_substitutions(
|
|
508
|
+
vars_map: StrAnyMap | None,
|
|
509
|
+
env_map: Mapping[str, Any] | None,
|
|
510
|
+
) -> tuple[tuple[str, Any], ...]:
|
|
511
|
+
"""
|
|
512
|
+
Merge variable and environment maps into an ordered substitutions list.
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
vars_map : StrAnyMap | None
|
|
517
|
+
Mapping of variable names to replacement values (lower precedence).
|
|
518
|
+
env_map : Mapping[str, Any] | None
|
|
519
|
+
Environment-backed values that override entries from ``vars_map``.
|
|
520
|
+
|
|
521
|
+
Returns
|
|
522
|
+
-------
|
|
523
|
+
tuple[tuple[str, Any], ...]
|
|
524
|
+
Immutable sequence of ``(name, value)`` pairs suitable for token
|
|
525
|
+
replacement.
|
|
526
|
+
"""
|
|
527
|
+
if not vars_map and not env_map:
|
|
528
|
+
return ()
|
|
529
|
+
merged: dict[str, Any] = {**(vars_map or {}), **(env_map or {})}
|
|
530
|
+
return tuple(merged.items())
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _replace_tokens(
|
|
534
|
+
text: str,
|
|
535
|
+
substitutions: Iterable[tuple[str, Any]],
|
|
536
|
+
) -> str:
|
|
537
|
+
"""
|
|
538
|
+
Replace ``${VAR}`` tokens in ``text`` using ``substitutions``.
|
|
539
|
+
|
|
540
|
+
Parameters
|
|
541
|
+
----------
|
|
542
|
+
text : str
|
|
543
|
+
Input string that may contain ``${VAR}`` tokens.
|
|
544
|
+
substitutions : Iterable[tuple[str, Any]]
|
|
545
|
+
Sequence of ``(name, value)`` pairs used for token replacement.
|
|
546
|
+
|
|
547
|
+
Returns
|
|
548
|
+
-------
|
|
549
|
+
str
|
|
550
|
+
Updated text with replacements applied.
|
|
551
|
+
"""
|
|
552
|
+
if not substitutions:
|
|
553
|
+
return text
|
|
554
|
+
out = text
|
|
555
|
+
for name, replacement in substitutions:
|
|
556
|
+
token = f'${{{name}}}'
|
|
557
|
+
if token in out:
|
|
558
|
+
out = out.replace(token, str(replacement))
|
|
559
|
+
return out
|
|
560
|
+
|
|
561
|
+
|
|
428
562
|
def _coerce_float(
|
|
429
563
|
value: object,
|
|
430
564
|
) -> float | None:
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# `etlplus.workflow` Subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.workflow` subpackage: configuration helpers for connectors,
|
|
4
|
+
pipelines, jobs, and profiles.
|
|
5
|
+
|
|
6
|
+
- Provides classes and utilities for managing ETL pipeline configuration
|
|
7
|
+
- Supports YAML/JSON config loading and validation
|
|
8
|
+
- Includes helpers for connectors, jobs, pipelines, and profiles
|
|
9
|
+
- Exposes type definitions for config schemas
|
|
10
|
+
|
|
11
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
12
|
+
|
|
13
|
+
- [`etlplus.workflow` Subpackage](#etlplusworkflow-subpackage)
|
|
14
|
+
- [Supported Configuration Types](#supported-configuration-types)
|
|
15
|
+
- [Loading and Validating Configs](#loading-and-validating-configs)
|
|
16
|
+
- [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
|
|
17
|
+
- [See Also](#see-also)
|
|
18
|
+
|
|
19
|
+
## Supported Configuration Types
|
|
20
|
+
|
|
21
|
+
- **Connector**: Connection details for databases, files, or APIs
|
|
22
|
+
- **Job**: ETL job definitions and scheduling
|
|
23
|
+
- **Pipeline**: End-to-end pipeline configuration
|
|
24
|
+
- **Profile**: User or environment-specific settings
|
|
25
|
+
|
|
26
|
+
## Loading and Validating Configs
|
|
27
|
+
|
|
28
|
+
Use the provided classes to load and validate configuration files:
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from etlplus.workflow import PipelineConfig
|
|
32
|
+
|
|
33
|
+
cfg = PipelineConfig.from_yaml("pipeline.yml")
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
- Supports YAML and JSON formats
|
|
37
|
+
- Validates against expected schema
|
|
38
|
+
|
|
39
|
+
## Example: Loading a Pipeline Config
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from etlplus.workflow import PipelineConfig
|
|
43
|
+
|
|
44
|
+
pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
|
|
45
|
+
print(pipeline)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## See Also
|
|
49
|
+
|
|
50
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
51
|
+
- Config type definitions in [types.py](types.py)
|
|
52
|
+
- Config utilities in [utils.py](utils.py)
|
|
@@ -1,17 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow` package.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
This package defines models for data sources/targets ("connectors"), APIs,
|
|
7
|
-
pagination/rate limits, pipeline orchestration, and related utilities. The
|
|
8
|
-
parsers are permissive (accepting ``Mapping[str, Any]``) and normalize to
|
|
9
|
-
concrete types without raising on unknown/optional fields.
|
|
10
|
-
|
|
11
|
-
Notes
|
|
12
|
-
-----
|
|
13
|
-
- The models use ``@dataclass(slots=True)`` and avoid mutating inputs.
|
|
14
|
-
- TypedDicts are editor/type-checking hints and are not enforced at runtime.
|
|
4
|
+
Job workflow helpers.
|
|
15
5
|
"""
|
|
16
6
|
|
|
17
7
|
from __future__ import annotations
|
|
@@ -21,6 +11,7 @@ from .connector import ConnectorApi
|
|
|
21
11
|
from .connector import ConnectorDb
|
|
22
12
|
from .connector import ConnectorFile
|
|
23
13
|
from .connector import parse_connector
|
|
14
|
+
from .dag import topological_sort_jobs
|
|
24
15
|
from .jobs import ExtractRef
|
|
25
16
|
from .jobs import JobConfig
|
|
26
17
|
from .jobs import LoadRef
|
|
@@ -28,29 +19,25 @@ from .jobs import TransformRef
|
|
|
28
19
|
from .jobs import ValidationRef
|
|
29
20
|
from .pipeline import PipelineConfig
|
|
30
21
|
from .pipeline import load_pipeline_config
|
|
31
|
-
from .profile import ProfileConfig
|
|
32
|
-
from .types import ConnectorType
|
|
33
22
|
|
|
34
23
|
# SECTION: EXPORTS ========================================================== #
|
|
35
24
|
|
|
36
25
|
|
|
37
26
|
__all__ = [
|
|
38
|
-
#
|
|
39
|
-
'Connector',
|
|
40
|
-
'ConnectorType',
|
|
27
|
+
# Data Classes
|
|
41
28
|
'ConnectorApi',
|
|
42
29
|
'ConnectorDb',
|
|
43
30
|
'ConnectorFile',
|
|
44
|
-
'parse_connector',
|
|
45
|
-
# Jobs / Refs
|
|
46
31
|
'ExtractRef',
|
|
47
32
|
'JobConfig',
|
|
48
33
|
'LoadRef',
|
|
34
|
+
'PipelineConfig',
|
|
49
35
|
'TransformRef',
|
|
50
36
|
'ValidationRef',
|
|
51
|
-
#
|
|
52
|
-
'PipelineConfig',
|
|
37
|
+
# Functions
|
|
53
38
|
'load_pipeline_config',
|
|
54
|
-
|
|
55
|
-
'
|
|
39
|
+
'parse_connector',
|
|
40
|
+
'topological_sort_jobs',
|
|
41
|
+
# Type Aliases
|
|
42
|
+
'Connector',
|
|
56
43
|
]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.connector` module.
|
|
3
3
|
|
|
4
4
|
A module defining configuration types for data source/target connectors in ETL
|
|
5
5
|
pipelines. A "connector" is any I/O endpoint:
|
|
@@ -11,25 +11,26 @@ pipelines. A "connector" is any I/O endpoint:
|
|
|
11
11
|
|
|
12
12
|
Examples
|
|
13
13
|
--------
|
|
14
|
-
- Use
|
|
15
|
-
|
|
16
|
-
- Use the
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
- Use :class:`ConnectorApi`/:class:`ConnectorFile`/:class:`ConnectorDb` when
|
|
15
|
+
you want the concrete dataclasses.
|
|
16
|
+
- Use the :class:`Connector` union for typing a value that can be any
|
|
17
|
+
connector.
|
|
18
|
+
- Use :func:`parse_connector(obj)` to construct a connector instance from a
|
|
19
|
+
generic mapping that includes a *type* key.
|
|
19
20
|
|
|
20
21
|
Notes
|
|
21
22
|
-----
|
|
22
23
|
- TypedDict shapes are editor hints; runtime parsing remains permissive
|
|
23
|
-
|
|
24
|
-
- TypedDicts referenced in :mod:`etlplus.
|
|
25
|
-
|
|
24
|
+
(from_obj accepts Mapping[str, Any]).
|
|
25
|
+
- TypedDicts referenced in :mod:`etlplus.workflow.types` remain editor hints.
|
|
26
|
+
Runtime parsing stays permissive and tolerant.
|
|
26
27
|
|
|
27
28
|
See Also
|
|
28
29
|
--------
|
|
29
30
|
- TypedDict shapes for editor hints (not enforced at runtime):
|
|
30
|
-
:mod:`etlplus.
|
|
31
|
-
:mod:`etlplus.
|
|
32
|
-
:mod:`etlplus.
|
|
31
|
+
:mod:`etlplus.workflow.types.ConnectorApiConfigMap`,
|
|
32
|
+
:mod:`etlplus.workflow.types.ConnectorDbConfigMap`,
|
|
33
|
+
:mod:`etlplus.workflow.types.ConnectorFileConfigMap`.
|
|
33
34
|
"""
|
|
34
35
|
|
|
35
36
|
from __future__ import annotations
|
|
@@ -59,7 +60,7 @@ if TYPE_CHECKING: # Editor-only typing hints to avoid runtime imports
|
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
__all__ = [
|
|
62
|
-
# Classes
|
|
63
|
+
# Data Classes
|
|
63
64
|
'ConnectorApi',
|
|
64
65
|
'ConnectorDb',
|
|
65
66
|
'ConnectorFile',
|
|
@@ -70,6 +71,40 @@ __all__ = [
|
|
|
70
71
|
]
|
|
71
72
|
|
|
72
73
|
|
|
74
|
+
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _require_name(
|
|
78
|
+
obj: StrAnyMap,
|
|
79
|
+
*,
|
|
80
|
+
kind: str,
|
|
81
|
+
) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Extract and validate the ``name`` field from connector mappings.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
obj : StrAnyMap
|
|
88
|
+
Connector mapping with a ``name`` entry.
|
|
89
|
+
kind : str
|
|
90
|
+
Connector kind used in the error message.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
str
|
|
95
|
+
Valid connector name.
|
|
96
|
+
|
|
97
|
+
Raises
|
|
98
|
+
------
|
|
99
|
+
TypeError
|
|
100
|
+
If ``name`` is missing or not a string.
|
|
101
|
+
"""
|
|
102
|
+
name = obj.get('name')
|
|
103
|
+
if not isinstance(name, str):
|
|
104
|
+
raise TypeError(f'Connector{kind} requires a "name" (str)')
|
|
105
|
+
return name
|
|
106
|
+
|
|
107
|
+
|
|
73
108
|
# SECTION: DATA CLASSES ===================================================== #
|
|
74
109
|
|
|
75
110
|
|
|
@@ -83,12 +118,12 @@ class ConnectorApi:
|
|
|
83
118
|
name : str
|
|
84
119
|
Unique connector name.
|
|
85
120
|
type : ConnectorType
|
|
86
|
-
Connector kind literal, always ``
|
|
121
|
+
Connector kind literal, always ``'api'``.
|
|
87
122
|
url : str | None
|
|
88
123
|
Direct absolute URL (when not using ``service``/``endpoint`` refs).
|
|
89
124
|
method : str | None
|
|
90
125
|
Optional HTTP method; typically omitted for sources (defaults to
|
|
91
|
-
GET) and used for targets (e.g., ``
|
|
126
|
+
GET) and used for targets (e.g., ``'post'``).
|
|
92
127
|
headers : dict[str, str]
|
|
93
128
|
Additional request headers.
|
|
94
129
|
query_params : dict[str, Any]
|
|
@@ -111,7 +146,7 @@ class ConnectorApi:
|
|
|
111
146
|
|
|
112
147
|
# Direct form
|
|
113
148
|
url: str | None = None
|
|
114
|
-
# Optional HTTP method; typically omitted for sources (defaults to GET
|
|
149
|
+
# Optional HTTP method; typically omitted for sources (defaults to GET)
|
|
115
150
|
# at runtime) and used for targets (e.g., 'post', 'put').
|
|
116
151
|
method: str | None = None
|
|
117
152
|
headers: dict[str, str] = field(default_factory=dict)
|
|
@@ -150,15 +185,8 @@ class ConnectorApi:
|
|
|
150
185
|
-------
|
|
151
186
|
Self
|
|
152
187
|
Parsed connector instance.
|
|
153
|
-
|
|
154
|
-
Raises
|
|
155
|
-
------
|
|
156
|
-
TypeError
|
|
157
|
-
If ``name`` is missing or invalid.
|
|
158
188
|
"""
|
|
159
|
-
name = obj
|
|
160
|
-
if not isinstance(name, str):
|
|
161
|
-
raise TypeError('ConnectorApi requires a "name" (str)')
|
|
189
|
+
name = _require_name(obj, kind='Api')
|
|
162
190
|
headers = cast_str_dict(obj.get('headers'))
|
|
163
191
|
|
|
164
192
|
return cls(
|
|
@@ -185,7 +213,7 @@ class ConnectorDb:
|
|
|
185
213
|
name : str
|
|
186
214
|
Unique connector name.
|
|
187
215
|
type : ConnectorType
|
|
188
|
-
Connector kind literal, always ``
|
|
216
|
+
Connector kind literal, always ``'database'``.
|
|
189
217
|
connection_string : str | None
|
|
190
218
|
Connection string/DSN for the database.
|
|
191
219
|
query : str | None
|
|
@@ -193,7 +221,7 @@ class ConnectorDb:
|
|
|
193
221
|
table : str | None
|
|
194
222
|
Target/source table name (optional).
|
|
195
223
|
mode : str | None
|
|
196
|
-
Load mode hint (e.g., ``
|
|
224
|
+
Load mode hint (e.g., ``'append'``, ``'replace'``) — future use.
|
|
197
225
|
"""
|
|
198
226
|
|
|
199
227
|
# -- Attributes -- #
|
|
@@ -232,15 +260,8 @@ class ConnectorDb:
|
|
|
232
260
|
-------
|
|
233
261
|
Self
|
|
234
262
|
Parsed connector instance.
|
|
235
|
-
|
|
236
|
-
Raises
|
|
237
|
-
------
|
|
238
|
-
TypeError
|
|
239
|
-
If ``name`` is missing or invalid.
|
|
240
263
|
"""
|
|
241
|
-
name = obj
|
|
242
|
-
if not isinstance(name, str):
|
|
243
|
-
raise TypeError('ConnectorDb requires a "name" (str)')
|
|
264
|
+
name = _require_name(obj, kind='Db')
|
|
244
265
|
|
|
245
266
|
return cls(
|
|
246
267
|
name=name,
|
|
@@ -262,9 +283,9 @@ class ConnectorFile:
|
|
|
262
283
|
name : str
|
|
263
284
|
Unique connector name.
|
|
264
285
|
type : ConnectorType
|
|
265
|
-
Connector kind literal, always ``
|
|
286
|
+
Connector kind literal, always ``'file'``.
|
|
266
287
|
format : str | None
|
|
267
|
-
File format (e.g., ``
|
|
288
|
+
File format (e.g., ``'json'``, ``'csv'``).
|
|
268
289
|
path : str | None
|
|
269
290
|
File path or URI.
|
|
270
291
|
options : dict[str, Any]
|
|
@@ -306,15 +327,8 @@ class ConnectorFile:
|
|
|
306
327
|
-------
|
|
307
328
|
Self
|
|
308
329
|
Parsed connector instance.
|
|
309
|
-
|
|
310
|
-
Raises
|
|
311
|
-
------
|
|
312
|
-
TypeError
|
|
313
|
-
If ``name`` is missing or invalid.
|
|
314
330
|
"""
|
|
315
|
-
name = obj
|
|
316
|
-
if not isinstance(name, str):
|
|
317
|
-
raise TypeError('ConnectorFile requires a "name" (str)')
|
|
331
|
+
name = _require_name(obj, kind='File')
|
|
318
332
|
|
|
319
333
|
return cls(
|
|
320
334
|
name=name,
|