etlplus 0.14.3__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. etlplus/README.md +4 -4
  2. etlplus/api/README.md +33 -2
  3. etlplus/api/auth.py +1 -1
  4. etlplus/api/config.py +5 -10
  5. etlplus/api/endpoint_client.py +4 -4
  6. etlplus/api/pagination/config.py +1 -1
  7. etlplus/api/pagination/paginator.py +6 -7
  8. etlplus/api/rate_limiting/config.py +4 -4
  9. etlplus/api/rate_limiting/rate_limiter.py +1 -1
  10. etlplus/api/retry_manager.py +2 -2
  11. etlplus/api/transport.py +1 -1
  12. etlplus/api/types.py +99 -0
  13. etlplus/api/utils.py +6 -2
  14. etlplus/cli/README.md +2 -2
  15. etlplus/cli/commands.py +75 -42
  16. etlplus/cli/constants.py +1 -1
  17. etlplus/cli/handlers.py +33 -15
  18. etlplus/cli/io.py +2 -2
  19. etlplus/cli/main.py +2 -2
  20. etlplus/cli/state.py +4 -7
  21. etlplus/connector/__init__.py +43 -0
  22. etlplus/connector/api.py +161 -0
  23. etlplus/connector/connector.py +26 -0
  24. etlplus/connector/core.py +132 -0
  25. etlplus/connector/database.py +122 -0
  26. etlplus/connector/enums.py +52 -0
  27. etlplus/connector/file.py +120 -0
  28. etlplus/connector/types.py +40 -0
  29. etlplus/connector/utils.py +122 -0
  30. etlplus/database/README.md +2 -2
  31. etlplus/database/ddl.py +2 -2
  32. etlplus/database/engine.py +19 -3
  33. etlplus/database/orm.py +2 -0
  34. etlplus/enums.py +1 -33
  35. etlplus/file/README.md +2 -2
  36. etlplus/file/_imports.py +1 -0
  37. etlplus/file/_io.py +52 -4
  38. etlplus/file/accdb.py +3 -2
  39. etlplus/file/arrow.py +3 -2
  40. etlplus/file/avro.py +3 -2
  41. etlplus/file/bson.py +3 -2
  42. etlplus/file/cbor.py +3 -2
  43. etlplus/file/cfg.py +3 -2
  44. etlplus/file/conf.py +3 -2
  45. etlplus/file/core.py +11 -8
  46. etlplus/file/csv.py +3 -2
  47. etlplus/file/dat.py +3 -2
  48. etlplus/file/dta.py +3 -2
  49. etlplus/file/duckdb.py +3 -2
  50. etlplus/file/enums.py +1 -1
  51. etlplus/file/feather.py +3 -2
  52. etlplus/file/fwf.py +3 -2
  53. etlplus/file/gz.py +3 -2
  54. etlplus/file/hbs.py +3 -2
  55. etlplus/file/hdf5.py +3 -2
  56. etlplus/file/ini.py +3 -2
  57. etlplus/file/ion.py +3 -2
  58. etlplus/file/jinja2.py +3 -2
  59. etlplus/file/json.py +5 -16
  60. etlplus/file/log.py +3 -2
  61. etlplus/file/mat.py +3 -2
  62. etlplus/file/mdb.py +3 -2
  63. etlplus/file/msgpack.py +3 -2
  64. etlplus/file/mustache.py +3 -2
  65. etlplus/file/nc.py +3 -2
  66. etlplus/file/ndjson.py +3 -2
  67. etlplus/file/numbers.py +3 -2
  68. etlplus/file/ods.py +3 -2
  69. etlplus/file/orc.py +3 -2
  70. etlplus/file/parquet.py +3 -2
  71. etlplus/file/pb.py +3 -2
  72. etlplus/file/pbf.py +3 -2
  73. etlplus/file/properties.py +3 -2
  74. etlplus/file/proto.py +3 -2
  75. etlplus/file/psv.py +3 -2
  76. etlplus/file/rda.py +3 -2
  77. etlplus/file/rds.py +3 -2
  78. etlplus/file/sas7bdat.py +3 -2
  79. etlplus/file/sav.py +3 -2
  80. etlplus/file/sqlite.py +3 -2
  81. etlplus/file/stub.py +1 -0
  82. etlplus/file/sylk.py +3 -2
  83. etlplus/file/tab.py +3 -2
  84. etlplus/file/toml.py +3 -2
  85. etlplus/file/tsv.py +3 -2
  86. etlplus/file/txt.py +4 -3
  87. etlplus/file/vm.py +3 -2
  88. etlplus/file/wks.py +3 -2
  89. etlplus/file/xls.py +3 -2
  90. etlplus/file/xlsm.py +3 -2
  91. etlplus/file/xlsx.py +3 -2
  92. etlplus/file/xml.py +9 -3
  93. etlplus/file/xpt.py +3 -2
  94. etlplus/file/yaml.py +5 -16
  95. etlplus/file/zip.py +3 -2
  96. etlplus/file/zsav.py +3 -2
  97. etlplus/ops/extract.py +13 -1
  98. etlplus/ops/load.py +15 -2
  99. etlplus/ops/run.py +18 -13
  100. etlplus/ops/transform.py +2 -2
  101. etlplus/ops/utils.py +6 -35
  102. etlplus/ops/validate.py +3 -3
  103. etlplus/templates/README.md +2 -2
  104. etlplus/types.py +3 -2
  105. etlplus/utils.py +163 -29
  106. etlplus/{config → workflow}/README.md +6 -6
  107. etlplus/workflow/__init__.py +32 -0
  108. etlplus/{dag.py → workflow/dag.py} +6 -4
  109. etlplus/{config → workflow}/jobs.py +101 -38
  110. etlplus/{config → workflow}/pipeline.py +59 -51
  111. etlplus/{config → workflow}/profile.py +8 -5
  112. {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/METADATA +4 -4
  113. etlplus-0.16.0.dist-info/RECORD +141 -0
  114. {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/WHEEL +1 -1
  115. etlplus/config/__init__.py +0 -56
  116. etlplus/config/connector.py +0 -372
  117. etlplus/config/types.py +0 -204
  118. etlplus/config/utils.py +0 -120
  119. etlplus-0.14.3.dist-info/RECORD +0 -135
  120. {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/entry_points.txt +0 -0
  121. {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/licenses/LICENSE +0 -0
  122. {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/top_level.txt +0 -0
etlplus/file/yaml.py CHANGED
@@ -18,18 +18,17 @@ Notes
18
18
  from __future__ import annotations
19
19
 
20
20
  from pathlib import Path
21
- from typing import cast
22
21
 
23
22
  from ..types import JSONData
24
- from ..types import JSONDict
25
- from ..types import JSONList
26
23
  from ..utils import count_records
27
24
  from ._imports import get_yaml
25
+ from ._io import coerce_record_payload
28
26
 
29
27
  # SECTION: EXPORTS ========================================================== #
30
28
 
31
29
 
32
30
  __all__ = [
31
+ # Functions
33
32
  'read',
34
33
  'write',
35
34
  ]
@@ -42,7 +41,7 @@ def read(
42
41
  path: Path,
43
42
  ) -> JSONData:
44
43
  """
45
- Read YAML content from ``path``.
44
+ Read YAML content from *path*.
46
45
 
47
46
  Validates that the YAML root is a dict or a list of dicts.
48
47
 
@@ -64,17 +63,7 @@ def read(
64
63
  with path.open('r', encoding='utf-8') as handle:
65
64
  loaded = get_yaml().safe_load(handle)
66
65
 
67
- if isinstance(loaded, dict):
68
- return cast(JSONDict, loaded)
69
- if isinstance(loaded, list):
70
- if all(isinstance(item, dict) for item in loaded):
71
- return cast(JSONList, loaded)
72
- raise TypeError(
73
- 'YAML array must contain only objects (dicts) when loading',
74
- )
75
- raise TypeError(
76
- 'YAML root must be an object or an array of objects when loading',
77
- )
66
+ return coerce_record_payload(loaded, format_name='YAML')
78
67
 
79
68
 
80
69
  def write(
@@ -82,7 +71,7 @@ def write(
82
71
  data: JSONData,
83
72
  ) -> int:
84
73
  """
85
- Write ``data`` as YAML to ``path`` and return record count.
74
+ Write *data* as YAML to *path* and return record count.
86
75
 
87
76
  Parameters
88
77
  ----------
etlplus/file/zip.py CHANGED
@@ -20,6 +20,7 @@ from .enums import infer_file_format_and_compression
20
20
 
21
21
 
22
22
  __all__ = [
23
+ # Functions
23
24
  'read',
24
25
  'write',
25
26
  ]
@@ -89,7 +90,7 @@ def read(
89
90
  path: Path,
90
91
  ) -> JSONData:
91
92
  """
92
- Read ZIP content from ``path`` and parse the inner payload(s).
93
+ Read ZIP content from *path* and parse the inner payload(s).
93
94
 
94
95
  Parameters
95
96
  ----------
@@ -140,7 +141,7 @@ def write(
140
141
  data: JSONData,
141
142
  ) -> int:
142
143
  """
143
- Write ``data`` to ZIP at ``path`` and return record count.
144
+ Write *data* to ZIP at *path* and return record count.
144
145
 
145
146
  Parameters
146
147
  ----------
etlplus/file/zsav.py CHANGED
@@ -27,6 +27,7 @@ from . import stub
27
27
 
28
28
 
29
29
  __all__ = [
30
+ # Functions
30
31
  'read',
31
32
  'write',
32
33
  ]
@@ -39,7 +40,7 @@ def read(
39
40
  path: Path,
40
41
  ) -> JSONList:
41
42
  """
42
- Read ZSAV content from ``path``.
43
+ Read ZSAV content from *path*.
43
44
 
44
45
  Parameters
45
46
  ----------
@@ -59,7 +60,7 @@ def write(
59
60
  data: JSONData,
60
61
  ) -> int:
61
62
  """
62
- Write ``data`` to ZSAV file at ``path`` and return record count.
63
+ Write *data* to ZSAV file at *path* and return record count.
63
64
 
64
65
  Parameters
65
66
  ----------
etlplus/ops/extract.py CHANGED
@@ -12,7 +12,7 @@ from typing import cast
12
12
 
13
13
  from ..api import HttpMethod
14
14
  from ..api.utils import resolve_request
15
- from ..enums import DataConnectorType
15
+ from ..connector import DataConnectorType
16
16
  from ..file import File
17
17
  from ..file import FileFormat
18
18
  from ..types import JSONData
@@ -20,6 +20,18 @@ from ..types import JSONDict
20
20
  from ..types import JSONList
21
21
  from ..types import StrPath
22
22
 
23
+ # SECTION: EXPORTS ========================================================== #
24
+
25
+
26
+ __all__ = [
27
+ # Functions
28
+ 'extract',
29
+ 'extract_from_api',
30
+ 'extract_from_database',
31
+ 'extract_from_file',
32
+ ]
33
+
34
+
23
35
  # SECTION: FUNCTIONS ======================================================== #
24
36
 
25
37
 
etlplus/ops/load.py CHANGED
@@ -14,7 +14,7 @@ from typing import cast
14
14
 
15
15
  from ..api import HttpMethod
16
16
  from ..api.utils import resolve_request
17
- from ..enums import DataConnectorType
17
+ from ..connector import DataConnectorType
18
18
  from ..file import File
19
19
  from ..file import FileFormat
20
20
  from ..types import JSONData
@@ -23,6 +23,19 @@ from ..types import JSONList
23
23
  from ..types import StrPath
24
24
  from ..utils import count_records
25
25
 
26
+ # SECTION: EXPORTS ========================================================== #
27
+
28
+
29
+ __all__ = [
30
+ # Functions
31
+ 'load',
32
+ 'load_data',
33
+ 'load_to_api',
34
+ 'load_to_database',
35
+ 'load_to_file',
36
+ ]
37
+
38
+
26
39
  # SECTION: INTERNAL FUNCTIONS ============================================== #
27
40
 
28
41
 
@@ -30,7 +43,7 @@ def _parse_json_string(
30
43
  raw: str,
31
44
  ) -> JSONData:
32
45
  """
33
- Parse JSON data from ``raw`` text.
46
+ Parse JSON data from *raw* text.
34
47
 
35
48
  Parameters
36
49
  ----------
etlplus/ops/run.py CHANGED
@@ -20,8 +20,7 @@ from ..api import RequestOptions
20
20
  from ..api import compose_api_request_env
21
21
  from ..api import compose_api_target_env
22
22
  from ..api import paginate_with_client
23
- from ..config import load_pipeline_config
24
- from ..enums import DataConnectorType
23
+ from ..connector import DataConnectorType
25
24
  from ..file import FileFormat
26
25
  from ..types import JSONData
27
26
  from ..types import JSONDict
@@ -29,6 +28,7 @@ from ..types import PipelineConfig
29
28
  from ..types import StrPath
30
29
  from ..types import Timeout
31
30
  from ..utils import print_json
31
+ from ..workflow import load_pipeline_config
32
32
  from .extract import extract
33
33
  from .load import load
34
34
  from .transform import transform
@@ -94,7 +94,7 @@ def run(
94
94
  Run a pipeline job defined in a YAML configuration.
95
95
 
96
96
  By default it reads the configuration from ``in/pipeline.yml``, but callers
97
- can provide an explicit ``config_path`` to override this.
97
+ can provide an explicit *config_path* to override this.
98
98
 
99
99
  Parameters
100
100
  ----------
@@ -162,9 +162,12 @@ def run(
162
162
  # can monkeypatch this class on etlplus.ops.run.
163
163
  ClientClass = EndpointClient # noqa: N806
164
164
  client = ClientClass(
165
- base_url=cast(str, env['base_url']),
165
+ base_url=cast(str, env.get('base_url')),
166
166
  base_path=cast(str | None, env.get('base_path')),
167
- endpoints=cast(dict[str, str], env['endpoints_map']),
167
+ endpoints=cast(
168
+ dict[str, str],
169
+ env.get('endpoints_map', {}),
170
+ ),
168
171
  retry=env.get('retry'),
169
172
  retry_network_errors=bool(
170
173
  env.get('retry_network_errors', False),
@@ -173,7 +176,7 @@ def run(
173
176
  )
174
177
  data = paginate_with_client(
175
178
  client,
176
- cast(str, env['endpoint_key']),
179
+ cast(str, env.get('endpoint_key')),
177
180
  env.get('params'),
178
181
  env.get('headers'),
179
182
  env.get('timeout'),
@@ -276,12 +279,14 @@ def run(
276
279
  if not url_t:
277
280
  raise ValueError('API target missing "url"')
278
281
  kwargs_t: dict[str, Any] = {}
279
- if env_t.get('headers'):
280
- kwargs_t['headers'] = cast(dict[str, str], env_t['headers'])
282
+ headers = env_t.get('headers')
283
+ if headers:
284
+ kwargs_t['headers'] = cast(dict[str, str], headers)
281
285
  if env_t.get('timeout') is not None:
282
- kwargs_t['timeout'] = env_t['timeout']
283
- if env_t.get('session') is not None:
284
- kwargs_t['session'] = env_t['session']
286
+ kwargs_t['timeout'] = env_t.get('timeout')
287
+ session = env_t.get('session')
288
+ if session is not None:
289
+ kwargs_t['session'] = session
285
290
  result = load(
286
291
  data,
287
292
  'api',
@@ -323,11 +328,11 @@ def run_pipeline(
323
328
  Parameters
324
329
  ----------
325
330
  source_type : DataConnectorType | str | None, optional
326
- Connector type for extraction. When ``None``, ``source`` is assumed
331
+ Connector type for extraction. When ``None``, *source* is assumed
327
332
  to be pre-loaded data and extraction is skipped.
328
333
  source : StrPath | JSONData | None, optional
329
334
  Data source for extraction or the pre-loaded payload when
330
- ``source_type`` is ``None``.
335
+ *source_type* is ``None``.
331
336
  operations : PipelineConfig | None, optional
332
337
  Transform configuration passed to :func:`etlplus.ops.transform`.
333
338
  target_type : DataConnectorType | str | None, optional
etlplus/ops/transform.py CHANGED
@@ -110,7 +110,7 @@ def _agg_count(
110
110
  present: int,
111
111
  ) -> int:
112
112
  """
113
- Return the provided presence count ``present``.
113
+ Return the provided presence count *present*.
114
114
 
115
115
  Parameters
116
116
  ----------
@@ -120,7 +120,7 @@ def _agg_count(
120
120
  Returns
121
121
  -------
122
122
  int
123
- The provided presence count ``present``.
123
+ The provided presence count *present*.
124
124
  """
125
125
  return present
126
126
 
etlplus/ops/utils.py CHANGED
@@ -7,13 +7,11 @@ The helpers defined here embrace a "high cohesion, low coupling" design by
7
7
  isolating normalization, configuration, and logging responsibilities. The
8
8
  resulting surface keeps ``maybe_validate`` focused on orchestration while
9
9
  offloading ancillary concerns to composable helpers.
10
-
11
10
  """
12
11
 
13
12
  from __future__ import annotations
14
13
 
15
14
  from collections.abc import Callable
16
- from collections.abc import Mapping
17
15
  from dataclasses import dataclass
18
16
  from types import MappingProxyType
19
17
  from typing import Any
@@ -23,7 +21,7 @@ from typing import TypedDict
23
21
  from typing import cast
24
22
 
25
23
  from ..types import StrAnyMap
26
- from ..utils import normalized_str
24
+ from ..utils import normalize_choice
27
25
 
28
26
  # SECTION: TYPED DICTIONARIES =============================================== #
29
27
 
@@ -207,14 +205,14 @@ def maybe_validate(
207
205
  Returns
208
206
  -------
209
207
  Any
210
- ``payload`` when validation is skipped or when severity is ``"warn"``
208
+ *payload* when validation is skipped or when severity is ``"warn"``
211
209
  and the validation fails. Returns the validator ``data`` payload when
212
210
  validation succeeds.
213
211
 
214
212
  Raises
215
213
  ------
216
214
  ValueError
217
- Raised when validation fails and ``severity`` is ``"error"``.
215
+ Raised when validation fails and *severity* is ``"error"``.
218
216
 
219
217
  Examples
220
218
  --------
@@ -320,7 +318,7 @@ def _normalize_phase(
320
318
  """
321
319
  return cast(
322
320
  ValidationPhase,
323
- _normalize_choice(
321
+ normalize_choice(
324
322
  value,
325
323
  mapping=_PHASE_CHOICES,
326
324
  default='before_transform',
@@ -346,7 +344,7 @@ def _normalize_severity(
346
344
  """
347
345
  return cast(
348
346
  ValidationSeverity,
349
- _normalize_choice(
347
+ normalize_choice(
350
348
  value,
351
349
  mapping=_SEVERITY_CHOICES,
352
350
  default='error',
@@ -372,7 +370,7 @@ def _normalize_window(
372
370
  """
373
371
  return cast(
374
372
  ValidationWindow,
375
- _normalize_choice(
373
+ normalize_choice(
376
374
  value,
377
375
  mapping=_WINDOW_CHOICES,
378
376
  default='both',
@@ -380,33 +378,6 @@ def _normalize_window(
380
378
  )
381
379
 
382
380
 
383
- def _normalize_choice(
384
- value: str | None,
385
- *,
386
- mapping: Mapping[str, str],
387
- default: str,
388
- ) -> str:
389
- """
390
- Normalize a text value against a mapping with a default fallback.
391
-
392
- Parameters
393
- ----------
394
- value : str | None
395
- Input text to normalize.
396
- mapping : Mapping[str, str]
397
- Mapping of accepted values to normalized outputs.
398
- default : str
399
- Default to return when input is missing or unrecognized.
400
-
401
- Returns
402
- -------
403
- str
404
- Normalized value.
405
- """
406
- normalized = normalized_str(value)
407
- return mapping.get(normalized, default)
408
-
409
-
410
381
  def _rule_name(
411
382
  rules: Ruleset,
412
383
  ) -> str | None:
etlplus/ops/validate.py CHANGED
@@ -11,8 +11,8 @@ Highlights
11
11
  ----------
12
12
  - Centralized type map and helpers for clarity and reuse.
13
13
  - Consistent error wording; field and item paths like ``[2].email``.
14
- - Small, focused public API with ``load_data``, ``validate_field``,
15
- ``validate``.
14
+ - Small, focused public API with :func:`load_data`, :func:`validate_field`,
15
+ :func:`validate`.
16
16
 
17
17
  Examples
18
18
  --------
@@ -66,7 +66,7 @@ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
66
66
  }
67
67
 
68
68
 
69
- # SECTION: CLASSES ========================================================== #
69
+ # SECTION: TYPED DICTS ====================================================== #
70
70
 
71
71
 
72
72
  class FieldRules(TypedDict, total=False):
@@ -1,4 +1,4 @@
1
- # etlplus.templates subpackage
1
+ # `etlplus.templates` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
4
4
 
@@ -8,7 +8,7 @@ Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpe
8
8
 
9
9
  Back to project overview: see the top-level [README](../../README.md).
10
10
 
11
- - [etlplus.templates subpackage](#etlpustemplates-subpackage)
11
+ - [`etlplus.templates` Subpackage](#etlplus-templates-subpackage)
12
12
  - [Available Templates](#available-templates)
13
13
  - [Rendering Templates](#rendering-templates)
14
14
  - [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)
etlplus/types.py CHANGED
@@ -11,8 +11,9 @@ Notes
11
11
 
12
12
  See Also
13
13
  --------
14
- - :mod:`etlplus.api.types` for HTTP-specific aliases
15
- - :mod:`etlplus.config.types` for TypedDict surfaces
14
+ - :mod:`etlplus.api.types` for HTTP-specific aliases and data classes
15
+ - :mod:`etlplus.connector.types` for connector-specific aliases and TypedDict
16
+ surfaces
16
17
 
17
18
  Examples
18
19
  --------