etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,339 @@
1
+ """
2
+ :mod:`etlplus.config.pipeline` module.
3
+
4
+ Pipeline configuration model and helpers for job orchestration.
5
+
6
+ Notes
7
+ -----
8
+ - Loads from dicts or YAML and builds typed models for sources, targets, and
9
+ jobs.
10
+ - Connector parsing is unified (``parse_connector``) and tolerant; unknown or
11
+ malformed entries are skipped.
12
+ - Optional variable substitution merges ``profile.env`` (lower precedence)
13
+ with the provided/environment variables (higher precedence).
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ from collections.abc import Mapping
20
+ from dataclasses import dataclass
21
+ from dataclasses import field
22
+ from pathlib import Path
23
+ from typing import Any
24
+ from typing import Self
25
+
26
+ from ..api import ApiConfig
27
+ from ..enums import FileFormat
28
+ from ..file import File
29
+ from ..types import StrAnyMap
30
+ from ..utils import coerce_dict
31
+ from ..utils import maybe_mapping
32
+ from .connector import Connector
33
+ from .connector import parse_connector
34
+ from .jobs import JobConfig
35
+ from .profile import ProfileConfig
36
+ from .utils import deep_substitute
37
+
38
+ # SECTION: EXPORTS ========================================================== #
39
+
40
+
41
+ __all__ = ['PipelineConfig', 'load_pipeline_config']
42
+
43
+
44
+ def _build_jobs(
45
+ raw: StrAnyMap,
46
+ ) -> list[JobConfig]:
47
+ """
48
+ Return a list of ``JobConfig`` objects parsed from the mapping.
49
+
50
+ Parameters
51
+ ----------
52
+ raw : StrAnyMap
53
+ Raw pipeline mapping.
54
+
55
+ Returns
56
+ -------
57
+ list[JobConfig]
58
+ Parsed job configurations.
59
+ """
60
+ jobs: list[JobConfig] = []
61
+ for job_raw in raw.get('jobs', []) or []:
62
+ job_cfg = JobConfig.from_obj(job_raw)
63
+ if job_cfg is not None:
64
+ jobs.append(job_cfg)
65
+
66
+ return jobs
67
+
68
+
69
+ def _build_sources(
70
+ raw: StrAnyMap,
71
+ ) -> list[Connector]:
72
+ """
73
+ Return a list of source connectors parsed from the mapping.
74
+
75
+ Parameters
76
+ ----------
77
+ raw : StrAnyMap
78
+ Raw pipeline mapping.
79
+
80
+ Returns
81
+ -------
82
+ list[Connector]
83
+ Parsed source connectors.
84
+ """
85
+ return _build_connectors(raw, 'sources')
86
+
87
+
88
+ def _build_targets(
89
+ raw: StrAnyMap,
90
+ ) -> list[Connector]:
91
+ """
92
+ Return a list of target connectors parsed from the mapping.
93
+
94
+ Parameters
95
+ ----------
96
+ raw : StrAnyMap
97
+ Raw pipeline mapping.
98
+
99
+ Returns
100
+ -------
101
+ list[Connector]
102
+ Parsed target connectors.
103
+ """
104
+ return _build_connectors(raw, 'targets')
105
+
106
+
107
+ def _build_connectors(
108
+ raw: StrAnyMap,
109
+ key: str,
110
+ ) -> list[Connector]:
111
+ """
112
+ Return parsed connectors from ``raw[key]`` using tolerant parsing.
113
+
114
+ Unknown or malformed entries are skipped to preserve permissiveness.
115
+
116
+ Parameters
117
+ ----------
118
+ raw : StrAnyMap
119
+ Raw pipeline mapping.
120
+ key : str
121
+ List-containing top-level key ("sources" or "targets").
122
+
123
+ Returns
124
+ -------
125
+ list[Connector]
126
+ Constructed connector instances (malformed entries skipped).
127
+ """
128
+ items: list[Connector] = []
129
+ for obj in raw.get(key, []) or []:
130
+ if not (entry := maybe_mapping(obj)):
131
+ continue
132
+ try:
133
+ items.append(parse_connector(entry))
134
+ except TypeError:
135
+ # Skip unsupported types or malformed entries
136
+ continue
137
+
138
+ return items
139
+
140
+
141
+ # SECTION: FUNCTIONS ======================================================== #
142
+
143
+
144
+ def load_pipeline_config(
145
+ path: Path | str,
146
+ *,
147
+ substitute: bool = False,
148
+ env: Mapping[str, str] | None = None,
149
+ ) -> PipelineConfig:
150
+ """
151
+ Load a pipeline YAML file into a ``PipelineConfig`` instance.
152
+
153
+ Delegates to ``PipelineConfig.from_yaml`` for construction and optional
154
+ variable substitution.
155
+ """
156
+ return PipelineConfig.from_yaml(path, substitute=substitute, env=env)
157
+
158
+
159
+ # SECTION: CLASSES ========================================================== #
160
+
161
+
162
+ @dataclass(kw_only=True, slots=True)
163
+ class PipelineConfig:
164
+ """
165
+ Configuration for the data processing pipeline.
166
+
167
+ Attributes
168
+ ----------
169
+ name : str | None
170
+ Optional pipeline name.
171
+ version : str | None
172
+ Optional pipeline version string.
173
+ profile : ProfileConfig
174
+ Pipeline profile defaults and environment.
175
+ vars : dict[str, Any]
176
+ Named variables available for substitution.
177
+ apis : dict[str, ApiConfig]
178
+ Named API configurations.
179
+ databases : dict[str, dict[str, Any]]
180
+ Pass-through database config structures.
181
+ file_systems : dict[str, dict[str, Any]]
182
+ Pass-through filesystem config structures.
183
+ sources : list[Connector]
184
+ Source connectors, parsed tolerantly.
185
+ validations : dict[str, dict[str, Any]]
186
+ Validation rule set definitions.
187
+ transforms : dict[str, dict[str, Any]]
188
+ Transform pipeline definitions.
189
+ targets : list[Connector]
190
+ Target connectors, parsed tolerantly.
191
+ jobs : list[JobConfig]
192
+ Job orchestration definitions.
193
+ table_schemas : list[dict[str, Any]]
194
+ Optional DDL-style table specifications used by the render command.
195
+ """
196
+
197
+ # -- Attributes -- #
198
+
199
+ name: str | None = None
200
+ version: str | None = None
201
+ profile: ProfileConfig = field(default_factory=ProfileConfig)
202
+ vars: dict[str, Any] = field(default_factory=dict)
203
+
204
+ apis: dict[str, ApiConfig] = field(default_factory=dict)
205
+ databases: dict[str, dict[str, Any]] = field(default_factory=dict)
206
+ file_systems: dict[str, dict[str, Any]] = field(default_factory=dict)
207
+
208
+ sources: list[Connector] = field(default_factory=list)
209
+ validations: dict[str, dict[str, Any]] = field(default_factory=dict)
210
+ transforms: dict[str, dict[str, Any]] = field(default_factory=dict)
211
+ targets: list[Connector] = field(default_factory=list)
212
+ jobs: list[JobConfig] = field(default_factory=list)
213
+ table_schemas: list[dict[str, Any]] = field(default_factory=list)
214
+
215
+ # -- Class Methods -- #
216
+
217
+ @classmethod
218
+ def from_yaml(
219
+ cls,
220
+ path: Path | str,
221
+ *,
222
+ substitute: bool = False,
223
+ env: Mapping[str, str] | None = None,
224
+ ) -> Self:
225
+ """
226
+ Parse a YAML file into a ``PipelineConfig`` instance.
227
+
228
+ Parameters
229
+ ----------
230
+ path : Path | str
231
+ Path to the YAML file.
232
+ substitute : bool, optional
233
+ Perform variable substitution after initial parse. Defaults to
234
+ ``False``.
235
+ env : Mapping[str, str] | None, optional
236
+ Environment mapping used for substitution; if omitted use
237
+ ``os.environ``. Defaults to ``None``.
238
+
239
+ Returns
240
+ -------
241
+ Self
242
+ Parsed pipeline configuration.
243
+
244
+ Raises
245
+ ------
246
+ TypeError
247
+ If the YAML root is not a mapping/object.
248
+ """
249
+ raw = File(Path(path), FileFormat.YAML).read_yaml()
250
+ if not isinstance(raw, dict):
251
+ raise TypeError('Pipeline YAML must have a mapping/object root')
252
+
253
+ cfg = cls.from_dict(raw)
254
+
255
+ if substitute:
256
+ # Merge order: profile.env first (lowest), then provided env or
257
+ # os.environ (highest). External env overrides profile defaults.
258
+ base_env = dict(getattr(cfg.profile, 'env', {}) or {})
259
+ external = dict(env) if env is not None else dict(os.environ)
260
+ env_map = base_env | external
261
+ resolved = deep_substitute(raw, cfg.vars, env_map)
262
+ cfg = cls.from_dict(resolved)
263
+
264
+ return cfg
265
+
266
+ # -- Class Methods -- #
267
+
268
+ @classmethod
269
+ def from_dict(
270
+ cls,
271
+ raw: StrAnyMap,
272
+ ) -> Self:
273
+ """
274
+ Parse a mapping into a ``PipelineConfig`` instance.
275
+
276
+ Parameters
277
+ ----------
278
+ raw : StrAnyMap
279
+ Raw pipeline mapping.
280
+
281
+ Returns
282
+ -------
283
+ Self
284
+ Parsed pipeline configuration.
285
+ """
286
+ # Basic metadata
287
+ name = raw.get('name')
288
+ version = raw.get('version')
289
+
290
+ # Profile and vars
291
+ prof_raw = maybe_mapping(raw.get('profile')) or {}
292
+ profile = ProfileConfig.from_obj(prof_raw)
293
+ vars_map: dict[str, Any] = coerce_dict(raw.get('vars'))
294
+
295
+ # APIs
296
+ apis: dict[str, ApiConfig] = {}
297
+ api_block = maybe_mapping(raw.get('apis')) or {}
298
+ for api_name, api_obj in api_block.items():
299
+ apis[str(api_name)] = ApiConfig.from_obj(api_obj)
300
+
301
+ # Databases and file systems (pass-through structures)
302
+ databases = coerce_dict(raw.get('databases'))
303
+ file_systems = coerce_dict(raw.get('file_systems'))
304
+
305
+ # Sources
306
+ sources = _build_sources(raw)
307
+
308
+ # Validations/Transforms
309
+ validations = coerce_dict(raw.get('validations'))
310
+ transforms = coerce_dict(raw.get('transforms'))
311
+
312
+ # Targets
313
+ targets = _build_targets(raw)
314
+
315
+ # Jobs
316
+ jobs = _build_jobs(raw)
317
+
318
+ # Table schemas (optional, tolerant pass-through structures).
319
+ table_schemas: list[dict[str, Any]] = []
320
+ for entry in raw.get('table_schemas', []) or []:
321
+ spec = maybe_mapping(entry)
322
+ if spec is not None:
323
+ table_schemas.append(dict(spec))
324
+
325
+ return cls(
326
+ name=name,
327
+ version=version,
328
+ profile=profile,
329
+ vars=vars_map,
330
+ apis=apis,
331
+ databases=databases,
332
+ file_systems=file_systems,
333
+ sources=sources,
334
+ validations=validations,
335
+ transforms=transforms,
336
+ targets=targets,
337
+ jobs=jobs,
338
+ table_schemas=table_schemas,
339
+ )
@@ -0,0 +1,78 @@
1
+ """
2
+ :mod:`etlplus.config.profile` module.
3
+
4
+ Profile model for pipeline-level defaults and environment.
5
+
6
+ Notes
7
+ -----
8
+ - Accepts ``Mapping[str, Any]`` and normalizes to concrete types.
9
+ - Environment values are coerced to strings.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from collections.abc import Mapping
15
+ from dataclasses import dataclass
16
+ from dataclasses import field
17
+ from typing import Self
18
+
19
+ from ..types import StrAnyMap
20
+ from ..utils import cast_str_dict
21
+
22
+ # SECTION: EXPORTS ========================================================== #
23
+
24
+
25
+ __all__ = ['ProfileConfig']
26
+
27
+
28
+ # SECTION: CLASSES ========================================================== #
29
+
30
+
31
+ @dataclass(kw_only=True, slots=True)
32
+ class ProfileConfig:
33
+ """
34
+ Configuration for pipeline profiles.
35
+
36
+ Attributes
37
+ ----------
38
+ default_target : str | None
39
+ Default target name for jobs that omit an explicit target.
40
+ env : dict[str, str]
41
+ Environment variables available for substitution.
42
+ """
43
+
44
+ # -- Attributes -- #
45
+
46
+ default_target: str | None = None
47
+ env: dict[str, str] = field(default_factory=dict)
48
+
49
+ # -- Class Methods -- #
50
+
51
+ @classmethod
52
+ def from_obj(
53
+ cls,
54
+ obj: StrAnyMap | None,
55
+ ) -> Self:
56
+ """Parse a mapping into a ``ProfileConfig`` instance.
57
+
58
+ Parameters
59
+ ----------
60
+ obj : StrAnyMap | None
61
+ Mapping with optional profile fields, or ``None``.
62
+
63
+ Returns
64
+ -------
65
+ Self
66
+ Parsed profile configuration; non-mapping input yields a default
67
+ instance. All ``env`` values are coerced to strings.
68
+ """
69
+ if not isinstance(obj, Mapping):
70
+ return cls()
71
+
72
+ # Coerce all env values to strings using shared helper.
73
+ env = cast_str_dict(obj.get('env'))
74
+
75
+ return cls(
76
+ default_target=obj.get('default_target'),
77
+ env=env,
78
+ )
@@ -0,0 +1,204 @@
1
+ """
2
+ :mod:`etlplus.config.types` module.
3
+
4
+ Type aliases and editor-only TypedDicts for :mod:`etlplus.config`.
5
+
6
+ These types improve IDE autocomplete and static analysis while the runtime
7
+ parsers remain permissive.
8
+
9
+ Notes
10
+ -----
11
+ - TypedDicts in this module are intentionally ``total=False`` and are not
12
+ enforced at runtime.
13
+ - ``*.from_obj`` constructors accept ``Mapping[str, Any]`` and perform
14
+ tolerant parsing and light casting. This keeps the runtime permissive while
15
+ improving autocomplete and static analysis for contributors.
16
+
17
+ Examples
18
+ --------
19
+ >>> from etlplus.config import Connector
20
+ >>> src: Connector = {
21
+ >>> "type": "file",
22
+ >>> "path": "/data/input.csv",
23
+ >>> }
24
+ >>> tgt: Connector = {
25
+ >>> "type": "database",
26
+ >>> "connection_string": "postgresql://user:pass@localhost/db",
27
+ >>> }
28
+ >>> from etlplus.api import RetryPolicy
29
+ >>> rp: RetryPolicy = {"max_attempts": 3, "backoff": 0.5}
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ from collections.abc import Mapping
35
+ from typing import Any
36
+ from typing import Literal
37
+ from typing import TypedDict
38
+
39
+ from ..api import PaginationConfigMap
40
+ from ..api import RateLimitConfigMap
41
+ from ..types import StrAnyMap
42
+
43
+ # SECTION: EXPORTS ========================================================= #
44
+
45
+
46
+ __all__ = [
47
+ # Type aliases
48
+ 'ConnectorType',
49
+ # 'PaginationType',
50
+ # TypedDicts
51
+ 'ApiProfileDefaultsMap',
52
+ 'ApiProfileConfigMap',
53
+ 'ApiConfigMap',
54
+ 'EndpointMap',
55
+ 'ConnectorApiConfigMap',
56
+ 'ConnectorDbConfigMap',
57
+ 'ConnectorFileConfigMap',
58
+ ]
59
+
60
+
61
+ # SECTION: TYPE ALIASES ===================================================== #
62
+
63
+
64
+ # Literal type for supported connector kinds
65
+ type ConnectorType = Literal['api', 'database', 'file']
66
+
67
+ # Literal type for supported pagination kinds
68
+ # type PaginationType = Literal['page', 'offset', 'cursor']
69
+
70
+
71
+ # SECTION: TYPED DICTS ====================================================== #
72
+
73
+
74
+ class ApiConfigMap(TypedDict, total=False):
75
+ """
76
+ Top-level API config shape parsed by ApiConfig.from_obj.
77
+
78
+ Either provide a 'base_url' with optional 'headers' and 'endpoints', or
79
+ provide 'profiles' with at least one profile having a 'base_url'.
80
+
81
+ See Also
82
+ --------
83
+ - etlplus.config.api.ApiConfig.from_obj: parses this mapping
84
+ """
85
+
86
+ base_url: str
87
+ headers: StrAnyMap
88
+ endpoints: Mapping[str, EndpointMap | str]
89
+ profiles: Mapping[str, ApiProfileConfigMap]
90
+
91
+
92
+ class ApiProfileConfigMap(TypedDict, total=False):
93
+ """
94
+ Shape accepted for a profile entry under ApiConfigMap.profiles.
95
+
96
+ Notes
97
+ -----
98
+ `base_url` is required at runtime when profiles are provided.
99
+
100
+ See Also
101
+ --------
102
+ - etlplus.config.api.ApiProfileConfig.from_obj: parses this mapping
103
+ """
104
+
105
+ base_url: str
106
+ headers: StrAnyMap
107
+ base_path: str
108
+ auth: StrAnyMap
109
+ defaults: ApiProfileDefaultsMap
110
+
111
+
112
+ class ApiProfileDefaultsMap(TypedDict, total=False):
113
+ """
114
+ Defaults block available under a profile (all keys optional).
115
+
116
+ Notes
117
+ -----
118
+ Runtime expects header values to be str; typing remains permissive.
119
+
120
+ See Also
121
+ --------
122
+ - etlplus.config.api.ApiProfileConfig.from_obj: consumes this block
123
+ - etlplus.config.pagination.PaginationConfig.from_obj: parses pagination
124
+ - etlplus.api.rate_limiting.RateLimitConfig.from_obj: parses rate_limit
125
+ """
126
+
127
+ headers: StrAnyMap
128
+ pagination: PaginationConfigMap | StrAnyMap
129
+ rate_limit: RateLimitConfigMap | StrAnyMap
130
+
131
+
132
+ class ConnectorApiConfigMap(TypedDict, total=False):
133
+ """
134
+ Shape accepted by ConnectorApi.from_obj (all keys optional).
135
+
136
+ See Also
137
+ --------
138
+ - etlplus.config.connector.ConnectorApi.from_obj
139
+ """
140
+
141
+ name: str
142
+ type: ConnectorType
143
+ url: str
144
+ method: str
145
+ headers: StrAnyMap
146
+ query_params: StrAnyMap
147
+ pagination: PaginationConfigMap
148
+ rate_limit: RateLimitConfigMap
149
+ api: str
150
+ endpoint: str
151
+
152
+
153
+ class ConnectorDbConfigMap(TypedDict, total=False):
154
+ """
155
+ Shape accepted by ConnectorDb.from_obj (all keys optional).
156
+
157
+ See Also
158
+ --------
159
+ - etlplus.config.connector.ConnectorDb.from_obj
160
+ """
161
+
162
+ name: str
163
+ type: ConnectorType
164
+ connection_string: str
165
+ query: str
166
+ table: str
167
+ mode: str
168
+
169
+
170
+ class ConnectorFileConfigMap(TypedDict, total=False):
171
+ """
172
+ Shape accepted by ConnectorFile.from_obj (all keys optional).
173
+
174
+ See Also
175
+ --------
176
+ - etlplus.config.connector.ConnectorFile.from_obj
177
+ """
178
+
179
+ name: str
180
+ type: ConnectorType
181
+ format: str
182
+ path: str
183
+ options: StrAnyMap
184
+
185
+
186
+ class EndpointMap(TypedDict, total=False):
187
+ """
188
+ Shape accepted by EndpointConfig.from_obj.
189
+
190
+ One of 'path' or 'url' should be provided.
191
+
192
+ See Also
193
+ --------
194
+ - etlplus.config.api.EndpointConfig.from_obj: parses this mapping
195
+ """
196
+
197
+ path: str
198
+ url: str
199
+ method: str
200
+ path_params: StrAnyMap
201
+ query_params: StrAnyMap
202
+ body: Any
203
+ pagination: PaginationConfigMap
204
+ rate_limit: RateLimitConfigMap