etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,372 @@
1
+ """
2
+ :mod:`etlplus.config.connector` module.
3
+
4
+ A module defining configuration types for data source/target connectors in ETL
5
+ pipelines. A "connector" is any I/O endpoint:
6
+
7
+ - file (local/remote file systems)
8
+ - database
9
+ - REST API service/endpoint
10
+ - (future) queues, streams, etc.
11
+
12
+ Examples
13
+ --------
14
+ - Use ``ConnectorApi``/``ConnectorFile``/``ConnectorDb`` when you want the
15
+ concrete dataclasses.
16
+ - Use the ``Connector`` union for typing a value that can be any connector.
17
+ - Use ``parse_connector(obj)`` to construct a connector instance from a generic
18
+ mapping that includes a ``type`` key.
19
+
20
+ Notes
21
+ -----
22
+ - TypedDict shapes are editor hints; runtime parsing remains permissive
23
+ (from_obj accepts Mapping[str, Any]).
24
+ - TypedDicts referenced in :mod:`etlplus.config.types` remain editor hints.
25
+ Runtime parsing stays permissive and tolerant.
26
+
27
+ See Also
28
+ --------
29
+ - TypedDict shapes for editor hints (not enforced at runtime):
30
+ :mod:`etlplus.config.types.ConnectorApiConfigMap`,
31
+ :mod:`etlplus.config.types.ConnectorDbConfigMap`,
32
+ :mod:`etlplus.config.types.ConnectorFileConfigMap`.
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ from collections.abc import Mapping
38
+ from dataclasses import dataclass
39
+ from dataclasses import field
40
+ from typing import TYPE_CHECKING
41
+ from typing import Any
42
+ from typing import Self
43
+ from typing import overload
44
+
45
+ from ..api import PaginationConfig
46
+ from ..api import RateLimitConfig
47
+ from ..types import StrAnyMap
48
+ from ..utils import cast_str_dict
49
+ from ..utils import coerce_dict
50
+
51
+ if TYPE_CHECKING: # Editor-only typing hints to avoid runtime imports
52
+ from .types import ConnectorApiConfigMap
53
+ from .types import ConnectorDbConfigMap
54
+ from .types import ConnectorFileConfigMap
55
+ from .types import ConnectorType
56
+
57
+
58
+ # SECTION: EXPORTS ========================================================== #
59
+
60
+
61
+ __all__ = [
62
+ # Classes
63
+ 'ConnectorApi',
64
+ 'ConnectorDb',
65
+ 'ConnectorFile',
66
+ # Functions
67
+ 'parse_connector',
68
+ # Type aliases
69
+ 'Connector',
70
+ ]
71
+
72
+
73
+ # SECTION: DATA CLASSES ===================================================== #
74
+
75
+
76
+ @dataclass(kw_only=True, slots=True)
77
+ class ConnectorApi:
78
+ """
79
+ Configuration for an API-based data connector.
80
+
81
+ Attributes
82
+ ----------
83
+ name : str
84
+ Unique connector name.
85
+ type : ConnectorType
86
+ Connector kind literal, always ``"api"``.
87
+ url : str | None
88
+ Direct absolute URL (when not using ``service``/``endpoint`` refs).
89
+ method : str | None
90
+ Optional HTTP method; typically omitted for sources (defaults to
91
+ GET) and used for targets (e.g., ``"post"``).
92
+ headers : dict[str, str]
93
+ Additional request headers.
94
+ query_params : dict[str, Any]
95
+ Default query parameters.
96
+ pagination : PaginationConfig | None
97
+ Pagination settings (optional).
98
+ rate_limit : RateLimitConfig | None
99
+ Rate limiting settings (optional).
100
+ api : str | None
101
+ Service reference into the pipeline ``apis`` block (a.k.a.
102
+ ``service``).
103
+ endpoint : str | None
104
+ Endpoint name within the referenced service.
105
+ """
106
+
107
+ # -- Attributes -- #
108
+
109
+ name: str
110
+ type: ConnectorType = 'api'
111
+
112
+ # Direct form
113
+ url: str | None = None
114
+ # Optional HTTP method; typically omitted for sources (defaults to GET
115
+ # at runtime) and used for targets (e.g., 'post', 'put').
116
+ method: str | None = None
117
+ headers: dict[str, str] = field(default_factory=dict)
118
+ query_params: dict[str, Any] = field(default_factory=dict)
119
+ pagination: PaginationConfig | None = None
120
+ rate_limit: RateLimitConfig | None = None
121
+
122
+ # Reference form (to top-level APIs/endpoints)
123
+ api: str | None = None
124
+ endpoint: str | None = None
125
+
126
+ # -- Class Methods -- #
127
+
128
+ @classmethod
129
+ @overload
130
+ def from_obj(cls, obj: ConnectorApiConfigMap) -> Self: ...
131
+
132
+ @classmethod
133
+ @overload
134
+ def from_obj(cls, obj: StrAnyMap) -> Self: ...
135
+
136
+ @classmethod
137
+ def from_obj(
138
+ cls,
139
+ obj: StrAnyMap,
140
+ ) -> Self:
141
+ """
142
+ Parse a mapping into a ``ConnectorApi`` instance.
143
+
144
+ Parameters
145
+ ----------
146
+ obj : StrAnyMap
147
+ Mapping with at least ``name``.
148
+
149
+ Returns
150
+ -------
151
+ Self
152
+ Parsed connector instance.
153
+
154
+ Raises
155
+ ------
156
+ TypeError
157
+ If ``name`` is missing or invalid.
158
+ """
159
+ name = obj.get('name')
160
+ if not isinstance(name, str):
161
+ raise TypeError('ConnectorApi requires a "name" (str)')
162
+ headers = cast_str_dict(obj.get('headers'))
163
+
164
+ return cls(
165
+ name=name,
166
+ type='api',
167
+ url=obj.get('url'),
168
+ method=obj.get('method'),
169
+ headers=headers,
170
+ query_params=coerce_dict(obj.get('query_params')),
171
+ pagination=PaginationConfig.from_obj(obj.get('pagination')),
172
+ rate_limit=RateLimitConfig.from_obj(obj.get('rate_limit')),
173
+ api=obj.get('api') or obj.get('service'),
174
+ endpoint=obj.get('endpoint'),
175
+ )
176
+
177
+
178
+ @dataclass(kw_only=True, slots=True)
179
+ class ConnectorDb:
180
+ """
181
+ Configuration for a database-based data connector.
182
+
183
+ Attributes
184
+ ----------
185
+ name : str
186
+ Unique connector name.
187
+ type : ConnectorType
188
+ Connector kind literal, always ``"database"``.
189
+ connection_string : str | None
190
+ Connection string/DSN for the database.
191
+ query : str | None
192
+ Query to execute for extraction (optional).
193
+ table : str | None
194
+ Target/source table name (optional).
195
+ mode : str | None
196
+ Load mode hint (e.g., ``"append"``, ``"replace"``) — future use.
197
+ """
198
+
199
+ # -- Attributes -- #
200
+
201
+ name: str
202
+ type: ConnectorType = 'database'
203
+ connection_string: str | None = None
204
+ query: str | None = None
205
+ table: str | None = None
206
+ mode: str | None = None # append|replace|upsert (future)
207
+
208
+ # -- Class Methods -- #
209
+
210
+ @classmethod
211
+ @overload
212
+ def from_obj(cls, obj: ConnectorDbConfigMap) -> Self: ...
213
+
214
+ @classmethod
215
+ @overload
216
+ def from_obj(cls, obj: StrAnyMap) -> Self: ...
217
+
218
+ @classmethod
219
+ def from_obj(
220
+ cls,
221
+ obj: StrAnyMap,
222
+ ) -> Self:
223
+ """
224
+ Parse a mapping into a ``ConnectorDb`` instance.
225
+
226
+ Parameters
227
+ ----------
228
+ obj : StrAnyMap
229
+ Mapping with at least ``name``.
230
+
231
+ Returns
232
+ -------
233
+ Self
234
+ Parsed connector instance.
235
+
236
+ Raises
237
+ ------
238
+ TypeError
239
+ If ``name`` is missing or invalid.
240
+ """
241
+ name = obj.get('name')
242
+ if not isinstance(name, str):
243
+ raise TypeError('ConnectorDb requires a "name" (str)')
244
+
245
+ return cls(
246
+ name=name,
247
+ type='database',
248
+ connection_string=obj.get('connection_string'),
249
+ query=obj.get('query'),
250
+ table=obj.get('table'),
251
+ mode=obj.get('mode'),
252
+ )
253
+
254
+
255
+ @dataclass(kw_only=True, slots=True)
256
+ class ConnectorFile:
257
+ """
258
+ Configuration for a file-based data connector.
259
+
260
+ Attributes
261
+ ----------
262
+ name : str
263
+ Unique connector name.
264
+ type : ConnectorType
265
+ Connector kind literal, always ``"file"``.
266
+ format : str | None
267
+ File format (e.g., ``"json"``, ``"csv"``).
268
+ path : str | None
269
+ File path or URI.
270
+ options : dict[str, Any]
271
+ Reader/writer format options.
272
+ """
273
+
274
+ # -- Attributes -- #
275
+
276
+ name: str
277
+ type: ConnectorType = 'file'
278
+ format: str | None = None
279
+ path: str | None = None
280
+ options: dict[str, Any] = field(default_factory=dict)
281
+
282
+ # -- Class Methods -- #
283
+
284
+ @classmethod
285
+ @overload
286
+ def from_obj(cls, obj: ConnectorFileConfigMap) -> Self: ...
287
+
288
+ @classmethod
289
+ @overload
290
+ def from_obj(cls, obj: StrAnyMap) -> Self: ...
291
+
292
+ @classmethod
293
+ def from_obj(
294
+ cls,
295
+ obj: StrAnyMap,
296
+ ) -> Self:
297
+ """
298
+ Parse a mapping into a ``ConnectorFile`` instance.
299
+
300
+ Parameters
301
+ ----------
302
+ obj : StrAnyMap
303
+ Mapping with at least ``name``.
304
+
305
+ Returns
306
+ -------
307
+ Self
308
+ Parsed connector instance.
309
+
310
+ Raises
311
+ ------
312
+ TypeError
313
+ If ``name`` is missing or invalid.
314
+ """
315
+ name = obj.get('name')
316
+ if not isinstance(name, str):
317
+ raise TypeError('ConnectorFile requires a "name" (str)')
318
+
319
+ return cls(
320
+ name=name,
321
+ type='file',
322
+ format=obj.get('format'),
323
+ path=obj.get('path'),
324
+ options=coerce_dict(obj.get('options')),
325
+ )
326
+
327
+
328
+ # SECTION: FUNCTIONS ======================================================== #
329
+
330
+
331
+ def parse_connector(obj: Mapping[str, Any]) -> Connector:
332
+ """
333
+ Dispatch to a concrete connector constructor based on ``type``.
334
+
335
+ Parameters
336
+ ----------
337
+ obj : Mapping[str, Any]
338
+ Mapping with at least ``name`` and ``type``.
339
+
340
+ Returns
341
+ -------
342
+ Connector
343
+ Concrete connector instance.
344
+
345
+ Raises
346
+ ------
347
+ TypeError
348
+ If ``type`` is unsupported or missing.
349
+
350
+ Notes
351
+ -----
352
+ Delegates to the tolerant ``from_obj`` constructors for each connector
353
+ kind.
354
+ """
355
+ match str(obj.get('type', '')).casefold():
356
+ case 'file':
357
+ return ConnectorFile.from_obj(obj)
358
+ case 'database':
359
+ return ConnectorDb.from_obj(obj)
360
+ case 'api':
361
+ return ConnectorApi.from_obj(obj)
362
+ case _:
363
+ raise TypeError(
364
+ 'Unsupported connector type; '
365
+ 'expected one of {file, database, api}',
366
+ )
367
+
368
+
369
+ # SECTION: TYPED ALIASES (post-class definitions) ========================= #
370
+
371
+ # Type alias representing any supported connector
372
+ type Connector = ConnectorApi | ConnectorDb | ConnectorFile
etlplus/config/jobs.py ADDED
@@ -0,0 +1,311 @@
1
+ """
2
+ :mod:`etlplus.config.jobs` module.
3
+
4
+ Data classes modeling job orchestration references (extract, validate,
5
+ transform, load).
6
+
7
+ Notes
8
+ -----
9
+ - Lightweight references used inside ``PipelineConfig`` to avoid storing
10
+ large nested structures.
11
+ - All attributes are simple and optional where appropriate, keeping parsing
12
+ tolerant.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+ from dataclasses import field
19
+ from typing import Any
20
+ from typing import Self
21
+
22
+ from ..utils import coerce_dict
23
+ from ..utils import maybe_mapping
24
+
25
+ # SECTION: EXPORTS ========================================================== #
26
+
27
+
28
+ __all__ = [
29
+ 'ExtractRef',
30
+ 'JobConfig',
31
+ 'LoadRef',
32
+ 'TransformRef',
33
+ 'ValidationRef',
34
+ ]
35
+
36
+
37
+ # SECTION: TYPE ALIASES ===================================================== #
38
+
39
+
40
+ # SECTION: CLASSES ========================================================== #
41
+
42
+
43
+ @dataclass(kw_only=True, slots=True)
44
+ class ExtractRef:
45
+ """
46
+ Reference to a data source for extraction.
47
+
48
+ Attributes
49
+ ----------
50
+ source : str
51
+ Name of the source connector.
52
+ options : dict[str, Any]
53
+ Optional extract-time options (e.g., query parameters overrides).
54
+ """
55
+
56
+ # -- Attributes -- #
57
+
58
+ source: str
59
+ options: dict[str, Any] = field(default_factory=dict)
60
+
61
+ # -- Class Methods -- #
62
+
63
+ @classmethod
64
+ def from_obj(
65
+ cls,
66
+ obj: Any,
67
+ ) -> Self | None:
68
+ """Parse a mapping into an :class:`ExtractRef` instance.
69
+
70
+ Parameters
71
+ ----------
72
+ obj : Any
73
+ Mapping with ``source`` and optional ``options``.
74
+
75
+ Returns
76
+ -------
77
+ Self | None
78
+ Parsed reference or ``None`` when the payload is invalid.
79
+ """
80
+ data = maybe_mapping(obj)
81
+ if not data:
82
+ return None
83
+ source = data.get('source')
84
+ if not isinstance(source, str):
85
+ return None
86
+ return cls(
87
+ source=source,
88
+ options=coerce_dict(data.get('options')),
89
+ )
90
+
91
+
92
+ @dataclass(kw_only=True, slots=True)
93
+ class JobConfig:
94
+ """
95
+ Configuration for a data processing job.
96
+
97
+ Attributes
98
+ ----------
99
+ name : str
100
+ Unique job name.
101
+ description : str | None
102
+ Optional human-friendly description.
103
+ extract : ExtractRef | None
104
+ Extraction reference.
105
+ validate : ValidationRef | None
106
+ Validation reference.
107
+ transform : TransformRef | None
108
+ Transform reference.
109
+ load : LoadRef | None
110
+ Load reference.
111
+ """
112
+
113
+ # -- Attributes -- #
114
+
115
+ name: str
116
+ description: str | None = None
117
+ extract: ExtractRef | None = None
118
+ validate: ValidationRef | None = None
119
+ transform: TransformRef | None = None
120
+ load: LoadRef | None = None
121
+
122
+ # -- Class Methods -- #
123
+
124
+ @classmethod
125
+ def from_obj(
126
+ cls,
127
+ obj: Any,
128
+ ) -> Self | None:
129
+ """Parse a mapping into a :class:`JobConfig` instance.
130
+
131
+ Parameters
132
+ ----------
133
+ obj : Any
134
+ Mapping describing a job block.
135
+
136
+ Returns
137
+ -------
138
+ Self | None
139
+ Parsed job configuration or ``None`` if invalid.
140
+ """
141
+ data = maybe_mapping(obj)
142
+ if not data:
143
+ return None
144
+ name = data.get('name')
145
+ if not isinstance(name, str):
146
+ return None
147
+
148
+ description = data.get('description')
149
+ if description is not None and not isinstance(description, str):
150
+ description = str(description)
151
+
152
+ return cls(
153
+ name=name,
154
+ description=description,
155
+ extract=ExtractRef.from_obj(data.get('extract')),
156
+ validate=ValidationRef.from_obj(data.get('validate')),
157
+ transform=TransformRef.from_obj(data.get('transform')),
158
+ load=LoadRef.from_obj(data.get('load')),
159
+ )
160
+
161
+
162
+ @dataclass(kw_only=True, slots=True)
163
+ class LoadRef:
164
+ """
165
+ Reference to a data target for loading.
166
+
167
+ Attributes
168
+ ----------
169
+ target : str
170
+ Name of the target connector.
171
+ overrides : dict[str, Any]
172
+ Optional load-time overrides (e.g., headers).
173
+ """
174
+
175
+ # -- Attributes -- #
176
+
177
+ target: str
178
+ overrides: dict[str, Any] = field(default_factory=dict)
179
+
180
+ # -- Class Methods -- #
181
+
182
+ @classmethod
183
+ def from_obj(
184
+ cls,
185
+ obj: Any,
186
+ ) -> Self | None:
187
+ """Parse a mapping into a :class:`LoadRef` instance.
188
+
189
+ Parameters
190
+ ----------
191
+ obj : Any
192
+ Mapping with ``target`` and optional ``overrides``.
193
+
194
+ Returns
195
+ -------
196
+ Self | None
197
+ Parsed reference or ``None`` when invalid.
198
+ """
199
+ data = maybe_mapping(obj)
200
+ if not data:
201
+ return None
202
+ target = data.get('target')
203
+ if not isinstance(target, str):
204
+ return None
205
+ return cls(
206
+ target=target,
207
+ overrides=coerce_dict(data.get('overrides')),
208
+ )
209
+
210
+
211
+ @dataclass(kw_only=True, slots=True)
212
+ class TransformRef:
213
+ """
214
+ Reference to a transformation pipeline.
215
+
216
+ Attributes
217
+ ----------
218
+ pipeline : str
219
+ Name of the transformation pipeline.
220
+ """
221
+
222
+ # -- Attributes -- #
223
+
224
+ pipeline: str
225
+
226
+ # -- Class Methods -- #
227
+
228
+ @classmethod
229
+ def from_obj(
230
+ cls,
231
+ obj: Any,
232
+ ) -> Self | None:
233
+ """Parse a mapping into a :class:`TransformRef` instance.
234
+
235
+ Parameters
236
+ ----------
237
+ obj : Any
238
+ Mapping with ``pipeline``.
239
+
240
+ Returns
241
+ -------
242
+ Self | None
243
+ Parsed reference or ``None`` when invalid.
244
+ """
245
+ data = maybe_mapping(obj)
246
+ if not data:
247
+ return None
248
+ pipeline = data.get('pipeline')
249
+ if not isinstance(pipeline, str):
250
+ return None
251
+ return cls(pipeline=pipeline)
252
+
253
+
254
+ @dataclass(kw_only=True, slots=True)
255
+ class ValidationRef:
256
+ """
257
+ Reference to a validation rule set.
258
+
259
+ Attributes
260
+ ----------
261
+ ruleset : str
262
+ Name of the validation rule set.
263
+ severity : str | None
264
+ Severity level (``"warn"`` or ``"error"``).
265
+ phase : str | None
266
+ Execution phase (``"before_transform"``, ``"after_transform"``,
267
+ or ``"both"``).
268
+ """
269
+
270
+ # -- Attributes -- #
271
+
272
+ ruleset: str
273
+ severity: str | None = None # warn|error
274
+ phase: str | None = None # before_transform|after_transform|both
275
+
276
+ # -- Class Methods -- #
277
+
278
+ @classmethod
279
+ def from_obj(
280
+ cls,
281
+ obj: Any,
282
+ ) -> Self | None:
283
+ """Parse a mapping into a :class:`ValidationRef` instance.
284
+
285
+ Parameters
286
+ ----------
287
+ obj : Any
288
+ Mapping with ``ruleset`` plus optional metadata.
289
+
290
+ Returns
291
+ -------
292
+ Self | None
293
+ Parsed reference or ``None`` when invalid.
294
+ """
295
+ data = maybe_mapping(obj)
296
+ if not data:
297
+ return None
298
+ ruleset = data.get('ruleset')
299
+ if not isinstance(ruleset, str):
300
+ return None
301
+ severity = data.get('severity')
302
+ if severity is not None and not isinstance(severity, str):
303
+ severity = str(severity)
304
+ phase = data.get('phase')
305
+ if phase is not None and not isinstance(phase, str):
306
+ phase = str(phase)
307
+ return cls(
308
+ ruleset=ruleset,
309
+ severity=severity,
310
+ phase=phase,
311
+ )