etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/enums.py ADDED
@@ -0,0 +1,414 @@
1
+ """
2
+ :mod:`etlplus.enums` module.
3
+
4
+ Shared enumeration types used across ETLPlus modules.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import enum
10
+ import operator as _op
11
+ from statistics import fmean
12
+ from typing import Self
13
+
14
+ from .types import AggregateFunc
15
+ from .types import OperatorFunc
16
+ from .types import StrStrMap
17
+
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'AggregateName',
23
+ 'CoercibleStrEnum',
24
+ 'DataConnectorType',
25
+ 'FileFormat',
26
+ 'HttpMethod',
27
+ 'OperatorName',
28
+ 'PipelineStep',
29
+ 'coerce_data_connector_type',
30
+ 'coerce_file_format',
31
+ 'coerce_http_method',
32
+ ]
33
+
34
+
35
+ # SECTION: CLASSES ========================================================== #
36
+
37
+
38
+ class CoercibleStrEnum(enum.StrEnum):
39
+ """
40
+ StrEnum with ergonomic helpers.
41
+
42
+ Provides a DRY, class-level :meth:`coerce` that normalizes inputs and
43
+ produces consistent, informative error messages. Also exposes
44
+ :meth:`choices` for UI/validation and :meth:`try_coerce` for soft parsing.
45
+
46
+ Notes
47
+ -----
48
+ - Values are normalized via ``str(value).strip().casefold()``.
49
+ - Error messages enumerate allowed values for easier debugging.
50
+ """
51
+
52
+ # -- Class Methods -- #
53
+
54
+ @classmethod
55
+ def aliases(cls) -> StrStrMap:
56
+ """
57
+ Return a mapping of common aliases for each enum member.
58
+
59
+ Subclasses may override this method to provide custom aliases.
60
+
61
+ Returns
62
+ -------
63
+ StrStrMap
64
+ A mapping of alias names to their corresponding enum member names.
65
+ """
66
+ return {}
67
+
68
+ @classmethod
69
+ def choices(cls) -> tuple[str, ...]:
70
+ """
71
+ Return the allowed string values for this enum.
72
+
73
+ Returns
74
+ -------
75
+ tuple[str, ...]
76
+ A tuple of allowed string values for this enum.
77
+ """
78
+ return tuple(member.value for member in cls)
79
+
80
+ @classmethod
81
+ def coerce(cls, value: Self | str | object) -> Self:
82
+ """
83
+ Convert an enum member or string-like input to a member of ``cls``.
84
+
85
+ Parameters
86
+ ----------
87
+ value : Self | str | object
88
+ An existing enum member or a text value to normalize.
89
+
90
+ Returns
91
+ -------
92
+ Self
93
+ The corresponding enum member.
94
+
95
+ Raises
96
+ ------
97
+ ValueError
98
+ If the value cannot be coerced into a valid member.
99
+ """
100
+ if isinstance(value, cls):
101
+ return value
102
+ try:
103
+ normalized = str(value).strip().casefold()
104
+ resolved = cls.aliases().get(normalized, normalized)
105
+ return cls(resolved) # type: ignore[arg-type]
106
+ except (ValueError, TypeError) as e:
107
+ allowed = ', '.join(cls.choices())
108
+ raise ValueError(
109
+ f'Invalid {cls.__name__} value: {value!r}. Allowed: {allowed}',
110
+ ) from e
111
+
112
+ @classmethod
113
+ def try_coerce(
114
+ cls,
115
+ value: object,
116
+ ) -> Self | None:
117
+ """
118
+ Best-effort parse; return ``None`` on failure instead of raising.
119
+
120
+ Parameters
121
+ ----------
122
+ value : object
123
+ An existing enum member or a text value to normalize.
124
+
125
+ Returns
126
+ -------
127
+ Self | None
128
+ The corresponding enum member, or ``None`` if coercion fails.
129
+ """
130
+ try:
131
+ return cls.coerce(value)
132
+ except ValueError:
133
+ return None
134
+
135
+
136
+ # SECTION: ENUMS ============================================================ #
137
+
138
+
139
+ class AggregateName(CoercibleStrEnum):
140
+ """Supported aggregations with helpers."""
141
+
142
+ # -- Constants -- #
143
+
144
+ AVG = 'avg'
145
+ COUNT = 'count'
146
+ MAX = 'max'
147
+ MIN = 'min'
148
+ SUM = 'sum'
149
+
150
+ # -- Class Methods -- #
151
+
152
+ @property
153
+ def func(self) -> AggregateFunc:
154
+ """
155
+ Get the aggregation function for this aggregation type.
156
+
157
+ Returns
158
+ -------
159
+ AggregateFunc
160
+ The aggregation function corresponding to this aggregation type.
161
+ """
162
+ if self is AggregateName.COUNT:
163
+ return lambda xs, n: n
164
+ if self is AggregateName.MAX:
165
+ return lambda xs, n: (max(xs) if xs else None)
166
+ if self is AggregateName.MIN:
167
+ return lambda xs, n: (min(xs) if xs else None)
168
+ if self is AggregateName.SUM:
169
+ return lambda xs, n: sum(xs)
170
+
171
+ # AVG
172
+ return lambda xs, n: (fmean(xs) if xs else 0.0)
173
+
174
+
175
+ class DataConnectorType(CoercibleStrEnum):
176
+ """Supported data connector types."""
177
+
178
+ # -- Constants -- #
179
+
180
+ API = 'api'
181
+ DATABASE = 'database'
182
+ FILE = 'file'
183
+
184
+ # -- Class Methods -- #
185
+
186
+ @classmethod
187
+ def aliases(cls) -> StrStrMap:
188
+ """
189
+ Return a mapping of common aliases for each enum member.
190
+
191
+ Returns
192
+ -------
193
+ StrStrMap
194
+ A mapping of alias names to their corresponding enum member names.
195
+ """
196
+ return {
197
+ 'http': 'api',
198
+ 'https': 'api',
199
+ 'rest': 'api',
200
+ 'db': 'database',
201
+ 'filesystem': 'file',
202
+ 'fs': 'file',
203
+ }
204
+
205
+
206
+ class FileFormat(CoercibleStrEnum):
207
+ """Supported file formats for extraction."""
208
+
209
+ # -- Constants -- #
210
+
211
+ CSV = 'csv'
212
+ JSON = 'json'
213
+ XML = 'xml'
214
+ YAML = 'yaml'
215
+
216
+ # -- Class Methods -- #
217
+
218
+ @classmethod
219
+ def aliases(cls) -> StrStrMap:
220
+ """
221
+ Return a mapping of common aliases for each enum member.
222
+
223
+ Returns
224
+ -------
225
+ StrStrMap
226
+ A mapping of alias names to their corresponding enum member names.
227
+ """
228
+ return {
229
+ # Common shorthand
230
+ 'yml': 'yaml',
231
+ # MIME types
232
+ 'text/csv': 'csv',
233
+ 'application/json': 'json',
234
+ 'application/xml': 'xml',
235
+ }
236
+
237
+
238
+ class HttpMethod(CoercibleStrEnum):
239
+ """Supported HTTP verbs that accept JSON payloads."""
240
+
241
+ # -- Constants -- #
242
+
243
+ CONNECT = 'connect'
244
+ DELETE = 'delete'
245
+ GET = 'get'
246
+ HEAD = 'head'
247
+ OPTIONS = 'options'
248
+ PATCH = 'patch'
249
+ POST = 'post'
250
+ PUT = 'put'
251
+ TRACE = 'trace'
252
+
253
+ # -- Getters -- #
254
+
255
+ @property
256
+ def allows_body(self) -> bool:
257
+ """
258
+ Whether the method typically allows a request body.
259
+
260
+ Notes
261
+ -----
262
+ - RFCs do not strictly forbid bodies on some other methods (e.g.,
263
+ ``DELETE``), but many servers/clients do not expect them. We mark
264
+ ``POST``, ``PUT``, and ``PATCH`` as True.
265
+ """
266
+ return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
267
+
268
+
269
+ class OperatorName(CoercibleStrEnum):
270
+ """Supported comparison operators with helpers."""
271
+
272
+ # -- Constants -- #
273
+
274
+ EQ = 'eq'
275
+ NE = 'ne'
276
+ GT = 'gt'
277
+ GTE = 'gte'
278
+ LT = 'lt'
279
+ LTE = 'lte'
280
+ IN = 'in'
281
+ CONTAINS = 'contains'
282
+
283
+ # -- Getters -- #
284
+
285
+ @property
286
+ def func(self) -> OperatorFunc:
287
+ """
288
+ Get the comparison function for this operator.
289
+
290
+ Returns
291
+ -------
292
+ OperatorFunc
293
+ The comparison function corresponding to this operator.
294
+ """
295
+ match self:
296
+ case OperatorName.EQ:
297
+ return _op.eq
298
+ case OperatorName.NE:
299
+ return _op.ne
300
+ case OperatorName.GT:
301
+ return _op.gt
302
+ case OperatorName.GTE:
303
+ return _op.ge
304
+ case OperatorName.LT:
305
+ return _op.lt
306
+ case OperatorName.LTE:
307
+ return _op.le
308
+ case OperatorName.IN:
309
+ return lambda a, b: a in b
310
+ case OperatorName.CONTAINS:
311
+ return lambda a, b: b in a
312
+
313
+ # -- Class Methods -- #
314
+
315
+ @classmethod
316
+ def aliases(cls) -> StrStrMap:
317
+ """
318
+ Return a mapping of common aliases for each enum member.
319
+
320
+ Returns
321
+ -------
322
+ StrStrMap
323
+ A mapping of alias names to their corresponding enum member names.
324
+ """
325
+ return {
326
+ '==': 'eq',
327
+ '=': 'eq',
328
+ '!=': 'ne',
329
+ '<>': 'ne',
330
+ '>=': 'gte',
331
+ '≥': 'gte',
332
+ '<=': 'lte',
333
+ '≤': 'lte',
334
+ '>': 'gt',
335
+ '<': 'lt',
336
+ }
337
+
338
+
339
+ class PipelineStep(CoercibleStrEnum):
340
+ """Pipeline step names as an enum for internal orchestration."""
341
+
342
+ # -- Constants -- #
343
+
344
+ FILTER = 'filter'
345
+ MAP = 'map'
346
+ SELECT = 'select'
347
+ SORT = 'sort'
348
+ AGGREGATE = 'aggregate'
349
+
350
+ # -- Getters -- #
351
+
352
+ @property
353
+ def order(self) -> int:
354
+ """
355
+ Get the execution order of this pipeline step.
356
+
357
+ Returns
358
+ -------
359
+ int
360
+ The execution order of this pipeline step.
361
+ """
362
+ return _PIPELINE_ORDER_INDEX[self]
363
+
364
+
365
+ # SECTION: INTERNAL CONSTANTS ============================================== #
366
+
367
+
368
+ # Precomputed order index for PipelineStep; avoids recomputing on each access.
369
+ _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
370
+ PipelineStep.FILTER: 0,
371
+ PipelineStep.MAP: 1,
372
+ PipelineStep.SELECT: 2,
373
+ PipelineStep.SORT: 3,
374
+ PipelineStep.AGGREGATE: 4,
375
+ }
376
+
377
+
378
+ # SECTION: FUNCTIONS ======================================================== #
379
+
380
+
381
+ def coerce_data_connector_type(
382
+ connector: DataConnectorType | str,
383
+ ) -> DataConnectorType:
384
+ """
385
+ Normalize textual data connector values to :class:`DataConnectorType`.
386
+
387
+ This thin wrapper is kept for backward compatibility; prefer
388
+ :meth:`DataConnectorType.coerce` going forward.
389
+ """
390
+ return DataConnectorType.coerce(connector)
391
+
392
+
393
+ def coerce_file_format(
394
+ file_format: FileFormat | str,
395
+ ) -> FileFormat:
396
+ """
397
+ Normalize textual file format values to :class:`FileFormat`.
398
+
399
+ This thin wrapper is kept for backward compatibility; prefer
400
+ :meth:`FileFormat.coerce` going forward.
401
+ """
402
+ return FileFormat.coerce(file_format)
403
+
404
+
405
+ def coerce_http_method(
406
+ http_method: HttpMethod | str,
407
+ ) -> HttpMethod:
408
+ """
409
+ Normalize textual HTTP method values to :class:`HttpMethod`.
410
+
411
+ This thin wrapper is kept for backward compatibility; prefer
412
+ :meth:`HttpMethod.coerce` going forward.
413
+ """
414
+ return HttpMethod.coerce(http_method)
etlplus/extract.py ADDED
@@ -0,0 +1,218 @@
1
+ """
2
+ :mod:`etlplus.extract` module.
3
+
4
+ Helpers to extract data from files, databases, and REST APIs.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from typing import cast
12
+
13
+ import requests # type: ignore[import]
14
+
15
+ from .enums import DataConnectorType
16
+ from .enums import FileFormat
17
+ from .enums import HttpMethod
18
+ from .enums import coerce_data_connector_type
19
+ from .enums import coerce_file_format
20
+ from .file import File
21
+ from .types import JSONData
22
+ from .types import JSONDict
23
+ from .types import JSONList
24
+ from .types import StrPath
25
+
26
+ # SECTION: FUNCTIONS ======================================================== #
27
+
28
+
29
+ # -- File Extraction -- #
30
+
31
+
32
+ def extract_from_file(
33
+ file_path: StrPath,
34
+ file_format: FileFormat | str | None = FileFormat.JSON,
35
+ ) -> JSONData:
36
+ """
37
+ Extract (semi-)structured data from a local file.
38
+
39
+ Parameters
40
+ ----------
41
+ file_path : StrPath
42
+ Source file path.
43
+ file_format : FileFormat | str | None, optional
44
+ File format to parse. If ``None``, infer from the filename
45
+ extension. Defaults to `'json'` for backward compatibility when
46
+ explicitly provided.
47
+
48
+ Returns
49
+ -------
50
+ JSONData
51
+ Parsed data as a mapping or a list of mappings.
52
+ """
53
+ path = Path(file_path)
54
+
55
+ # If no explicit format is provided, let File infer from extension.
56
+ if file_format is None:
57
+ return File(path, None).read()
58
+ fmt = coerce_file_format(file_format)
59
+
60
+ # Let file module perform existence and format validation.
61
+ return File(path, fmt).read()
62
+
63
+
64
+ # -- Database Extraction (Placeholder) -- #
65
+
66
+
67
+ def extract_from_database(
68
+ connection_string: str,
69
+ ) -> JSONList:
70
+ """
71
+ Extract data from a database.
72
+
73
+ Notes
74
+ -----
75
+ Placeholder implementation. To enable database extraction, install and
76
+ configure database-specific drivers and query logic.
77
+
78
+ Parameters
79
+ ----------
80
+ connection_string : str
81
+ Database connection string.
82
+
83
+ Returns
84
+ -------
85
+ JSONList
86
+ Informational message payload.
87
+ """
88
+ return [
89
+ {
90
+ 'message': 'Database extraction not yet implemented',
91
+ 'connection_string': connection_string,
92
+ 'note': (
93
+ 'Install database-specific drivers to enable this feature'
94
+ ),
95
+ },
96
+ ]
97
+
98
+
99
+ # -- REST API Extraction -- #
100
+
101
+
102
+ def extract_from_api(
103
+ url: str,
104
+ method: HttpMethod | str = HttpMethod.GET,
105
+ **kwargs: Any,
106
+ ) -> JSONData:
107
+ """
108
+ Extract data from a REST API.
109
+
110
+ Parameters
111
+ ----------
112
+ url : str
113
+ API endpoint URL.
114
+ method : HttpMethod | str, optional
115
+ HTTP method to use. Defaults to ``GET``.
116
+ **kwargs : Any
117
+ Extra arguments forwarded to the underlying ``requests`` call
118
+ (for example, ``timeout``). To use a pre-configured
119
+ :class:`requests.Session`, provide it via ``session``.
120
+
121
+ Returns
122
+ -------
123
+ JSONData
124
+ Parsed JSON payload, or a fallback object with raw text.
125
+
126
+ Raises
127
+ ------
128
+ TypeError
129
+ If a provided ``session`` does not expose the required HTTP
130
+ method (for example, ``get``).
131
+ """
132
+ http_method = HttpMethod.coerce(method)
133
+
134
+ # Apply a conservative timeout to guard against hanging requests.
135
+ timeout = kwargs.pop('timeout', 10.0)
136
+ session = kwargs.pop('session', None)
137
+ requester = session or requests
138
+
139
+ request_callable = getattr(requester, http_method.value, None)
140
+ if not callable(request_callable):
141
+ raise TypeError(
142
+ 'Session object must supply a callable'
143
+ f'"{http_method.value}" method',
144
+ )
145
+
146
+ response = request_callable(url, timeout=timeout, **kwargs)
147
+ response.raise_for_status()
148
+
149
+ content_type = response.headers.get('content-type', '').lower()
150
+ if 'application/json' in content_type:
151
+ try:
152
+ payload: Any = response.json()
153
+ except ValueError:
154
+ # Malformed JSON despite content-type; fall back to text
155
+ return {
156
+ 'content': response.text,
157
+ 'content_type': content_type,
158
+ }
159
+ if isinstance(payload, dict):
160
+ return cast(JSONDict, payload)
161
+ if isinstance(payload, list):
162
+ if all(isinstance(x, dict) for x in payload):
163
+ return cast(JSONList, payload)
164
+ # Coerce non-dict array items into objects for consistency
165
+ return [{'value': x} for x in payload]
166
+ # Fallback: wrap scalar JSON
167
+ return {'value': payload}
168
+
169
+ return {'content': response.text, 'content_type': content_type}
170
+
171
+
172
+ # -- Orchestration -- #
173
+
174
+
175
+ def extract(
176
+ source_type: DataConnectorType | str,
177
+ source: StrPath,
178
+ file_format: FileFormat | str | None = None,
179
+ **kwargs: Any,
180
+ ) -> JSONData:
181
+ """
182
+ Extract data from a source (file, database, or API).
183
+
184
+ Parameters
185
+ ----------
186
+ source_type : DataConnectorType | str
187
+ Type of data source.
188
+ source : StrPath
189
+ Source location (file path, connection string, or API URL).
190
+ file_format : FileFormat | str | None, optional
191
+ File format, inferred from filename extension if omitted.
192
+ **kwargs : Any
193
+ Additional arguments forwarded to source-specific extractors.
194
+
195
+ Returns
196
+ -------
197
+ JSONData
198
+ Extracted data.
199
+
200
+ Raises
201
+ ------
202
+ ValueError
203
+ If `source_type` is not one of the supported values.
204
+ """
205
+ match coerce_data_connector_type(source_type):
206
+ case DataConnectorType.FILE:
207
+ # Prefer explicit format if provided, else infer from filename.
208
+ return extract_from_file(source, file_format)
209
+ case DataConnectorType.DATABASE:
210
+ return extract_from_database(str(source))
211
+ case DataConnectorType.API:
212
+ # API extraction always uses an HTTP method; default is GET.
213
+ # ``file_format`` is ignored for APIs.
214
+ return extract_from_api(str(source), **kwargs)
215
+ case _:
216
+ # ``coerce_data_connector_type`` covers invalid entries, but keep
217
+ # explicit guard for defensive programming.
218
+ raise ValueError(f'Invalid source type: {source_type}')