etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/load.py ADDED
@@ -0,0 +1,336 @@
1
+ """
2
+ :mod:`etlplus.load` module.
3
+
4
+ Helpers to load data into files, databases, and REST APIs.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import Any
13
+ from typing import cast
14
+
15
+ import requests # type: ignore[import]
16
+
17
+ from .enums import DataConnectorType
18
+ from .enums import FileFormat
19
+ from .enums import HttpMethod
20
+ from .enums import coerce_data_connector_type
21
+ from .enums import coerce_file_format
22
+ from .enums import coerce_http_method
23
+ from .file import File
24
+ from .types import JSONData
25
+ from .types import JSONDict
26
+ from .types import JSONList
27
+ from .types import StrPath
28
+ from .utils import count_records
29
+
30
+ # SECTION: INTERNAL FUNCTIONS ============================================== #
31
+
32
+
33
+ def _parse_json_string(
34
+ raw: str,
35
+ ) -> JSONData:
36
+ """
37
+ Parse JSON data from ``raw`` text.
38
+
39
+ Parameters
40
+ ----------
41
+ raw : str
42
+ Raw JSON string to parse.
43
+
44
+ Returns
45
+ -------
46
+ JSONData
47
+ Parsed object or list of objects.
48
+
49
+ Raises
50
+ ------
51
+ ValueError
52
+ If the JSON is invalid or not an object/array.
53
+ """
54
+ try:
55
+ loaded = json.loads(raw)
56
+ except json.JSONDecodeError as exc:
57
+ raise ValueError(f'Invalid data source: {raw}') from exc
58
+
59
+ if isinstance(loaded, dict):
60
+ return cast(JSONDict, loaded)
61
+ if isinstance(loaded, list):
62
+ if all(isinstance(item, dict) for item in loaded):
63
+ return cast(JSONList, loaded)
64
+ raise ValueError(
65
+ 'JSON array must contain only objects (dicts) when parsing string',
66
+ )
67
+ raise ValueError(
68
+ 'JSON root must be an object or array when parsing string',
69
+ )
70
+
71
+
72
+ # SECTION: FUNCTIONS ======================================================== #
73
+
74
+
75
+ # -- Data Loading -- #
76
+
77
+
78
+ def load_data(
79
+ source: StrPath | JSONData,
80
+ ) -> JSONData:
81
+ """
82
+ Load data from a file path, JSON string, or direct object.
83
+
84
+ Parameters
85
+ ----------
86
+ source : StrPath | JSONData
87
+ Data source to load. If a path is provided and exists, JSON will be
88
+ read from it. Otherwise, a JSON string will be parsed.
89
+
90
+ Returns
91
+ -------
92
+ JSONData
93
+ Parsed object or list of objects.
94
+
95
+ Raises
96
+ ------
97
+ TypeError
98
+ If `source` is not a string, path, or JSON-like object.
99
+ """
100
+ if isinstance(source, (dict, list)):
101
+ return cast(JSONData, source)
102
+
103
+ if isinstance(source, Path):
104
+ return File(source, FileFormat.JSON).read_json()
105
+
106
+ if isinstance(source, str):
107
+ # Special case: '-' means read JSON from stdin (Unix convention).
108
+ if source == '-':
109
+ raw = sys.stdin.read()
110
+ return _parse_json_string(raw)
111
+ candidate = Path(source)
112
+ if candidate.exists():
113
+ try:
114
+ return File(candidate, FileFormat.JSON).read_json()
115
+ except (OSError, json.JSONDecodeError, ValueError):
116
+ # Fall back to treating the string as raw JSON content.
117
+ pass
118
+ return _parse_json_string(source)
119
+
120
+ raise TypeError(
121
+ 'source must be a mapping, sequence of mappings, path, or JSON string',
122
+ )
123
+
124
+
125
+ # -- File Loading -- #
126
+
127
+
128
+ def load_to_file(
129
+ data: JSONData,
130
+ file_path: StrPath,
131
+ file_format: FileFormat | str | None = None,
132
+ ) -> JSONDict:
133
+ """
134
+ Persist data to a local file.
135
+
136
+ Parameters
137
+ ----------
138
+ data : JSONData
139
+ Data to write.
140
+ file_path : StrPath
141
+ Target file path.
142
+ file_format : FileFormat | str | None, optional
143
+ Output format. If omitted (None), the format is inferred from the
144
+ filename extension.
145
+
146
+ Returns
147
+ -------
148
+ JSONDict
149
+ Result dictionary with status and record count.
150
+ """
151
+ path = Path(file_path)
152
+ path.parent.mkdir(parents=True, exist_ok=True)
153
+
154
+ # If no explicit format is provided, let File infer from extension.
155
+ if file_format is None:
156
+ records = File(path).write(data)
157
+ ext = path.suffix.lstrip('.').lower()
158
+ fmt = coerce_file_format(ext) if ext else FileFormat.JSON
159
+ else:
160
+ fmt = coerce_file_format(file_format)
161
+ records = File(path, fmt).write(data)
162
+ if fmt is FileFormat.CSV and records == 0:
163
+ message = 'No data to write'
164
+ else:
165
+ message = f'Data loaded to {path}'
166
+
167
+ return {
168
+ 'status': 'success',
169
+ 'message': message,
170
+ 'records': records,
171
+ }
172
+
173
+
174
+ # -- Database Loading (Placeholder) -- #
175
+
176
+
177
+ def load_to_database(
178
+ data: JSONData,
179
+ connection_string: str,
180
+ ) -> JSONDict:
181
+ """
182
+ Load data to a database.
183
+
184
+ Notes
185
+ -----
186
+ Placeholder implementation. To enable database loading, install and
187
+ configure database-specific drivers and query logic.
188
+
189
+ Parameters
190
+ ----------
191
+ data : JSONData
192
+ Data to load.
193
+ connection_string : str
194
+ Database connection string.
195
+
196
+ Returns
197
+ -------
198
+ JSONDict
199
+ Result object describing the operation.
200
+ """
201
+ records = count_records(data)
202
+
203
+ return {
204
+ 'status': 'not_implemented',
205
+ 'message': 'Database loading not yet implemented',
206
+ 'connection_string': connection_string,
207
+ 'records': records,
208
+ 'note': 'Install database-specific drivers to enable this feature',
209
+ }
210
+
211
+
212
+ # -- REST API Loading -- #
213
+
214
+
215
+ def load_to_api(
216
+ data: JSONData,
217
+ url: str,
218
+ method: HttpMethod | str,
219
+ **kwargs: Any,
220
+ ) -> JSONDict:
221
+ """
222
+ Load data to a REST API.
223
+
224
+ Parameters
225
+ ----------
226
+ data : JSONData
227
+ Data to send as JSON.
228
+ url : str
229
+ API endpoint URL.
230
+ method : HttpMethod | str
231
+ HTTP method to use.
232
+ **kwargs : Any
233
+ Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
234
+
235
+ Returns
236
+ -------
237
+ JSONDict
238
+ Result dictionary including response payload or text.
239
+
240
+ Raises
241
+ ------
242
+ TypeError
243
+ If the session object is not valid.
244
+ """
245
+ http_method = coerce_http_method(method)
246
+
247
+ # Apply a conservative timeout to guard against hanging requests.
248
+ timeout = kwargs.pop('timeout', 10.0)
249
+ session = kwargs.pop('session', None)
250
+ requester = session or requests
251
+
252
+ request_callable = getattr(requester, http_method.value, None)
253
+ if not callable(request_callable):
254
+ raise TypeError(
255
+ 'Session object must supply a '
256
+ f'callable "{http_method.value}" method',
257
+ )
258
+
259
+ response = request_callable(url, json=data, timeout=timeout, **kwargs)
260
+ response.raise_for_status()
261
+
262
+ # Try JSON first, fall back to text.
263
+ try:
264
+ payload: Any = response.json()
265
+ except ValueError:
266
+ payload = response.text
267
+
268
+ return {
269
+ 'status': 'success',
270
+ 'status_code': response.status_code,
271
+ 'message': f'Data loaded to {url}',
272
+ 'response': payload,
273
+ 'records': count_records(data),
274
+ 'method': http_method.value.upper(),
275
+ }
276
+
277
+
278
+ # -- Orchestration -- #
279
+
280
+
281
+ def load(
282
+ source: StrPath | JSONData,
283
+ target_type: DataConnectorType | str,
284
+ target: StrPath,
285
+ file_format: FileFormat | str | None = None,
286
+ method: HttpMethod | str | None = None,
287
+ **kwargs: Any,
288
+ ) -> JSONData:
289
+ """
290
+ Load data to a target (file, database, or API).
291
+
292
+ Parameters
293
+ ----------
294
+ source : StrPath | JSONData
295
+ Data source to load.
296
+ target_type : DataConnectorType | str
297
+ Type of data target.
298
+ target : StrPath
299
+ Target location (file path, connection string, or API URL).
300
+ file_format : FileFormat | str | None, optional
301
+ File format, inferred from filename extension if omitted.
302
+ method : HttpMethod | str | None, optional
303
+ HTTP method for API targets. Defaults to POST if omitted.
304
+ **kwargs : Any
305
+ Additional arguments forwarded to target-specific loaders.
306
+
307
+ Returns
308
+ -------
309
+ JSONData
310
+ Result dictionary with status.
311
+
312
+ Raises
313
+ ------
314
+ ValueError
315
+ If `target_type` is not one of the supported values.
316
+ """
317
+ data = load_data(source)
318
+
319
+ match coerce_data_connector_type(target_type):
320
+ case DataConnectorType.FILE:
321
+ # Prefer explicit format if provided, else infer from filename.
322
+ return load_to_file(data, target, file_format)
323
+ case DataConnectorType.DATABASE:
324
+ return load_to_database(data, str(target))
325
+ case DataConnectorType.API:
326
+ api_method = method if method is not None else HttpMethod.POST
327
+ return load_to_api(
328
+ data,
329
+ str(target),
330
+ method=api_method,
331
+ **kwargs,
332
+ )
333
+ case _:
334
+ # `coerce_data_connector_type` covers invalid entries, but keep
335
+ # explicit guard.
336
+ raise ValueError(f'Invalid target type: {target_type}')
etlplus/mixins.py ADDED
@@ -0,0 +1,62 @@
1
+ """
2
+ :mod:`etlplus.mixins` module.
3
+
4
+ Shared mixin utilities used across configuration and API layers.
5
+
6
+ Notes
7
+ ------
8
+ - Mixins are stateless helpers.
9
+ - ``__slots__`` prevents accidental attribute mutation at runtime.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Final
15
+
16
+ # SECTION: EXPORTS ========================================================== #
17
+
18
+
19
+ __all__ = ['BoundsWarningsMixin']
20
+
21
+
22
+ # SECTION: EXPORTS ========================================================== #
23
+
24
+
25
+ class BoundsWarningsMixin:
26
+ """
27
+ Append human-readable warnings without raising exceptions.
28
+
29
+ Examples
30
+ --------
31
+ >>> warnings: list[str] = []
32
+ >>> BoundsWarningsMixin._warn_if(True, 'oops', warnings)
33
+ >>> warnings
34
+ ['oops']
35
+ """
36
+
37
+ __slots__ = ()
38
+
39
+ _APPEND: Final = list.append
40
+
41
+ # -- Static Methods -- #
42
+
43
+ @staticmethod
44
+ def _warn_if(
45
+ condition: bool,
46
+ message: str,
47
+ bucket: list[str],
48
+ ) -> None:
49
+ """
50
+ Append a warning to a list if a condition is met.
51
+
52
+ Parameters
53
+ ----------
54
+ condition : bool
55
+ Whether to issue the warning.
56
+ message : str
57
+ Warning message to append.
58
+ bucket : list[str]
59
+ Target list for collected warnings.
60
+ """
61
+ if condition:
62
+ BoundsWarningsMixin._APPEND(bucket, message)
etlplus/py.typed ADDED
File without changes