etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/validate.py ADDED
@@ -0,0 +1,493 @@
1
+ """
2
+ :mod:`etlplus.validation` module.
3
+
4
+ Validate dicts and lists of dicts using simple, schema-like rules.
5
+
6
+ This module provides a very small validation primitive that is intentionally
7
+ runtime-friendly (no heavy schema engines) and pairs with ETLPlus' JSON-like
8
+ types. It focuses on clear error messages and predictable behavior.
9
+
10
+ Highlights
11
+ ----------
12
+ - Centralized type map and helpers for clarity and reuse.
13
+ - Consistent error wording; field and item paths like ``[2].email``.
14
+ - Small, focused public API with ``load_data``, ``validate_field``,
15
+ ``validate``.
16
+
17
+ Examples
18
+ --------
19
+ >>> rules = {
20
+ ... 'name': {'required': True, 'type': 'string', 'minLength': 1},
21
+ ... 'age': {'type': 'integer', 'min': 0},
22
+ ... }
23
+ >>> data = {'name': 'Ada', 'age': 28}
24
+ >>> validate(data, rules)['valid']
25
+ True
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import re
31
+ from collections.abc import Mapping
32
+ from typing import Any
33
+ from typing import Final
34
+ from typing import Literal
35
+ from typing import TypedDict
36
+
37
+ from .load import load_data
38
+ from .types import JSONData
39
+ from .types import Record
40
+ from .types import StrAnyMap
41
+ from .types import StrPath
42
+
43
+ # SECTION: EXPORTS ========================================================== #
44
+
45
+
46
+ __all__ = [
47
+ 'FieldRules',
48
+ 'FieldValidation',
49
+ 'Validation',
50
+ 'validate_field',
51
+ 'validate',
52
+ ]
53
+
54
+
55
+ # SECTION: CONSTANTS ======================================================== #
56
+
57
+
58
+ # Map the logical JSON-like type names to Python runtime types.
59
+ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
60
+ 'string': str,
61
+ 'number': (int, float),
62
+ 'integer': int,
63
+ 'boolean': bool,
64
+ 'array': list,
65
+ 'object': dict,
66
+ }
67
+
68
+
69
+ # SECTION: CLASSES ========================================================== #
70
+
71
+
72
+ class FieldRules(TypedDict, total=False):
73
+ """
74
+ Validation rules for a single field.
75
+
76
+ Keys are optional; absent keys imply no constraint.
77
+ """
78
+
79
+ required: bool
80
+ type: Literal[
81
+ 'string',
82
+ 'number',
83
+ 'integer',
84
+ 'boolean',
85
+ 'array',
86
+ 'object',
87
+ ]
88
+ min: float
89
+ max: float
90
+ minLength: int
91
+ maxLength: int
92
+ pattern: str
93
+ enum: list[Any]
94
+
95
+
96
+ class FieldValidation(TypedDict):
97
+ """
98
+ Validation result for a single field.
99
+
100
+ Attributes
101
+ ----------
102
+ valid : bool
103
+ Whether the field is valid.
104
+ errors : list[str]
105
+ List of error messages, if any.
106
+ """
107
+
108
+ valid: bool
109
+ errors: list[str]
110
+
111
+
112
+ class Validation(TypedDict):
113
+ """
114
+ Validation result for a complete data structure.
115
+
116
+ Attributes
117
+ ----------
118
+ valid : bool
119
+ Whether the entire data structure is valid.
120
+ errors : list[str]
121
+ List of error messages, if any.
122
+ field_errors : dict[str, list[str]]
123
+ Mapping of field names to their error messages.
124
+ data : JSONData | None
125
+ The validated data, if valid.
126
+ """
127
+
128
+ valid: bool
129
+ errors: list[str]
130
+ field_errors: dict[str, list[str]]
131
+ data: JSONData | None
132
+
133
+
134
+ # SECTION: TYPE ALIASES ===================================================== #
135
+
136
+
137
+ type RulesMap = Mapping[str, FieldRules]
138
+
139
+
140
+ # SECTION: INTERNAL FUNCTIONS ============================================== #
141
+
142
+
143
+ def _coerce_rule(
144
+ rules: StrAnyMap,
145
+ key: str,
146
+ coercer: type[int] | type[float],
147
+ type_desc: str,
148
+ errors: list[str],
149
+ ) -> int | float | None:
150
+ """
151
+ Extract and coerce a rule value, recording an error.
152
+
153
+ Returns None when the key is absent.
154
+
155
+ Parameters
156
+ ----------
157
+ rules : StrAnyMap
158
+ The rules dictionary.
159
+ key : str
160
+ The key to extract.
161
+ coercer : type[int] | type[float]
162
+ The type to coerce to (int or float).
163
+ type_desc : str
164
+ Description of the expected type for error messages.
165
+ errors : list[str]
166
+ List to append error messages to.
167
+
168
+ Returns
169
+ -------
170
+ int | float | None
171
+ The coerced value, or None if the key is absent.
172
+ """
173
+ if key not in rules:
174
+ return None
175
+
176
+ try:
177
+ val = rules.get(key)
178
+ if val is None:
179
+ return None
180
+ # Calling the type as a coercer is fine at runtime
181
+ return coercer(val) # type: ignore[call-arg]
182
+ except (TypeError, ValueError):
183
+ errors.append(f"Rule '{key}' must be {type_desc}")
184
+ return None
185
+
186
+
187
+ def _get_int_rule(
188
+ rules: StrAnyMap,
189
+ key: str,
190
+ errors: list[str],
191
+ ) -> int | None:
192
+ """
193
+ Extract and coerce an integer rule value, recording an error if invalid.
194
+
195
+ Returns None when the key is absent.
196
+
197
+ Parameters
198
+ ----------
199
+ rules : StrAnyMap
200
+ The rules dictionary.
201
+ key : str
202
+ The key to extract.
203
+ errors : list[str]
204
+ List to append error messages to.
205
+
206
+ Returns
207
+ -------
208
+ int | None
209
+ The coerced integer value, or None if the key is absent.
210
+ """
211
+ coerced = _coerce_rule(rules, key, int, 'an integer', errors)
212
+
213
+ return int(coerced) if coerced is not None else None
214
+
215
+
216
+ def _get_numeric_rule(
217
+ rules: StrAnyMap,
218
+ key: str,
219
+ errors: list[str],
220
+ ) -> float | None:
221
+ """
222
+ Extract and coerce a numeric rule value, recording an error if invalid.
223
+
224
+ Returns None when the key is absent.
225
+
226
+ Parameters
227
+ ----------
228
+ rules : StrAnyMap
229
+ The rules dictionary.
230
+ key : str
231
+ The key to extract.
232
+ errors : list[str]
233
+ List to append error messages to.
234
+
235
+ Returns
236
+ -------
237
+ float | None
238
+ The coerced float value, or None if the key is absent.
239
+ """
240
+ coerced = _coerce_rule(rules, key, float, 'numeric', errors)
241
+
242
+ return float(coerced) if coerced is not None else None
243
+
244
+
245
+ def _is_number(value: Any) -> bool:
246
+ """
247
+ Return True if value is an int/float but not a bool.
248
+
249
+ Parameters
250
+ ----------
251
+ value : Any
252
+ Value to test.
253
+
254
+ Returns
255
+ -------
256
+ bool
257
+ ``True`` if value is a number, else ``False``.
258
+ """
259
+ return isinstance(value, (int, float)) and not isinstance(value, bool)
260
+
261
+
262
+ def _type_matches(
263
+ value: Any,
264
+ expected: str,
265
+ ) -> bool:
266
+ """
267
+ Check if a value matches an expected JSON-like type.
268
+
269
+ Parameters
270
+ ----------
271
+ value : Any
272
+ Value to test.
273
+ expected : str
274
+ Expected logical type name ('string', 'number', 'integer', 'boolean',
275
+ 'array', 'object').
276
+
277
+ Returns
278
+ -------
279
+ bool
280
+ ``True`` if the value matches the expected type; ``False`` if not.
281
+ """
282
+ py_type = TYPE_MAP.get(expected)
283
+ if py_type:
284
+ return isinstance(value, py_type)
285
+
286
+ return False
287
+
288
+
289
+ def _validate_record(
290
+ record: Record,
291
+ rules: RulesMap,
292
+ idx: int | None = None,
293
+ ) -> tuple[list[str], dict[str, list[str]]]:
294
+ """
295
+ Validate a single record against rules and return aggregated errors.
296
+
297
+ Returns a tuple of (errors, field_errors) where errors are the flattened
298
+ messages with field prefixes and field_errors maps field keys to messages.
299
+ If idx is provided, the field keys are prefixed like ``"[i].field"``.
300
+
301
+ Parameters
302
+ ----------
303
+ record : Record
304
+ The record to validate.
305
+ rules : RulesMap
306
+ The field rules.
307
+ idx : int | None, optional
308
+ Optional index for prefixing field keys.
309
+
310
+ Returns
311
+ -------
312
+ tuple[list[str], dict[str, list[str]]]
313
+ A tuple of (errors, field_errors).
314
+ """
315
+ errors: list[str] = []
316
+ field_errors: dict[str, list[str]] = {}
317
+
318
+ for field, field_rules in rules.items():
319
+ value = record.get(field)
320
+ result = validate_field(value, field_rules)
321
+ if result['valid']:
322
+ continue
323
+ field_key = field if idx is None else f'[{idx}].{field}'
324
+ field_errors[field_key] = result['errors']
325
+ errors.extend(f'{field_key}: {err}' for err in result['errors'])
326
+
327
+ return errors, field_errors
328
+
329
+
330
+ # SECTION: FUNCTIONS ======================================================== #
331
+
332
+
333
+ def validate_field(
334
+ value: Any,
335
+ rules: StrAnyMap | FieldRules,
336
+ ) -> FieldValidation:
337
+ """
338
+ Validate a single value against field rules.
339
+
340
+ Parameters
341
+ ----------
342
+ value : Any
343
+ The value to validate. ``None`` is treated as missing.
344
+ rules : StrAnyMap | FieldRules
345
+ Rule dictionary. Supported keys include ``required``, ``type``,
346
+ ``min``, ``max``, ``minLength``, ``maxLength``, ``pattern``, and
347
+ ``enum``.
348
+
349
+ Returns
350
+ -------
351
+ FieldValidation
352
+ Result with ``valid`` and a list of ``errors``.
353
+
354
+ Notes
355
+ -----
356
+ If ``required`` is ``False`` or absent and the value is ``None``, the
357
+ field is considered valid without further checks.
358
+ """
359
+ errors: list[str] = []
360
+
361
+ # Required check (None is treated as missing).
362
+ if bool(rules.get('required', False)) and value is None:
363
+ errors.append('Field is required')
364
+ return {'valid': False, 'errors': errors}
365
+
366
+ # If optional and missing, it's valid.
367
+ if value is None:
368
+ return {'valid': True, 'errors': []}
369
+
370
+ # Type check.
371
+ expected_type = rules.get('type')
372
+ if isinstance(expected_type, str):
373
+ if not _type_matches(value, expected_type):
374
+ errors.append(
375
+ f'Expected type {expected_type}, got {type(value).__name__}',
376
+ )
377
+
378
+ # Numeric range checks.
379
+ if _is_number(value):
380
+ min_v = _get_numeric_rule(rules, 'min', errors)
381
+ if min_v is not None and float(value) < min_v:
382
+ errors.append(f'Value {value} is less than minimum {min_v}')
383
+ max_v = _get_numeric_rule(rules, 'max', errors)
384
+ if max_v is not None and float(value) > max_v:
385
+ errors.append(f'Value {value} is greater than maximum {max_v}')
386
+
387
+ # String checks.
388
+ if isinstance(value, str):
389
+ min_len = _get_int_rule(rules, 'minLength', errors)
390
+ if min_len is not None and len(value) < min_len:
391
+ errors.append(
392
+ f'Length {len(value)} is less than minimum {min_len}',
393
+ )
394
+ max_len = _get_int_rule(rules, 'maxLength', errors)
395
+ if max_len is not None and len(value) > max_len:
396
+ errors.append(
397
+ f'Length {len(value)} is greater than maximum {max_len}',
398
+ )
399
+ if 'pattern' in rules:
400
+ pattern = rules.get('pattern')
401
+ if isinstance(pattern, str):
402
+ try:
403
+ regex = re.compile(pattern)
404
+ except re.error as e:
405
+ errors.append(f'Rule "pattern" is not a valid regex: {e}')
406
+ else:
407
+ if not regex.search(value):
408
+ errors.append(
409
+ f'Value does not match pattern {pattern}',
410
+ )
411
+ else:
412
+ errors.append("Rule 'pattern' must be a string")
413
+
414
+ # Enum check.
415
+ if 'enum' in rules:
416
+ enum_vals = rules.get('enum')
417
+ if isinstance(enum_vals, list):
418
+ if value not in enum_vals:
419
+ errors.append(
420
+ f'Value {value} not in allowed values {enum_vals}',
421
+ )
422
+ else:
423
+ errors.append("Rule 'enum' must be a list")
424
+
425
+ return {'valid': len(errors) == 0, 'errors': errors}
426
+
427
+
428
+ def validate(
429
+ source: StrPath | JSONData,
430
+ rules: RulesMap | None = None,
431
+ ) -> Validation:
432
+ """
433
+ Validate data against rules.
434
+
435
+ Parameters
436
+ ----------
437
+ source : StrPath | JSONData
438
+ Data source to validate.
439
+ rules : RulesMap | None, optional
440
+ Field rules keyed by field name. If ``None``, data is considered
441
+ valid and returned unchanged.
442
+
443
+ Returns
444
+ -------
445
+ Validation
446
+ Structured result with keys ``valid``, ``errors``, ``field_errors``,
447
+ and ``data``. If loading fails, ``data`` is ``None`` and an error is
448
+ reported in ``errors``.
449
+ """
450
+ try:
451
+ data = load_data(source)
452
+ except ValueError as e:
453
+ return {
454
+ 'valid': False,
455
+ 'errors': [f'Failed to load data: {e}'],
456
+ 'field_errors': {},
457
+ 'data': None,
458
+ }
459
+
460
+ if not rules:
461
+ return {
462
+ 'valid': True,
463
+ 'errors': [],
464
+ 'field_errors': {},
465
+ 'data': data,
466
+ }
467
+
468
+ errors: list[str] = []
469
+ field_errors: dict[str, list[str]] = {}
470
+
471
+ if isinstance(data, dict):
472
+ rec_errors, rec_field_errors = _validate_record(data, rules)
473
+ errors.extend(rec_errors)
474
+ field_errors.update(rec_field_errors)
475
+
476
+ elif isinstance(data, list):
477
+ for i, item in enumerate(data):
478
+ if not isinstance(item, dict):
479
+ key = f'[{i}]'
480
+ msg = 'Item is not an object (expected dict)'
481
+ errors.append(f'{key}: {msg}')
482
+ field_errors.setdefault(key, []).append(msg)
483
+ continue
484
+ rec_errors, rec_field_errors = _validate_record(item, rules, i)
485
+ errors.extend(rec_errors)
486
+ field_errors.update(rec_field_errors)
487
+
488
+ return {
489
+ 'valid': len(errors) == 0,
490
+ 'errors': errors,
491
+ 'field_errors': field_errors,
492
+ 'data': data,
493
+ }
@@ -0,0 +1,44 @@
1
+ """
2
+ :mod:`etlplus.validation` package.
3
+
4
+ Conditional validation utilities used across the ETL pipeline.
5
+
6
+ The package intentionally exposes a single helper, :func:`maybe_validate`, to
7
+ keep the public API compact and predictable. Supporting logic lives in
8
+ ``etlplus.validation.utils`` where validation configuration is normalized,
9
+ reducing the likelihood of phase/option mismatches.
10
+
11
+ Examples
12
+ --------
13
+ >>> from etlplus.validation import maybe_validate
14
+ >>> payload = {'name': 'Alice'}
15
+ >>> rules = {'required': ['name']}
16
+ >>> def validator(data, config):
17
+ ... missing = [field for field in config['required'] if field not in data]
18
+ ... return {'valid': not missing, 'errors': missing, 'data': data}
19
+ >>> maybe_validate(
20
+ ... payload,
21
+ ... when='both',
22
+ ... enabled=True,
23
+ ... rules=rules,
24
+ ... phase='before_transform',
25
+ ... severity='warn',
26
+ ... validate_fn=validator,
27
+ ... print_json_fn=lambda message: message,
28
+ ... )
29
+ {'name': 'Alice'}
30
+
31
+ See Also
32
+ --------
33
+ - :mod:`etlplus.validation.utils` for implementation details and helper
34
+ utilities.
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ from .utils import maybe_validate
40
+
41
+ # SECTION: EXPORTS ========================================================== #
42
+
43
+
44
+ __all__ = ['maybe_validate']