etlplus 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. etlplus/README.md +37 -0
  2. etlplus/__init__.py +1 -26
  3. etlplus/api/README.md +51 -3
  4. etlplus/api/__init__.py +10 -0
  5. etlplus/api/config.py +39 -28
  6. etlplus/api/endpoint_client.py +3 -3
  7. etlplus/api/enums.py +51 -0
  8. etlplus/api/pagination/client.py +1 -1
  9. etlplus/api/rate_limiting/config.py +13 -1
  10. etlplus/api/rate_limiting/rate_limiter.py +8 -11
  11. etlplus/api/request_manager.py +11 -6
  12. etlplus/api/transport.py +14 -2
  13. etlplus/api/types.py +96 -6
  14. etlplus/{run_helpers.py → api/utils.py} +209 -153
  15. etlplus/cli/README.md +40 -0
  16. etlplus/cli/commands.py +76 -43
  17. etlplus/cli/constants.py +1 -1
  18. etlplus/cli/handlers.py +40 -12
  19. etlplus/cli/io.py +2 -2
  20. etlplus/cli/main.py +1 -1
  21. etlplus/cli/state.py +4 -7
  22. etlplus/database/README.md +48 -0
  23. etlplus/database/ddl.py +1 -1
  24. etlplus/database/engine.py +19 -3
  25. etlplus/database/orm.py +2 -0
  26. etlplus/database/schema.py +1 -1
  27. etlplus/enums.py +1 -157
  28. etlplus/file/README.md +105 -0
  29. etlplus/file/__init__.py +25 -0
  30. etlplus/file/_imports.py +141 -0
  31. etlplus/file/_io.py +160 -0
  32. etlplus/file/accdb.py +78 -0
  33. etlplus/file/arrow.py +78 -0
  34. etlplus/file/avro.py +176 -0
  35. etlplus/file/bson.py +77 -0
  36. etlplus/file/cbor.py +78 -0
  37. etlplus/file/cfg.py +79 -0
  38. etlplus/file/conf.py +80 -0
  39. etlplus/file/core.py +322 -0
  40. etlplus/file/csv.py +79 -0
  41. etlplus/file/dat.py +78 -0
  42. etlplus/file/dta.py +77 -0
  43. etlplus/file/duckdb.py +78 -0
  44. etlplus/file/enums.py +343 -0
  45. etlplus/file/feather.py +111 -0
  46. etlplus/file/fwf.py +77 -0
  47. etlplus/file/gz.py +123 -0
  48. etlplus/file/hbs.py +78 -0
  49. etlplus/file/hdf5.py +78 -0
  50. etlplus/file/ini.py +79 -0
  51. etlplus/file/ion.py +78 -0
  52. etlplus/file/jinja2.py +78 -0
  53. etlplus/file/json.py +98 -0
  54. etlplus/file/log.py +78 -0
  55. etlplus/file/mat.py +78 -0
  56. etlplus/file/mdb.py +78 -0
  57. etlplus/file/msgpack.py +78 -0
  58. etlplus/file/mustache.py +78 -0
  59. etlplus/file/nc.py +78 -0
  60. etlplus/file/ndjson.py +108 -0
  61. etlplus/file/numbers.py +75 -0
  62. etlplus/file/ods.py +79 -0
  63. etlplus/file/orc.py +111 -0
  64. etlplus/file/parquet.py +113 -0
  65. etlplus/file/pb.py +78 -0
  66. etlplus/file/pbf.py +77 -0
  67. etlplus/file/properties.py +78 -0
  68. etlplus/file/proto.py +77 -0
  69. etlplus/file/psv.py +79 -0
  70. etlplus/file/rda.py +78 -0
  71. etlplus/file/rds.py +78 -0
  72. etlplus/file/sas7bdat.py +78 -0
  73. etlplus/file/sav.py +77 -0
  74. etlplus/file/sqlite.py +78 -0
  75. etlplus/file/stub.py +84 -0
  76. etlplus/file/sylk.py +77 -0
  77. etlplus/file/tab.py +81 -0
  78. etlplus/file/toml.py +78 -0
  79. etlplus/file/tsv.py +80 -0
  80. etlplus/file/txt.py +102 -0
  81. etlplus/file/vm.py +78 -0
  82. etlplus/file/wks.py +77 -0
  83. etlplus/file/xls.py +88 -0
  84. etlplus/file/xlsm.py +79 -0
  85. etlplus/file/xlsx.py +99 -0
  86. etlplus/file/xml.py +185 -0
  87. etlplus/file/xpt.py +78 -0
  88. etlplus/file/yaml.py +95 -0
  89. etlplus/file/zip.py +175 -0
  90. etlplus/file/zsav.py +77 -0
  91. etlplus/ops/README.md +50 -0
  92. etlplus/ops/__init__.py +61 -0
  93. etlplus/{extract.py → ops/extract.py} +81 -99
  94. etlplus/{load.py → ops/load.py} +78 -101
  95. etlplus/{run.py → ops/run.py} +159 -127
  96. etlplus/{transform.py → ops/transform.py} +75 -68
  97. etlplus/{validation → ops}/utils.py +53 -17
  98. etlplus/{validate.py → ops/validate.py} +22 -12
  99. etlplus/templates/README.md +46 -0
  100. etlplus/types.py +5 -4
  101. etlplus/utils.py +136 -2
  102. etlplus/workflow/README.md +52 -0
  103. etlplus/{config → workflow}/__init__.py +10 -23
  104. etlplus/{config → workflow}/connector.py +58 -44
  105. etlplus/workflow/dag.py +105 -0
  106. etlplus/{config → workflow}/jobs.py +105 -32
  107. etlplus/{config → workflow}/pipeline.py +59 -51
  108. etlplus/{config → workflow}/profile.py +8 -5
  109. etlplus/workflow/types.py +115 -0
  110. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
  111. etlplus-0.9.2.dist-info/RECORD +134 -0
  112. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
  113. etlplus/config/types.py +0 -204
  114. etlplus/config/utils.py +0 -120
  115. etlplus/file.py +0 -657
  116. etlplus/validation/__init__.py +0 -44
  117. etlplus-0.9.1.dist-info/RECORD +0 -65
  118. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
  119. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
  120. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
etlplus/file/xpt.py ADDED
@@ -0,0 +1,78 @@
1
+ """
2
+ :mod:`etlplus.file.xpt` module.
3
+
4
+ Helpers for reading/writing SAS Transport (XPT) files.
5
+
6
+ Notes
7
+ -----
8
+ - A SAS Transport (XPT) file is a standardized file format used to transfer
9
+ SAS datasets between different systems.
10
+ - Common cases:
11
+ - Sharing datasets between different SAS installations.
12
+ - Archiving datasets in a platform-independent format.
13
+ - Importing/exporting data to/from statistical software that supports XPT.
14
+ - Rule of thumb:
15
+ - If you need to work with XPT files, use this module for reading
16
+ and writing.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from pathlib import Path
22
+
23
+ from ..types import JSONData
24
+ from ..types import JSONList
25
+ from . import stub
26
+
27
+ # SECTION: EXPORTS ========================================================== #
28
+
29
+
30
+ __all__ = [
31
+ 'read',
32
+ 'write',
33
+ ]
34
+
35
+
36
+ # SECTION: FUNCTIONS ======================================================== #
37
+
38
+
39
+ def read(
40
+ path: Path,
41
+ ) -> JSONList:
42
+ """
43
+ Read XPT content from ``path``.
44
+
45
+ Parameters
46
+ ----------
47
+ path : Path
48
+ Path to the XPT file on disk.
49
+
50
+ Returns
51
+ -------
52
+ JSONList
53
+ The list of dictionaries read from the XPT file.
54
+ """
55
+ return stub.read(path, format_name='XPT')
56
+
57
+
58
+ def write(
59
+ path: Path,
60
+ data: JSONData,
61
+ ) -> int:
62
+ """
63
+ Write ``data`` to XPT file at ``path`` and return record count.
64
+
65
+ Parameters
66
+ ----------
67
+ path : Path
68
+ Path to the XPT file on disk.
69
+ data : JSONData
70
+ Data to write as XPT file. Should be a list of dictionaries or a
71
+ single dictionary.
72
+
73
+ Returns
74
+ -------
75
+ int
76
+ The number of rows written to the XPT file.
77
+ """
78
+ return stub.write(path, data, format_name='XPT')
etlplus/file/yaml.py ADDED
@@ -0,0 +1,95 @@
1
+ """
2
+ :mod:`etlplus.file.yaml` module.
3
+
4
+ Helpers for reading/writing YAML Ain't Markup Language (YAML) files.
5
+
6
+ Notes
7
+ -----
8
+ - A YAML file is a human-readable data serialization format.
9
+ - Common cases:
10
+ - Configuration files.
11
+ - Data exchange between languages with different data structures.
12
+ - Complex data storage.
13
+ - Rule of thumb:
14
+ - If the file follows the YAML specification, use this module for
15
+ reading and writing.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from pathlib import Path
21
+
22
+ from ..types import JSONData
23
+ from ..utils import count_records
24
+ from ._imports import get_yaml
25
+ from ._io import coerce_record_payload
26
+
27
+ # SECTION: EXPORTS ========================================================== #
28
+
29
+
30
+ __all__ = [
31
+ 'read',
32
+ 'write',
33
+ ]
34
+
35
+
36
+ # SECTION: FUNCTIONS ======================================================== #
37
+
38
+
39
+ def read(
40
+ path: Path,
41
+ ) -> JSONData:
42
+ """
43
+ Read YAML content from ``path``.
44
+
45
+ Validates that the YAML root is a dict or a list of dicts.
46
+
47
+ Parameters
48
+ ----------
49
+ path : Path
50
+ Path to the YAML file on disk.
51
+
52
+ Returns
53
+ -------
54
+ JSONData
55
+ The structured data read from the YAML file.
56
+
57
+ Raises
58
+ ------
59
+ TypeError
60
+ If the YAML root is not an object or an array of objects.
61
+ """
62
+ with path.open('r', encoding='utf-8') as handle:
63
+ loaded = get_yaml().safe_load(handle)
64
+
65
+ return coerce_record_payload(loaded, format_name='YAML')
66
+
67
+
68
+ def write(
69
+ path: Path,
70
+ data: JSONData,
71
+ ) -> int:
72
+ """
73
+ Write ``data`` as YAML to ``path`` and return record count.
74
+
75
+ Parameters
76
+ ----------
77
+ path : Path
78
+ Path to the YAML file on disk.
79
+ data : JSONData
80
+ Data to write as YAML.
81
+
82
+ Returns
83
+ -------
84
+ int
85
+ The number of records written.
86
+ """
87
+ with path.open('w', encoding='utf-8') as handle:
88
+ get_yaml().safe_dump(
89
+ data,
90
+ handle,
91
+ sort_keys=False,
92
+ allow_unicode=True,
93
+ default_flow_style=False,
94
+ )
95
+ return count_records(data)
etlplus/file/zip.py ADDED
@@ -0,0 +1,175 @@
1
+ """
2
+ :mod:`etlplus.file.zip` module.
3
+
4
+ Helpers for reading/writing ZIP files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import tempfile
10
+ import zipfile
11
+ from pathlib import Path
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from .enums import CompressionFormat
16
+ from .enums import FileFormat
17
+ from .enums import infer_file_format_and_compression
18
+
19
+ # SECTION: EXPORTS ========================================================== #
20
+
21
+
22
+ __all__ = [
23
+ 'read',
24
+ 'write',
25
+ ]
26
+
27
+
28
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
29
+
30
+
31
+ def _resolve_format(
32
+ filename: str,
33
+ ) -> FileFormat:
34
+ """
35
+ Resolve the inner file format from a filename.
36
+
37
+ Parameters
38
+ ----------
39
+ filename : str
40
+ The name of the file inside the ZIP archive.
41
+
42
+ Returns
43
+ -------
44
+ FileFormat
45
+ The inferred inner file format.
46
+
47
+ Raises
48
+ ------
49
+ ValueError
50
+ If the file format cannot be inferred from the filename.
51
+ """
52
+ fmt, compression = infer_file_format_and_compression(filename)
53
+ if compression is not None and compression is not CompressionFormat.ZIP:
54
+ raise ValueError(f'Unexpected compression in archive: {filename}')
55
+ if fmt is None:
56
+ raise ValueError(
57
+ f'Cannot infer file format from compressed file {filename!r}',
58
+ )
59
+ return fmt
60
+
61
+
62
+ def _extract_payload(
63
+ entry: zipfile.ZipInfo,
64
+ archive: zipfile.ZipFile,
65
+ ) -> bytes:
66
+ """
67
+ Extract an archive entry into memory.
68
+
69
+ Parameters
70
+ ----------
71
+ entry : zipfile.ZipInfo
72
+ The ZIP archive entry.
73
+ archive : zipfile.ZipFile
74
+ The opened ZIP archive.
75
+
76
+ Returns
77
+ -------
78
+ bytes
79
+ The raw payload.
80
+ """
81
+ with archive.open(entry, 'r') as handle:
82
+ return handle.read()
83
+
84
+
85
+ # SECTION: FUNCTIONS ======================================================== #
86
+
87
+
88
+ def read(
89
+ path: Path,
90
+ ) -> JSONData:
91
+ """
92
+ Read ZIP content from ``path`` and parse the inner payload(s).
93
+
94
+ Parameters
95
+ ----------
96
+ path : Path
97
+ Path to the ZIP file on disk.
98
+
99
+ Returns
100
+ -------
101
+ JSONData
102
+ Parsed payload.
103
+
104
+ Raises
105
+ ------
106
+ ValueError
107
+ If the ZIP archive is empty.
108
+ """
109
+ with zipfile.ZipFile(path, 'r') as archive:
110
+ entries = [entry for entry in archive.infolist() if not entry.is_dir()]
111
+ if not entries:
112
+ raise ValueError(f'ZIP archive is empty: {path}')
113
+
114
+ if len(entries) == 1:
115
+ entry = entries[0]
116
+ fmt = _resolve_format(entry.filename)
117
+ payload = _extract_payload(entry, archive)
118
+ with tempfile.TemporaryDirectory() as tmpdir:
119
+ tmp_path = Path(tmpdir) / Path(entry.filename).name
120
+ tmp_path.write_bytes(payload)
121
+ from .core import File
122
+
123
+ return File(tmp_path, fmt).read()
124
+
125
+ results: JSONDict = {}
126
+ for entry in entries:
127
+ fmt = _resolve_format(entry.filename)
128
+ payload = _extract_payload(entry, archive)
129
+ with tempfile.TemporaryDirectory() as tmpdir:
130
+ tmp_path = Path(tmpdir) / Path(entry.filename).name
131
+ tmp_path.write_bytes(payload)
132
+ from .core import File
133
+
134
+ results[entry.filename] = File(tmp_path, fmt).read()
135
+ return results
136
+
137
+
138
+ def write(
139
+ path: Path,
140
+ data: JSONData,
141
+ ) -> int:
142
+ """
143
+ Write ``data`` to ZIP at ``path`` and return record count.
144
+
145
+ Parameters
146
+ ----------
147
+ path : Path
148
+ Path to the ZIP file on disk.
149
+ data : JSONData
150
+ Data to write.
151
+
152
+ Returns
153
+ -------
154
+ int
155
+ Number of records written.
156
+ """
157
+ fmt = _resolve_format(path.name)
158
+ inner_name = Path(path.name).with_suffix('').name
159
+
160
+ with tempfile.TemporaryDirectory() as tmpdir:
161
+ tmp_path = Path(tmpdir) / inner_name
162
+ from .core import File
163
+
164
+ count = File(tmp_path, fmt).write(data)
165
+ payload = tmp_path.read_bytes()
166
+
167
+ path.parent.mkdir(parents=True, exist_ok=True)
168
+ with zipfile.ZipFile(
169
+ path,
170
+ 'w',
171
+ compression=zipfile.ZIP_DEFLATED,
172
+ ) as archive:
173
+ archive.writestr(inner_name, payload)
174
+
175
+ return count
etlplus/file/zsav.py ADDED
@@ -0,0 +1,77 @@
1
+ """
2
+ :mod:`etlplus.file.zsav` module.
3
+
4
+ Helpers for reading/writing compressed SPSS (ZSAV) data files.
5
+
6
+ Notes
7
+ -----
8
+ - A ZSAV file is a compressed binary file format used by SPSS to store
9
+ datasets, including variables, labels, and data types.
10
+ - Common cases:
11
+ - Reading compressed data for analysis in Python.
12
+ - Writing processed data back to compressed SPSS format.
13
+ - Rule of thumb:
14
+ - If you need to work with compressed SPSS data files, use this module for
15
+ reading and writing.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from pathlib import Path
21
+
22
+ from ..types import JSONData
23
+ from ..types import JSONList
24
+ from . import stub
25
+
26
+ # SECTION: EXPORTS ========================================================== #
27
+
28
+
29
+ __all__ = [
30
+ 'read',
31
+ 'write',
32
+ ]
33
+
34
+
35
+ # SECTION: FUNCTIONS ======================================================== #
36
+
37
+
38
+ def read(
39
+ path: Path,
40
+ ) -> JSONList:
41
+ """
42
+ Read ZSAV content from ``path``.
43
+
44
+ Parameters
45
+ ----------
46
+ path : Path
47
+ Path to the ZSAV file on disk.
48
+
49
+ Returns
50
+ -------
51
+ JSONList
52
+ The list of dictionaries read from the ZSAV file.
53
+ """
54
+ return stub.read(path, format_name='ZSAV')
55
+
56
+
57
+ def write(
58
+ path: Path,
59
+ data: JSONData,
60
+ ) -> int:
61
+ """
62
+ Write ``data`` to ZSAV file at ``path`` and return record count.
63
+
64
+ Parameters
65
+ ----------
66
+ path : Path
67
+ Path to the ZSAV file on disk.
68
+ data : JSONData
69
+ Data to write as ZSAV file. Should be a list of dictionaries or a
70
+ single dictionary.
71
+
72
+ Returns
73
+ -------
74
+ int
75
+ The number of rows written to the ZSAV file.
76
+ """
77
+ return stub.write(path, data, format_name='ZSAV')
etlplus/ops/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # etlplus.ops subpackage
2
+
3
+ Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
4
+
5
+ - Provides flexible data validation for ETL pipelines
6
+ - Supports type checking, required fields, and custom rules
7
+ - Includes utilities for rule definition and validation logic
8
+
9
+ Back to project overview: see the top-level [README](../../README.md).
10
+
11
+ - [etlplus.ops subpackage](#etlplusops-subpackage)
12
+ - [Validation Features](#validation-features)
13
+ - [Defining Validation Rules](#defining-validation-rules)
14
+ - [Example: Validating Data](#example-validating-data)
15
+ - [See Also](#see-also)
16
+
17
+ ## Validation Features
18
+
19
+ - Type checking (string, number, boolean, etc.)
20
+ - Required/optional fields
21
+ - Enum and pattern validation
22
+ - Custom rule support
23
+
24
+ ## Defining Validation Rules
25
+
26
+ Validation rules are defined as dictionaries specifying field types, requirements, and constraints:
27
+
28
+ ```python
29
+ rules = {
30
+ "name": {"type": "string", "required": True},
31
+ "age": {"type": "number", "min": 0, "max": 120},
32
+ }
33
+ ```
34
+
35
+ ## Example: Validating Data
36
+
37
+ ```python
38
+ from etlplus.validation import validate
39
+
40
+ result = validate({"name": "Alice", "age": 30}, rules)
41
+ if result["valid"]:
42
+ print("Data is valid!")
43
+ else:
44
+ print(result["errors"])
45
+ ```
46
+
47
+ ## See Also
48
+
49
+ - Top-level CLI and library usage in the main [README](../../README.md)
50
+ - Validation utilities in [utils.py](utils.py)
@@ -0,0 +1,61 @@
1
+ """
2
+ :mod:`etlplus.ops` package.
3
+
4
+ Data operations helpers.
5
+
6
+ Importing :mod:`etlplus.ops` exposes the coarse-grained helpers most users care
7
+ about: ``extract``, ``transform``, ``load``, ``validate``, ``run``, and
8
+ ``run_pipeline``. Each helper delegates to the richer modules under
9
+ ``etlplus.ops.*`` while presenting a compact public API surface. Conditional
10
+ validation orchestration is available via
11
+ :func:`etlplus.ops.utils.maybe_validate`. The legacy compatibility module
12
+ :mod:`etlplus.ops.__init__validation` is deprecated in favor of this package.
13
+
14
+ Examples
15
+ --------
16
+ >>> from etlplus.ops import extract, transform
17
+ >>> raw = extract('file', 'input.json')
18
+ >>> curated = transform(raw, {'select': ['id', 'name']})
19
+
20
+ >>> from etlplus.ops.utils import maybe_validate
21
+ >>> payload = {'name': 'Alice'}
22
+ >>> rules = {'required': ['name']}
23
+ >>> def validator(data, config):
24
+ ... missing = [field for field in config['required'] if field not in data]
25
+ ... return {'valid': not missing, 'errors': missing, 'data': data}
26
+ >>> maybe_validate(
27
+ ... payload,
28
+ ... when='both',
29
+ ... enabled=True,
30
+ ... rules=rules,
31
+ ... phase='before_transform',
32
+ ... severity='warn',
33
+ ... validate_fn=validator,
34
+ ... print_json_fn=lambda message: message,
35
+ ... )
36
+ {'name': 'Alice'}
37
+
38
+ See Also
39
+ --------
40
+ :mod:`etlplus.ops.run`
41
+ :mod:`etlplus.ops.utils`
42
+ """
43
+
44
+ from .extract import extract
45
+ from .load import load
46
+ from .run import run
47
+ from .run import run_pipeline
48
+ from .transform import transform
49
+ from .validate import validate
50
+
51
+ # SECTION: EXPORTS ========================================================== #
52
+
53
+
54
+ __all__ = [
55
+ 'extract',
56
+ 'load',
57
+ 'run',
58
+ 'run_pipeline',
59
+ 'transform',
60
+ 'validate',
61
+ ]