etlplus 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. etlplus/README.md +2 -2
  2. etlplus/__init__.py +1 -26
  3. etlplus/api/README.md +2 -2
  4. etlplus/api/__init__.py +10 -0
  5. etlplus/api/config.py +36 -20
  6. etlplus/api/endpoint_client.py +3 -3
  7. etlplus/api/enums.py +51 -0
  8. etlplus/api/pagination/client.py +1 -1
  9. etlplus/api/rate_limiting/config.py +13 -1
  10. etlplus/api/rate_limiting/rate_limiter.py +8 -11
  11. etlplus/api/request_manager.py +11 -6
  12. etlplus/api/transport.py +14 -2
  13. etlplus/api/types.py +7 -6
  14. etlplus/{run_helpers.py → api/utils.py} +209 -153
  15. etlplus/cli/README.md +2 -2
  16. etlplus/cli/handlers.py +19 -9
  17. etlplus/config/README.md +31 -33
  18. etlplus/config/__init__.py +9 -32
  19. etlplus/config/types.py +0 -64
  20. etlplus/dag.py +103 -0
  21. etlplus/database/README.md +2 -2
  22. etlplus/enums.py +0 -32
  23. etlplus/file/README.md +2 -2
  24. etlplus/file/enums.py +1 -1
  25. etlplus/{validation → ops}/README.md +2 -2
  26. etlplus/ops/__init__.py +61 -0
  27. etlplus/{extract.py → ops/extract.py} +78 -94
  28. etlplus/{load.py → ops/load.py} +73 -93
  29. etlplus/{run.py → ops/run.py} +153 -118
  30. etlplus/{transform.py → ops/transform.py} +75 -68
  31. etlplus/{validation → ops}/utils.py +80 -15
  32. etlplus/{validate.py → ops/validate.py} +19 -9
  33. etlplus/templates/README.md +2 -2
  34. etlplus/types.py +2 -2
  35. etlplus/workflow/README.md +52 -0
  36. etlplus/workflow/__init__.py +43 -0
  37. etlplus/{config → workflow}/connector.py +17 -16
  38. etlplus/workflow/dag.py +105 -0
  39. etlplus/{config → workflow}/jobs.py +31 -15
  40. etlplus/{config → workflow}/pipeline.py +11 -3
  41. etlplus/{config → workflow}/profile.py +8 -5
  42. etlplus/workflow/types.py +115 -0
  43. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/METADATA +91 -60
  44. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/RECORD +49 -43
  45. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/WHEEL +1 -1
  46. etlplus/validation/__init__.py +0 -44
  47. /etlplus/{config → workflow}/utils.py +0 -0
  48. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/entry_points.txt +0 -0
  49. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/licenses/LICENSE +0 -0
  50. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/top_level.txt +0 -0
etlplus/config/README.md CHANGED
@@ -1,52 +1,50 @@
1
- # etlplus.config subpackage
1
+ # `etlplus.config` Subpackage
2
2
 
3
- Documentation for the `etlplus.config` subpackage: configuration helpers for connectors, pipelines,
4
- jobs, and profiles.
3
+ Documentation for the `etlplus.config` subpackage: type definitions and config shape helpers for
4
+ ETLPlus.
5
5
 
6
- - Provides classes and utilities for managing ETL pipeline configuration
7
- - Supports YAML/JSON config loading and validation
8
- - Includes helpers for connectors, jobs, pipelines, and profiles
9
- - Exposes type definitions for config schemas
6
+ - Exposes TypedDict-based config schemas for API profiles and endpoints
7
+ - Provides exported type aliases for API configuration maps
8
+ - Designed for Python 3.13 typing and editor assistance (runtime parsing lives elsewhere)
10
9
 
11
10
  Back to project overview: see the top-level [README](../../README.md).
12
11
 
13
- - [etlplus.config subpackage](#etlplusconfig-subpackage)
14
- - [Supported Configuration Types](#supported-configuration-types)
15
- - [Loading and Validating Configs](#loading-and-validating-configs)
16
- - [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
12
+ - [`etlplus.config` Subpackage](#etlplusconfig-subpackage)
13
+ - [Modules](#modules)
14
+ - [Exported Types](#exported-types)
15
+ - [Example: Typing an API Config](#example-typing-an-api-config)
17
16
  - [See Also](#see-also)
18
17
 
19
- ## Supported Configuration Types
18
+ ## Modules
20
19
 
21
- - **Connector**: Connection details for databases, files, or APIs
22
- - **Job**: ETL job definitions and scheduling
23
- - **Pipeline**: End-to-end pipeline configuration
24
- - **Profile**: User or environment-specific settings
20
+ - `etlplus.config.__init__`: package exports and high-level package notes
21
+ - `etlplus.config.types`: TypedDict-based config schemas
25
22
 
26
- ## Loading and Validating Configs
23
+ ## Exported Types
27
24
 
28
- Use the provided classes to load and validate configuration files:
25
+ - `ApiConfigMap`: top-level API config shape
26
+ - `ApiProfileConfigMap`: per-profile API config shape
27
+ - `ApiProfileDefaultsMap`: defaults block within a profile
28
+ - `EndpointMap`: endpoint config shape
29
29
 
30
- ```python
31
- from etlplus.config import PipelineConfig
32
-
33
- cfg = PipelineConfig.from_yaml("pipeline.yml")
34
- ```
35
-
36
- - Supports YAML and JSON formats
37
- - Validates against expected schema
38
-
39
- ## Example: Loading a Pipeline Config
30
+ ## Example: Typing an API Config
40
31
 
41
32
  ```python
42
- from etlplus.config import PipelineConfig
43
-
44
- pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
45
- print(pipeline)
33
+ from etlplus.config import ApiConfigMap
34
+
35
+ api_cfg: ApiConfigMap = {
36
+ "base_url": "https://example.test",
37
+ "headers": {"Authorization": "Bearer token"},
38
+ "endpoints": {
39
+ "users": {
40
+ "path": "/users",
41
+ "method": "GET",
42
+ },
43
+ },
44
+ }
46
45
  ```
47
46
 
48
47
  ## See Also
49
48
 
50
49
  - Top-level CLI and library usage in the main [README](../../README.md)
51
50
  - Config type definitions in [types.py](types.py)
52
- - Config utilities in [utils.py](utils.py)
@@ -16,41 +16,18 @@ Notes
16
16
 
17
17
  from __future__ import annotations
18
18
 
19
- from .connector import Connector
20
- from .connector import ConnectorApi
21
- from .connector import ConnectorDb
22
- from .connector import ConnectorFile
23
- from .connector import parse_connector
24
- from .jobs import ExtractRef
25
- from .jobs import JobConfig
26
- from .jobs import LoadRef
27
- from .jobs import TransformRef
28
- from .jobs import ValidationRef
29
- from .pipeline import PipelineConfig
30
- from .pipeline import load_pipeline_config
31
- from .profile import ProfileConfig
32
- from .types import ConnectorType
19
+ from .types import ApiConfigMap
20
+ from .types import ApiProfileConfigMap
21
+ from .types import ApiProfileDefaultsMap
22
+ from .types import EndpointMap
33
23
 
34
24
  # SECTION: EXPORTS ========================================================== #
35
25
 
36
26
 
37
27
  __all__ = [
38
- # Connectors
39
- 'Connector',
40
- 'ConnectorType',
41
- 'ConnectorApi',
42
- 'ConnectorDb',
43
- 'ConnectorFile',
44
- 'parse_connector',
45
- # Jobs / Refs
46
- 'ExtractRef',
47
- 'JobConfig',
48
- 'LoadRef',
49
- 'TransformRef',
50
- 'ValidationRef',
51
- # Pipeline
52
- 'PipelineConfig',
53
- 'load_pipeline_config',
54
- # Profile
55
- 'ProfileConfig',
28
+ # Typed Dicts
29
+ 'ApiConfigMap',
30
+ 'ApiProfileConfigMap',
31
+ 'ApiProfileDefaultsMap',
32
+ 'EndpointMap',
56
33
  ]
etlplus/config/types.py CHANGED
@@ -33,7 +33,6 @@ from __future__ import annotations
33
33
 
34
34
  from collections.abc import Mapping
35
35
  from typing import Any
36
- from typing import Literal
37
36
  from typing import TypedDict
38
37
 
39
38
  from ..api import PaginationConfigMap
@@ -44,26 +43,17 @@ from ..types import StrAnyMap
44
43
 
45
44
 
46
45
  __all__ = [
47
- # Type aliases
48
- 'ConnectorType',
49
- # 'PaginationType',
50
46
  # TypedDicts
51
47
  'ApiProfileDefaultsMap',
52
48
  'ApiProfileConfigMap',
53
49
  'ApiConfigMap',
54
50
  'EndpointMap',
55
- 'ConnectorApiConfigMap',
56
- 'ConnectorDbConfigMap',
57
- 'ConnectorFileConfigMap',
58
51
  ]
59
52
 
60
53
 
61
54
  # SECTION: TYPE ALIASES ===================================================== #
62
55
 
63
56
 
64
- # Literal type for supported connector kinds
65
- type ConnectorType = Literal['api', 'database', 'file']
66
-
67
57
  # Literal type for supported pagination kinds
68
58
  # type PaginationType = Literal['page', 'offset', 'cursor']
69
59
 
@@ -129,60 +119,6 @@ class ApiProfileDefaultsMap(TypedDict, total=False):
129
119
  rate_limit: RateLimitConfigMap | StrAnyMap
130
120
 
131
121
 
132
- class ConnectorApiConfigMap(TypedDict, total=False):
133
- """
134
- Shape accepted by ConnectorApi.from_obj (all keys optional).
135
-
136
- See Also
137
- --------
138
- - etlplus.config.connector.ConnectorApi.from_obj
139
- """
140
-
141
- name: str
142
- type: ConnectorType
143
- url: str
144
- method: str
145
- headers: StrAnyMap
146
- query_params: StrAnyMap
147
- pagination: PaginationConfigMap
148
- rate_limit: RateLimitConfigMap
149
- api: str
150
- endpoint: str
151
-
152
-
153
- class ConnectorDbConfigMap(TypedDict, total=False):
154
- """
155
- Shape accepted by ConnectorDb.from_obj (all keys optional).
156
-
157
- See Also
158
- --------
159
- - etlplus.config.connector.ConnectorDb.from_obj
160
- """
161
-
162
- name: str
163
- type: ConnectorType
164
- connection_string: str
165
- query: str
166
- table: str
167
- mode: str
168
-
169
-
170
- class ConnectorFileConfigMap(TypedDict, total=False):
171
- """
172
- Shape accepted by ConnectorFile.from_obj (all keys optional).
173
-
174
- See Also
175
- --------
176
- - etlplus.config.connector.ConnectorFile.from_obj
177
- """
178
-
179
- name: str
180
- type: ConnectorType
181
- format: str
182
- path: str
183
- options: StrAnyMap
184
-
185
-
186
122
  class EndpointMap(TypedDict, total=False):
187
123
  """
188
124
  Shape accepted by EndpointConfig.from_obj.
etlplus/dag.py ADDED
@@ -0,0 +1,103 @@
1
+ """
2
+ :mod:`etlplus.dag` module.
3
+
4
+ Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
5
+ ``depends_on``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections import deque
11
+ from dataclasses import dataclass
12
+
13
+ from .config.jobs import JobConfig
14
+
15
+ # SECTION: EXPORTS ========================================================== #
16
+
17
+
18
+ __all__ = [
19
+ 'DagError',
20
+ 'topological_sort_jobs',
21
+ ]
22
+
23
+
24
+ # SECTION: ERRORS =========================================================== #
25
+
26
+
27
+ @dataclass(slots=True)
28
+ class DagError(ValueError):
29
+ """
30
+ Raised when the job dependency graph is invalid.
31
+
32
+ Attributes
33
+ ----------
34
+ message : str
35
+ Error message.
36
+ """
37
+
38
+ # -- Attributes -- #
39
+
40
+ message: str
41
+
42
+ # -- Magic Methods (Object Representation) -- #
43
+
44
+ def __str__(self) -> str:
45
+ return self.message
46
+
47
+
48
+ # SECTION: FUNCTIONS ======================================================== #
49
+
50
+
51
+ def topological_sort_jobs(
52
+ jobs: list[JobConfig],
53
+ ) -> list[JobConfig]:
54
+ """
55
+ Return jobs in topological order based on ``depends_on``.
56
+
57
+ Parameters
58
+ ----------
59
+ jobs : list[JobConfig]
60
+ List of job configurations to sort.
61
+
62
+ Returns
63
+ -------
64
+ list[JobConfig]
65
+ Jobs sorted in topological order.
66
+
67
+ Raises
68
+ ------
69
+ DagError
70
+ If a dependency is missing, self-referential, or when a cycle is
71
+ detected.
72
+ """
73
+ index = {job.name: job for job in jobs}
74
+ edges: dict[str, set[str]] = {name: set() for name in index}
75
+ indegree: dict[str, int] = {name: 0 for name in index}
76
+
77
+ for job in jobs:
78
+ for dep in job.depends_on:
79
+ if dep not in index:
80
+ raise DagError(
81
+ f'Unknown dependency "{dep}" in job "{job.name}"',
82
+ )
83
+ if dep == job.name:
84
+ raise DagError(f'Job "{job.name}" depends on itself')
85
+ if job.name not in edges[dep]:
86
+ edges[dep].add(job.name)
87
+ indegree[job.name] += 1
88
+
89
+ queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
90
+ ordered: list[str] = []
91
+
92
+ while queue:
93
+ name = queue.popleft()
94
+ ordered.append(name)
95
+ for child in sorted(edges[name]):
96
+ indegree[child] -= 1
97
+ if indegree[child] == 0:
98
+ queue.append(child)
99
+
100
+ if len(ordered) != len(jobs):
101
+ raise DagError('Dependency cycle detected')
102
+
103
+ return [index[name] for name in ordered]
@@ -1,4 +1,4 @@
1
- # etlplus.database subpackage
1
+ # `etlplus.database` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
4
4
 
@@ -9,7 +9,7 @@ Documentation for the `etlplus.database` subpackage: database engine, schema, an
9
9
 
10
10
  Back to project overview: see the top-level [README](../../README.md).
11
11
 
12
- - [etlplus.database subpackage](#etlplusdatabase-subpackage)
12
+ - [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
13
13
  - [Database Engine and Connections](#database-engine-and-connections)
14
14
  - [Schema and DDL Helpers](#schema-and-ddl-helpers)
15
15
  - [ORM Utilities](#orm-utilities)
etlplus/enums.py CHANGED
@@ -23,7 +23,6 @@ __all__ = [
23
23
  'AggregateName',
24
24
  'CoercibleStrEnum',
25
25
  'DataConnectorType',
26
- 'HttpMethod',
27
26
  'OperatorName',
28
27
  'PipelineStep',
29
28
  ]
@@ -200,37 +199,6 @@ class DataConnectorType(CoercibleStrEnum):
200
199
  }
201
200
 
202
201
 
203
- class HttpMethod(CoercibleStrEnum):
204
- """Supported HTTP verbs that accept JSON payloads."""
205
-
206
- # -- Constants -- #
207
-
208
- CONNECT = 'connect'
209
- DELETE = 'delete'
210
- GET = 'get'
211
- HEAD = 'head'
212
- OPTIONS = 'options'
213
- PATCH = 'patch'
214
- POST = 'post'
215
- PUT = 'put'
216
- TRACE = 'trace'
217
-
218
- # -- Getters -- #
219
-
220
- @property
221
- def allows_body(self) -> bool:
222
- """
223
- Whether the method typically allows a request body.
224
-
225
- Notes
226
- -----
227
- - RFCs do not strictly forbid bodies on some other methods (e.g.,
228
- ``DELETE``), but many servers/clients do not expect them. We mark
229
- ``POST``, ``PUT``, and ``PATCH`` as True.
230
- """
231
- return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
232
-
233
-
234
202
  class OperatorName(CoercibleStrEnum):
235
203
  """Supported comparison operators with helpers."""
236
204
 
etlplus/file/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # etlplus.file subpackage
1
+ # `etlplus.file` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
4
4
  and writing data files.
@@ -11,7 +11,7 @@ and writing data files.
11
11
 
12
12
  Back to project overview: see the top-level [README](../../README.md).
13
13
 
14
- - [etlplus.file subpackage](#etlplusfile-subpackage)
14
+ - [`etlplus.file` Subpackage](#etlplusfile-subpackage)
15
15
  - [Supported File Formats](#supported-file-formats)
16
16
  - [Inferring File Format and Compression](#inferring-file-format-and-compression)
17
17
  - [Reading and Writing Files](#reading-and-writing-files)
etlplus/file/enums.py CHANGED
@@ -123,7 +123,7 @@ class FileFormat(CoercibleStrEnum):
123
123
  RDS = 'rds' # R data file
124
124
  SAS7BDAT = 'sas7bdat' # SAS data file
125
125
  SAV = 'sav' # SPSS data file
126
- SYLK = 'sylk' # Symbolic Link (SYmbolic LinK)
126
+ SYLK = 'sylk' # Symbolic Link
127
127
  XPT = 'xpt' # SAS Transport file
128
128
  ZSAV = 'zsav' # Compressed SPSS data file
129
129
 
@@ -1,4 +1,4 @@
1
- # etlplus.validation subpackage
1
+ # etlplus.ops subpackage
2
2
 
3
3
  Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
4
4
 
@@ -8,7 +8,7 @@ Documentation for the `etlplus.validation` subpackage: data validation utilities
8
8
 
9
9
  Back to project overview: see the top-level [README](../../README.md).
10
10
 
11
- - [etlplus.validation subpackage](#etlplusvalidation-subpackage)
11
+ - [etlplus.ops subpackage](#etlplusops-subpackage)
12
12
  - [Validation Features](#validation-features)
13
13
  - [Defining Validation Rules](#defining-validation-rules)
14
14
  - [Example: Validating Data](#example-validating-data)
@@ -0,0 +1,61 @@
1
+ """
2
+ :mod:`etlplus.ops` package.
3
+
4
+ Data operations helpers.
5
+
6
+ Importing :mod:`etlplus.ops` exposes the coarse-grained helpers most users care
7
+ about: ``extract``, ``transform``, ``load``, ``validate``, ``run``, and
8
+ ``run_pipeline``. Each helper delegates to the richer modules under
9
+ ``etlplus.ops.*`` while presenting a compact public API surface. Conditional
10
+ validation orchestration is available via
11
+ :func:`etlplus.ops.utils.maybe_validate`. The legacy compatibility module
12
+ :mod:`etlplus.ops.__init__validation` is deprecated in favor of this package.
13
+
14
+ Examples
15
+ --------
16
+ >>> from etlplus.ops import extract, transform
17
+ >>> raw = extract('file', 'input.json')
18
+ >>> curated = transform(raw, {'select': ['id', 'name']})
19
+
20
+ >>> from etlplus.ops.utils import maybe_validate
21
+ >>> payload = {'name': 'Alice'}
22
+ >>> rules = {'required': ['name']}
23
+ >>> def validator(data, config):
24
+ ... missing = [field for field in config['required'] if field not in data]
25
+ ... return {'valid': not missing, 'errors': missing, 'data': data}
26
+ >>> maybe_validate(
27
+ ... payload,
28
+ ... when='both',
29
+ ... enabled=True,
30
+ ... rules=rules,
31
+ ... phase='before_transform',
32
+ ... severity='warn',
33
+ ... validate_fn=validator,
34
+ ... print_json_fn=lambda message: message,
35
+ ... )
36
+ {'name': 'Alice'}
37
+
38
+ See Also
39
+ --------
40
+ :mod:`etlplus.ops.run`
41
+ :mod:`etlplus.ops.utils`
42
+ """
43
+
44
+ from .extract import extract
45
+ from .load import load
46
+ from .run import run
47
+ from .run import run_pipeline
48
+ from .transform import transform
49
+ from .validate import validate
50
+
51
+ # SECTION: EXPORTS ========================================================== #
52
+
53
+
54
+ __all__ = [
55
+ 'extract',
56
+ 'load',
57
+ 'run',
58
+ 'run_pipeline',
59
+ 'transform',
60
+ 'validate',
61
+ ]