etlplus 0.9.2__py3-none-any.whl → 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. etlplus/__init__.py +26 -1
  2. etlplus/api/README.md +3 -51
  3. etlplus/api/__init__.py +0 -10
  4. etlplus/api/config.py +28 -39
  5. etlplus/api/endpoint_client.py +3 -3
  6. etlplus/api/pagination/client.py +1 -1
  7. etlplus/api/rate_limiting/config.py +1 -13
  8. etlplus/api/rate_limiting/rate_limiter.py +11 -8
  9. etlplus/api/request_manager.py +6 -11
  10. etlplus/api/transport.py +2 -14
  11. etlplus/api/types.py +6 -96
  12. etlplus/cli/commands.py +43 -76
  13. etlplus/cli/constants.py +1 -1
  14. etlplus/cli/handlers.py +12 -40
  15. etlplus/cli/io.py +2 -2
  16. etlplus/cli/main.py +1 -1
  17. etlplus/cli/state.py +7 -4
  18. etlplus/{workflow → config}/__init__.py +23 -10
  19. etlplus/{workflow → config}/connector.py +44 -58
  20. etlplus/{workflow → config}/jobs.py +32 -105
  21. etlplus/{workflow → config}/pipeline.py +51 -59
  22. etlplus/{workflow → config}/profile.py +5 -8
  23. etlplus/config/types.py +204 -0
  24. etlplus/config/utils.py +120 -0
  25. etlplus/database/ddl.py +1 -1
  26. etlplus/database/engine.py +3 -19
  27. etlplus/database/orm.py +0 -2
  28. etlplus/database/schema.py +1 -1
  29. etlplus/enums.py +288 -0
  30. etlplus/{ops/extract.py → extract.py} +99 -81
  31. etlplus/file.py +652 -0
  32. etlplus/{ops/load.py → load.py} +101 -78
  33. etlplus/{ops/run.py → run.py} +127 -159
  34. etlplus/{api/utils.py → run_helpers.py} +153 -209
  35. etlplus/{ops/transform.py → transform.py} +68 -75
  36. etlplus/types.py +4 -5
  37. etlplus/utils.py +2 -136
  38. etlplus/{ops/validate.py → validate.py} +12 -22
  39. etlplus/validation/__init__.py +44 -0
  40. etlplus/{ops → validation}/utils.py +17 -53
  41. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/METADATA +17 -210
  42. etlplus-0.10.2.dist-info/RECORD +65 -0
  43. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/WHEEL +1 -1
  44. etlplus/README.md +0 -37
  45. etlplus/api/enums.py +0 -51
  46. etlplus/cli/README.md +0 -40
  47. etlplus/database/README.md +0 -48
  48. etlplus/file/README.md +0 -105
  49. etlplus/file/__init__.py +0 -25
  50. etlplus/file/_imports.py +0 -141
  51. etlplus/file/_io.py +0 -160
  52. etlplus/file/accdb.py +0 -78
  53. etlplus/file/arrow.py +0 -78
  54. etlplus/file/avro.py +0 -176
  55. etlplus/file/bson.py +0 -77
  56. etlplus/file/cbor.py +0 -78
  57. etlplus/file/cfg.py +0 -79
  58. etlplus/file/conf.py +0 -80
  59. etlplus/file/core.py +0 -322
  60. etlplus/file/csv.py +0 -79
  61. etlplus/file/dat.py +0 -78
  62. etlplus/file/dta.py +0 -77
  63. etlplus/file/duckdb.py +0 -78
  64. etlplus/file/enums.py +0 -343
  65. etlplus/file/feather.py +0 -111
  66. etlplus/file/fwf.py +0 -77
  67. etlplus/file/gz.py +0 -123
  68. etlplus/file/hbs.py +0 -78
  69. etlplus/file/hdf5.py +0 -78
  70. etlplus/file/ini.py +0 -79
  71. etlplus/file/ion.py +0 -78
  72. etlplus/file/jinja2.py +0 -78
  73. etlplus/file/json.py +0 -98
  74. etlplus/file/log.py +0 -78
  75. etlplus/file/mat.py +0 -78
  76. etlplus/file/mdb.py +0 -78
  77. etlplus/file/msgpack.py +0 -78
  78. etlplus/file/mustache.py +0 -78
  79. etlplus/file/nc.py +0 -78
  80. etlplus/file/ndjson.py +0 -108
  81. etlplus/file/numbers.py +0 -75
  82. etlplus/file/ods.py +0 -79
  83. etlplus/file/orc.py +0 -111
  84. etlplus/file/parquet.py +0 -113
  85. etlplus/file/pb.py +0 -78
  86. etlplus/file/pbf.py +0 -77
  87. etlplus/file/properties.py +0 -78
  88. etlplus/file/proto.py +0 -77
  89. etlplus/file/psv.py +0 -79
  90. etlplus/file/rda.py +0 -78
  91. etlplus/file/rds.py +0 -78
  92. etlplus/file/sas7bdat.py +0 -78
  93. etlplus/file/sav.py +0 -77
  94. etlplus/file/sqlite.py +0 -78
  95. etlplus/file/stub.py +0 -84
  96. etlplus/file/sylk.py +0 -77
  97. etlplus/file/tab.py +0 -81
  98. etlplus/file/toml.py +0 -78
  99. etlplus/file/tsv.py +0 -80
  100. etlplus/file/txt.py +0 -102
  101. etlplus/file/vm.py +0 -78
  102. etlplus/file/wks.py +0 -77
  103. etlplus/file/xls.py +0 -88
  104. etlplus/file/xlsm.py +0 -79
  105. etlplus/file/xlsx.py +0 -99
  106. etlplus/file/xml.py +0 -185
  107. etlplus/file/xpt.py +0 -78
  108. etlplus/file/yaml.py +0 -95
  109. etlplus/file/zip.py +0 -175
  110. etlplus/file/zsav.py +0 -77
  111. etlplus/ops/README.md +0 -50
  112. etlplus/ops/__init__.py +0 -61
  113. etlplus/templates/README.md +0 -46
  114. etlplus/workflow/README.md +0 -52
  115. etlplus/workflow/dag.py +0 -105
  116. etlplus/workflow/types.py +0 -115
  117. etlplus-0.9.2.dist-info/RECORD +0 -134
  118. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/entry_points.txt +0 -0
  119. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/licenses/LICENSE +0 -0
  120. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,204 @@
1
+ """
2
+ :mod:`etlplus.config.types` module.
3
+
4
+ Type aliases and editor-only TypedDicts for :mod:`etlplus.config`.
5
+
6
+ These types improve IDE autocomplete and static analysis while the runtime
7
+ parsers remain permissive.
8
+
9
+ Notes
10
+ -----
11
+ - TypedDicts in this module are intentionally ``total=False`` and are not
12
+ enforced at runtime.
13
+ - ``*.from_obj`` constructors accept ``Mapping[str, Any]`` and perform
14
+ tolerant parsing and light casting. This keeps the runtime permissive while
15
+ improving autocomplete and static analysis for contributors.
16
+
17
+ Examples
18
+ --------
19
+ >>> from etlplus.config import Connector
20
+ >>> src: Connector = {
21
+ >>> "type": "file",
22
+ >>> "path": "/data/input.csv",
23
+ >>> }
24
+ >>> tgt: Connector = {
25
+ >>> "type": "database",
26
+ >>> "connection_string": "postgresql://user:pass@localhost/db",
27
+ >>> }
28
+ >>> from etlplus.api import RetryPolicy
29
+ >>> rp: RetryPolicy = {"max_attempts": 3, "backoff": 0.5}
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ from collections.abc import Mapping
35
+ from typing import Any
36
+ from typing import Literal
37
+ from typing import TypedDict
38
+
39
+ from ..api import PaginationConfigMap
40
+ from ..api import RateLimitConfigMap
41
+ from ..types import StrAnyMap
42
+
43
+ # SECTION: EXPORTS ========================================================= #
44
+
45
+
46
+ __all__ = [
47
+ # Type aliases
48
+ 'ConnectorType',
49
+ # 'PaginationType',
50
+ # TypedDicts
51
+ 'ApiProfileDefaultsMap',
52
+ 'ApiProfileConfigMap',
53
+ 'ApiConfigMap',
54
+ 'EndpointMap',
55
+ 'ConnectorApiConfigMap',
56
+ 'ConnectorDbConfigMap',
57
+ 'ConnectorFileConfigMap',
58
+ ]
59
+
60
+
61
+ # SECTION: TYPE ALIASES ===================================================== #
62
+
63
+
64
+ # Literal type for supported connector kinds
65
+ type ConnectorType = Literal['api', 'database', 'file']
66
+
67
+ # Literal type for supported pagination kinds
68
+ # type PaginationType = Literal['page', 'offset', 'cursor']
69
+
70
+
71
+ # SECTION: TYPED DICTS ====================================================== #
72
+
73
+
74
+ class ApiConfigMap(TypedDict, total=False):
75
+ """
76
+ Top-level API config shape parsed by ApiConfig.from_obj.
77
+
78
+ Either provide a 'base_url' with optional 'headers' and 'endpoints', or
79
+ provide 'profiles' with at least one profile having a 'base_url'.
80
+
81
+ See Also
82
+ --------
83
+ - etlplus.config.api.ApiConfig.from_obj: parses this mapping
84
+ """
85
+
86
+ base_url: str
87
+ headers: StrAnyMap
88
+ endpoints: Mapping[str, EndpointMap | str]
89
+ profiles: Mapping[str, ApiProfileConfigMap]
90
+
91
+
92
+ class ApiProfileConfigMap(TypedDict, total=False):
93
+ """
94
+ Shape accepted for a profile entry under ApiConfigMap.profiles.
95
+
96
+ Notes
97
+ -----
98
+ `base_url` is required at runtime when profiles are provided.
99
+
100
+ See Also
101
+ --------
102
+ - etlplus.config.api.ApiProfileConfig.from_obj: parses this mapping
103
+ """
104
+
105
+ base_url: str
106
+ headers: StrAnyMap
107
+ base_path: str
108
+ auth: StrAnyMap
109
+ defaults: ApiProfileDefaultsMap
110
+
111
+
112
+ class ApiProfileDefaultsMap(TypedDict, total=False):
113
+ """
114
+ Defaults block available under a profile (all keys optional).
115
+
116
+ Notes
117
+ -----
118
+ Runtime expects header values to be str; typing remains permissive.
119
+
120
+ See Also
121
+ --------
122
+ - etlplus.config.api.ApiProfileConfig.from_obj: consumes this block
123
+ - etlplus.config.pagination.PaginationConfig.from_obj: parses pagination
124
+ - etlplus.api.rate_limiting.RateLimitConfig.from_obj: parses rate_limit
125
+ """
126
+
127
+ headers: StrAnyMap
128
+ pagination: PaginationConfigMap | StrAnyMap
129
+ rate_limit: RateLimitConfigMap | StrAnyMap
130
+
131
+
132
+ class ConnectorApiConfigMap(TypedDict, total=False):
133
+ """
134
+ Shape accepted by ConnectorApi.from_obj (all keys optional).
135
+
136
+ See Also
137
+ --------
138
+ - etlplus.config.connector.ConnectorApi.from_obj
139
+ """
140
+
141
+ name: str
142
+ type: ConnectorType
143
+ url: str
144
+ method: str
145
+ headers: StrAnyMap
146
+ query_params: StrAnyMap
147
+ pagination: PaginationConfigMap
148
+ rate_limit: RateLimitConfigMap
149
+ api: str
150
+ endpoint: str
151
+
152
+
153
+ class ConnectorDbConfigMap(TypedDict, total=False):
154
+ """
155
+ Shape accepted by ConnectorDb.from_obj (all keys optional).
156
+
157
+ See Also
158
+ --------
159
+ - etlplus.config.connector.ConnectorDb.from_obj
160
+ """
161
+
162
+ name: str
163
+ type: ConnectorType
164
+ connection_string: str
165
+ query: str
166
+ table: str
167
+ mode: str
168
+
169
+
170
+ class ConnectorFileConfigMap(TypedDict, total=False):
171
+ """
172
+ Shape accepted by ConnectorFile.from_obj (all keys optional).
173
+
174
+ See Also
175
+ --------
176
+ - etlplus.config.connector.ConnectorFile.from_obj
177
+ """
178
+
179
+ name: str
180
+ type: ConnectorType
181
+ format: str
182
+ path: str
183
+ options: StrAnyMap
184
+
185
+
186
+ class EndpointMap(TypedDict, total=False):
187
+ """
188
+ Shape accepted by EndpointConfig.from_obj.
189
+
190
+ One of 'path' or 'url' should be provided.
191
+
192
+ See Also
193
+ --------
194
+ - etlplus.config.api.EndpointConfig.from_obj: parses this mapping
195
+ """
196
+
197
+ path: str
198
+ url: str
199
+ method: str
200
+ path_params: StrAnyMap
201
+ query_params: StrAnyMap
202
+ body: Any
203
+ pagination: PaginationConfigMap
204
+ rate_limit: RateLimitConfigMap
@@ -0,0 +1,120 @@
1
+ """
2
+ :mod:`etlplus.config.utils` module.
3
+
4
+ A module defining utility helpers for ETL pipeline configuration.
5
+
6
+ Notes
7
+ -----
8
+ - Inputs to parsers favor ``Mapping[str, Any]`` to remain permissive and
9
+ avoid unnecessary copies; normalization returns concrete types.
10
+ - Substitution is shallow for strings and recursive for containers.
11
+ - Numeric coercion helpers are intentionally forgiving: invalid values
12
+ become ``None`` rather than raising.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Iterable
18
+ from collections.abc import Mapping
19
+ from typing import Any
20
+
21
+ from ..types import StrAnyMap
22
+
23
+ # SECTION: EXPORTS ========================================================== #
24
+
25
+
26
+ __all__ = [
27
+ # Functions
28
+ 'deep_substitute',
29
+ ]
30
+
31
+
32
+ # SECTION: FUNCTIONS ======================================================== #
33
+
34
+
35
+ def deep_substitute(
36
+ value: Any,
37
+ vars_map: StrAnyMap | None,
38
+ env_map: Mapping[str, str] | None,
39
+ ) -> Any:
40
+ """
41
+ Recursively substitute ``${VAR}`` tokens in nested structures.
42
+
43
+ Only strings are substituted; other types are returned as-is.
44
+
45
+ Parameters
46
+ ----------
47
+ value : Any
48
+ The value to perform substitutions on.
49
+ vars_map : StrAnyMap | None
50
+ Mapping of variable names to replacement values (lower precedence).
51
+ env_map : Mapping[str, str] | None
52
+ Mapping of environment variables overriding ``vars_map`` values (higher
53
+ precedence).
54
+
55
+ Returns
56
+ -------
57
+ Any
58
+ New structure with substitutions applied where tokens were found.
59
+ """
60
+ substitutions = _prepare_substitutions(vars_map, env_map)
61
+
62
+ def _apply(node: Any) -> Any:
63
+ match node:
64
+ case str():
65
+ return _replace_tokens(node, substitutions)
66
+ case Mapping():
67
+ return {k: _apply(v) for k, v in node.items()}
68
+ case list() | tuple() as seq:
69
+ apply = [_apply(item) for item in seq]
70
+ return apply if isinstance(seq, list) else tuple(apply)
71
+ case set():
72
+ return {_apply(item) for item in node}
73
+ case frozenset():
74
+ return frozenset(_apply(item) for item in node)
75
+ case _:
76
+ return node
77
+
78
+ return _apply(value)
79
+
80
+
81
+ # SECTION: INTERNAL FUNCTIONS ============================================== #
82
+
83
+
84
+ def _prepare_substitutions(
85
+ vars_map: StrAnyMap | None,
86
+ env_map: Mapping[str, Any] | None,
87
+ ) -> tuple[tuple[str, Any], ...]:
88
+ """Merge variable and environment maps into an ordered substitutions list.
89
+
90
+ Parameters
91
+ ----------
92
+ vars_map : StrAnyMap | None
93
+ Mapping of variable names to replacement values (lower precedence).
94
+ env_map : Mapping[str, Any] | None
95
+ Environment-backed values that override entries from ``vars_map``.
96
+
97
+ Returns
98
+ -------
99
+ tuple[tuple[str, Any], ...]
100
+ Immutable sequence of ``(name, value)`` pairs suitable for token
101
+ replacement.
102
+ """
103
+ if not vars_map and not env_map:
104
+ return ()
105
+ merged: dict[str, Any] = {**(vars_map or {}), **(env_map or {})}
106
+ return tuple(merged.items())
107
+
108
+
109
+ def _replace_tokens(
110
+ text: str,
111
+ substitutions: Iterable[tuple[str, Any]],
112
+ ) -> str:
113
+ if not substitutions:
114
+ return text
115
+ out = text
116
+ for name, replacement in substitutions:
117
+ token = f'${{{name}}}'
118
+ if token in out:
119
+ out = out.replace(token, str(replacement))
120
+ return out
etlplus/database/ddl.py CHANGED
@@ -203,7 +203,7 @@ def load_table_spec(
203
203
  raise ValueError('Spec must be .json, .yml, or .yaml')
204
204
 
205
205
  try:
206
- spec = File(spec_path).read()
206
+ spec = File.read_file(spec_path)
207
207
  except ImportError as e:
208
208
  if suffix in {'.yml', '.yaml'}:
209
209
  raise RuntimeError(
@@ -113,7 +113,7 @@ def load_database_url_from_config(
113
113
  ValueError
114
114
  If no connection string/URL/DSN is found for the specified entry.
115
115
  """
116
- cfg = File(Path(path)).read()
116
+ cfg = File.read_file(Path(path))
117
117
  if not isinstance(cfg, Mapping):
118
118
  raise TypeError('Database config must be a mapping')
119
119
 
@@ -136,25 +136,9 @@ def load_database_url_from_config(
136
136
  return url
137
137
 
138
138
 
139
- def make_engine(
140
- url: str | None = None,
141
- **engine_kwargs: Any,
142
- ) -> Engine:
143
- """
144
- Create a SQLAlchemy Engine, defaulting to env config if no URL given.
145
-
146
- Parameters
147
- ----------
148
- url : str | None, optional
149
- Database URL/DSN string. When omitted, ``DATABASE_URL`` is used.
150
- **engine_kwargs : Any
151
- Extra keyword arguments forwarded to ``create_engine``.
139
+ def make_engine(url: str | None = None, **engine_kwargs: Any) -> Engine:
140
+ """Create a SQLAlchemy Engine, defaulting to env config if no URL given."""
152
141
 
153
- Returns
154
- -------
155
- Engine
156
- Configured SQLAlchemy engine instance.
157
- """
158
142
  resolved_url = url or DATABASE_URL
159
143
  return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
160
144
 
etlplus/database/orm.py CHANGED
@@ -201,14 +201,12 @@ def build_models(
201
201
  ) -> ModelRegistry:
202
202
  """
203
203
  Build SQLAlchemy ORM models from table specifications.
204
-
205
204
  Parameters
206
205
  ----------
207
206
  specs : list[TableSpec]
208
207
  List of table specifications.
209
208
  base : type[DeclarativeBase], optional
210
209
  Base class for the ORM models (default: :class:`Base`).
211
-
212
210
  Returns
213
211
  -------
214
212
  ModelRegistry
@@ -260,7 +260,7 @@ def load_table_specs(
260
260
  list[TableSpec]
261
261
  A list of TableSpec instances parsed from the YAML file.
262
262
  """
263
- data = File(Path(path)).read()
263
+ data = File.read_file(Path(path))
264
264
  if not data:
265
265
  return []
266
266