etlplus 0.12.10__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. etlplus/README.md +1 -1
  2. etlplus/__init__.py +1 -26
  3. etlplus/api/__init__.py +10 -0
  4. etlplus/api/config.py +36 -20
  5. etlplus/api/endpoint_client.py +3 -3
  6. etlplus/api/enums.py +51 -0
  7. etlplus/api/pagination/client.py +1 -1
  8. etlplus/api/rate_limiting/config.py +13 -1
  9. etlplus/api/rate_limiting/rate_limiter.py +8 -11
  10. etlplus/api/request_manager.py +11 -6
  11. etlplus/api/transport.py +14 -2
  12. etlplus/api/types.py +7 -6
  13. etlplus/{run_helpers.py → api/utils.py} +205 -153
  14. etlplus/cli/handlers.py +17 -7
  15. etlplus/config/jobs.py +14 -4
  16. etlplus/dag.py +103 -0
  17. etlplus/enums.py +0 -32
  18. etlplus/file/cfg.py +2 -2
  19. etlplus/file/conf.py +2 -2
  20. etlplus/file/dta.py +77 -0
  21. etlplus/file/enums.py +10 -4
  22. etlplus/file/hbs.py +78 -0
  23. etlplus/file/hdf5.py +78 -0
  24. etlplus/file/jinja2.py +78 -0
  25. etlplus/file/mat.py +78 -0
  26. etlplus/file/mustache.py +78 -0
  27. etlplus/file/nc.py +78 -0
  28. etlplus/file/numbers.py +75 -0
  29. etlplus/file/ods.py +79 -0
  30. etlplus/file/properties.py +13 -13
  31. etlplus/file/rda.py +78 -0
  32. etlplus/file/rds.py +78 -0
  33. etlplus/file/sas7bdat.py +78 -0
  34. etlplus/file/sav.py +77 -0
  35. etlplus/file/sylk.py +77 -0
  36. etlplus/file/toml.py +1 -1
  37. etlplus/file/vm.py +78 -0
  38. etlplus/file/wks.py +77 -0
  39. etlplus/file/xlsm.py +79 -0
  40. etlplus/file/xpt.py +78 -0
  41. etlplus/file/zsav.py +77 -0
  42. etlplus/{validation → ops}/README.md +2 -2
  43. etlplus/ops/__init__.py +61 -0
  44. etlplus/{extract.py → ops/extract.py} +78 -94
  45. etlplus/{load.py → ops/load.py} +73 -93
  46. etlplus/{run.py → ops/run.py} +140 -110
  47. etlplus/{transform.py → ops/transform.py} +75 -68
  48. etlplus/{validation → ops}/utils.py +80 -15
  49. etlplus/{validate.py → ops/validate.py} +19 -9
  50. etlplus/types.py +2 -2
  51. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/METADATA +91 -60
  52. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/RECORD +56 -35
  53. etlplus/validation/__init__.py +0 -44
  54. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/WHEEL +0 -0
  55. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/entry_points.txt +0 -0
  56. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/licenses/LICENSE +0 -0
  57. {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.load` module.
2
+ :mod:`etlplus.ops.load` module.
3
3
 
4
4
  Helpers to load data into files, databases, and REST APIs.
5
5
  """
@@ -12,17 +12,16 @@ from pathlib import Path
12
12
  from typing import Any
13
13
  from typing import cast
14
14
 
15
- import requests # type: ignore[import]
16
-
17
- from .enums import DataConnectorType
18
- from .enums import HttpMethod
19
- from .file import File
20
- from .file import FileFormat
21
- from .types import JSONData
22
- from .types import JSONDict
23
- from .types import JSONList
24
- from .types import StrPath
25
- from .utils import count_records
15
+ from ..api import HttpMethod
16
+ from ..api.utils import resolve_request
17
+ from ..enums import DataConnectorType
18
+ from ..file import File
19
+ from ..file import FileFormat
20
+ from ..types import JSONData
21
+ from ..types import JSONDict
22
+ from ..types import JSONList
23
+ from ..types import StrPath
24
+ from ..utils import count_records
26
25
 
27
26
  # SECTION: INTERNAL FUNCTIONS ============================================== #
28
27
 
@@ -69,7 +68,7 @@ def _parse_json_string(
69
68
  # SECTION: FUNCTIONS ======================================================== #
70
69
 
71
70
 
72
- # -- Data Loading -- #
71
+ # -- Helpers -- #
73
72
 
74
73
 
75
74
  def load_data(
@@ -119,58 +118,59 @@ def load_data(
119
118
  )
120
119
 
121
120
 
122
- # -- File Loading -- #
123
-
124
-
125
- def load_to_file(
121
+ def load_to_api(
126
122
  data: JSONData,
127
- file_path: StrPath,
128
- file_format: FileFormat | str | None = None,
123
+ url: str,
124
+ method: HttpMethod | str,
125
+ **kwargs: Any,
129
126
  ) -> JSONDict:
130
127
  """
131
- Persist data to a local file.
128
+ Load data to a REST API.
132
129
 
133
130
  Parameters
134
131
  ----------
135
132
  data : JSONData
136
- Data to write.
137
- file_path : StrPath
138
- Target file path.
139
- file_format : FileFormat | str | None, optional
140
- Output format. If omitted (None), the format is inferred from the
141
- filename extension.
133
+ Data to send as JSON.
134
+ url : str
135
+ API endpoint URL.
136
+ method : HttpMethod | str
137
+ HTTP method to use.
138
+ **kwargs : Any
139
+ Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
140
+ When omitted, ``timeout`` defaults to 10 seconds.
142
141
 
143
142
  Returns
144
143
  -------
145
144
  JSONDict
146
- Result dictionary with status and record count.
145
+ Result dictionary including response payload or text.
147
146
  """
148
- path = Path(file_path)
149
- path.parent.mkdir(parents=True, exist_ok=True)
147
+ # Apply a conservative timeout to guard against hanging requests.
148
+ timeout = kwargs.pop('timeout', 10.0)
149
+ session = kwargs.pop('session', None)
150
+ request_callable, timeout, http_method = resolve_request(
151
+ method,
152
+ session=session,
153
+ timeout=timeout,
154
+ )
155
+ response = request_callable(url, json=data, timeout=timeout, **kwargs)
156
+ response.raise_for_status()
150
157
 
151
- # If no explicit format is provided, let File infer from extension.
152
- if file_format is None:
153
- records = File(path).write(data)
154
- ext = path.suffix.lstrip('.').lower()
155
- fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
156
- else:
157
- fmt = FileFormat.coerce(file_format)
158
- records = File(path, fmt).write(data)
159
- if fmt is FileFormat.CSV and records == 0:
160
- message = 'No data to write'
161
- else:
162
- message = f'Data loaded to {path}'
158
+ # Try JSON first, fall back to text.
159
+ try:
160
+ payload: Any = response.json()
161
+ except ValueError:
162
+ payload = response.text
163
163
 
164
164
  return {
165
165
  'status': 'success',
166
- 'message': message,
167
- 'records': records,
166
+ 'status_code': response.status_code,
167
+ 'message': f'Data loaded to {url}',
168
+ 'response': payload,
169
+ 'records': count_records(data),
170
+ 'method': http_method.value.upper(),
168
171
  }
169
172
 
170
173
 
171
- # -- Database Loading (Placeholder) -- #
172
-
173
-
174
174
  def load_to_database(
175
175
  data: JSONData,
176
176
  connection_string: str,
@@ -206,69 +206,49 @@ def load_to_database(
206
206
  }
207
207
 
208
208
 
209
- # -- REST API Loading -- #
210
-
211
-
212
- def load_to_api(
209
+ def load_to_file(
213
210
  data: JSONData,
214
- url: str,
215
- method: HttpMethod | str,
216
- **kwargs: Any,
211
+ file_path: StrPath,
212
+ file_format: FileFormat | str | None = None,
217
213
  ) -> JSONDict:
218
214
  """
219
- Load data to a REST API.
215
+ Persist data to a local file.
220
216
 
221
217
  Parameters
222
218
  ----------
223
219
  data : JSONData
224
- Data to send as JSON.
225
- url : str
226
- API endpoint URL.
227
- method : HttpMethod | str
228
- HTTP method to use.
229
- **kwargs : Any
230
- Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
220
+ Data to write.
221
+ file_path : StrPath
222
+ Target file path.
223
+ file_format : FileFormat | str | None, optional
224
+ Output format. If omitted (None), the format is inferred from the
225
+ filename extension.
231
226
 
232
227
  Returns
233
228
  -------
234
229
  JSONDict
235
- Result dictionary including response payload or text.
236
-
237
- Raises
238
- ------
239
- TypeError
240
- If the session object is not valid.
230
+ Result dictionary with status and record count.
241
231
  """
242
- http_method = HttpMethod.coerce(method)
243
-
244
- # Apply a conservative timeout to guard against hanging requests.
245
- timeout = kwargs.pop('timeout', 10.0)
246
- session = kwargs.pop('session', None)
247
- requester = session or requests
248
-
249
- request_callable = getattr(requester, http_method.value, None)
250
- if not callable(request_callable):
251
- raise TypeError(
252
- 'Session object must supply a '
253
- f'callable "{http_method.value}" method',
254
- )
255
-
256
- response = request_callable(url, json=data, timeout=timeout, **kwargs)
257
- response.raise_for_status()
232
+ path = Path(file_path)
233
+ path.parent.mkdir(parents=True, exist_ok=True)
258
234
 
259
- # Try JSON first, fall back to text.
260
- try:
261
- payload: Any = response.json()
262
- except ValueError:
263
- payload = response.text
235
+ # If no explicit format is provided, let File infer from extension.
236
+ if file_format is None:
237
+ records = File(path).write(data)
238
+ ext = path.suffix.lstrip('.').lower()
239
+ fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
240
+ else:
241
+ fmt = FileFormat.coerce(file_format)
242
+ records = File(path, fmt).write(data)
243
+ if fmt is FileFormat.CSV and records == 0:
244
+ message = 'No data to write'
245
+ else:
246
+ message = f'Data loaded to {path}'
264
247
 
265
248
  return {
266
249
  'status': 'success',
267
- 'status_code': response.status_code,
268
- 'message': f'Data loaded to {url}',
269
- 'response': payload,
270
- 'records': count_records(data),
271
- 'method': http_method.value.upper(),
250
+ 'message': message,
251
+ 'records': records,
272
252
  }
273
253
 
274
254
 
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.run` module.
2
+ :mod:`etlplus.ops.run` module.
3
3
 
4
4
  A module for running ETL jobs defined in YAML configurations.
5
5
  """
@@ -9,126 +9,78 @@ from __future__ import annotations
9
9
  from collections.abc import Mapping
10
10
  from typing import Any
11
11
  from typing import Final
12
- from typing import TypedDict
13
12
  from typing import cast
14
13
  from urllib.parse import urlsplit
15
14
  from urllib.parse import urlunsplit
16
15
 
17
- import requests # type: ignore[import]
18
-
19
- from .api import EndpointClient # noqa: F401 (re-exported for tests)
20
- from .api import PaginationConfigMap
21
- from .api import RequestOptions
22
- from .api import RetryPolicy
23
- from .api import Url
24
- from .config import load_pipeline_config
25
- from .enums import DataConnectorType
16
+ from ..api import EndpointClient # noqa: F401 (re-exported for tests)
17
+ from ..api import HttpMethod
18
+ from ..api import PaginationConfigMap
19
+ from ..api import RequestOptions
20
+ from ..api import compose_api_request_env
21
+ from ..api import compose_api_target_env
22
+ from ..api import paginate_with_client
23
+ from ..config import load_pipeline_config
24
+ from ..enums import DataConnectorType
25
+ from ..file import FileFormat
26
+ from ..types import JSONData
27
+ from ..types import JSONDict
28
+ from ..types import PipelineConfig
29
+ from ..types import StrPath
30
+ from ..types import Timeout
31
+ from ..utils import print_json
26
32
  from .extract import extract
27
33
  from .load import load
28
- from .run_helpers import compose_api_request_env
29
- from .run_helpers import compose_api_target_env
30
- from .run_helpers import paginate_with_client
31
34
  from .transform import transform
32
- from .types import JSONDict
33
- from .types import Timeout
34
- from .utils import print_json
35
+ from .utils import maybe_validate
35
36
  from .validate import validate
36
- from .validation.utils import maybe_validate
37
37
 
38
38
  # SECTION: EXPORTS ========================================================== #
39
39
 
40
40
 
41
- __all__ = ['run']
42
-
43
-
44
- # SECTION: TYPED DICTS ====================================================== #
45
-
41
+ __all__ = [
42
+ # Functions
43
+ 'run',
44
+ 'run_pipeline',
45
+ ]
46
46
 
47
- class BaseApiHttpEnv(TypedDict, total=False):
48
- """
49
- Common HTTP request environment for API interactions.
50
-
51
- Fields shared by both source-side and target-side API operations.
52
- """
53
47
 
54
- # Request details
55
- url: Url | None
56
- headers: dict[str, str]
57
- timeout: Timeout
58
-
59
- # Session
60
- session: requests.Session | None
61
-
62
-
63
- class ApiRequestEnv(BaseApiHttpEnv, total=False):
64
- """
65
- Composed request environment for API sources.
48
+ # SECTION: CONSTANTS ======================================================== #
66
49
 
67
- Returned by ``compose_api_request_env`` (run_helpers) and consumed by the
68
- API extract branch. Values are fully merged with endpoint/API defaults and
69
- job-level overrides, preserving the original precedence and behavior.
70
- """
71
50
 
72
- # Client
73
- use_endpoints: bool
74
- base_url: str | None
75
- base_path: str | None
76
- endpoints_map: dict[str, str] | None
77
- endpoint_key: str | None
51
+ DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
78
52
 
79
- # Request
80
- params: dict[str, Any]
81
- pagination: PaginationConfigMap | None
82
- sleep_seconds: float
83
53
 
84
- # Reliability
85
- retry: RetryPolicy | None
86
- retry_network_errors: bool
54
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
87
55
 
88
56
 
89
- class ApiTargetEnv(BaseApiHttpEnv, total=False):
90
- """
91
- Composed request environment for API targets.
92
-
93
- Returned by ``compose_api_target_env`` (run_helpers) and consumed by the
94
- API load branch. Values are merged from the target object, optional
95
- API/endpoint reference, and job-level overrides, preserving original
96
- precedence and behavior.
97
-
98
- Notes
99
- -----
100
- - Precedence for inherited values matches original logic:
101
- overrides -> target -> API profile defaults.
102
- - Target composition does not include pagination/rate-limit/retry since
103
- loads are single-request operations; only headers/timeout/session
104
- apply.
57
+ def _resolve_validation_config(
58
+ job_obj: Any,
59
+ cfg: Any,
60
+ ) -> tuple[bool, dict[str, Any], str, str]:
105
61
  """
62
+ Resolve validation settings for a job with safe defaults.
106
63
 
107
- # Request
108
- method: str | None
109
-
110
-
111
- class SessionConfig(TypedDict, total=False):
112
- """
113
- Minimal session configuration schema accepted by this runner.
64
+ Parameters
65
+ ----------
66
+ job_obj : Any
67
+ Job configuration object.
68
+ cfg : Any
69
+ Pipeline configuration object with validations.
114
70
 
115
- Keys mirror common requests.Session options; all are optional.
71
+ Returns
72
+ -------
73
+ tuple[bool, dict[str, Any], str, str]
74
+ Tuple of (enabled, rules, severity, phase).
116
75
  """
76
+ val_ref = job_obj.validate
77
+ if val_ref is None:
78
+ return False, {}, 'error', 'before_transform'
117
79
 
118
- headers: Mapping[str, Any]
119
- params: Mapping[str, Any]
120
- auth: Any # (user, pass) tuple or requests-compatible auth object
121
- verify: bool | str
122
- cert: Any # str or (cert, key)
123
- proxies: Mapping[str, Any]
124
- cookies: Mapping[str, Any]
125
- trust_env: bool
126
-
127
-
128
- # SECTION: CONSTANTS ======================================================== #
129
-
130
-
131
- DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
80
+ rules = cfg.validations.get(val_ref.ruleset, {})
81
+ severity = (val_ref.severity or 'error').lower()
82
+ phase = (val_ref.phase or 'before_transform').lower()
83
+ return True, rules, severity, phase
132
84
 
133
85
 
134
86
  # SECTION: FUNCTIONS ======================================================== #
@@ -207,7 +159,7 @@ def run(
207
159
  and env.get('endpoint_key')
208
160
  ):
209
161
  # Construct client using module-level EndpointClient so tests
210
- # can monkeypatch this class on etlplus.run.
162
+ # can monkeypatch this class on etlplus.ops.run.
211
163
  ClientClass = EndpointClient # noqa: N806
212
164
  client = ClientClass(
213
165
  base_url=cast(str, env['base_url']),
@@ -263,19 +215,10 @@ def run(
263
215
  # keep explicit guard for defensive programming.
264
216
  raise ValueError(f'Unsupported source type: {stype_raw}')
265
217
 
266
- # DRY: unified validation helper (pre/post transform)
267
- val_ref = job_obj.validate
268
- enabled_validation = val_ref is not None
269
- if enabled_validation:
270
- # Type narrowing for static checkers
271
- assert val_ref is not None
272
- rules = cfg.validations.get(val_ref.ruleset, {})
273
- severity = (val_ref.severity or 'error').lower()
274
- phase = (val_ref.phase or 'before_transform').lower()
275
- else:
276
- rules = {}
277
- severity = 'error'
278
- phase = 'before_transform'
218
+ enabled_validation, rules, severity, phase = _resolve_validation_config(
219
+ job_obj,
220
+ cfg,
221
+ )
279
222
 
280
223
  # Pre-transform validation (if configured).
281
224
  data = maybe_validate(
@@ -361,3 +304,90 @@ def run(
361
304
  # Return the terminal load result directly; callers (e.g., CLI) can wrap
362
305
  # it in their own envelope when needed.
363
306
  return cast(JSONDict, result)
307
+
308
+
309
+ def run_pipeline(
310
+ *,
311
+ source_type: DataConnectorType | str | None = None,
312
+ source: StrPath | JSONData | None = None,
313
+ operations: PipelineConfig | None = None,
314
+ target_type: DataConnectorType | str | None = None,
315
+ target: StrPath | None = None,
316
+ file_format: FileFormat | str | None = None,
317
+ method: HttpMethod | str | None = None,
318
+ **kwargs: Any,
319
+ ) -> JSONData:
320
+ """
321
+ Run a single extract-transform-load flow without a YAML config.
322
+
323
+ Parameters
324
+ ----------
325
+ source_type : DataConnectorType | str | None, optional
326
+ Connector type for extraction. When ``None``, ``source`` is assumed
327
+ to be pre-loaded data and extraction is skipped.
328
+ source : StrPath | JSONData | None, optional
329
+ Data source for extraction or the pre-loaded payload when
330
+ ``source_type`` is ``None``.
331
+ operations : PipelineConfig | None, optional
332
+ Transform configuration passed to :func:`etlplus.ops.transform`.
333
+ target_type : DataConnectorType | str | None, optional
334
+ Connector type for loading. When ``None``, load is skipped and the
335
+ transformed data is returned.
336
+ target : StrPath | None, optional
337
+ Target for loading (file path, connection string, or API URL).
338
+ file_format : FileFormat | str | None, optional
339
+ File format for file sources/targets (forwarded to extract/load).
340
+ method : HttpMethod | str | None, optional
341
+ HTTP method for API loads (forwarded to :func:`etlplus.ops.load`).
342
+ **kwargs : Any
343
+ Extra keyword arguments forwarded to extract/load for API options
344
+ (headers, timeout, session, etc.).
345
+
346
+ Returns
347
+ -------
348
+ JSONData
349
+ Transformed data or the load result payload.
350
+
351
+ Raises
352
+ ------
353
+ TypeError
354
+ Raised when extracted data is not a dict or list of dicts and no
355
+ target is specified.
356
+ ValueError
357
+ Raised when required source/target inputs are missing.
358
+ """
359
+ if source_type is None:
360
+ if source is None:
361
+ raise ValueError('source or source_type is required')
362
+ data = source
363
+ else:
364
+ if source is None:
365
+ raise ValueError('source is required when source_type is set')
366
+ data = extract(
367
+ source_type,
368
+ cast(StrPath, source),
369
+ file_format=file_format,
370
+ **kwargs,
371
+ )
372
+
373
+ if operations:
374
+ data = transform(data, operations)
375
+
376
+ if target_type is None:
377
+ if not isinstance(data, (dict, list)):
378
+ raise TypeError(
379
+ f'Expected data to be dict or list of dicts, '
380
+ f'got {type(data).__name__}',
381
+ )
382
+ return data
383
+ if target is None:
384
+ raise ValueError('target is required when target_type is set')
385
+
386
+ return load(
387
+ data,
388
+ target_type,
389
+ target,
390
+ file_format=file_format,
391
+ method=method,
392
+ **kwargs,
393
+ )