etlplus 0.9.2__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. etlplus/__init__.py +26 -1
  2. etlplus/api/README.md +3 -51
  3. etlplus/api/__init__.py +0 -10
  4. etlplus/api/config.py +28 -39
  5. etlplus/api/endpoint_client.py +3 -3
  6. etlplus/api/pagination/client.py +1 -1
  7. etlplus/api/rate_limiting/config.py +1 -13
  8. etlplus/api/rate_limiting/rate_limiter.py +11 -8
  9. etlplus/api/request_manager.py +6 -11
  10. etlplus/api/transport.py +2 -14
  11. etlplus/api/types.py +6 -96
  12. etlplus/cli/commands.py +43 -76
  13. etlplus/cli/constants.py +1 -1
  14. etlplus/cli/handlers.py +12 -40
  15. etlplus/cli/io.py +2 -2
  16. etlplus/cli/main.py +1 -1
  17. etlplus/cli/state.py +7 -4
  18. etlplus/{workflow → config}/__init__.py +23 -10
  19. etlplus/{workflow → config}/connector.py +44 -58
  20. etlplus/{workflow → config}/jobs.py +32 -105
  21. etlplus/{workflow → config}/pipeline.py +51 -59
  22. etlplus/{workflow → config}/profile.py +5 -8
  23. etlplus/config/types.py +204 -0
  24. etlplus/config/utils.py +120 -0
  25. etlplus/database/ddl.py +1 -1
  26. etlplus/database/engine.py +3 -19
  27. etlplus/database/orm.py +0 -2
  28. etlplus/database/schema.py +1 -1
  29. etlplus/enums.py +266 -0
  30. etlplus/{ops/extract.py → extract.py} +99 -81
  31. etlplus/file.py +652 -0
  32. etlplus/{ops/load.py → load.py} +101 -78
  33. etlplus/{ops/run.py → run.py} +127 -159
  34. etlplus/{api/utils.py → run_helpers.py} +153 -209
  35. etlplus/{ops/transform.py → transform.py} +68 -75
  36. etlplus/types.py +4 -5
  37. etlplus/utils.py +2 -136
  38. etlplus/{ops/validate.py → validate.py} +12 -22
  39. etlplus/validation/__init__.py +44 -0
  40. etlplus/{ops → validation}/utils.py +17 -53
  41. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/METADATA +17 -210
  42. etlplus-0.10.1.dist-info/RECORD +65 -0
  43. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/WHEEL +1 -1
  44. etlplus/README.md +0 -37
  45. etlplus/api/enums.py +0 -51
  46. etlplus/cli/README.md +0 -40
  47. etlplus/database/README.md +0 -48
  48. etlplus/file/README.md +0 -105
  49. etlplus/file/__init__.py +0 -25
  50. etlplus/file/_imports.py +0 -141
  51. etlplus/file/_io.py +0 -160
  52. etlplus/file/accdb.py +0 -78
  53. etlplus/file/arrow.py +0 -78
  54. etlplus/file/avro.py +0 -176
  55. etlplus/file/bson.py +0 -77
  56. etlplus/file/cbor.py +0 -78
  57. etlplus/file/cfg.py +0 -79
  58. etlplus/file/conf.py +0 -80
  59. etlplus/file/core.py +0 -322
  60. etlplus/file/csv.py +0 -79
  61. etlplus/file/dat.py +0 -78
  62. etlplus/file/dta.py +0 -77
  63. etlplus/file/duckdb.py +0 -78
  64. etlplus/file/enums.py +0 -343
  65. etlplus/file/feather.py +0 -111
  66. etlplus/file/fwf.py +0 -77
  67. etlplus/file/gz.py +0 -123
  68. etlplus/file/hbs.py +0 -78
  69. etlplus/file/hdf5.py +0 -78
  70. etlplus/file/ini.py +0 -79
  71. etlplus/file/ion.py +0 -78
  72. etlplus/file/jinja2.py +0 -78
  73. etlplus/file/json.py +0 -98
  74. etlplus/file/log.py +0 -78
  75. etlplus/file/mat.py +0 -78
  76. etlplus/file/mdb.py +0 -78
  77. etlplus/file/msgpack.py +0 -78
  78. etlplus/file/mustache.py +0 -78
  79. etlplus/file/nc.py +0 -78
  80. etlplus/file/ndjson.py +0 -108
  81. etlplus/file/numbers.py +0 -75
  82. etlplus/file/ods.py +0 -79
  83. etlplus/file/orc.py +0 -111
  84. etlplus/file/parquet.py +0 -113
  85. etlplus/file/pb.py +0 -78
  86. etlplus/file/pbf.py +0 -77
  87. etlplus/file/properties.py +0 -78
  88. etlplus/file/proto.py +0 -77
  89. etlplus/file/psv.py +0 -79
  90. etlplus/file/rda.py +0 -78
  91. etlplus/file/rds.py +0 -78
  92. etlplus/file/sas7bdat.py +0 -78
  93. etlplus/file/sav.py +0 -77
  94. etlplus/file/sqlite.py +0 -78
  95. etlplus/file/stub.py +0 -84
  96. etlplus/file/sylk.py +0 -77
  97. etlplus/file/tab.py +0 -81
  98. etlplus/file/toml.py +0 -78
  99. etlplus/file/tsv.py +0 -80
  100. etlplus/file/txt.py +0 -102
  101. etlplus/file/vm.py +0 -78
  102. etlplus/file/wks.py +0 -77
  103. etlplus/file/xls.py +0 -88
  104. etlplus/file/xlsm.py +0 -79
  105. etlplus/file/xlsx.py +0 -99
  106. etlplus/file/xml.py +0 -185
  107. etlplus/file/xpt.py +0 -78
  108. etlplus/file/yaml.py +0 -95
  109. etlplus/file/zip.py +0 -175
  110. etlplus/file/zsav.py +0 -77
  111. etlplus/ops/README.md +0 -50
  112. etlplus/ops/__init__.py +0 -61
  113. etlplus/templates/README.md +0 -46
  114. etlplus/workflow/README.md +0 -52
  115. etlplus/workflow/dag.py +0 -105
  116. etlplus/workflow/types.py +0 -115
  117. etlplus-0.9.2.dist-info/RECORD +0 -134
  118. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/entry_points.txt +0 -0
  119. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/licenses/LICENSE +0 -0
  120. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.ops.load` module.
2
+ :mod:`etlplus.load` module.
3
3
 
4
4
  Helpers to load data into files, databases, and REST APIs.
5
5
  """
@@ -12,16 +12,20 @@ from pathlib import Path
12
12
  from typing import Any
13
13
  from typing import cast
14
14
 
15
- from ..api import HttpMethod
16
- from ..api.utils import resolve_request
17
- from ..enums import DataConnectorType
18
- from ..file import File
19
- from ..file import FileFormat
20
- from ..types import JSONData
21
- from ..types import JSONDict
22
- from ..types import JSONList
23
- from ..types import StrPath
24
- from ..utils import count_records
15
+ import requests # type: ignore[import]
16
+
17
+ from .enums import DataConnectorType
18
+ from .enums import FileFormat
19
+ from .enums import HttpMethod
20
+ from .enums import coerce_data_connector_type
21
+ from .enums import coerce_file_format
22
+ from .enums import coerce_http_method
23
+ from .file import File
24
+ from .types import JSONData
25
+ from .types import JSONDict
26
+ from .types import JSONList
27
+ from .types import StrPath
28
+ from .utils import count_records
25
29
 
26
30
  # SECTION: INTERNAL FUNCTIONS ============================================== #
27
31
 
@@ -68,7 +72,7 @@ def _parse_json_string(
68
72
  # SECTION: FUNCTIONS ======================================================== #
69
73
 
70
74
 
71
- # -- Helpers -- #
75
+ # -- Data Loading -- #
72
76
 
73
77
 
74
78
  def load_data(
@@ -97,7 +101,7 @@ def load_data(
97
101
  return cast(JSONData, source)
98
102
 
99
103
  if isinstance(source, Path):
100
- return File(source, FileFormat.JSON).read()
104
+ return File(source, FileFormat.JSON).read_json()
101
105
 
102
106
  if isinstance(source, str):
103
107
  # Special case: '-' means read JSON from STDIN (Unix convention).
@@ -107,7 +111,7 @@ def load_data(
107
111
  candidate = Path(source)
108
112
  if candidate.exists():
109
113
  try:
110
- return File(candidate, FileFormat.JSON).read()
114
+ return File(candidate, FileFormat.JSON).read_json()
111
115
  except (OSError, json.JSONDecodeError, ValueError):
112
116
  # Fall back to treating the string as raw JSON content.
113
117
  pass
@@ -118,59 +122,58 @@ def load_data(
118
122
  )
119
123
 
120
124
 
121
- def load_to_api(
125
+ # -- File Loading -- #
126
+
127
+
128
+ def load_to_file(
122
129
  data: JSONData,
123
- url: str,
124
- method: HttpMethod | str,
125
- **kwargs: Any,
130
+ file_path: StrPath,
131
+ file_format: FileFormat | str | None = None,
126
132
  ) -> JSONDict:
127
133
  """
128
- Load data to a REST API.
134
+ Persist data to a local file.
129
135
 
130
136
  Parameters
131
137
  ----------
132
138
  data : JSONData
133
- Data to send as JSON.
134
- url : str
135
- API endpoint URL.
136
- method : HttpMethod | str
137
- HTTP method to use.
138
- **kwargs : Any
139
- Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
140
- When omitted, ``timeout`` defaults to 10 seconds.
139
+ Data to write.
140
+ file_path : StrPath
141
+ Target file path.
142
+ file_format : FileFormat | str | None, optional
143
+ Output format. If omitted (None), the format is inferred from the
144
+ filename extension.
141
145
 
142
146
  Returns
143
147
  -------
144
148
  JSONDict
145
- Result dictionary including response payload or text.
149
+ Result dictionary with status and record count.
146
150
  """
147
- # Apply a conservative timeout to guard against hanging requests.
148
- timeout = kwargs.pop('timeout', 10.0)
149
- session = kwargs.pop('session', None)
150
- request_callable, timeout, http_method = resolve_request(
151
- method,
152
- session=session,
153
- timeout=timeout,
154
- )
155
- response = request_callable(url, json=data, timeout=timeout, **kwargs)
156
- response.raise_for_status()
151
+ path = Path(file_path)
152
+ path.parent.mkdir(parents=True, exist_ok=True)
157
153
 
158
- # Try JSON first, fall back to text.
159
- try:
160
- payload: Any = response.json()
161
- except ValueError:
162
- payload = response.text
154
+ # If no explicit format is provided, let File infer from extension.
155
+ if file_format is None:
156
+ records = File(path).write(data)
157
+ ext = path.suffix.lstrip('.').lower()
158
+ fmt = coerce_file_format(ext) if ext else FileFormat.JSON
159
+ else:
160
+ fmt = coerce_file_format(file_format)
161
+ records = File(path, fmt).write(data)
162
+ if fmt is FileFormat.CSV and records == 0:
163
+ message = 'No data to write'
164
+ else:
165
+ message = f'Data loaded to {path}'
163
166
 
164
167
  return {
165
168
  'status': 'success',
166
- 'status_code': response.status_code,
167
- 'message': f'Data loaded to {url}',
168
- 'response': payload,
169
- 'records': count_records(data),
170
- 'method': http_method.value.upper(),
169
+ 'message': message,
170
+ 'records': records,
171
171
  }
172
172
 
173
173
 
174
+ # -- Database Loading (Placeholder) -- #
175
+
176
+
174
177
  def load_to_database(
175
178
  data: JSONData,
176
179
  connection_string: str,
@@ -206,49 +209,69 @@ def load_to_database(
206
209
  }
207
210
 
208
211
 
209
- def load_to_file(
212
+ # -- REST API Loading -- #
213
+
214
+
215
+ def load_to_api(
210
216
  data: JSONData,
211
- file_path: StrPath,
212
- file_format: FileFormat | str | None = None,
217
+ url: str,
218
+ method: HttpMethod | str,
219
+ **kwargs: Any,
213
220
  ) -> JSONDict:
214
221
  """
215
- Persist data to a local file.
222
+ Load data to a REST API.
216
223
 
217
224
  Parameters
218
225
  ----------
219
226
  data : JSONData
220
- Data to write.
221
- file_path : StrPath
222
- Target file path.
223
- file_format : FileFormat | str | None, optional
224
- Output format. If omitted (None), the format is inferred from the
225
- filename extension.
227
+ Data to send as JSON.
228
+ url : str
229
+ API endpoint URL.
230
+ method : HttpMethod | str
231
+ HTTP method to use.
232
+ **kwargs : Any
233
+ Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
226
234
 
227
235
  Returns
228
236
  -------
229
237
  JSONDict
230
- Result dictionary with status and record count.
238
+ Result dictionary including response payload or text.
239
+
240
+ Raises
241
+ ------
242
+ TypeError
243
+ If the session object is not valid.
231
244
  """
232
- path = Path(file_path)
233
- path.parent.mkdir(parents=True, exist_ok=True)
245
+ http_method = coerce_http_method(method)
234
246
 
235
- # If no explicit format is provided, let File infer from extension.
236
- if file_format is None:
237
- records = File(path).write(data)
238
- ext = path.suffix.lstrip('.').lower()
239
- fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
240
- else:
241
- fmt = FileFormat.coerce(file_format)
242
- records = File(path, fmt).write(data)
243
- if fmt is FileFormat.CSV and records == 0:
244
- message = 'No data to write'
245
- else:
246
- message = f'Data loaded to {path}'
247
+ # Apply a conservative timeout to guard against hanging requests.
248
+ timeout = kwargs.pop('timeout', 10.0)
249
+ session = kwargs.pop('session', None)
250
+ requester = session or requests
251
+
252
+ request_callable = getattr(requester, http_method.value, None)
253
+ if not callable(request_callable):
254
+ raise TypeError(
255
+ 'Session object must supply a '
256
+ f'callable "{http_method.value}" method',
257
+ )
258
+
259
+ response = request_callable(url, json=data, timeout=timeout, **kwargs)
260
+ response.raise_for_status()
261
+
262
+ # Try JSON first, fall back to text.
263
+ try:
264
+ payload: Any = response.json()
265
+ except ValueError:
266
+ payload = response.text
247
267
 
248
268
  return {
249
269
  'status': 'success',
250
- 'message': message,
251
- 'records': records,
270
+ 'status_code': response.status_code,
271
+ 'message': f'Data loaded to {url}',
272
+ 'response': payload,
273
+ 'records': count_records(data),
274
+ 'method': http_method.value.upper(),
252
275
  }
253
276
 
254
277
 
@@ -293,7 +316,7 @@ def load(
293
316
  """
294
317
  data = load_data(source)
295
318
 
296
- match DataConnectorType.coerce(target_type):
319
+ match coerce_data_connector_type(target_type):
297
320
  case DataConnectorType.FILE:
298
321
  # Prefer explicit format if provided, else infer from filename.
299
322
  return load_to_file(data, target, file_format)
@@ -308,6 +331,6 @@ def load(
308
331
  **kwargs,
309
332
  )
310
333
  case _:
311
- # :meth:`coerce` already raises for invalid connector types, but
312
- # keep explicit guard for defensive programming.
334
+ # `coerce_data_connector_type` covers invalid entries, but keep
335
+ # explicit guard.
313
336
  raise ValueError(f'Invalid target type: {target_type}')
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.ops.run` module.
2
+ :mod:`etlplus.run` module.
3
3
 
4
4
  A module for running ETL jobs defined in YAML configurations.
5
5
  """
@@ -9,78 +9,127 @@ from __future__ import annotations
9
9
  from collections.abc import Mapping
10
10
  from typing import Any
11
11
  from typing import Final
12
+ from typing import TypedDict
12
13
  from typing import cast
13
14
  from urllib.parse import urlsplit
14
15
  from urllib.parse import urlunsplit
15
16
 
16
- from ..api import EndpointClient # noqa: F401 (re-exported for tests)
17
- from ..api import HttpMethod
18
- from ..api import PaginationConfigMap
19
- from ..api import RequestOptions
20
- from ..api import compose_api_request_env
21
- from ..api import compose_api_target_env
22
- from ..api import paginate_with_client
23
- from ..enums import DataConnectorType
24
- from ..file import FileFormat
25
- from ..types import JSONData
26
- from ..types import JSONDict
27
- from ..types import PipelineConfig
28
- from ..types import StrPath
29
- from ..types import Timeout
30
- from ..utils import print_json
31
- from ..workflow import load_pipeline_config
17
+ import requests # type: ignore[import]
18
+
19
+ from .api import EndpointClient # noqa: F401 (re-exported for tests)
20
+ from .api import PaginationConfigMap
21
+ from .api import RequestOptions
22
+ from .api import RetryPolicy
23
+ from .api import Url
24
+ from .config import load_pipeline_config
25
+ from .enums import DataConnectorType
26
+ from .enums import coerce_data_connector_type
32
27
  from .extract import extract
33
28
  from .load import load
29
+ from .run_helpers import compose_api_request_env
30
+ from .run_helpers import compose_api_target_env
31
+ from .run_helpers import paginate_with_client
34
32
  from .transform import transform
35
- from .utils import maybe_validate
33
+ from .types import JSONDict
34
+ from .types import Timeout
35
+ from .utils import print_json
36
36
  from .validate import validate
37
+ from .validation.utils import maybe_validate
37
38
 
38
39
  # SECTION: EXPORTS ========================================================== #
39
40
 
40
41
 
41
- __all__ = [
42
- # Functions
43
- 'run',
44
- 'run_pipeline',
45
- ]
42
+ __all__ = ['run']
46
43
 
47
44
 
48
- # SECTION: CONSTANTS ======================================================== #
45
+ # SECTION: TYPED DICTS ====================================================== #
49
46
 
50
47
 
51
- DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
48
+ class BaseApiHttpEnv(TypedDict, total=False):
49
+ """
50
+ Common HTTP request environment for API interactions.
52
51
 
52
+ Fields shared by both source-side and target-side API operations.
53
+ """
54
+
55
+ # Request details
56
+ url: Url | None
57
+ headers: dict[str, str]
58
+ timeout: Timeout
53
59
 
54
- # SECTION: INTERNAL FUNCTIONS =============================================== #
60
+ # Session
61
+ session: requests.Session | None
55
62
 
56
63
 
57
- def _resolve_validation_config(
58
- job_obj: Any,
59
- cfg: Any,
60
- ) -> tuple[bool, dict[str, Any], str, str]:
64
+ class ApiRequestEnv(BaseApiHttpEnv, total=False):
61
65
  """
62
- Resolve validation settings for a job with safe defaults.
66
+ Composed request environment for API sources.
63
67
 
64
- Parameters
65
- ----------
66
- job_obj : Any
67
- Job configuration object.
68
- cfg : Any
69
- Pipeline configuration object with validations.
68
+ Returned by ``compose_api_request_env`` (run_helpers) and consumed by the
69
+ API extract branch. Values are fully merged with endpoint/API defaults and
70
+ job-level overrides, preserving the original precedence and behavior.
71
+ """
70
72
 
71
- Returns
72
- -------
73
- tuple[bool, dict[str, Any], str, str]
74
- Tuple of (enabled, rules, severity, phase).
73
+ # Client
74
+ use_endpoints: bool
75
+ base_url: str | None
76
+ base_path: str | None
77
+ endpoints_map: dict[str, str] | None
78
+ endpoint_key: str | None
79
+
80
+ # Request
81
+ params: dict[str, Any]
82
+ pagination: PaginationConfigMap | None
83
+ sleep_seconds: float
84
+
85
+ # Reliability
86
+ retry: RetryPolicy | None
87
+ retry_network_errors: bool
88
+
89
+
90
+ class ApiTargetEnv(BaseApiHttpEnv, total=False):
75
91
  """
76
- val_ref = job_obj.validate
77
- if val_ref is None:
78
- return False, {}, 'error', 'before_transform'
92
+ Composed request environment for API targets.
93
+
94
+ Returned by ``compose_api_target_env`` (run_helpers) and consumed by the
95
+ API load branch. Values are merged from the target object, optional
96
+ API/endpoint reference, and job-level overrides, preserving original
97
+ precedence and behavior.
98
+
99
+ Notes
100
+ -----
101
+ - Precedence for inherited values matches original logic:
102
+ overrides -> target -> API profile defaults.
103
+ - Target composition does not include pagination/rate-limit/retry since
104
+ loads are single-request operations; only headers/timeout/session
105
+ apply.
106
+ """
107
+
108
+ # Request
109
+ method: str | None
110
+
111
+
112
+ class SessionConfig(TypedDict, total=False):
113
+ """
114
+ Minimal session configuration schema accepted by this runner.
115
+
116
+ Keys mirror common requests.Session options; all are optional.
117
+ """
118
+
119
+ headers: Mapping[str, Any]
120
+ params: Mapping[str, Any]
121
+ auth: Any # (user, pass) tuple or requests-compatible auth object
122
+ verify: bool | str
123
+ cert: Any # str or (cert, key)
124
+ proxies: Mapping[str, Any]
125
+ cookies: Mapping[str, Any]
126
+ trust_env: bool
127
+
128
+
129
+ # SECTION: CONSTANTS ======================================================== #
130
+
79
131
 
80
- rules = cfg.validations.get(val_ref.ruleset, {})
81
- severity = (val_ref.severity or 'error').lower()
82
- phase = (val_ref.phase or 'before_transform').lower()
83
- return True, rules, severity, phase
132
+ DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
84
133
 
85
134
 
86
135
  # SECTION: FUNCTIONS ======================================================== #
@@ -136,7 +185,8 @@ def run(
136
185
 
137
186
  data: Any
138
187
  stype_raw = getattr(source_obj, 'type', None)
139
- match DataConnectorType.coerce(stype_raw or ''):
188
+ stype = coerce_data_connector_type(stype_raw or '')
189
+ match stype:
140
190
  case DataConnectorType.FILE:
141
191
  path = getattr(source_obj, 'path', None)
142
192
  fmt = ex_opts.get('format') or getattr(
@@ -159,15 +209,12 @@ def run(
159
209
  and env.get('endpoint_key')
160
210
  ):
161
211
  # Construct client using module-level EndpointClient so tests
162
- # can monkeypatch this class on etlplus.ops.run.
212
+ # can monkeypatch this class on etlplus.run.
163
213
  ClientClass = EndpointClient # noqa: N806
164
214
  client = ClientClass(
165
- base_url=cast(str, env.get('base_url')),
215
+ base_url=cast(str, env['base_url']),
166
216
  base_path=cast(str | None, env.get('base_path')),
167
- endpoints=cast(
168
- dict[str, str],
169
- env.get('endpoints_map', {}),
170
- ),
217
+ endpoints=cast(dict[str, str], env['endpoints_map']),
171
218
  retry=env.get('retry'),
172
219
  retry_network_errors=bool(
173
220
  env.get('retry_network_errors', False),
@@ -176,7 +223,7 @@ def run(
176
223
  )
177
224
  data = paginate_with_client(
178
225
  client,
179
- cast(str, env.get('endpoint_key')),
226
+ cast(str, env['endpoint_key']),
180
227
  env.get('params'),
181
228
  env.get('headers'),
182
229
  env.get('timeout'),
@@ -214,14 +261,23 @@ def run(
214
261
  sleep_seconds=cast(float, env.get('sleep_seconds', 0.0)),
215
262
  )
216
263
  case _:
217
- # :meth:`coerce` already raises for invalid connector types, but
218
- # keep explicit guard for defensive programming.
264
+ # ``coerce_data_connector_type`` already raises for invalid
265
+ # connector types; this branch is defensive only.
219
266
  raise ValueError(f'Unsupported source type: {stype_raw}')
220
267
 
221
- enabled_validation, rules, severity, phase = _resolve_validation_config(
222
- job_obj,
223
- cfg,
224
- )
268
+ # DRY: unified validation helper (pre/post transform)
269
+ val_ref = job_obj.validate
270
+ enabled_validation = val_ref is not None
271
+ if enabled_validation:
272
+ # Type narrowing for static checkers
273
+ assert val_ref is not None
274
+ rules = cfg.validations.get(val_ref.ruleset, {})
275
+ severity = (val_ref.severity or 'error').lower()
276
+ phase = (val_ref.phase or 'before_transform').lower()
277
+ else:
278
+ rules = {}
279
+ severity = 'error'
280
+ phase = 'before_transform'
225
281
 
226
282
  # Pre-transform validation (if configured).
227
283
  data = maybe_validate(
@@ -262,7 +318,8 @@ def run(
262
318
  overrides = job_obj.load.overrides or {}
263
319
 
264
320
  ttype_raw = getattr(target_obj, 'type', None)
265
- match DataConnectorType.coerce(ttype_raw or ''):
321
+ ttype = coerce_data_connector_type(ttype_raw or '')
322
+ match ttype:
266
323
  case DataConnectorType.FILE:
267
324
  path = overrides.get('path') or getattr(target_obj, 'path', None)
268
325
  fmt = overrides.get('format') or getattr(
@@ -279,14 +336,12 @@ def run(
279
336
  if not url_t:
280
337
  raise ValueError('API target missing "url"')
281
338
  kwargs_t: dict[str, Any] = {}
282
- headers = env_t.get('headers')
283
- if headers:
284
- kwargs_t['headers'] = cast(dict[str, str], headers)
339
+ if env_t.get('headers'):
340
+ kwargs_t['headers'] = cast(dict[str, str], env_t['headers'])
285
341
  if env_t.get('timeout') is not None:
286
- kwargs_t['timeout'] = env_t.get('timeout')
287
- session = env_t.get('session')
288
- if session is not None:
289
- kwargs_t['session'] = session
342
+ kwargs_t['timeout'] = env_t['timeout']
343
+ if env_t.get('session') is not None:
344
+ kwargs_t['session'] = env_t['session']
290
345
  result = load(
291
346
  data,
292
347
  'api',
@@ -302,97 +357,10 @@ def run(
302
357
  )
303
358
  result = load(data, 'database', str(conn))
304
359
  case _:
305
- # :meth:`coerce` already raises for invalid connector types, but
306
- # keep explicit guard for defensive programming.
360
+ # ``coerce_data_connector_type`` already raises for invalid
361
+ # connector types; this branch is defensive only.
307
362
  raise ValueError(f'Unsupported target type: {ttype_raw}')
308
363
 
309
364
  # Return the terminal load result directly; callers (e.g., CLI) can wrap
310
365
  # it in their own envelope when needed.
311
366
  return cast(JSONDict, result)
312
-
313
-
314
- def run_pipeline(
315
- *,
316
- source_type: DataConnectorType | str | None = None,
317
- source: StrPath | JSONData | None = None,
318
- operations: PipelineConfig | None = None,
319
- target_type: DataConnectorType | str | None = None,
320
- target: StrPath | None = None,
321
- file_format: FileFormat | str | None = None,
322
- method: HttpMethod | str | None = None,
323
- **kwargs: Any,
324
- ) -> JSONData:
325
- """
326
- Run a single extract-transform-load flow without a YAML config.
327
-
328
- Parameters
329
- ----------
330
- source_type : DataConnectorType | str | None, optional
331
- Connector type for extraction. When ``None``, ``source`` is assumed
332
- to be pre-loaded data and extraction is skipped.
333
- source : StrPath | JSONData | None, optional
334
- Data source for extraction or the pre-loaded payload when
335
- ``source_type`` is ``None``.
336
- operations : PipelineConfig | None, optional
337
- Transform configuration passed to :func:`etlplus.ops.transform`.
338
- target_type : DataConnectorType | str | None, optional
339
- Connector type for loading. When ``None``, load is skipped and the
340
- transformed data is returned.
341
- target : StrPath | None, optional
342
- Target for loading (file path, connection string, or API URL).
343
- file_format : FileFormat | str | None, optional
344
- File format for file sources/targets (forwarded to extract/load).
345
- method : HttpMethod | str | None, optional
346
- HTTP method for API loads (forwarded to :func:`etlplus.ops.load`).
347
- **kwargs : Any
348
- Extra keyword arguments forwarded to extract/load for API options
349
- (headers, timeout, session, etc.).
350
-
351
- Returns
352
- -------
353
- JSONData
354
- Transformed data or the load result payload.
355
-
356
- Raises
357
- ------
358
- TypeError
359
- Raised when extracted data is not a dict or list of dicts and no
360
- target is specified.
361
- ValueError
362
- Raised when required source/target inputs are missing.
363
- """
364
- if source_type is None:
365
- if source is None:
366
- raise ValueError('source or source_type is required')
367
- data = source
368
- else:
369
- if source is None:
370
- raise ValueError('source is required when source_type is set')
371
- data = extract(
372
- source_type,
373
- cast(StrPath, source),
374
- file_format=file_format,
375
- **kwargs,
376
- )
377
-
378
- if operations:
379
- data = transform(data, operations)
380
-
381
- if target_type is None:
382
- if not isinstance(data, (dict, list)):
383
- raise TypeError(
384
- f'Expected data to be dict or list of dicts, '
385
- f'got {type(data).__name__}',
386
- )
387
- return data
388
- if target is None:
389
- raise ValueError('target is required when target_type is set')
390
-
391
- return load(
392
- data,
393
- target_type,
394
- target,
395
- file_format=file_format,
396
- method=method,
397
- **kwargs,
398
- )