etlplus 0.12.13__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.extract` module.
2
+ :mod:`etlplus.ops.extract` module.
3
3
 
4
4
  Helpers to extract data from files, databases, and REST APIs.
5
5
  """
@@ -10,56 +10,81 @@ from pathlib import Path
10
10
  from typing import Any
11
11
  from typing import cast
12
12
 
13
- import requests # type: ignore[import]
14
-
15
- from .enums import DataConnectorType
16
- from .enums import HttpMethod
17
- from .file import File
18
- from .file import FileFormat
19
- from .types import JSONData
20
- from .types import JSONDict
21
- from .types import JSONList
22
- from .types import StrPath
13
+ from ..api.utils import resolve_request
14
+ from ..enums import DataConnectorType
15
+ from ..enums import HttpMethod
16
+ from ..file import File
17
+ from ..file import FileFormat
18
+ from ..types import JSONData
19
+ from ..types import JSONDict
20
+ from ..types import JSONList
21
+ from ..types import StrPath
23
22
 
24
23
  # SECTION: FUNCTIONS ======================================================== #
25
24
 
26
25
 
27
- # -- File Extraction -- #
28
-
29
-
30
- def extract_from_file(
31
- file_path: StrPath,
32
- file_format: FileFormat | str | None = FileFormat.JSON,
26
+ def extract_from_api(
27
+ url: str,
28
+ method: HttpMethod | str = HttpMethod.GET,
29
+ **kwargs: Any,
33
30
  ) -> JSONData:
34
31
  """
35
- Extract (semi-)structured data from a local file.
32
+ Extract data from a REST API.
36
33
 
37
34
  Parameters
38
35
  ----------
39
- file_path : StrPath
40
- Source file path.
41
- file_format : FileFormat | str | None, optional
42
- File format to parse. If ``None``, infer from the filename
43
- extension. Defaults to `'json'` for backward compatibility when
44
- explicitly provided.
36
+ url : str
37
+ API endpoint URL.
38
+ method : HttpMethod | str, optional
39
+ HTTP method to use. Defaults to ``GET``.
40
+ **kwargs : Any
41
+ Extra arguments forwarded to the underlying ``requests`` call
42
+ (for example, ``timeout``). To use a pre-configured
43
+ :class:`requests.Session`, provide it via ``session``.
44
+ When omitted, ``timeout`` defaults to 10 seconds.
45
45
 
46
46
  Returns
47
47
  -------
48
48
  JSONData
49
- Parsed data as a mapping or a list of mappings.
50
- """
51
- path = Path(file_path)
52
-
53
- # If no explicit format is provided, let File infer from extension.
54
- if file_format is None:
55
- return File(path, None).read()
56
- fmt = FileFormat.coerce(file_format)
49
+ Parsed JSON payload, or a fallback object with raw text.
57
50
 
58
- # Let file module perform existence and format validation.
59
- return File(path, fmt).read()
51
+ Raises
52
+ ------
53
+ TypeError
54
+ If a provided ``session`` does not expose the required HTTP
55
+ method (for example, ``get``).
56
+ """
57
+ timeout = kwargs.pop('timeout', None)
58
+ session = kwargs.pop('session', None)
59
+ request_callable, timeout, _ = resolve_request(
60
+ method,
61
+ session=session,
62
+ timeout=timeout,
63
+ )
64
+ response = request_callable(url, timeout=timeout, **kwargs)
65
+ response.raise_for_status()
60
66
 
67
+ content_type = response.headers.get('content-type', '').lower()
68
+ if 'application/json' in content_type:
69
+ try:
70
+ payload: Any = response.json()
71
+ except ValueError:
72
+ # Malformed JSON despite content-type; fall back to text
73
+ return {
74
+ 'content': response.text,
75
+ 'content_type': content_type,
76
+ }
77
+ if isinstance(payload, dict):
78
+ return cast(JSONDict, payload)
79
+ if isinstance(payload, list):
80
+ if all(isinstance(x, dict) for x in payload):
81
+ return cast(JSONList, payload)
82
+ # Coerce non-dict array items into objects for consistency
83
+ return [{'value': x} for x in payload]
84
+ # Fallback: wrap scalar JSON
85
+ return {'value': payload}
61
86
 
62
- # -- Database Extraction (Placeholder) -- #
87
+ return {'content': response.text, 'content_type': content_type}
63
88
 
64
89
 
65
90
  def extract_from_database(
@@ -94,77 +119,36 @@ def extract_from_database(
94
119
  ]
95
120
 
96
121
 
97
- # -- REST API Extraction -- #
98
-
99
-
100
- def extract_from_api(
101
- url: str,
102
- method: HttpMethod | str = HttpMethod.GET,
103
- **kwargs: Any,
122
+ def extract_from_file(
123
+ file_path: StrPath,
124
+ file_format: FileFormat | str | None = FileFormat.JSON,
104
125
  ) -> JSONData:
105
126
  """
106
- Extract data from a REST API.
127
+ Extract (semi-)structured data from a local file.
107
128
 
108
129
  Parameters
109
130
  ----------
110
- url : str
111
- API endpoint URL.
112
- method : HttpMethod | str, optional
113
- HTTP method to use. Defaults to ``GET``.
114
- **kwargs : Any
115
- Extra arguments forwarded to the underlying ``requests`` call
116
- (for example, ``timeout``). To use a pre-configured
117
- :class:`requests.Session`, provide it via ``session``.
131
+ file_path : StrPath
132
+ Source file path.
133
+ file_format : FileFormat | str | None, optional
134
+ File format to parse. If ``None``, infer from the filename
135
+ extension. Defaults to `'json'` for backward compatibility when
136
+ explicitly provided.
118
137
 
119
138
  Returns
120
139
  -------
121
140
  JSONData
122
- Parsed JSON payload, or a fallback object with raw text.
123
-
124
- Raises
125
- ------
126
- TypeError
127
- If a provided ``session`` does not expose the required HTTP
128
- method (for example, ``get``).
141
+ Parsed data as a mapping or a list of mappings.
129
142
  """
130
- http_method = HttpMethod.coerce(method)
131
-
132
- # Apply a conservative timeout to guard against hanging requests.
133
- timeout = kwargs.pop('timeout', 10.0)
134
- session = kwargs.pop('session', None)
135
- requester = session or requests
136
-
137
- request_callable = getattr(requester, http_method.value, None)
138
- if not callable(request_callable):
139
- raise TypeError(
140
- 'Session object must supply a callable'
141
- f'"{http_method.value}" method',
142
- )
143
-
144
- response = request_callable(url, timeout=timeout, **kwargs)
145
- response.raise_for_status()
143
+ path = Path(file_path)
146
144
 
147
- content_type = response.headers.get('content-type', '').lower()
148
- if 'application/json' in content_type:
149
- try:
150
- payload: Any = response.json()
151
- except ValueError:
152
- # Malformed JSON despite content-type; fall back to text
153
- return {
154
- 'content': response.text,
155
- 'content_type': content_type,
156
- }
157
- if isinstance(payload, dict):
158
- return cast(JSONDict, payload)
159
- if isinstance(payload, list):
160
- if all(isinstance(x, dict) for x in payload):
161
- return cast(JSONList, payload)
162
- # Coerce non-dict array items into objects for consistency
163
- return [{'value': x} for x in payload]
164
- # Fallback: wrap scalar JSON
165
- return {'value': payload}
145
+ # If no explicit format is provided, let File infer from extension.
146
+ if file_format is None:
147
+ return File(path, None).read()
148
+ fmt = FileFormat.coerce(file_format)
166
149
 
167
- return {'content': response.text, 'content_type': content_type}
150
+ # Let file module perform existence and format validation.
151
+ return File(path, fmt).read()
168
152
 
169
153
 
170
154
  # -- Orchestration -- #
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.load` module.
2
+ :mod:`etlplus.ops.load` module.
3
3
 
4
4
  Helpers to load data into files, databases, and REST APIs.
5
5
  """
@@ -12,17 +12,16 @@ from pathlib import Path
12
12
  from typing import Any
13
13
  from typing import cast
14
14
 
15
- import requests # type: ignore[import]
16
-
17
- from .enums import DataConnectorType
18
- from .enums import HttpMethod
19
- from .file import File
20
- from .file import FileFormat
21
- from .types import JSONData
22
- from .types import JSONDict
23
- from .types import JSONList
24
- from .types import StrPath
25
- from .utils import count_records
15
+ from ..api.utils import resolve_request
16
+ from ..enums import DataConnectorType
17
+ from ..enums import HttpMethod
18
+ from ..file import File
19
+ from ..file import FileFormat
20
+ from ..types import JSONData
21
+ from ..types import JSONDict
22
+ from ..types import JSONList
23
+ from ..types import StrPath
24
+ from ..utils import count_records
26
25
 
27
26
  # SECTION: INTERNAL FUNCTIONS ============================================== #
28
27
 
@@ -69,7 +68,7 @@ def _parse_json_string(
69
68
  # SECTION: FUNCTIONS ======================================================== #
70
69
 
71
70
 
72
- # -- Data Loading -- #
71
+ # -- Helpers -- #
73
72
 
74
73
 
75
74
  def load_data(
@@ -119,58 +118,59 @@ def load_data(
119
118
  )
120
119
 
121
120
 
122
- # -- File Loading -- #
123
-
124
-
125
- def load_to_file(
121
+ def load_to_api(
126
122
  data: JSONData,
127
- file_path: StrPath,
128
- file_format: FileFormat | str | None = None,
123
+ url: str,
124
+ method: HttpMethod | str,
125
+ **kwargs: Any,
129
126
  ) -> JSONDict:
130
127
  """
131
- Persist data to a local file.
128
+ Load data to a REST API.
132
129
 
133
130
  Parameters
134
131
  ----------
135
132
  data : JSONData
136
- Data to write.
137
- file_path : StrPath
138
- Target file path.
139
- file_format : FileFormat | str | None, optional
140
- Output format. If omitted (None), the format is inferred from the
141
- filename extension.
133
+ Data to send as JSON.
134
+ url : str
135
+ API endpoint URL.
136
+ method : HttpMethod | str
137
+ HTTP method to use.
138
+ **kwargs : Any
139
+ Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
140
+ When omitted, ``timeout`` defaults to 10 seconds.
142
141
 
143
142
  Returns
144
143
  -------
145
144
  JSONDict
146
- Result dictionary with status and record count.
145
+ Result dictionary including response payload or text.
147
146
  """
148
- path = Path(file_path)
149
- path.parent.mkdir(parents=True, exist_ok=True)
147
+ # Apply a conservative timeout to guard against hanging requests.
148
+ timeout = kwargs.pop('timeout', 10.0)
149
+ session = kwargs.pop('session', None)
150
+ request_callable, timeout, http_method = resolve_request(
151
+ method,
152
+ session=session,
153
+ timeout=timeout,
154
+ )
155
+ response = request_callable(url, json=data, timeout=timeout, **kwargs)
156
+ response.raise_for_status()
150
157
 
151
- # If no explicit format is provided, let File infer from extension.
152
- if file_format is None:
153
- records = File(path).write(data)
154
- ext = path.suffix.lstrip('.').lower()
155
- fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
156
- else:
157
- fmt = FileFormat.coerce(file_format)
158
- records = File(path, fmt).write(data)
159
- if fmt is FileFormat.CSV and records == 0:
160
- message = 'No data to write'
161
- else:
162
- message = f'Data loaded to {path}'
158
+ # Try JSON first, fall back to text.
159
+ try:
160
+ payload: Any = response.json()
161
+ except ValueError:
162
+ payload = response.text
163
163
 
164
164
  return {
165
165
  'status': 'success',
166
- 'message': message,
167
- 'records': records,
166
+ 'status_code': response.status_code,
167
+ 'message': f'Data loaded to {url}',
168
+ 'response': payload,
169
+ 'records': count_records(data),
170
+ 'method': http_method.value.upper(),
168
171
  }
169
172
 
170
173
 
171
- # -- Database Loading (Placeholder) -- #
172
-
173
-
174
174
  def load_to_database(
175
175
  data: JSONData,
176
176
  connection_string: str,
@@ -206,69 +206,49 @@ def load_to_database(
206
206
  }
207
207
 
208
208
 
209
- # -- REST API Loading -- #
210
-
211
-
212
- def load_to_api(
209
+ def load_to_file(
213
210
  data: JSONData,
214
- url: str,
215
- method: HttpMethod | str,
216
- **kwargs: Any,
211
+ file_path: StrPath,
212
+ file_format: FileFormat | str | None = None,
217
213
  ) -> JSONDict:
218
214
  """
219
- Load data to a REST API.
215
+ Persist data to a local file.
220
216
 
221
217
  Parameters
222
218
  ----------
223
219
  data : JSONData
224
- Data to send as JSON.
225
- url : str
226
- API endpoint URL.
227
- method : HttpMethod | str
228
- HTTP method to use.
229
- **kwargs : Any
230
- Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
220
+ Data to write.
221
+ file_path : StrPath
222
+ Target file path.
223
+ file_format : FileFormat | str | None, optional
224
+ Output format. If omitted (None), the format is inferred from the
225
+ filename extension.
231
226
 
232
227
  Returns
233
228
  -------
234
229
  JSONDict
235
- Result dictionary including response payload or text.
236
-
237
- Raises
238
- ------
239
- TypeError
240
- If the session object is not valid.
230
+ Result dictionary with status and record count.
241
231
  """
242
- http_method = HttpMethod.coerce(method)
243
-
244
- # Apply a conservative timeout to guard against hanging requests.
245
- timeout = kwargs.pop('timeout', 10.0)
246
- session = kwargs.pop('session', None)
247
- requester = session or requests
248
-
249
- request_callable = getattr(requester, http_method.value, None)
250
- if not callable(request_callable):
251
- raise TypeError(
252
- 'Session object must supply a '
253
- f'callable "{http_method.value}" method',
254
- )
255
-
256
- response = request_callable(url, json=data, timeout=timeout, **kwargs)
257
- response.raise_for_status()
232
+ path = Path(file_path)
233
+ path.parent.mkdir(parents=True, exist_ok=True)
258
234
 
259
- # Try JSON first, fall back to text.
260
- try:
261
- payload: Any = response.json()
262
- except ValueError:
263
- payload = response.text
235
+ # If no explicit format is provided, let File infer from extension.
236
+ if file_format is None:
237
+ records = File(path).write(data)
238
+ ext = path.suffix.lstrip('.').lower()
239
+ fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
240
+ else:
241
+ fmt = FileFormat.coerce(file_format)
242
+ records = File(path, fmt).write(data)
243
+ if fmt is FileFormat.CSV and records == 0:
244
+ message = 'No data to write'
245
+ else:
246
+ message = f'Data loaded to {path}'
264
247
 
265
248
  return {
266
249
  'status': 'success',
267
- 'status_code': response.status_code,
268
- 'message': f'Data loaded to {url}',
269
- 'response': payload,
270
- 'records': count_records(data),
271
- 'method': http_method.value.upper(),
250
+ 'message': message,
251
+ 'records': records,
272
252
  }
273
253
 
274
254
 
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.run` module.
2
+ :mod:`etlplus.ops.run` module.
3
3
 
4
4
  A module for running ETL jobs defined in YAML configurations.
5
5
  """
@@ -9,31 +9,26 @@ from __future__ import annotations
9
9
  from collections.abc import Mapping
10
10
  from typing import Any
11
11
  from typing import Final
12
- from typing import TypedDict
13
12
  from typing import cast
14
13
  from urllib.parse import urlsplit
15
14
  from urllib.parse import urlunsplit
16
15
 
17
- import requests # type: ignore[import]
18
-
19
- from .api import EndpointClient # noqa: F401 (re-exported for tests)
20
- from .api import PaginationConfigMap
21
- from .api import RequestOptions
22
- from .api import RetryPolicy
23
- from .api import Url
24
- from .config import load_pipeline_config
25
- from .enums import DataConnectorType
16
+ from ..api import EndpointClient # noqa: F401 (re-exported for tests)
17
+ from ..api import PaginationConfigMap
18
+ from ..api import RequestOptions
19
+ from ..api import compose_api_request_env
20
+ from ..api import compose_api_target_env
21
+ from ..api import paginate_with_client
22
+ from ..config import load_pipeline_config
23
+ from ..enums import DataConnectorType
24
+ from ..types import JSONDict
25
+ from ..types import Timeout
26
+ from ..utils import print_json
26
27
  from .extract import extract
27
28
  from .load import load
28
- from .run_helpers import compose_api_request_env
29
- from .run_helpers import compose_api_target_env
30
- from .run_helpers import paginate_with_client
31
29
  from .transform import transform
32
- from .types import JSONDict
33
- from .types import Timeout
34
- from .utils import print_json
30
+ from .utils import maybe_validate
35
31
  from .validate import validate
36
- from .validation.utils import maybe_validate
37
32
 
38
33
  # SECTION: EXPORTS ========================================================== #
39
34
 
@@ -41,90 +36,6 @@ from .validation.utils import maybe_validate
41
36
  __all__ = ['run']
42
37
 
43
38
 
44
- # SECTION: TYPED DICTS ====================================================== #
45
-
46
-
47
- class BaseApiHttpEnv(TypedDict, total=False):
48
- """
49
- Common HTTP request environment for API interactions.
50
-
51
- Fields shared by both source-side and target-side API operations.
52
- """
53
-
54
- # Request details
55
- url: Url | None
56
- headers: dict[str, str]
57
- timeout: Timeout
58
-
59
- # Session
60
- session: requests.Session | None
61
-
62
-
63
- class ApiRequestEnv(BaseApiHttpEnv, total=False):
64
- """
65
- Composed request environment for API sources.
66
-
67
- Returned by ``compose_api_request_env`` (run_helpers) and consumed by the
68
- API extract branch. Values are fully merged with endpoint/API defaults and
69
- job-level overrides, preserving the original precedence and behavior.
70
- """
71
-
72
- # Client
73
- use_endpoints: bool
74
- base_url: str | None
75
- base_path: str | None
76
- endpoints_map: dict[str, str] | None
77
- endpoint_key: str | None
78
-
79
- # Request
80
- params: dict[str, Any]
81
- pagination: PaginationConfigMap | None
82
- sleep_seconds: float
83
-
84
- # Reliability
85
- retry: RetryPolicy | None
86
- retry_network_errors: bool
87
-
88
-
89
- class ApiTargetEnv(BaseApiHttpEnv, total=False):
90
- """
91
- Composed request environment for API targets.
92
-
93
- Returned by ``compose_api_target_env`` (run_helpers) and consumed by the
94
- API load branch. Values are merged from the target object, optional
95
- API/endpoint reference, and job-level overrides, preserving original
96
- precedence and behavior.
97
-
98
- Notes
99
- -----
100
- - Precedence for inherited values matches original logic:
101
- overrides -> target -> API profile defaults.
102
- - Target composition does not include pagination/rate-limit/retry since
103
- loads are single-request operations; only headers/timeout/session
104
- apply.
105
- """
106
-
107
- # Request
108
- method: str | None
109
-
110
-
111
- class SessionConfig(TypedDict, total=False):
112
- """
113
- Minimal session configuration schema accepted by this runner.
114
-
115
- Keys mirror common requests.Session options; all are optional.
116
- """
117
-
118
- headers: Mapping[str, Any]
119
- params: Mapping[str, Any]
120
- auth: Any # (user, pass) tuple or requests-compatible auth object
121
- verify: bool | str
122
- cert: Any # str or (cert, key)
123
- proxies: Mapping[str, Any]
124
- cookies: Mapping[str, Any]
125
- trust_env: bool
126
-
127
-
128
39
  # SECTION: CONSTANTS ======================================================== #
129
40
 
130
41
 
@@ -207,7 +118,7 @@ def run(
207
118
  and env.get('endpoint_key')
208
119
  ):
209
120
  # Construct client using module-level EndpointClient so tests
210
- # can monkeypatch this class on etlplus.run.
121
+ # can monkeypatch this class on etlplus.ops.run.
211
122
  ClientClass = EndpointClient # noqa: N806
212
123
  client = ClientClass(
213
124
  base_url=cast(str, env['base_url']),