etlplus 0.13.0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.extract` module.
2
+ :mod:`etlplus.ops.extract` module.
3
3
 
4
4
  Helpers to extract data from files, databases, and REST APIs.
5
5
  """
@@ -10,56 +10,81 @@ from pathlib import Path
10
10
  from typing import Any
11
11
  from typing import cast
12
12
 
13
- import requests # type: ignore[import]
14
-
15
- from .enums import DataConnectorType
16
- from .enums import HttpMethod
17
- from .file import File
18
- from .file import FileFormat
19
- from .types import JSONData
20
- from .types import JSONDict
21
- from .types import JSONList
22
- from .types import StrPath
13
+ from ..api.utils import resolve_request
14
+ from ..enums import DataConnectorType
15
+ from ..enums import HttpMethod
16
+ from ..file import File
17
+ from ..file import FileFormat
18
+ from ..types import JSONData
19
+ from ..types import JSONDict
20
+ from ..types import JSONList
21
+ from ..types import StrPath
23
22
 
24
23
  # SECTION: FUNCTIONS ======================================================== #
25
24
 
26
25
 
27
- # -- File Extraction -- #
28
-
29
-
30
- def extract_from_file(
31
- file_path: StrPath,
32
- file_format: FileFormat | str | None = FileFormat.JSON,
26
+ def extract_from_api(
27
+ url: str,
28
+ method: HttpMethod | str = HttpMethod.GET,
29
+ **kwargs: Any,
33
30
  ) -> JSONData:
34
31
  """
35
- Extract (semi-)structured data from a local file.
32
+ Extract data from a REST API.
36
33
 
37
34
  Parameters
38
35
  ----------
39
- file_path : StrPath
40
- Source file path.
41
- file_format : FileFormat | str | None, optional
42
- File format to parse. If ``None``, infer from the filename
43
- extension. Defaults to `'json'` for backward compatibility when
44
- explicitly provided.
36
+ url : str
37
+ API endpoint URL.
38
+ method : HttpMethod | str, optional
39
+ HTTP method to use. Defaults to ``GET``.
40
+ **kwargs : Any
41
+ Extra arguments forwarded to the underlying ``requests`` call
42
+ (for example, ``timeout``). To use a pre-configured
43
+ :class:`requests.Session`, provide it via ``session``.
44
+ When omitted, ``timeout`` defaults to 10 seconds.
45
45
 
46
46
  Returns
47
47
  -------
48
48
  JSONData
49
- Parsed data as a mapping or a list of mappings.
50
- """
51
- path = Path(file_path)
52
-
53
- # If no explicit format is provided, let File infer from extension.
54
- if file_format is None:
55
- return File(path, None).read()
56
- fmt = FileFormat.coerce(file_format)
49
+ Parsed JSON payload, or a fallback object with raw text.
57
50
 
58
- # Let file module perform existence and format validation.
59
- return File(path, fmt).read()
51
+ Raises
52
+ ------
53
+ TypeError
54
+ If a provided ``session`` does not expose the required HTTP
55
+ method (for example, ``get``).
56
+ """
57
+ timeout = kwargs.pop('timeout', None)
58
+ session = kwargs.pop('session', None)
59
+ request_callable, timeout, _ = resolve_request(
60
+ method,
61
+ session=session,
62
+ timeout=timeout,
63
+ )
64
+ response = request_callable(url, timeout=timeout, **kwargs)
65
+ response.raise_for_status()
60
66
 
67
+ content_type = response.headers.get('content-type', '').lower()
68
+ if 'application/json' in content_type:
69
+ try:
70
+ payload: Any = response.json()
71
+ except ValueError:
72
+ # Malformed JSON despite content-type; fall back to text
73
+ return {
74
+ 'content': response.text,
75
+ 'content_type': content_type,
76
+ }
77
+ if isinstance(payload, dict):
78
+ return cast(JSONDict, payload)
79
+ if isinstance(payload, list):
80
+ if all(isinstance(x, dict) for x in payload):
81
+ return cast(JSONList, payload)
82
+ # Coerce non-dict array items into objects for consistency
83
+ return [{'value': x} for x in payload]
84
+ # Fallback: wrap scalar JSON
85
+ return {'value': payload}
61
86
 
62
- # -- Database Extraction (Placeholder) -- #
87
+ return {'content': response.text, 'content_type': content_type}
63
88
 
64
89
 
65
90
  def extract_from_database(
@@ -94,77 +119,36 @@ def extract_from_database(
94
119
  ]
95
120
 
96
121
 
97
- # -- REST API Extraction -- #
98
-
99
-
100
- def extract_from_api(
101
- url: str,
102
- method: HttpMethod | str = HttpMethod.GET,
103
- **kwargs: Any,
122
+ def extract_from_file(
123
+ file_path: StrPath,
124
+ file_format: FileFormat | str | None = FileFormat.JSON,
104
125
  ) -> JSONData:
105
126
  """
106
- Extract data from a REST API.
127
+ Extract (semi-)structured data from a local file.
107
128
 
108
129
  Parameters
109
130
  ----------
110
- url : str
111
- API endpoint URL.
112
- method : HttpMethod | str, optional
113
- HTTP method to use. Defaults to ``GET``.
114
- **kwargs : Any
115
- Extra arguments forwarded to the underlying ``requests`` call
116
- (for example, ``timeout``). To use a pre-configured
117
- :class:`requests.Session`, provide it via ``session``.
131
+ file_path : StrPath
132
+ Source file path.
133
+ file_format : FileFormat | str | None, optional
134
+ File format to parse. If ``None``, infer from the filename
135
+ extension. Defaults to `'json'` for backward compatibility when
136
+ explicitly provided.
118
137
 
119
138
  Returns
120
139
  -------
121
140
  JSONData
122
- Parsed JSON payload, or a fallback object with raw text.
123
-
124
- Raises
125
- ------
126
- TypeError
127
- If a provided ``session`` does not expose the required HTTP
128
- method (for example, ``get``).
141
+ Parsed data as a mapping or a list of mappings.
129
142
  """
130
- http_method = HttpMethod.coerce(method)
131
-
132
- # Apply a conservative timeout to guard against hanging requests.
133
- timeout = kwargs.pop('timeout', 10.0)
134
- session = kwargs.pop('session', None)
135
- requester = session or requests
136
-
137
- request_callable = getattr(requester, http_method.value, None)
138
- if not callable(request_callable):
139
- raise TypeError(
140
- 'Session object must supply a callable'
141
- f'"{http_method.value}" method',
142
- )
143
-
144
- response = request_callable(url, timeout=timeout, **kwargs)
145
- response.raise_for_status()
143
+ path = Path(file_path)
146
144
 
147
- content_type = response.headers.get('content-type', '').lower()
148
- if 'application/json' in content_type:
149
- try:
150
- payload: Any = response.json()
151
- except ValueError:
152
- # Malformed JSON despite content-type; fall back to text
153
- return {
154
- 'content': response.text,
155
- 'content_type': content_type,
156
- }
157
- if isinstance(payload, dict):
158
- return cast(JSONDict, payload)
159
- if isinstance(payload, list):
160
- if all(isinstance(x, dict) for x in payload):
161
- return cast(JSONList, payload)
162
- # Coerce non-dict array items into objects for consistency
163
- return [{'value': x} for x in payload]
164
- # Fallback: wrap scalar JSON
165
- return {'value': payload}
145
+ # If no explicit format is provided, let File infer from extension.
146
+ if file_format is None:
147
+ return File(path, None).read()
148
+ fmt = FileFormat.coerce(file_format)
166
149
 
167
- return {'content': response.text, 'content_type': content_type}
150
+ # Let file module perform existence and format validation.
151
+ return File(path, fmt).read()
168
152
 
169
153
 
170
154
  # -- Orchestration -- #
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.load` module.
2
+ :mod:`etlplus.ops.load` module.
3
3
 
4
4
  Helpers to load data into files, databases, and REST APIs.
5
5
  """
@@ -12,17 +12,16 @@ from pathlib import Path
12
12
  from typing import Any
13
13
  from typing import cast
14
14
 
15
- import requests # type: ignore[import]
16
-
17
- from .enums import DataConnectorType
18
- from .enums import HttpMethod
19
- from .file import File
20
- from .file import FileFormat
21
- from .types import JSONData
22
- from .types import JSONDict
23
- from .types import JSONList
24
- from .types import StrPath
25
- from .utils import count_records
15
+ from ..api.utils import resolve_request
16
+ from ..enums import DataConnectorType
17
+ from ..enums import HttpMethod
18
+ from ..file import File
19
+ from ..file import FileFormat
20
+ from ..types import JSONData
21
+ from ..types import JSONDict
22
+ from ..types import JSONList
23
+ from ..types import StrPath
24
+ from ..utils import count_records
26
25
 
27
26
  # SECTION: INTERNAL FUNCTIONS ============================================== #
28
27
 
@@ -69,7 +68,7 @@ def _parse_json_string(
69
68
  # SECTION: FUNCTIONS ======================================================== #
70
69
 
71
70
 
72
- # -- Data Loading -- #
71
+ # -- Helpers -- #
73
72
 
74
73
 
75
74
  def load_data(
@@ -119,58 +118,59 @@ def load_data(
119
118
  )
120
119
 
121
120
 
122
- # -- File Loading -- #
123
-
124
-
125
- def load_to_file(
121
+ def load_to_api(
126
122
  data: JSONData,
127
- file_path: StrPath,
128
- file_format: FileFormat | str | None = None,
123
+ url: str,
124
+ method: HttpMethod | str,
125
+ **kwargs: Any,
129
126
  ) -> JSONDict:
130
127
  """
131
- Persist data to a local file.
128
+ Load data to a REST API.
132
129
 
133
130
  Parameters
134
131
  ----------
135
132
  data : JSONData
136
- Data to write.
137
- file_path : StrPath
138
- Target file path.
139
- file_format : FileFormat | str | None, optional
140
- Output format. If omitted (None), the format is inferred from the
141
- filename extension.
133
+ Data to send as JSON.
134
+ url : str
135
+ API endpoint URL.
136
+ method : HttpMethod | str
137
+ HTTP method to use.
138
+ **kwargs : Any
139
+ Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
140
+ When omitted, ``timeout`` defaults to 10 seconds.
142
141
 
143
142
  Returns
144
143
  -------
145
144
  JSONDict
146
- Result dictionary with status and record count.
145
+ Result dictionary including response payload or text.
147
146
  """
148
- path = Path(file_path)
149
- path.parent.mkdir(parents=True, exist_ok=True)
147
+ # Apply a conservative timeout to guard against hanging requests.
148
+ timeout = kwargs.pop('timeout', 10.0)
149
+ session = kwargs.pop('session', None)
150
+ request_callable, timeout, http_method = resolve_request(
151
+ method,
152
+ session=session,
153
+ timeout=timeout,
154
+ )
155
+ response = request_callable(url, json=data, timeout=timeout, **kwargs)
156
+ response.raise_for_status()
150
157
 
151
- # If no explicit format is provided, let File infer from extension.
152
- if file_format is None:
153
- records = File(path).write(data)
154
- ext = path.suffix.lstrip('.').lower()
155
- fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
156
- else:
157
- fmt = FileFormat.coerce(file_format)
158
- records = File(path, fmt).write(data)
159
- if fmt is FileFormat.CSV and records == 0:
160
- message = 'No data to write'
161
- else:
162
- message = f'Data loaded to {path}'
158
+ # Try JSON first, fall back to text.
159
+ try:
160
+ payload: Any = response.json()
161
+ except ValueError:
162
+ payload = response.text
163
163
 
164
164
  return {
165
165
  'status': 'success',
166
- 'message': message,
167
- 'records': records,
166
+ 'status_code': response.status_code,
167
+ 'message': f'Data loaded to {url}',
168
+ 'response': payload,
169
+ 'records': count_records(data),
170
+ 'method': http_method.value.upper(),
168
171
  }
169
172
 
170
173
 
171
- # -- Database Loading (Placeholder) -- #
172
-
173
-
174
174
  def load_to_database(
175
175
  data: JSONData,
176
176
  connection_string: str,
@@ -206,69 +206,49 @@ def load_to_database(
206
206
  }
207
207
 
208
208
 
209
- # -- REST API Loading -- #
210
-
211
-
212
- def load_to_api(
209
+ def load_to_file(
213
210
  data: JSONData,
214
- url: str,
215
- method: HttpMethod | str,
216
- **kwargs: Any,
211
+ file_path: StrPath,
212
+ file_format: FileFormat | str | None = None,
217
213
  ) -> JSONDict:
218
214
  """
219
- Load data to a REST API.
215
+ Persist data to a local file.
220
216
 
221
217
  Parameters
222
218
  ----------
223
219
  data : JSONData
224
- Data to send as JSON.
225
- url : str
226
- API endpoint URL.
227
- method : HttpMethod | str
228
- HTTP method to use.
229
- **kwargs : Any
230
- Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
220
+ Data to write.
221
+ file_path : StrPath
222
+ Target file path.
223
+ file_format : FileFormat | str | None, optional
224
+ Output format. If omitted (None), the format is inferred from the
225
+ filename extension.
231
226
 
232
227
  Returns
233
228
  -------
234
229
  JSONDict
235
- Result dictionary including response payload or text.
236
-
237
- Raises
238
- ------
239
- TypeError
240
- If the session object is not valid.
230
+ Result dictionary with status and record count.
241
231
  """
242
- http_method = HttpMethod.coerce(method)
243
-
244
- # Apply a conservative timeout to guard against hanging requests.
245
- timeout = kwargs.pop('timeout', 10.0)
246
- session = kwargs.pop('session', None)
247
- requester = session or requests
248
-
249
- request_callable = getattr(requester, http_method.value, None)
250
- if not callable(request_callable):
251
- raise TypeError(
252
- 'Session object must supply a '
253
- f'callable "{http_method.value}" method',
254
- )
255
-
256
- response = request_callable(url, json=data, timeout=timeout, **kwargs)
257
- response.raise_for_status()
232
+ path = Path(file_path)
233
+ path.parent.mkdir(parents=True, exist_ok=True)
258
234
 
259
- # Try JSON first, fall back to text.
260
- try:
261
- payload: Any = response.json()
262
- except ValueError:
263
- payload = response.text
235
+ # If no explicit format is provided, let File infer from extension.
236
+ if file_format is None:
237
+ records = File(path).write(data)
238
+ ext = path.suffix.lstrip('.').lower()
239
+ fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
240
+ else:
241
+ fmt = FileFormat.coerce(file_format)
242
+ records = File(path, fmt).write(data)
243
+ if fmt is FileFormat.CSV and records == 0:
244
+ message = 'No data to write'
245
+ else:
246
+ message = f'Data loaded to {path}'
264
247
 
265
248
  return {
266
249
  'status': 'success',
267
- 'status_code': response.status_code,
268
- 'message': f'Data loaded to {url}',
269
- 'response': payload,
270
- 'records': count_records(data),
271
- 'method': http_method.value.upper(),
250
+ 'message': message,
251
+ 'records': records,
272
252
  }
273
253
 
274
254