etlplus 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. etlplus/README.md +37 -0
  2. etlplus/__init__.py +1 -26
  3. etlplus/api/README.md +51 -3
  4. etlplus/api/__init__.py +10 -0
  5. etlplus/api/config.py +39 -28
  6. etlplus/api/endpoint_client.py +3 -3
  7. etlplus/api/enums.py +51 -0
  8. etlplus/api/pagination/client.py +1 -1
  9. etlplus/api/rate_limiting/config.py +13 -1
  10. etlplus/api/rate_limiting/rate_limiter.py +8 -11
  11. etlplus/api/request_manager.py +11 -6
  12. etlplus/api/transport.py +14 -2
  13. etlplus/api/types.py +96 -6
  14. etlplus/{run_helpers.py → api/utils.py} +209 -153
  15. etlplus/cli/README.md +40 -0
  16. etlplus/cli/commands.py +94 -61
  17. etlplus/cli/constants.py +1 -1
  18. etlplus/cli/handlers.py +40 -12
  19. etlplus/cli/io.py +2 -2
  20. etlplus/cli/main.py +1 -1
  21. etlplus/cli/state.py +4 -7
  22. etlplus/database/README.md +48 -0
  23. etlplus/database/ddl.py +1 -1
  24. etlplus/database/engine.py +19 -3
  25. etlplus/database/orm.py +2 -0
  26. etlplus/database/schema.py +1 -1
  27. etlplus/enums.py +1 -107
  28. etlplus/file/README.md +105 -0
  29. etlplus/file/__init__.py +25 -0
  30. etlplus/file/_imports.py +141 -0
  31. etlplus/file/_io.py +160 -0
  32. etlplus/file/accdb.py +78 -0
  33. etlplus/file/arrow.py +78 -0
  34. etlplus/file/avro.py +176 -0
  35. etlplus/file/bson.py +77 -0
  36. etlplus/file/cbor.py +78 -0
  37. etlplus/file/cfg.py +79 -0
  38. etlplus/file/conf.py +80 -0
  39. etlplus/file/core.py +322 -0
  40. etlplus/file/csv.py +79 -0
  41. etlplus/file/dat.py +78 -0
  42. etlplus/file/dta.py +77 -0
  43. etlplus/file/duckdb.py +78 -0
  44. etlplus/file/enums.py +343 -0
  45. etlplus/file/feather.py +111 -0
  46. etlplus/file/fwf.py +77 -0
  47. etlplus/file/gz.py +123 -0
  48. etlplus/file/hbs.py +78 -0
  49. etlplus/file/hdf5.py +78 -0
  50. etlplus/file/ini.py +79 -0
  51. etlplus/file/ion.py +78 -0
  52. etlplus/file/jinja2.py +78 -0
  53. etlplus/file/json.py +98 -0
  54. etlplus/file/log.py +78 -0
  55. etlplus/file/mat.py +78 -0
  56. etlplus/file/mdb.py +78 -0
  57. etlplus/file/msgpack.py +78 -0
  58. etlplus/file/mustache.py +78 -0
  59. etlplus/file/nc.py +78 -0
  60. etlplus/file/ndjson.py +108 -0
  61. etlplus/file/numbers.py +75 -0
  62. etlplus/file/ods.py +79 -0
  63. etlplus/file/orc.py +111 -0
  64. etlplus/file/parquet.py +113 -0
  65. etlplus/file/pb.py +78 -0
  66. etlplus/file/pbf.py +77 -0
  67. etlplus/file/properties.py +78 -0
  68. etlplus/file/proto.py +77 -0
  69. etlplus/file/psv.py +79 -0
  70. etlplus/file/rda.py +78 -0
  71. etlplus/file/rds.py +78 -0
  72. etlplus/file/sas7bdat.py +78 -0
  73. etlplus/file/sav.py +77 -0
  74. etlplus/file/sqlite.py +78 -0
  75. etlplus/file/stub.py +84 -0
  76. etlplus/file/sylk.py +77 -0
  77. etlplus/file/tab.py +81 -0
  78. etlplus/file/toml.py +78 -0
  79. etlplus/file/tsv.py +80 -0
  80. etlplus/file/txt.py +102 -0
  81. etlplus/file/vm.py +78 -0
  82. etlplus/file/wks.py +77 -0
  83. etlplus/file/xls.py +88 -0
  84. etlplus/file/xlsm.py +79 -0
  85. etlplus/file/xlsx.py +99 -0
  86. etlplus/file/xml.py +185 -0
  87. etlplus/file/xpt.py +78 -0
  88. etlplus/file/yaml.py +95 -0
  89. etlplus/file/zip.py +175 -0
  90. etlplus/file/zsav.py +77 -0
  91. etlplus/ops/README.md +50 -0
  92. etlplus/ops/__init__.py +61 -0
  93. etlplus/{extract.py → ops/extract.py} +81 -99
  94. etlplus/{load.py → ops/load.py} +78 -101
  95. etlplus/{run.py → ops/run.py} +159 -127
  96. etlplus/{transform.py → ops/transform.py} +75 -68
  97. etlplus/{validation → ops}/utils.py +53 -17
  98. etlplus/{validate.py → ops/validate.py} +22 -12
  99. etlplus/templates/README.md +46 -0
  100. etlplus/types.py +5 -4
  101. etlplus/utils.py +136 -2
  102. etlplus/workflow/README.md +52 -0
  103. etlplus/{config → workflow}/__init__.py +10 -23
  104. etlplus/{config → workflow}/connector.py +58 -44
  105. etlplus/workflow/dag.py +105 -0
  106. etlplus/{config → workflow}/jobs.py +105 -32
  107. etlplus/{config → workflow}/pipeline.py +59 -51
  108. etlplus/{config → workflow}/profile.py +8 -5
  109. etlplus/workflow/types.py +115 -0
  110. {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
  111. etlplus-0.9.2.dist-info/RECORD +134 -0
  112. {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
  113. etlplus/config/types.py +0 -204
  114. etlplus/config/utils.py +0 -120
  115. etlplus/file.py +0 -657
  116. etlplus/validation/__init__.py +0 -44
  117. etlplus-0.9.0.dist-info/RECORD +0 -65
  118. {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
  119. {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
  120. {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.extract` module.
2
+ :mod:`etlplus.ops.extract` module.
3
3
 
4
4
  Helpers to extract data from files, databases, and REST APIs.
5
5
  """
@@ -10,58 +10,81 @@ from pathlib import Path
10
10
  from typing import Any
11
11
  from typing import cast
12
12
 
13
- import requests # type: ignore[import]
14
-
15
- from .enums import DataConnectorType
16
- from .enums import FileFormat
17
- from .enums import HttpMethod
18
- from .enums import coerce_data_connector_type
19
- from .enums import coerce_file_format
20
- from .file import File
21
- from .types import JSONData
22
- from .types import JSONDict
23
- from .types import JSONList
24
- from .types import StrPath
13
+ from ..api import HttpMethod
14
+ from ..api.utils import resolve_request
15
+ from ..enums import DataConnectorType
16
+ from ..file import File
17
+ from ..file import FileFormat
18
+ from ..types import JSONData
19
+ from ..types import JSONDict
20
+ from ..types import JSONList
21
+ from ..types import StrPath
25
22
 
26
23
  # SECTION: FUNCTIONS ======================================================== #
27
24
 
28
25
 
29
- # -- File Extraction -- #
30
-
31
-
32
- def extract_from_file(
33
- file_path: StrPath,
34
- file_format: FileFormat | str | None = FileFormat.JSON,
26
+ def extract_from_api(
27
+ url: str,
28
+ method: HttpMethod | str = HttpMethod.GET,
29
+ **kwargs: Any,
35
30
  ) -> JSONData:
36
31
  """
37
- Extract (semi-)structured data from a local file.
32
+ Extract data from a REST API.
38
33
 
39
34
  Parameters
40
35
  ----------
41
- file_path : StrPath
42
- Source file path.
43
- file_format : FileFormat | str | None, optional
44
- File format to parse. If ``None``, infer from the filename
45
- extension. Defaults to `'json'` for backward compatibility when
46
- explicitly provided.
36
+ url : str
37
+ API endpoint URL.
38
+ method : HttpMethod | str, optional
39
+ HTTP method to use. Defaults to ``GET``.
40
+ **kwargs : Any
41
+ Extra arguments forwarded to the underlying ``requests`` call
42
+ (for example, ``timeout``). To use a pre-configured
43
+ :class:`requests.Session`, provide it via ``session``.
44
+ When omitted, ``timeout`` defaults to 10 seconds.
47
45
 
48
46
  Returns
49
47
  -------
50
48
  JSONData
51
- Parsed data as a mapping or a list of mappings.
52
- """
53
- path = Path(file_path)
54
-
55
- # If no explicit format is provided, let File infer from extension.
56
- if file_format is None:
57
- return File(path, None).read()
58
- fmt = coerce_file_format(file_format)
49
+ Parsed JSON payload, or a fallback object with raw text.
59
50
 
60
- # Let file module perform existence and format validation.
61
- return File(path, fmt).read()
51
+ Raises
52
+ ------
53
+ TypeError
54
+ If a provided ``session`` does not expose the required HTTP
55
+ method (for example, ``get``).
56
+ """
57
+ timeout = kwargs.pop('timeout', None)
58
+ session = kwargs.pop('session', None)
59
+ request_callable, timeout, _ = resolve_request(
60
+ method,
61
+ session=session,
62
+ timeout=timeout,
63
+ )
64
+ response = request_callable(url, timeout=timeout, **kwargs)
65
+ response.raise_for_status()
62
66
 
67
+ content_type = response.headers.get('content-type', '').lower()
68
+ if 'application/json' in content_type:
69
+ try:
70
+ payload: Any = response.json()
71
+ except ValueError:
72
+ # Malformed JSON despite content-type; fall back to text
73
+ return {
74
+ 'content': response.text,
75
+ 'content_type': content_type,
76
+ }
77
+ if isinstance(payload, dict):
78
+ return cast(JSONDict, payload)
79
+ if isinstance(payload, list):
80
+ if all(isinstance(x, dict) for x in payload):
81
+ return cast(JSONList, payload)
82
+ # Coerce non-dict array items into objects for consistency
83
+ return [{'value': x} for x in payload]
84
+ # Fallback: wrap scalar JSON
85
+ return {'value': payload}
63
86
 
64
- # -- Database Extraction (Placeholder) -- #
87
+ return {'content': response.text, 'content_type': content_type}
65
88
 
66
89
 
67
90
  def extract_from_database(
@@ -96,77 +119,36 @@ def extract_from_database(
96
119
  ]
97
120
 
98
121
 
99
- # -- REST API Extraction -- #
100
-
101
-
102
- def extract_from_api(
103
- url: str,
104
- method: HttpMethod | str = HttpMethod.GET,
105
- **kwargs: Any,
122
+ def extract_from_file(
123
+ file_path: StrPath,
124
+ file_format: FileFormat | str | None = FileFormat.JSON,
106
125
  ) -> JSONData:
107
126
  """
108
- Extract data from a REST API.
127
+ Extract (semi-)structured data from a local file.
109
128
 
110
129
  Parameters
111
130
  ----------
112
- url : str
113
- API endpoint URL.
114
- method : HttpMethod | str, optional
115
- HTTP method to use. Defaults to ``GET``.
116
- **kwargs : Any
117
- Extra arguments forwarded to the underlying ``requests`` call
118
- (for example, ``timeout``). To use a pre-configured
119
- :class:`requests.Session`, provide it via ``session``.
131
+ file_path : StrPath
132
+ Source file path.
133
+ file_format : FileFormat | str | None, optional
134
+ File format to parse. If ``None``, infer from the filename
135
+ extension. Defaults to `'json'` for backward compatibility when
136
+ explicitly provided.
120
137
 
121
138
  Returns
122
139
  -------
123
140
  JSONData
124
- Parsed JSON payload, or a fallback object with raw text.
125
-
126
- Raises
127
- ------
128
- TypeError
129
- If a provided ``session`` does not expose the required HTTP
130
- method (for example, ``get``).
141
+ Parsed data as a mapping or a list of mappings.
131
142
  """
132
- http_method = HttpMethod.coerce(method)
133
-
134
- # Apply a conservative timeout to guard against hanging requests.
135
- timeout = kwargs.pop('timeout', 10.0)
136
- session = kwargs.pop('session', None)
137
- requester = session or requests
138
-
139
- request_callable = getattr(requester, http_method.value, None)
140
- if not callable(request_callable):
141
- raise TypeError(
142
- 'Session object must supply a callable'
143
- f'"{http_method.value}" method',
144
- )
145
-
146
- response = request_callable(url, timeout=timeout, **kwargs)
147
- response.raise_for_status()
143
+ path = Path(file_path)
148
144
 
149
- content_type = response.headers.get('content-type', '').lower()
150
- if 'application/json' in content_type:
151
- try:
152
- payload: Any = response.json()
153
- except ValueError:
154
- # Malformed JSON despite content-type; fall back to text
155
- return {
156
- 'content': response.text,
157
- 'content_type': content_type,
158
- }
159
- if isinstance(payload, dict):
160
- return cast(JSONDict, payload)
161
- if isinstance(payload, list):
162
- if all(isinstance(x, dict) for x in payload):
163
- return cast(JSONList, payload)
164
- # Coerce non-dict array items into objects for consistency
165
- return [{'value': x} for x in payload]
166
- # Fallback: wrap scalar JSON
167
- return {'value': payload}
145
+ # If no explicit format is provided, let File infer from extension.
146
+ if file_format is None:
147
+ return File(path, None).read()
148
+ fmt = FileFormat.coerce(file_format)
168
149
 
169
- return {'content': response.text, 'content_type': content_type}
150
+ # Let file module perform existence and format validation.
151
+ return File(path, fmt).read()
170
152
 
171
153
 
172
154
  # -- Orchestration -- #
@@ -202,7 +184,7 @@ def extract(
202
184
  ValueError
203
185
  If `source_type` is not one of the supported values.
204
186
  """
205
- match coerce_data_connector_type(source_type):
187
+ match DataConnectorType.coerce(source_type):
206
188
  case DataConnectorType.FILE:
207
189
  # Prefer explicit format if provided, else infer from filename.
208
190
  return extract_from_file(source, file_format)
@@ -213,6 +195,6 @@ def extract(
213
195
  # ``file_format`` is ignored for APIs.
214
196
  return extract_from_api(str(source), **kwargs)
215
197
  case _:
216
- # ``coerce_data_connector_type`` covers invalid entries, but keep
217
- # explicit guard for defensive programming.
198
+ # :meth:`coerce` already raises for invalid connector types, but
199
+ # keep explicit guard for defensive programming.
218
200
  raise ValueError(f'Invalid source type: {source_type}')
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.load` module.
2
+ :mod:`etlplus.ops.load` module.
3
3
 
4
4
  Helpers to load data into files, databases, and REST APIs.
5
5
  """
@@ -12,20 +12,16 @@ from pathlib import Path
12
12
  from typing import Any
13
13
  from typing import cast
14
14
 
15
- import requests # type: ignore[import]
16
-
17
- from .enums import DataConnectorType
18
- from .enums import FileFormat
19
- from .enums import HttpMethod
20
- from .enums import coerce_data_connector_type
21
- from .enums import coerce_file_format
22
- from .enums import coerce_http_method
23
- from .file import File
24
- from .types import JSONData
25
- from .types import JSONDict
26
- from .types import JSONList
27
- from .types import StrPath
28
- from .utils import count_records
15
+ from ..api import HttpMethod
16
+ from ..api.utils import resolve_request
17
+ from ..enums import DataConnectorType
18
+ from ..file import File
19
+ from ..file import FileFormat
20
+ from ..types import JSONData
21
+ from ..types import JSONDict
22
+ from ..types import JSONList
23
+ from ..types import StrPath
24
+ from ..utils import count_records
29
25
 
30
26
  # SECTION: INTERNAL FUNCTIONS ============================================== #
31
27
 
@@ -72,7 +68,7 @@ def _parse_json_string(
72
68
  # SECTION: FUNCTIONS ======================================================== #
73
69
 
74
70
 
75
- # -- Data Loading -- #
71
+ # -- Helpers -- #
76
72
 
77
73
 
78
74
  def load_data(
@@ -101,7 +97,7 @@ def load_data(
101
97
  return cast(JSONData, source)
102
98
 
103
99
  if isinstance(source, Path):
104
- return File(source, FileFormat.JSON).read_json()
100
+ return File(source, FileFormat.JSON).read()
105
101
 
106
102
  if isinstance(source, str):
107
103
  # Special case: '-' means read JSON from STDIN (Unix convention).
@@ -111,7 +107,7 @@ def load_data(
111
107
  candidate = Path(source)
112
108
  if candidate.exists():
113
109
  try:
114
- return File(candidate, FileFormat.JSON).read_json()
110
+ return File(candidate, FileFormat.JSON).read()
115
111
  except (OSError, json.JSONDecodeError, ValueError):
116
112
  # Fall back to treating the string as raw JSON content.
117
113
  pass
@@ -122,58 +118,59 @@ def load_data(
122
118
  )
123
119
 
124
120
 
125
- # -- File Loading -- #
126
-
127
-
128
- def load_to_file(
121
+ def load_to_api(
129
122
  data: JSONData,
130
- file_path: StrPath,
131
- file_format: FileFormat | str | None = None,
123
+ url: str,
124
+ method: HttpMethod | str,
125
+ **kwargs: Any,
132
126
  ) -> JSONDict:
133
127
  """
134
- Persist data to a local file.
128
+ Load data to a REST API.
135
129
 
136
130
  Parameters
137
131
  ----------
138
132
  data : JSONData
139
- Data to write.
140
- file_path : StrPath
141
- Target file path.
142
- file_format : FileFormat | str | None, optional
143
- Output format. If omitted (None), the format is inferred from the
144
- filename extension.
133
+ Data to send as JSON.
134
+ url : str
135
+ API endpoint URL.
136
+ method : HttpMethod | str
137
+ HTTP method to use.
138
+ **kwargs : Any
139
+ Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
140
+ When omitted, ``timeout`` defaults to 10 seconds.
145
141
 
146
142
  Returns
147
143
  -------
148
144
  JSONDict
149
- Result dictionary with status and record count.
145
+ Result dictionary including response payload or text.
150
146
  """
151
- path = Path(file_path)
152
- path.parent.mkdir(parents=True, exist_ok=True)
147
+ # Apply a conservative timeout to guard against hanging requests.
148
+ timeout = kwargs.pop('timeout', 10.0)
149
+ session = kwargs.pop('session', None)
150
+ request_callable, timeout, http_method = resolve_request(
151
+ method,
152
+ session=session,
153
+ timeout=timeout,
154
+ )
155
+ response = request_callable(url, json=data, timeout=timeout, **kwargs)
156
+ response.raise_for_status()
153
157
 
154
- # If no explicit format is provided, let File infer from extension.
155
- if file_format is None:
156
- records = File(path).write(data)
157
- ext = path.suffix.lstrip('.').lower()
158
- fmt = coerce_file_format(ext) if ext else FileFormat.JSON
159
- else:
160
- fmt = coerce_file_format(file_format)
161
- records = File(path, fmt).write(data)
162
- if fmt is FileFormat.CSV and records == 0:
163
- message = 'No data to write'
164
- else:
165
- message = f'Data loaded to {path}'
158
+ # Try JSON first, fall back to text.
159
+ try:
160
+ payload: Any = response.json()
161
+ except ValueError:
162
+ payload = response.text
166
163
 
167
164
  return {
168
165
  'status': 'success',
169
- 'message': message,
170
- 'records': records,
166
+ 'status_code': response.status_code,
167
+ 'message': f'Data loaded to {url}',
168
+ 'response': payload,
169
+ 'records': count_records(data),
170
+ 'method': http_method.value.upper(),
171
171
  }
172
172
 
173
173
 
174
- # -- Database Loading (Placeholder) -- #
175
-
176
-
177
174
  def load_to_database(
178
175
  data: JSONData,
179
176
  connection_string: str,
@@ -209,69 +206,49 @@ def load_to_database(
209
206
  }
210
207
 
211
208
 
212
- # -- REST API Loading -- #
213
-
214
-
215
- def load_to_api(
209
+ def load_to_file(
216
210
  data: JSONData,
217
- url: str,
218
- method: HttpMethod | str,
219
- **kwargs: Any,
211
+ file_path: StrPath,
212
+ file_format: FileFormat | str | None = None,
220
213
  ) -> JSONDict:
221
214
  """
222
- Load data to a REST API.
215
+ Persist data to a local file.
223
216
 
224
217
  Parameters
225
218
  ----------
226
219
  data : JSONData
227
- Data to send as JSON.
228
- url : str
229
- API endpoint URL.
230
- method : HttpMethod | str
231
- HTTP method to use.
232
- **kwargs : Any
233
- Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
220
+ Data to write.
221
+ file_path : StrPath
222
+ Target file path.
223
+ file_format : FileFormat | str | None, optional
224
+ Output format. If omitted (None), the format is inferred from the
225
+ filename extension.
234
226
 
235
227
  Returns
236
228
  -------
237
229
  JSONDict
238
- Result dictionary including response payload or text.
239
-
240
- Raises
241
- ------
242
- TypeError
243
- If the session object is not valid.
230
+ Result dictionary with status and record count.
244
231
  """
245
- http_method = coerce_http_method(method)
246
-
247
- # Apply a conservative timeout to guard against hanging requests.
248
- timeout = kwargs.pop('timeout', 10.0)
249
- session = kwargs.pop('session', None)
250
- requester = session or requests
251
-
252
- request_callable = getattr(requester, http_method.value, None)
253
- if not callable(request_callable):
254
- raise TypeError(
255
- 'Session object must supply a '
256
- f'callable "{http_method.value}" method',
257
- )
258
-
259
- response = request_callable(url, json=data, timeout=timeout, **kwargs)
260
- response.raise_for_status()
232
+ path = Path(file_path)
233
+ path.parent.mkdir(parents=True, exist_ok=True)
261
234
 
262
- # Try JSON first, fall back to text.
263
- try:
264
- payload: Any = response.json()
265
- except ValueError:
266
- payload = response.text
235
+ # If no explicit format is provided, let File infer from extension.
236
+ if file_format is None:
237
+ records = File(path).write(data)
238
+ ext = path.suffix.lstrip('.').lower()
239
+ fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
240
+ else:
241
+ fmt = FileFormat.coerce(file_format)
242
+ records = File(path, fmt).write(data)
243
+ if fmt is FileFormat.CSV and records == 0:
244
+ message = 'No data to write'
245
+ else:
246
+ message = f'Data loaded to {path}'
267
247
 
268
248
  return {
269
249
  'status': 'success',
270
- 'status_code': response.status_code,
271
- 'message': f'Data loaded to {url}',
272
- 'response': payload,
273
- 'records': count_records(data),
274
- 'method': http_method.value.upper(),
250
+ 'message': message,
251
+ 'records': records,
275
252
  }
276
253
 
277
254
 
@@ -316,7 +293,7 @@ def load(
316
293
  """
317
294
  data = load_data(source)
318
295
 
319
- match coerce_data_connector_type(target_type):
296
+ match DataConnectorType.coerce(target_type):
320
297
  case DataConnectorType.FILE:
321
298
  # Prefer explicit format if provided, else infer from filename.
322
299
  return load_to_file(data, target, file_format)
@@ -331,6 +308,6 @@ def load(
331
308
  **kwargs,
332
309
  )
333
310
  case _:
334
- # `coerce_data_connector_type` covers invalid entries, but keep
335
- # explicit guard.
311
+ # :meth:`coerce` already raises for invalid connector types, but
312
+ # keep explicit guard for defensive programming.
336
313
  raise ValueError(f'Invalid target type: {target_type}')