etlplus 0.9.2__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. etlplus/__init__.py +26 -1
  2. etlplus/api/README.md +3 -51
  3. etlplus/api/__init__.py +0 -10
  4. etlplus/api/config.py +28 -39
  5. etlplus/api/endpoint_client.py +3 -3
  6. etlplus/api/pagination/client.py +1 -1
  7. etlplus/api/rate_limiting/config.py +1 -13
  8. etlplus/api/rate_limiting/rate_limiter.py +11 -8
  9. etlplus/api/request_manager.py +6 -11
  10. etlplus/api/transport.py +2 -14
  11. etlplus/api/types.py +6 -96
  12. etlplus/cli/commands.py +43 -76
  13. etlplus/cli/constants.py +1 -1
  14. etlplus/cli/handlers.py +12 -40
  15. etlplus/cli/io.py +2 -2
  16. etlplus/cli/main.py +1 -1
  17. etlplus/cli/state.py +7 -4
  18. etlplus/{workflow → config}/__init__.py +23 -10
  19. etlplus/{workflow → config}/connector.py +44 -58
  20. etlplus/{workflow → config}/jobs.py +32 -105
  21. etlplus/{workflow → config}/pipeline.py +51 -59
  22. etlplus/{workflow → config}/profile.py +5 -8
  23. etlplus/config/types.py +204 -0
  24. etlplus/config/utils.py +120 -0
  25. etlplus/database/ddl.py +1 -1
  26. etlplus/database/engine.py +3 -19
  27. etlplus/database/orm.py +0 -2
  28. etlplus/database/schema.py +1 -1
  29. etlplus/enums.py +266 -0
  30. etlplus/{ops/extract.py → extract.py} +99 -81
  31. etlplus/file.py +652 -0
  32. etlplus/{ops/load.py → load.py} +101 -78
  33. etlplus/{ops/run.py → run.py} +127 -159
  34. etlplus/{api/utils.py → run_helpers.py} +153 -209
  35. etlplus/{ops/transform.py → transform.py} +68 -75
  36. etlplus/types.py +4 -5
  37. etlplus/utils.py +2 -136
  38. etlplus/{ops/validate.py → validate.py} +12 -22
  39. etlplus/validation/__init__.py +44 -0
  40. etlplus/{ops → validation}/utils.py +17 -53
  41. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/METADATA +17 -210
  42. etlplus-0.10.1.dist-info/RECORD +65 -0
  43. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/WHEEL +1 -1
  44. etlplus/README.md +0 -37
  45. etlplus/api/enums.py +0 -51
  46. etlplus/cli/README.md +0 -40
  47. etlplus/database/README.md +0 -48
  48. etlplus/file/README.md +0 -105
  49. etlplus/file/__init__.py +0 -25
  50. etlplus/file/_imports.py +0 -141
  51. etlplus/file/_io.py +0 -160
  52. etlplus/file/accdb.py +0 -78
  53. etlplus/file/arrow.py +0 -78
  54. etlplus/file/avro.py +0 -176
  55. etlplus/file/bson.py +0 -77
  56. etlplus/file/cbor.py +0 -78
  57. etlplus/file/cfg.py +0 -79
  58. etlplus/file/conf.py +0 -80
  59. etlplus/file/core.py +0 -322
  60. etlplus/file/csv.py +0 -79
  61. etlplus/file/dat.py +0 -78
  62. etlplus/file/dta.py +0 -77
  63. etlplus/file/duckdb.py +0 -78
  64. etlplus/file/enums.py +0 -343
  65. etlplus/file/feather.py +0 -111
  66. etlplus/file/fwf.py +0 -77
  67. etlplus/file/gz.py +0 -123
  68. etlplus/file/hbs.py +0 -78
  69. etlplus/file/hdf5.py +0 -78
  70. etlplus/file/ini.py +0 -79
  71. etlplus/file/ion.py +0 -78
  72. etlplus/file/jinja2.py +0 -78
  73. etlplus/file/json.py +0 -98
  74. etlplus/file/log.py +0 -78
  75. etlplus/file/mat.py +0 -78
  76. etlplus/file/mdb.py +0 -78
  77. etlplus/file/msgpack.py +0 -78
  78. etlplus/file/mustache.py +0 -78
  79. etlplus/file/nc.py +0 -78
  80. etlplus/file/ndjson.py +0 -108
  81. etlplus/file/numbers.py +0 -75
  82. etlplus/file/ods.py +0 -79
  83. etlplus/file/orc.py +0 -111
  84. etlplus/file/parquet.py +0 -113
  85. etlplus/file/pb.py +0 -78
  86. etlplus/file/pbf.py +0 -77
  87. etlplus/file/properties.py +0 -78
  88. etlplus/file/proto.py +0 -77
  89. etlplus/file/psv.py +0 -79
  90. etlplus/file/rda.py +0 -78
  91. etlplus/file/rds.py +0 -78
  92. etlplus/file/sas7bdat.py +0 -78
  93. etlplus/file/sav.py +0 -77
  94. etlplus/file/sqlite.py +0 -78
  95. etlplus/file/stub.py +0 -84
  96. etlplus/file/sylk.py +0 -77
  97. etlplus/file/tab.py +0 -81
  98. etlplus/file/toml.py +0 -78
  99. etlplus/file/tsv.py +0 -80
  100. etlplus/file/txt.py +0 -102
  101. etlplus/file/vm.py +0 -78
  102. etlplus/file/wks.py +0 -77
  103. etlplus/file/xls.py +0 -88
  104. etlplus/file/xlsm.py +0 -79
  105. etlplus/file/xlsx.py +0 -99
  106. etlplus/file/xml.py +0 -185
  107. etlplus/file/xpt.py +0 -78
  108. etlplus/file/yaml.py +0 -95
  109. etlplus/file/zip.py +0 -175
  110. etlplus/file/zsav.py +0 -77
  111. etlplus/ops/README.md +0 -50
  112. etlplus/ops/__init__.py +0 -61
  113. etlplus/templates/README.md +0 -46
  114. etlplus/workflow/README.md +0 -52
  115. etlplus/workflow/dag.py +0 -105
  116. etlplus/workflow/types.py +0 -115
  117. etlplus-0.9.2.dist-info/RECORD +0 -134
  118. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/entry_points.txt +0 -0
  119. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/licenses/LICENSE +0 -0
  120. {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.ops.extract` module.
2
+ :mod:`etlplus.extract` module.
3
3
 
4
4
  Helpers to extract data from files, databases, and REST APIs.
5
5
  """
@@ -10,81 +10,58 @@ from pathlib import Path
10
10
  from typing import Any
11
11
  from typing import cast
12
12
 
13
- from ..api import HttpMethod
14
- from ..api.utils import resolve_request
15
- from ..enums import DataConnectorType
16
- from ..file import File
17
- from ..file import FileFormat
18
- from ..types import JSONData
19
- from ..types import JSONDict
20
- from ..types import JSONList
21
- from ..types import StrPath
13
+ import requests # type: ignore[import]
14
+
15
+ from .enums import DataConnectorType
16
+ from .enums import FileFormat
17
+ from .enums import HttpMethod
18
+ from .enums import coerce_data_connector_type
19
+ from .enums import coerce_file_format
20
+ from .file import File
21
+ from .types import JSONData
22
+ from .types import JSONDict
23
+ from .types import JSONList
24
+ from .types import StrPath
22
25
 
23
26
  # SECTION: FUNCTIONS ======================================================== #
24
27
 
25
28
 
26
- def extract_from_api(
27
- url: str,
28
- method: HttpMethod | str = HttpMethod.GET,
29
- **kwargs: Any,
29
+ # -- File Extraction -- #
30
+
31
+
32
+ def extract_from_file(
33
+ file_path: StrPath,
34
+ file_format: FileFormat | str | None = FileFormat.JSON,
30
35
  ) -> JSONData:
31
36
  """
32
- Extract data from a REST API.
37
+ Extract (semi-)structured data from a local file.
33
38
 
34
39
  Parameters
35
40
  ----------
36
- url : str
37
- API endpoint URL.
38
- method : HttpMethod | str, optional
39
- HTTP method to use. Defaults to ``GET``.
40
- **kwargs : Any
41
- Extra arguments forwarded to the underlying ``requests`` call
42
- (for example, ``timeout``). To use a pre-configured
43
- :class:`requests.Session`, provide it via ``session``.
44
- When omitted, ``timeout`` defaults to 10 seconds.
41
+ file_path : StrPath
42
+ Source file path.
43
+ file_format : FileFormat | str | None, optional
44
+ File format to parse. If ``None``, infer from the filename
45
+ extension. Defaults to `'json'` for backward compatibility when
46
+ explicitly provided.
45
47
 
46
48
  Returns
47
49
  -------
48
50
  JSONData
49
- Parsed JSON payload, or a fallback object with raw text.
50
-
51
- Raises
52
- ------
53
- TypeError
54
- If a provided ``session`` does not expose the required HTTP
55
- method (for example, ``get``).
51
+ Parsed data as a mapping or a list of mappings.
56
52
  """
57
- timeout = kwargs.pop('timeout', None)
58
- session = kwargs.pop('session', None)
59
- request_callable, timeout, _ = resolve_request(
60
- method,
61
- session=session,
62
- timeout=timeout,
63
- )
64
- response = request_callable(url, timeout=timeout, **kwargs)
65
- response.raise_for_status()
53
+ path = Path(file_path)
66
54
 
67
- content_type = response.headers.get('content-type', '').lower()
68
- if 'application/json' in content_type:
69
- try:
70
- payload: Any = response.json()
71
- except ValueError:
72
- # Malformed JSON despite content-type; fall back to text
73
- return {
74
- 'content': response.text,
75
- 'content_type': content_type,
76
- }
77
- if isinstance(payload, dict):
78
- return cast(JSONDict, payload)
79
- if isinstance(payload, list):
80
- if all(isinstance(x, dict) for x in payload):
81
- return cast(JSONList, payload)
82
- # Coerce non-dict array items into objects for consistency
83
- return [{'value': x} for x in payload]
84
- # Fallback: wrap scalar JSON
85
- return {'value': payload}
55
+ # If no explicit format is provided, let File infer from extension.
56
+ if file_format is None:
57
+ return File(path, None).read()
58
+ fmt = coerce_file_format(file_format)
86
59
 
87
- return {'content': response.text, 'content_type': content_type}
60
+ # Let file module perform existence and format validation.
61
+ return File(path, fmt).read()
62
+
63
+
64
+ # -- Database Extraction (Placeholder) -- #
88
65
 
89
66
 
90
67
  def extract_from_database(
@@ -119,36 +96,77 @@ def extract_from_database(
119
96
  ]
120
97
 
121
98
 
122
- def extract_from_file(
123
- file_path: StrPath,
124
- file_format: FileFormat | str | None = FileFormat.JSON,
99
+ # -- REST API Extraction -- #
100
+
101
+
102
+ def extract_from_api(
103
+ url: str,
104
+ method: HttpMethod | str = HttpMethod.GET,
105
+ **kwargs: Any,
125
106
  ) -> JSONData:
126
107
  """
127
- Extract (semi-)structured data from a local file.
108
+ Extract data from a REST API.
128
109
 
129
110
  Parameters
130
111
  ----------
131
- file_path : StrPath
132
- Source file path.
133
- file_format : FileFormat | str | None, optional
134
- File format to parse. If ``None``, infer from the filename
135
- extension. Defaults to `'json'` for backward compatibility when
136
- explicitly provided.
112
+ url : str
113
+ API endpoint URL.
114
+ method : HttpMethod | str, optional
115
+ HTTP method to use. Defaults to ``GET``.
116
+ **kwargs : Any
117
+ Extra arguments forwarded to the underlying ``requests`` call
118
+ (for example, ``timeout``). To use a pre-configured
119
+ :class:`requests.Session`, provide it via ``session``.
137
120
 
138
121
  Returns
139
122
  -------
140
123
  JSONData
141
- Parsed data as a mapping or a list of mappings.
124
+ Parsed JSON payload, or a fallback object with raw text.
125
+
126
+ Raises
127
+ ------
128
+ TypeError
129
+ If a provided ``session`` does not expose the required HTTP
130
+ method (for example, ``get``).
142
131
  """
143
- path = Path(file_path)
132
+ http_method = HttpMethod.coerce(method)
144
133
 
145
- # If no explicit format is provided, let File infer from extension.
146
- if file_format is None:
147
- return File(path, None).read()
148
- fmt = FileFormat.coerce(file_format)
134
+ # Apply a conservative timeout to guard against hanging requests.
135
+ timeout = kwargs.pop('timeout', 10.0)
136
+ session = kwargs.pop('session', None)
137
+ requester = session or requests
149
138
 
150
- # Let file module perform existence and format validation.
151
- return File(path, fmt).read()
139
+ request_callable = getattr(requester, http_method.value, None)
140
+ if not callable(request_callable):
141
+ raise TypeError(
142
+ 'Session object must supply a callable'
143
+ f'"{http_method.value}" method',
144
+ )
145
+
146
+ response = request_callable(url, timeout=timeout, **kwargs)
147
+ response.raise_for_status()
148
+
149
+ content_type = response.headers.get('content-type', '').lower()
150
+ if 'application/json' in content_type:
151
+ try:
152
+ payload: Any = response.json()
153
+ except ValueError:
154
+ # Malformed JSON despite content-type; fall back to text
155
+ return {
156
+ 'content': response.text,
157
+ 'content_type': content_type,
158
+ }
159
+ if isinstance(payload, dict):
160
+ return cast(JSONDict, payload)
161
+ if isinstance(payload, list):
162
+ if all(isinstance(x, dict) for x in payload):
163
+ return cast(JSONList, payload)
164
+ # Coerce non-dict array items into objects for consistency
165
+ return [{'value': x} for x in payload]
166
+ # Fallback: wrap scalar JSON
167
+ return {'value': payload}
168
+
169
+ return {'content': response.text, 'content_type': content_type}
152
170
 
153
171
 
154
172
  # -- Orchestration -- #
@@ -184,7 +202,7 @@ def extract(
184
202
  ValueError
185
203
  If `source_type` is not one of the supported values.
186
204
  """
187
- match DataConnectorType.coerce(source_type):
205
+ match coerce_data_connector_type(source_type):
188
206
  case DataConnectorType.FILE:
189
207
  # Prefer explicit format if provided, else infer from filename.
190
208
  return extract_from_file(source, file_format)
@@ -195,6 +213,6 @@ def extract(
195
213
  # ``file_format`` is ignored for APIs.
196
214
  return extract_from_api(str(source), **kwargs)
197
215
  case _:
198
- # :meth:`coerce` already raises for invalid connector types, but
199
- # keep explicit guard for defensive programming.
216
+ # ``coerce_data_connector_type`` covers invalid entries, but keep
217
+ # explicit guard for defensive programming.
200
218
  raise ValueError(f'Invalid source type: {source_type}')