etlplus 0.16.0__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. etlplus/README.md +24 -2
  2. etlplus/__init__.py +2 -0
  3. etlplus/api/__init__.py +14 -14
  4. etlplus/api/auth.py +9 -6
  5. etlplus/api/config.py +6 -6
  6. etlplus/api/endpoint_client.py +16 -16
  7. etlplus/api/enums.py +2 -2
  8. etlplus/api/errors.py +4 -4
  9. etlplus/api/pagination/__init__.py +6 -6
  10. etlplus/api/pagination/config.py +11 -9
  11. etlplus/api/rate_limiting/__init__.py +2 -2
  12. etlplus/api/rate_limiting/config.py +10 -10
  13. etlplus/api/rate_limiting/rate_limiter.py +2 -2
  14. etlplus/api/request_manager.py +4 -4
  15. etlplus/api/retry_manager.py +6 -6
  16. etlplus/api/transport.py +10 -10
  17. etlplus/api/types.py +47 -26
  18. etlplus/api/utils.py +49 -49
  19. etlplus/cli/README.md +9 -7
  20. etlplus/cli/commands.py +22 -22
  21. etlplus/cli/handlers.py +12 -13
  22. etlplus/cli/main.py +1 -1
  23. etlplus/{workflow/pipeline.py → config.py} +54 -91
  24. etlplus/connector/__init__.py +6 -6
  25. etlplus/connector/api.py +7 -7
  26. etlplus/connector/database.py +3 -3
  27. etlplus/connector/file.py +3 -3
  28. etlplus/connector/types.py +2 -2
  29. etlplus/database/README.md +7 -7
  30. etlplus/enums.py +35 -167
  31. etlplus/file/README.md +7 -5
  32. etlplus/file/accdb.py +2 -1
  33. etlplus/file/arrow.py +2 -1
  34. etlplus/file/bson.py +2 -1
  35. etlplus/file/cbor.py +2 -1
  36. etlplus/file/cfg.py +1 -1
  37. etlplus/file/conf.py +1 -1
  38. etlplus/file/dat.py +1 -1
  39. etlplus/file/dta.py +1 -1
  40. etlplus/file/duckdb.py +2 -1
  41. etlplus/file/enums.py +1 -1
  42. etlplus/file/fwf.py +2 -1
  43. etlplus/file/hbs.py +2 -1
  44. etlplus/file/hdf5.py +2 -1
  45. etlplus/file/ini.py +2 -1
  46. etlplus/file/ion.py +1 -1
  47. etlplus/file/jinja2.py +2 -1
  48. etlplus/file/log.py +1 -1
  49. etlplus/file/mat.py +1 -1
  50. etlplus/file/mdb.py +2 -1
  51. etlplus/file/msgpack.py +2 -1
  52. etlplus/file/mustache.py +2 -1
  53. etlplus/file/nc.py +1 -1
  54. etlplus/file/numbers.py +2 -1
  55. etlplus/file/ods.py +2 -1
  56. etlplus/file/pb.py +2 -1
  57. etlplus/file/pbf.py +2 -1
  58. etlplus/file/properties.py +2 -1
  59. etlplus/file/proto.py +2 -1
  60. etlplus/file/psv.py +2 -1
  61. etlplus/file/rda.py +2 -1
  62. etlplus/file/rds.py +1 -1
  63. etlplus/file/sas7bdat.py +2 -1
  64. etlplus/file/sav.py +1 -1
  65. etlplus/file/sqlite.py +2 -1
  66. etlplus/file/sylk.py +2 -1
  67. etlplus/file/tab.py +2 -1
  68. etlplus/file/toml.py +2 -1
  69. etlplus/file/vm.py +2 -1
  70. etlplus/file/wks.py +2 -1
  71. etlplus/file/xls.py +1 -1
  72. etlplus/file/xlsm.py +2 -2
  73. etlplus/file/xpt.py +2 -1
  74. etlplus/file/zsav.py +2 -1
  75. etlplus/ops/README.md +10 -9
  76. etlplus/ops/__init__.py +1 -0
  77. etlplus/ops/enums.py +173 -0
  78. etlplus/ops/extract.py +209 -22
  79. etlplus/ops/load.py +140 -34
  80. etlplus/ops/run.py +88 -103
  81. etlplus/ops/transform.py +46 -27
  82. etlplus/ops/types.py +147 -0
  83. etlplus/ops/utils.py +5 -5
  84. etlplus/ops/validate.py +13 -13
  85. etlplus/templates/README.md +11 -9
  86. etlplus/types.py +5 -102
  87. etlplus/workflow/README.md +0 -24
  88. etlplus/workflow/__init__.py +2 -4
  89. etlplus/workflow/dag.py +23 -1
  90. etlplus/workflow/jobs.py +15 -28
  91. etlplus/workflow/profile.py +4 -2
  92. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/METADATA +32 -28
  93. etlplus-0.16.7.dist-info/RECORD +143 -0
  94. etlplus-0.16.0.dist-info/RECORD +0 -141
  95. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/WHEEL +0 -0
  96. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/entry_points.txt +0 -0
  97. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/licenses/LICENSE +0 -0
  98. {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/top_level.txt +0 -0
etlplus/file/sqlite.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.sqlite` module.
3
3
 
4
- Helpers for reading/writing SQLite database (SQLITE) files.
4
+ Stub helpers for reading/writing SQLite database (SQLITE) files (not
5
+ implemented yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/file/sylk.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.sylk` module.
3
3
 
4
- Helpers for reading/writing Symbolic Link (SYLK) data files.
4
+ Stub helpers for reading/writing Symbolic Link (SYLK) data files (not
5
+ implemented yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/file/tab.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.tab` module.
3
3
 
4
- Helpers for reading/writing "tab"-formatted (TAB) files.
4
+ Stub helpers for reading/writing "tab"-formatted (TAB) files (not implemented
5
+ yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/file/toml.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.toml` module.
3
3
 
4
- Helpers for reading/writing Tom's Obvious Minimal Language (TOML) files.
4
+ Stub helpers for reading/writing Tom's Obvious Minimal Language (TOML) files
5
+ (not implemented yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/file/vm.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.vm` module.
3
3
 
4
- Helpers for reading/writing Apache Velocity (VM) template files.
4
+ Stub helpers for reading/writing Apache Velocity (VM) template files (not
5
+ implemented yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/file/wks.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.wks` module.
3
3
 
4
- Helpers for reading/writing Lotus 1-2-3 (WKS) spreadsheet files.
4
+ Stub helpers for reading/writing Lotus 1-2-3 (WKS) spreadsheet files (not
5
+ implemented yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/file/xls.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.xls` module.
3
3
 
4
- Helpers for reading/writing Excel XLS files.
4
+ Helpers for reading Excel XLS files (write is not supported).
5
5
  """
6
6
 
7
7
  from __future__ import annotations
etlplus/file/xlsm.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.xlsm` module.
3
3
 
4
- Helpers for reading/writing Microsoft Excel Macro-Enabled (XLSM) spreadsheet
5
- files.
4
+ Stub helpers for reading/writing Microsoft Excel Macro-Enabled (XLSM)
5
+ spreadsheet files (not implemented yet).
6
6
 
7
7
  Notes
8
8
  -----
etlplus/file/xpt.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.xpt` module.
3
3
 
4
- Helpers for reading/writing SAS Transport (XPT) files.
4
+ Stub helpers for reading/writing SAS Transport (XPT) files (not implemented
5
+ yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/file/zsav.py CHANGED
@@ -1,7 +1,8 @@
1
1
  """
2
2
  :mod:`etlplus.file.zsav` module.
3
3
 
4
- Helpers for reading/writing compressed SPSS (ZSAV) data files.
4
+ Stub helpers for reading/writing compressed SPSS (ZSAV) data files (not
5
+ implemented yet).
5
6
 
6
7
  Notes
7
8
  -----
etlplus/ops/README.md CHANGED
@@ -1,14 +1,16 @@
1
- # etlplus.ops subpackage
1
+ # `etlplus.ops` Subpackage
2
2
 
3
- Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
3
+ Documentation for the `etlplus.ops` subpackage: core ETL primitives used by the CLI and pipeline
4
+ runner.
4
5
 
5
- - Provides flexible data validation for ETL pipelines
6
- - Supports type checking, required fields, and custom rules
7
- - Includes utilities for rule definition and validation logic
6
+ - Extract data from files, APIs, and databases (database extract is a placeholder today)
7
+ - Validate JSON-like data with schema-style rules
8
+ - Transform records (filter, map, select, sort, aggregate)
9
+ - Load data into files and APIs (database load is a placeholder today)
8
10
 
9
11
  Back to project overview: see the top-level [README](../../README.md).
10
12
 
11
- - [etlplus.ops subpackage](#etlplusops-subpackage)
13
+ - [`etlplus.ops` Subpackage](#etlplusops-subpackage)
12
14
  - [Validation Features](#validation-features)
13
15
  - [Defining Validation Rules](#defining-validation-rules)
14
16
  - [Example: Validating Data](#example-validating-data)
@@ -19,7 +21,6 @@ Back to project overview: see the top-level [README](../../README.md).
19
21
  - Type checking (string, number, boolean, etc.)
20
22
  - Required/optional fields
21
23
  - Enum and pattern validation
22
- - Custom rule support
23
24
 
24
25
  ## Defining Validation Rules
25
26
 
@@ -35,7 +36,7 @@ rules = {
35
36
  ## Example: Validating Data
36
37
 
37
38
  ```python
38
- from etlplus.validation import validate
39
+ from etlplus.ops import validate
39
40
 
40
41
  result = validate({"name": "Alice", "age": 30}, rules)
41
42
  if result["valid"]:
@@ -47,4 +48,4 @@ else:
47
48
  ## See Also
48
49
 
49
50
  - Top-level CLI and library usage in the main [README](../../README.md)
50
- - Validation utilities in [utils.py](utils.py)
51
+ - Validation utilities in [validate.py](validate.py)
etlplus/ops/__init__.py CHANGED
@@ -52,6 +52,7 @@ from .validate import validate
52
52
 
53
53
 
54
54
  __all__ = [
55
+ # Functions
55
56
  'extract',
56
57
  'load',
57
58
  'run',
etlplus/ops/enums.py ADDED
@@ -0,0 +1,173 @@
1
+ """
2
+ :mod:`etlplus.ops.enums` module.
3
+
4
+ Operation-specific enums and helpers.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import operator as _op
10
+ from statistics import fmean
11
+
12
+ from ..enums import CoercibleStrEnum
13
+ from ..types import StrStrMap
14
+ from .types import AggregateFunc
15
+ from .types import OperatorFunc
16
+
17
+ # SECTION: EXPORTS ========================================================= #
18
+
19
+
20
+ __all__ = [
21
+ # Enums
22
+ 'AggregateName',
23
+ 'OperatorName',
24
+ 'PipelineStep',
25
+ ]
26
+
27
+
28
+ # SECTION: ENUMS ============================================================ #
29
+
30
+
31
+ class AggregateName(CoercibleStrEnum):
32
+ """Supported aggregations with helpers."""
33
+
34
+ # -- Constants -- #
35
+
36
+ AVG = 'avg'
37
+ COUNT = 'count'
38
+ MAX = 'max'
39
+ MIN = 'min'
40
+ SUM = 'sum'
41
+
42
+ # -- Class Methods -- #
43
+
44
+ @property
45
+ def func(self) -> AggregateFunc:
46
+ """
47
+ Get the aggregation function for this aggregation type.
48
+
49
+ Returns
50
+ -------
51
+ AggregateFunc
52
+ The aggregation function corresponding to this aggregation type.
53
+ """
54
+ if self is AggregateName.COUNT:
55
+ return lambda xs, n: n
56
+ if self is AggregateName.MAX:
57
+ return lambda xs, n: (max(xs) if xs else None)
58
+ if self is AggregateName.MIN:
59
+ return lambda xs, n: (min(xs) if xs else None)
60
+ if self is AggregateName.SUM:
61
+ return lambda xs, n: sum(xs)
62
+
63
+ # AVG
64
+ return lambda xs, n: (fmean(xs) if xs else 0.0)
65
+
66
+
67
+ class OperatorName(CoercibleStrEnum):
68
+ """Supported comparison operators with helpers."""
69
+
70
+ # -- Constants -- #
71
+
72
+ EQ = 'eq'
73
+ NE = 'ne'
74
+ GT = 'gt'
75
+ GTE = 'gte'
76
+ LT = 'lt'
77
+ LTE = 'lte'
78
+ IN = 'in'
79
+ CONTAINS = 'contains'
80
+
81
+ # -- Getters -- #
82
+
83
+ @property
84
+ def func(self) -> OperatorFunc:
85
+ """
86
+ Get the comparison function for this operator.
87
+
88
+ Returns
89
+ -------
90
+ OperatorFunc
91
+ The comparison function corresponding to this operator.
92
+ """
93
+ match self:
94
+ case OperatorName.EQ:
95
+ return _op.eq
96
+ case OperatorName.NE:
97
+ return _op.ne
98
+ case OperatorName.GT:
99
+ return _op.gt
100
+ case OperatorName.GTE:
101
+ return _op.ge
102
+ case OperatorName.LT:
103
+ return _op.lt
104
+ case OperatorName.LTE:
105
+ return _op.le
106
+ case OperatorName.IN:
107
+ return lambda a, b: a in b
108
+ case OperatorName.CONTAINS:
109
+ return lambda a, b: b in a
110
+
111
+ # -- Class Methods -- #
112
+
113
+ @classmethod
114
+ def aliases(cls) -> StrStrMap:
115
+ """
116
+ Return a mapping of common aliases for each enum member.
117
+
118
+ Returns
119
+ -------
120
+ StrStrMap
121
+ A mapping of alias names to their corresponding enum member names.
122
+ """
123
+ return {
124
+ '==': 'eq',
125
+ '=': 'eq',
126
+ '!=': 'ne',
127
+ '<>': 'ne',
128
+ '>=': 'gte',
129
+ '≥': 'gte',
130
+ '<=': 'lte',
131
+ '≤': 'lte',
132
+ '>': 'gt',
133
+ '<': 'lt',
134
+ }
135
+
136
+
137
+ class PipelineStep(CoercibleStrEnum):
138
+ """Pipeline step names as an enum for internal orchestration."""
139
+
140
+ # -- Constants -- #
141
+
142
+ FILTER = 'filter'
143
+ MAP = 'map'
144
+ SELECT = 'select'
145
+ SORT = 'sort'
146
+ AGGREGATE = 'aggregate'
147
+
148
+ # -- Getters -- #
149
+
150
+ @property
151
+ def order(self) -> int:
152
+ """
153
+ Get the execution order of this pipeline step.
154
+
155
+ Returns
156
+ -------
157
+ int
158
+ The execution order of this pipeline step.
159
+ """
160
+ return _PIPELINE_ORDER_INDEX[self]
161
+
162
+
163
+ # SECTION: INTERNAL CONSTANTS ============================================== #
164
+
165
+
166
+ # Precomputed order index for PipelineStep; avoids recomputing on each access.
167
+ _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
168
+ PipelineStep.FILTER: 0,
169
+ PipelineStep.MAP: 1,
170
+ PipelineStep.SELECT: 2,
171
+ PipelineStep.SORT: 3,
172
+ PipelineStep.AGGREGATE: 4,
173
+ }
etlplus/ops/extract.py CHANGED
@@ -6,11 +6,19 @@ Helpers to extract data from files, databases, and REST APIs.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ from collections.abc import Mapping
9
10
  from pathlib import Path
10
11
  from typing import Any
11
12
  from typing import cast
13
+ from urllib.parse import urlsplit
14
+ from urllib.parse import urlunsplit
12
15
 
16
+ from ..api import EndpointClient
13
17
  from ..api import HttpMethod
18
+ from ..api import PaginationConfigDict
19
+ from ..api import RequestOptions
20
+ from ..api import compose_api_request_env
21
+ from ..api import paginate_with_client
14
22
  from ..api.utils import resolve_request
15
23
  from ..connector import DataConnectorType
16
24
  from ..file import File
@@ -19,6 +27,7 @@ from ..types import JSONData
19
27
  from ..types import JSONDict
20
28
  from ..types import JSONList
21
29
  from ..types import StrPath
30
+ from ..types import Timeout
22
31
 
23
32
  # SECTION: EXPORTS ========================================================== #
24
33
 
@@ -32,50 +41,164 @@ __all__ = [
32
41
  ]
33
42
 
34
43
 
35
- # SECTION: FUNCTIONS ======================================================== #
44
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
36
45
 
37
46
 
38
- def extract_from_api(
39
- url: str,
40
- method: HttpMethod | str = HttpMethod.GET,
41
- **kwargs: Any,
47
+ def _build_client(
48
+ *,
49
+ base_url: str,
50
+ base_path: str | None,
51
+ endpoints: dict[str, str],
52
+ retry: Any,
53
+ retry_network_errors: bool,
54
+ session: Any,
55
+ ) -> EndpointClient:
56
+ """
57
+ Construct an API client with shared defaults.
58
+
59
+ Parameters
60
+ ----------
61
+ base_url : str
62
+ API base URL.
63
+ base_path : str | None
64
+ Base path to prepend for endpoints.
65
+ endpoints : dict[str, str]
66
+ Endpoint name to path mappings.
67
+ retry : Any
68
+ Retry policy configuration.
69
+ retry_network_errors : bool
70
+ Whether to retry on network errors.
71
+ session : Any
72
+ Optional requests session.
73
+
74
+ Returns
75
+ -------
76
+ EndpointClient
77
+ Configured endpoint client instance.
78
+ """
79
+ ClientClass = EndpointClient # noqa: N806
80
+ return ClientClass(
81
+ base_url=base_url,
82
+ base_path=base_path,
83
+ endpoints=endpoints,
84
+ retry=retry,
85
+ retry_network_errors=retry_network_errors,
86
+ session=session,
87
+ )
88
+
89
+
90
+ def _extract_from_api_env(
91
+ env: Mapping[str, Any],
92
+ *,
93
+ use_client: bool,
42
94
  ) -> JSONData:
43
95
  """
44
- Extract data from a REST API.
96
+ Extract API data from a normalized request environment.
45
97
 
46
98
  Parameters
47
99
  ----------
48
- url : str
49
- API endpoint URL.
50
- method : HttpMethod | str, optional
51
- HTTP method to use. Defaults to ``GET``.
52
- **kwargs : Any
53
- Extra arguments forwarded to the underlying ``requests`` call
54
- (for example, ``timeout``). To use a pre-configured
55
- :class:`requests.Session`, provide it via ``session``.
56
- When omitted, ``timeout`` defaults to 10 seconds.
100
+ env : Mapping[str, Any]
101
+ Normalized environment describing API request parameters.
102
+ use_client : bool
103
+ Whether to use the endpoint client/pagination machinery.
57
104
 
58
105
  Returns
59
106
  -------
60
107
  JSONData
61
- Parsed JSON payload, or a fallback object with raw text.
108
+ Extracted payload.
62
109
 
63
110
  Raises
64
111
  ------
65
- TypeError
66
- If a provided ``session`` does not expose the required HTTP
67
- method (for example, ``get``).
112
+ ValueError
113
+ If required parameters are missing.
68
114
  """
69
- timeout = kwargs.pop('timeout', None)
70
- session = kwargs.pop('session', None)
115
+ if (
116
+ use_client
117
+ and env.get('use_endpoints')
118
+ and env.get('base_url')
119
+ and env.get('endpoints_map')
120
+ and env.get('endpoint_key')
121
+ ):
122
+ client = _build_client(
123
+ base_url=cast(str, env.get('base_url')),
124
+ base_path=cast(str | None, env.get('base_path')),
125
+ endpoints=cast(dict[str, str], env.get('endpoints_map', {})),
126
+ retry=env.get('retry'),
127
+ retry_network_errors=bool(env.get('retry_network_errors', False)),
128
+ session=env.get('session'),
129
+ )
130
+ return paginate_with_client(
131
+ client,
132
+ cast(str, env.get('endpoint_key')),
133
+ env.get('params'),
134
+ env.get('headers'),
135
+ env.get('timeout'),
136
+ env.get('pagination'),
137
+ cast(float | None, env.get('sleep_seconds')),
138
+ )
139
+
140
+ url = env.get('url')
141
+ if not url:
142
+ raise ValueError('API source missing URL')
143
+
144
+ if use_client:
145
+ parts = urlsplit(cast(str, url))
146
+ base = urlunsplit((parts.scheme, parts.netloc, '', '', ''))
147
+ client = _build_client(
148
+ base_url=base,
149
+ base_path=None,
150
+ endpoints={},
151
+ retry=env.get('retry'),
152
+ retry_network_errors=bool(env.get('retry_network_errors', False)),
153
+ session=env.get('session'),
154
+ )
155
+ request_options = RequestOptions(
156
+ params=cast(Mapping[str, Any] | None, env.get('params')),
157
+ headers=cast(Mapping[str, str] | None, env.get('headers')),
158
+ timeout=cast(Timeout | None, env.get('timeout')),
159
+ )
160
+
161
+ return client.paginate_url(
162
+ cast(str, url),
163
+ cast(PaginationConfigDict | None, env.get('pagination')),
164
+ request=request_options,
165
+ sleep_seconds=cast(float, env.get('sleep_seconds', 0.0)),
166
+ )
167
+
168
+ method = env.get('method', HttpMethod.GET)
169
+ timeout = env.get('timeout', None)
170
+ session = env.get('session', None)
171
+ request_kwargs = dict(env.get('request_kwargs') or {})
71
172
  request_callable, timeout, _ = resolve_request(
72
173
  method,
73
174
  session=session,
74
175
  timeout=timeout,
75
176
  )
76
- response = request_callable(url, timeout=timeout, **kwargs)
177
+ response = request_callable(
178
+ cast(str, url),
179
+ timeout=timeout,
180
+ **request_kwargs,
181
+ )
77
182
  response.raise_for_status()
183
+ return _parse_api_response(response)
184
+
78
185
 
186
+ def _parse_api_response(
187
+ response: Any,
188
+ ) -> JSONData:
189
+ """
190
+ Parse API responses into a consistent JSON payload.
191
+
192
+ Parameters
193
+ ----------
194
+ response : Any
195
+ HTTP response object exposing ``headers``, ``json()``, and ``text``.
196
+
197
+ Returns
198
+ -------
199
+ JSONData
200
+ Parsed JSON payload, or a fallback object with raw text.
201
+ """
79
202
  content_type = response.headers.get('content-type', '').lower()
80
203
  if 'application/json' in content_type:
81
204
  try:
@@ -99,6 +222,70 @@ def extract_from_api(
99
222
  return {'content': response.text, 'content_type': content_type}
100
223
 
101
224
 
225
+ # SECTION: FUNCTIONS ======================================================== #
226
+
227
+
228
+ def extract_from_api(
229
+ url: str,
230
+ method: HttpMethod | str = HttpMethod.GET,
231
+ **kwargs: Any,
232
+ ) -> JSONData:
233
+ """
234
+ Extract data from a REST API.
235
+
236
+ Parameters
237
+ ----------
238
+ url : str
239
+ API endpoint URL.
240
+ method : HttpMethod | str, optional
241
+ HTTP method to use. Defaults to ``GET``.
242
+ **kwargs : Any
243
+ Extra arguments forwarded to the underlying ``requests`` call
244
+ (for example, ``timeout``). To use a pre-configured
245
+ :class:`requests.Session`, provide it via ``session``.
246
+ When omitted, ``timeout`` defaults to 10 seconds.
247
+
248
+ Returns
249
+ -------
250
+ JSONData
251
+ Parsed JSON payload, or a fallback object with raw text.
252
+ """
253
+ env = {
254
+ 'url': url,
255
+ 'method': method,
256
+ 'timeout': kwargs.pop('timeout', None),
257
+ 'session': kwargs.pop('session', None),
258
+ 'request_kwargs': kwargs,
259
+ }
260
+ return _extract_from_api_env(env, use_client=False)
261
+
262
+
263
+ def extract_from_api_source(
264
+ cfg: Any,
265
+ source_obj: Any,
266
+ overrides: dict[str, Any],
267
+ ) -> JSONData:
268
+ """
269
+ Extract data from a REST API source connector.
270
+
271
+ Parameters
272
+ ----------
273
+ cfg : Any
274
+ Pipeline configuration.
275
+ source_obj : Any
276
+ Connector configuration.
277
+ overrides : dict[str, Any]
278
+ Extract-time overrides.
279
+
280
+ Returns
281
+ -------
282
+ JSONData
283
+ Extracted payload.
284
+ """
285
+ env = compose_api_request_env(cfg, source_obj, overrides)
286
+ return _extract_from_api_env(env, use_client=True)
287
+
288
+
102
289
  def extract_from_database(
103
290
  connection_string: str,
104
291
  ) -> JSONList: