etlplus 0.9.2__py3-none-any.whl → 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. etlplus/__init__.py +26 -1
  2. etlplus/api/README.md +3 -51
  3. etlplus/api/__init__.py +0 -10
  4. etlplus/api/config.py +28 -39
  5. etlplus/api/endpoint_client.py +3 -3
  6. etlplus/api/pagination/client.py +1 -1
  7. etlplus/api/rate_limiting/config.py +1 -13
  8. etlplus/api/rate_limiting/rate_limiter.py +11 -8
  9. etlplus/api/request_manager.py +6 -11
  10. etlplus/api/transport.py +2 -14
  11. etlplus/api/types.py +6 -96
  12. etlplus/cli/commands.py +43 -76
  13. etlplus/cli/constants.py +1 -1
  14. etlplus/cli/handlers.py +12 -40
  15. etlplus/cli/io.py +2 -2
  16. etlplus/cli/main.py +1 -1
  17. etlplus/cli/state.py +7 -4
  18. etlplus/{workflow → config}/__init__.py +23 -10
  19. etlplus/{workflow → config}/connector.py +44 -58
  20. etlplus/{workflow → config}/jobs.py +32 -105
  21. etlplus/{workflow → config}/pipeline.py +51 -59
  22. etlplus/{workflow → config}/profile.py +5 -8
  23. etlplus/config/types.py +204 -0
  24. etlplus/config/utils.py +120 -0
  25. etlplus/database/ddl.py +1 -1
  26. etlplus/database/engine.py +3 -19
  27. etlplus/database/orm.py +0 -2
  28. etlplus/database/schema.py +1 -1
  29. etlplus/enums.py +288 -0
  30. etlplus/{ops/extract.py → extract.py} +99 -81
  31. etlplus/file.py +652 -0
  32. etlplus/{ops/load.py → load.py} +101 -78
  33. etlplus/{ops/run.py → run.py} +127 -159
  34. etlplus/{api/utils.py → run_helpers.py} +153 -209
  35. etlplus/{ops/transform.py → transform.py} +68 -75
  36. etlplus/types.py +4 -5
  37. etlplus/utils.py +2 -136
  38. etlplus/{ops/validate.py → validate.py} +12 -22
  39. etlplus/validation/__init__.py +44 -0
  40. etlplus/{ops → validation}/utils.py +17 -53
  41. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/METADATA +17 -210
  42. etlplus-0.10.2.dist-info/RECORD +65 -0
  43. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/WHEEL +1 -1
  44. etlplus/README.md +0 -37
  45. etlplus/api/enums.py +0 -51
  46. etlplus/cli/README.md +0 -40
  47. etlplus/database/README.md +0 -48
  48. etlplus/file/README.md +0 -105
  49. etlplus/file/__init__.py +0 -25
  50. etlplus/file/_imports.py +0 -141
  51. etlplus/file/_io.py +0 -160
  52. etlplus/file/accdb.py +0 -78
  53. etlplus/file/arrow.py +0 -78
  54. etlplus/file/avro.py +0 -176
  55. etlplus/file/bson.py +0 -77
  56. etlplus/file/cbor.py +0 -78
  57. etlplus/file/cfg.py +0 -79
  58. etlplus/file/conf.py +0 -80
  59. etlplus/file/core.py +0 -322
  60. etlplus/file/csv.py +0 -79
  61. etlplus/file/dat.py +0 -78
  62. etlplus/file/dta.py +0 -77
  63. etlplus/file/duckdb.py +0 -78
  64. etlplus/file/enums.py +0 -343
  65. etlplus/file/feather.py +0 -111
  66. etlplus/file/fwf.py +0 -77
  67. etlplus/file/gz.py +0 -123
  68. etlplus/file/hbs.py +0 -78
  69. etlplus/file/hdf5.py +0 -78
  70. etlplus/file/ini.py +0 -79
  71. etlplus/file/ion.py +0 -78
  72. etlplus/file/jinja2.py +0 -78
  73. etlplus/file/json.py +0 -98
  74. etlplus/file/log.py +0 -78
  75. etlplus/file/mat.py +0 -78
  76. etlplus/file/mdb.py +0 -78
  77. etlplus/file/msgpack.py +0 -78
  78. etlplus/file/mustache.py +0 -78
  79. etlplus/file/nc.py +0 -78
  80. etlplus/file/ndjson.py +0 -108
  81. etlplus/file/numbers.py +0 -75
  82. etlplus/file/ods.py +0 -79
  83. etlplus/file/orc.py +0 -111
  84. etlplus/file/parquet.py +0 -113
  85. etlplus/file/pb.py +0 -78
  86. etlplus/file/pbf.py +0 -77
  87. etlplus/file/properties.py +0 -78
  88. etlplus/file/proto.py +0 -77
  89. etlplus/file/psv.py +0 -79
  90. etlplus/file/rda.py +0 -78
  91. etlplus/file/rds.py +0 -78
  92. etlplus/file/sas7bdat.py +0 -78
  93. etlplus/file/sav.py +0 -77
  94. etlplus/file/sqlite.py +0 -78
  95. etlplus/file/stub.py +0 -84
  96. etlplus/file/sylk.py +0 -77
  97. etlplus/file/tab.py +0 -81
  98. etlplus/file/toml.py +0 -78
  99. etlplus/file/tsv.py +0 -80
  100. etlplus/file/txt.py +0 -102
  101. etlplus/file/vm.py +0 -78
  102. etlplus/file/wks.py +0 -77
  103. etlplus/file/xls.py +0 -88
  104. etlplus/file/xlsm.py +0 -79
  105. etlplus/file/xlsx.py +0 -99
  106. etlplus/file/xml.py +0 -185
  107. etlplus/file/xpt.py +0 -78
  108. etlplus/file/yaml.py +0 -95
  109. etlplus/file/zip.py +0 -175
  110. etlplus/file/zsav.py +0 -77
  111. etlplus/ops/README.md +0 -50
  112. etlplus/ops/__init__.py +0 -61
  113. etlplus/templates/README.md +0 -46
  114. etlplus/workflow/README.md +0 -52
  115. etlplus/workflow/dag.py +0 -105
  116. etlplus/workflow/types.py +0 -115
  117. etlplus-0.9.2.dist-info/RECORD +0 -134
  118. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/entry_points.txt +0 -0
  119. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/licenses/LICENSE +0 -0
  120. {etlplus-0.9.2.dist-info → etlplus-0.10.2.dist-info}/top_level.txt +0 -0
etlplus/enums.py CHANGED
@@ -8,6 +8,7 @@ from __future__ import annotations
8
8
 
9
9
  import enum
10
10
  import operator as _op
11
+ from pathlib import PurePath
11
12
  from statistics import fmean
12
13
  from typing import Self
13
14
 
@@ -22,9 +23,18 @@ __all__ = [
22
23
  # Enums
23
24
  'AggregateName',
24
25
  'CoercibleStrEnum',
26
+ 'CompressionFormat',
25
27
  'DataConnectorType',
28
+ 'FileFormat',
29
+ 'HttpMethod',
26
30
  'OperatorName',
27
31
  'PipelineStep',
32
+ # Functions
33
+ 'coerce_compression_format',
34
+ 'coerce_data_connector_type',
35
+ 'coerce_file_format',
36
+ 'coerce_http_method',
37
+ 'infer_file_format_and_compression',
28
38
  ]
29
39
 
30
40
 
@@ -168,6 +178,39 @@ class AggregateName(CoercibleStrEnum):
168
178
  return lambda xs, n: (fmean(xs) if xs else 0.0)
169
179
 
170
180
 
181
+ class CompressionFormat(CoercibleStrEnum):
182
+ """Supported compression formats for data files."""
183
+
184
+ # -- Constants -- #
185
+
186
+ GZ = 'gz'
187
+ ZIP = 'zip'
188
+
189
+ # -- Class Methods -- #
190
+
191
+ @classmethod
192
+ def aliases(cls) -> StrStrMap:
193
+ """
194
+ Return a mapping of common aliases for each enum member.
195
+
196
+ Returns
197
+ -------
198
+ StrStrMap
199
+ A mapping of alias names to their corresponding enum member names.
200
+ """
201
+ return {
202
+ # File extensions
203
+ '.gz': 'gz',
204
+ '.gzip': 'gz',
205
+ '.zip': 'zip',
206
+ # MIME types
207
+ 'application/gzip': 'gz',
208
+ 'application/x-gzip': 'gz',
209
+ 'application/zip': 'zip',
210
+ 'application/x-zip-compressed': 'zip',
211
+ }
212
+
213
+
171
214
  class DataConnectorType(CoercibleStrEnum):
172
215
  """Supported data connector types."""
173
216
 
@@ -199,6 +242,119 @@ class DataConnectorType(CoercibleStrEnum):
199
242
  }
200
243
 
201
244
 
245
+ class FileFormat(CoercibleStrEnum):
246
+ """Supported file formats for extraction."""
247
+
248
+ # -- Constants -- #
249
+
250
+ AVRO = 'avro'
251
+ CSV = 'csv'
252
+ FEATHER = 'feather'
253
+ GZ = 'gz'
254
+ JSON = 'json'
255
+ NDJSON = 'ndjson'
256
+ ORC = 'orc'
257
+ PARQUET = 'parquet'
258
+ TSV = 'tsv'
259
+ TXT = 'txt'
260
+ XLS = 'xls'
261
+ XLSX = 'xlsx'
262
+ ZIP = 'zip'
263
+ XML = 'xml'
264
+ YAML = 'yaml'
265
+
266
+ # -- Class Methods -- #
267
+
268
+ @classmethod
269
+ def aliases(cls) -> StrStrMap:
270
+ """
271
+ Return a mapping of common aliases for each enum member.
272
+
273
+ Returns
274
+ -------
275
+ StrStrMap
276
+ A mapping of alias names to their corresponding enum member names.
277
+ """
278
+ return {
279
+ # Common shorthand
280
+ 'parq': 'parquet',
281
+ 'yml': 'yaml',
282
+ # File extensions
283
+ '.avro': 'avro',
284
+ '.csv': 'csv',
285
+ '.feather': 'feather',
286
+ '.gz': 'gz',
287
+ '.json': 'json',
288
+ '.jsonl': 'ndjson',
289
+ '.ndjson': 'ndjson',
290
+ '.orc': 'orc',
291
+ '.parquet': 'parquet',
292
+ '.pq': 'parquet',
293
+ '.tsv': 'tsv',
294
+ '.txt': 'txt',
295
+ '.xls': 'xls',
296
+ '.xlsx': 'xlsx',
297
+ '.zip': 'zip',
298
+ '.xml': 'xml',
299
+ '.yaml': 'yaml',
300
+ '.yml': 'yaml',
301
+ # MIME types
302
+ 'application/avro': 'avro',
303
+ 'application/feather': 'feather',
304
+ 'application/gzip': 'gz',
305
+ 'application/json': 'json',
306
+ 'application/jsonlines': 'ndjson',
307
+ 'application/ndjson': 'ndjson',
308
+ 'application/orc': 'orc',
309
+ 'application/vnd.apache.arrow.file': 'feather',
310
+ 'application/vnd.apache.orc': 'orc',
311
+ 'application/vnd.ms-excel': 'xls',
312
+ (
313
+ 'application/vnd.openxmlformats-'
314
+ 'officedocument.spreadsheetml.sheet'
315
+ ): 'xlsx',
316
+ 'application/x-avro': 'avro',
317
+ 'application/x-ndjson': 'ndjson',
318
+ 'application/x-parquet': 'parquet',
319
+ 'application/xml': 'xml',
320
+ 'application/zip': 'zip',
321
+ 'text/csv': 'csv',
322
+ 'text/plain': 'txt',
323
+ 'text/tab-separated-values': 'tsv',
324
+ }
325
+
326
+
327
+ class HttpMethod(CoercibleStrEnum):
328
+ """Supported HTTP verbs that accept JSON payloads."""
329
+
330
+ # -- Constants -- #
331
+
332
+ CONNECT = 'connect'
333
+ DELETE = 'delete'
334
+ GET = 'get'
335
+ HEAD = 'head'
336
+ OPTIONS = 'options'
337
+ PATCH = 'patch'
338
+ POST = 'post'
339
+ PUT = 'put'
340
+ TRACE = 'trace'
341
+
342
+ # -- Getters -- #
343
+
344
+ @property
345
+ def allows_body(self) -> bool:
346
+ """
347
+ Whether the method typically allows a request body.
348
+
349
+ Notes
350
+ -----
351
+ - RFCs do not strictly forbid bodies on some other methods (e.g.,
352
+ ``DELETE``), but many servers/clients do not expect them. We mark
353
+ ``POST``, ``PUT``, and ``PATCH`` as True.
354
+ """
355
+ return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
356
+
357
+
202
358
  class OperatorName(CoercibleStrEnum):
203
359
  """Supported comparison operators with helpers."""
204
360
 
@@ -298,6 +454,13 @@ class PipelineStep(CoercibleStrEnum):
298
454
  # SECTION: INTERNAL CONSTANTS ============================================== #
299
455
 
300
456
 
457
+ # Compression formats that are also file formats.
458
+ _COMPRESSION_FILE_FORMATS: set[FileFormat] = {
459
+ FileFormat.GZ,
460
+ FileFormat.ZIP,
461
+ }
462
+
463
+
301
464
  # Precomputed order index for PipelineStep; avoids recomputing on each access.
302
465
  _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
303
466
  PipelineStep.FILTER: 0,
@@ -306,3 +469,128 @@ _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
306
469
  PipelineStep.SORT: 3,
307
470
  PipelineStep.AGGREGATE: 4,
308
471
  }
472
+
473
+
474
+ # SECTION: FUNCTIONS ======================================================== #
475
+
476
+
477
+ def coerce_data_connector_type(
478
+ connector: DataConnectorType | str,
479
+ ) -> DataConnectorType:
480
+ """
481
+ Normalize textual data connector values to :class:`DataConnectorType`.
482
+
483
+ This thin wrapper is kept for backward compatibility; prefer
484
+ :meth:`DataConnectorType.coerce` going forward.
485
+ """
486
+ return DataConnectorType.coerce(connector)
487
+
488
+
489
+ def coerce_file_format(
490
+ file_format: FileFormat | str,
491
+ ) -> FileFormat:
492
+ """
493
+ Normalize textual file format values to :class:`FileFormat`.
494
+
495
+ This thin wrapper is kept for backward compatibility; prefer
496
+ :meth:`FileFormat.coerce` going forward.
497
+ """
498
+ return FileFormat.coerce(file_format)
499
+
500
+
501
+ def coerce_compression_format(
502
+ compression_format: CompressionFormat | str,
503
+ ) -> CompressionFormat:
504
+ """
505
+ Normalize textual compression format values to :class:`CompressionFormat`.
506
+
507
+ This thin wrapper is kept for backward compatibility; prefer
508
+ :meth:`CompressionFormat.coerce` going forward.
509
+ """
510
+ return CompressionFormat.coerce(compression_format)
511
+
512
+
513
+ def coerce_http_method(
514
+ http_method: HttpMethod | str,
515
+ ) -> HttpMethod:
516
+ """
517
+ Normalize textual HTTP method values to :class:`HttpMethod`.
518
+
519
+ This thin wrapper is kept for backward compatibility; prefer
520
+ :meth:`HttpMethod.coerce` going forward.
521
+ """
522
+ return HttpMethod.coerce(http_method)
523
+
524
+
525
+ def infer_file_format_and_compression(
526
+ value: object,
527
+ filename: object | None = None,
528
+ ) -> tuple[FileFormat | None, CompressionFormat | None]:
529
+ """
530
+ Infer data format and compression from a filename, extension, or MIME type.
531
+
532
+ Parameters
533
+ ----------
534
+ value : object
535
+ A filename, extension, MIME type, or existing enum member.
536
+ filename : object | None, optional
537
+ A filename to consult for extension-based inference (e.g. when
538
+ ``value`` is ``application/octet-stream``).
539
+
540
+ Returns
541
+ -------
542
+ tuple[FileFormat | None, CompressionFormat | None]
543
+ The inferred data format and compression, if any.
544
+ """
545
+ if isinstance(value, FileFormat):
546
+ if value in _COMPRESSION_FILE_FORMATS:
547
+ return None, CompressionFormat.coerce(value.value)
548
+ return value, None
549
+ if isinstance(value, CompressionFormat):
550
+ return None, value
551
+
552
+ text = str(value).strip()
553
+ if not text:
554
+ return None, None
555
+
556
+ normalized = text.casefold()
557
+ mime = normalized.split(';', 1)[0].strip()
558
+
559
+ compression = CompressionFormat.try_coerce(mime)
560
+ fmt = FileFormat.try_coerce(mime)
561
+
562
+ is_mime = mime.startswith(
563
+ (
564
+ 'application/',
565
+ 'text/',
566
+ 'audio/',
567
+ 'image/',
568
+ 'video/',
569
+ 'multipart/',
570
+ ),
571
+ )
572
+ suffix_source: object | None = filename if filename is not None else text
573
+ if is_mime and filename is None:
574
+ suffix_source = None
575
+
576
+ suffixes = (
577
+ PurePath(str(suffix_source)).suffixes
578
+ if suffix_source is not None
579
+ else []
580
+ )
581
+ if suffixes:
582
+ normalized_suffixes = [suffix.casefold() for suffix in suffixes]
583
+ compression = (
584
+ CompressionFormat.try_coerce(normalized_suffixes[-1])
585
+ or compression
586
+ )
587
+ if compression is not None:
588
+ normalized_suffixes = normalized_suffixes[:-1]
589
+ if normalized_suffixes:
590
+ fmt = FileFormat.try_coerce(normalized_suffixes[-1]) or fmt
591
+
592
+ if fmt in _COMPRESSION_FILE_FORMATS:
593
+ compression = compression or CompressionFormat.coerce(fmt.value)
594
+ fmt = None
595
+
596
+ return fmt, compression
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.ops.extract` module.
2
+ :mod:`etlplus.extract` module.
3
3
 
4
4
  Helpers to extract data from files, databases, and REST APIs.
5
5
  """
@@ -10,81 +10,58 @@ from pathlib import Path
10
10
  from typing import Any
11
11
  from typing import cast
12
12
 
13
- from ..api import HttpMethod
14
- from ..api.utils import resolve_request
15
- from ..enums import DataConnectorType
16
- from ..file import File
17
- from ..file import FileFormat
18
- from ..types import JSONData
19
- from ..types import JSONDict
20
- from ..types import JSONList
21
- from ..types import StrPath
13
+ import requests # type: ignore[import]
14
+
15
+ from .enums import DataConnectorType
16
+ from .enums import FileFormat
17
+ from .enums import HttpMethod
18
+ from .enums import coerce_data_connector_type
19
+ from .enums import coerce_file_format
20
+ from .file import File
21
+ from .types import JSONData
22
+ from .types import JSONDict
23
+ from .types import JSONList
24
+ from .types import StrPath
22
25
 
23
26
  # SECTION: FUNCTIONS ======================================================== #
24
27
 
25
28
 
26
- def extract_from_api(
27
- url: str,
28
- method: HttpMethod | str = HttpMethod.GET,
29
- **kwargs: Any,
29
+ # -- File Extraction -- #
30
+
31
+
32
+ def extract_from_file(
33
+ file_path: StrPath,
34
+ file_format: FileFormat | str | None = FileFormat.JSON,
30
35
  ) -> JSONData:
31
36
  """
32
- Extract data from a REST API.
37
+ Extract (semi-)structured data from a local file.
33
38
 
34
39
  Parameters
35
40
  ----------
36
- url : str
37
- API endpoint URL.
38
- method : HttpMethod | str, optional
39
- HTTP method to use. Defaults to ``GET``.
40
- **kwargs : Any
41
- Extra arguments forwarded to the underlying ``requests`` call
42
- (for example, ``timeout``). To use a pre-configured
43
- :class:`requests.Session`, provide it via ``session``.
44
- When omitted, ``timeout`` defaults to 10 seconds.
41
+ file_path : StrPath
42
+ Source file path.
43
+ file_format : FileFormat | str | None, optional
44
+ File format to parse. If ``None``, infer from the filename
45
+ extension. Defaults to `'json'` for backward compatibility when
46
+ explicitly provided.
45
47
 
46
48
  Returns
47
49
  -------
48
50
  JSONData
49
- Parsed JSON payload, or a fallback object with raw text.
50
-
51
- Raises
52
- ------
53
- TypeError
54
- If a provided ``session`` does not expose the required HTTP
55
- method (for example, ``get``).
51
+ Parsed data as a mapping or a list of mappings.
56
52
  """
57
- timeout = kwargs.pop('timeout', None)
58
- session = kwargs.pop('session', None)
59
- request_callable, timeout, _ = resolve_request(
60
- method,
61
- session=session,
62
- timeout=timeout,
63
- )
64
- response = request_callable(url, timeout=timeout, **kwargs)
65
- response.raise_for_status()
53
+ path = Path(file_path)
66
54
 
67
- content_type = response.headers.get('content-type', '').lower()
68
- if 'application/json' in content_type:
69
- try:
70
- payload: Any = response.json()
71
- except ValueError:
72
- # Malformed JSON despite content-type; fall back to text
73
- return {
74
- 'content': response.text,
75
- 'content_type': content_type,
76
- }
77
- if isinstance(payload, dict):
78
- return cast(JSONDict, payload)
79
- if isinstance(payload, list):
80
- if all(isinstance(x, dict) for x in payload):
81
- return cast(JSONList, payload)
82
- # Coerce non-dict array items into objects for consistency
83
- return [{'value': x} for x in payload]
84
- # Fallback: wrap scalar JSON
85
- return {'value': payload}
55
+ # If no explicit format is provided, let File infer from extension.
56
+ if file_format is None:
57
+ return File(path, None).read()
58
+ fmt = coerce_file_format(file_format)
86
59
 
87
- return {'content': response.text, 'content_type': content_type}
60
+ # Let file module perform existence and format validation.
61
+ return File(path, fmt).read()
62
+
63
+
64
+ # -- Database Extraction (Placeholder) -- #
88
65
 
89
66
 
90
67
  def extract_from_database(
@@ -119,36 +96,77 @@ def extract_from_database(
119
96
  ]
120
97
 
121
98
 
122
- def extract_from_file(
123
- file_path: StrPath,
124
- file_format: FileFormat | str | None = FileFormat.JSON,
99
+ # -- REST API Extraction -- #
100
+
101
+
102
+ def extract_from_api(
103
+ url: str,
104
+ method: HttpMethod | str = HttpMethod.GET,
105
+ **kwargs: Any,
125
106
  ) -> JSONData:
126
107
  """
127
- Extract (semi-)structured data from a local file.
108
+ Extract data from a REST API.
128
109
 
129
110
  Parameters
130
111
  ----------
131
- file_path : StrPath
132
- Source file path.
133
- file_format : FileFormat | str | None, optional
134
- File format to parse. If ``None``, infer from the filename
135
- extension. Defaults to `'json'` for backward compatibility when
136
- explicitly provided.
112
+ url : str
113
+ API endpoint URL.
114
+ method : HttpMethod | str, optional
115
+ HTTP method to use. Defaults to ``GET``.
116
+ **kwargs : Any
117
+ Extra arguments forwarded to the underlying ``requests`` call
118
+ (for example, ``timeout``). To use a pre-configured
119
+ :class:`requests.Session`, provide it via ``session``.
137
120
 
138
121
  Returns
139
122
  -------
140
123
  JSONData
141
- Parsed data as a mapping or a list of mappings.
124
+ Parsed JSON payload, or a fallback object with raw text.
125
+
126
+ Raises
127
+ ------
128
+ TypeError
129
+ If a provided ``session`` does not expose the required HTTP
130
+ method (for example, ``get``).
142
131
  """
143
- path = Path(file_path)
132
+ http_method = HttpMethod.coerce(method)
144
133
 
145
- # If no explicit format is provided, let File infer from extension.
146
- if file_format is None:
147
- return File(path, None).read()
148
- fmt = FileFormat.coerce(file_format)
134
+ # Apply a conservative timeout to guard against hanging requests.
135
+ timeout = kwargs.pop('timeout', 10.0)
136
+ session = kwargs.pop('session', None)
137
+ requester = session or requests
149
138
 
150
- # Let file module perform existence and format validation.
151
- return File(path, fmt).read()
139
+ request_callable = getattr(requester, http_method.value, None)
140
+ if not callable(request_callable):
141
+ raise TypeError(
142
+ 'Session object must supply a callable'
143
+ f'"{http_method.value}" method',
144
+ )
145
+
146
+ response = request_callable(url, timeout=timeout, **kwargs)
147
+ response.raise_for_status()
148
+
149
+ content_type = response.headers.get('content-type', '').lower()
150
+ if 'application/json' in content_type:
151
+ try:
152
+ payload: Any = response.json()
153
+ except ValueError:
154
+ # Malformed JSON despite content-type; fall back to text
155
+ return {
156
+ 'content': response.text,
157
+ 'content_type': content_type,
158
+ }
159
+ if isinstance(payload, dict):
160
+ return cast(JSONDict, payload)
161
+ if isinstance(payload, list):
162
+ if all(isinstance(x, dict) for x in payload):
163
+ return cast(JSONList, payload)
164
+ # Coerce non-dict array items into objects for consistency
165
+ return [{'value': x} for x in payload]
166
+ # Fallback: wrap scalar JSON
167
+ return {'value': payload}
168
+
169
+ return {'content': response.text, 'content_type': content_type}
152
170
 
153
171
 
154
172
  # -- Orchestration -- #
@@ -184,7 +202,7 @@ def extract(
184
202
  ValueError
185
203
  If `source_type` is not one of the supported values.
186
204
  """
187
- match DataConnectorType.coerce(source_type):
205
+ match coerce_data_connector_type(source_type):
188
206
  case DataConnectorType.FILE:
189
207
  # Prefer explicit format if provided, else infer from filename.
190
208
  return extract_from_file(source, file_format)
@@ -195,6 +213,6 @@ def extract(
195
213
  # ``file_format`` is ignored for APIs.
196
214
  return extract_from_api(str(source), **kwargs)
197
215
  case _:
198
- # :meth:`coerce` already raises for invalid connector types, but
199
- # keep explicit guard for defensive programming.
216
+ # ``coerce_data_connector_type`` covers invalid entries, but keep
217
+ # explicit guard for defensive programming.
200
218
  raise ValueError(f'Invalid source type: {source_type}')