etlplus 0.14.3__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. etlplus/README.md +4 -4
  2. etlplus/api/README.md +33 -2
  3. etlplus/api/config.py +3 -8
  4. etlplus/api/types.py +89 -0
  5. etlplus/api/utils.py +5 -1
  6. etlplus/cli/README.md +2 -2
  7. etlplus/cli/commands.py +75 -42
  8. etlplus/cli/handlers.py +32 -14
  9. etlplus/cli/main.py +1 -1
  10. etlplus/cli/state.py +4 -7
  11. etlplus/database/README.md +2 -2
  12. etlplus/database/engine.py +18 -2
  13. etlplus/database/orm.py +2 -0
  14. etlplus/file/README.md +2 -2
  15. etlplus/file/_io.py +39 -0
  16. etlplus/file/json.py +2 -14
  17. etlplus/file/yaml.py +2 -14
  18. etlplus/ops/run.py +14 -9
  19. etlplus/ops/utils.py +4 -33
  20. etlplus/ops/validate.py +3 -3
  21. etlplus/templates/README.md +2 -2
  22. etlplus/types.py +3 -2
  23. etlplus/utils.py +136 -2
  24. etlplus/{config → workflow}/README.md +6 -6
  25. etlplus/{config → workflow}/__init__.py +10 -23
  26. etlplus/{config → workflow}/connector.py +58 -44
  27. etlplus/{dag.py → workflow/dag.py} +6 -4
  28. etlplus/{config → workflow}/jobs.py +101 -38
  29. etlplus/{config → workflow}/pipeline.py +57 -49
  30. etlplus/{config → workflow}/profile.py +8 -5
  31. etlplus/workflow/types.py +115 -0
  32. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/METADATA +4 -4
  33. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/RECORD +37 -38
  34. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/WHEEL +1 -1
  35. etlplus/config/types.py +0 -204
  36. etlplus/config/utils.py +0 -120
  37. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/entry_points.txt +0 -0
  38. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/licenses/LICENSE +0 -0
  39. {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/top_level.txt +0 -0
etlplus/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # etlplus package
1
+ # `etlplus` Package
2
2
 
3
3
  The `etlplus` package provides a unified Python API and CLI for ETL operations: extraction,
4
4
  validation, transformation, and loading of data from files, APIs, and databases.
@@ -13,12 +13,12 @@ Back to project overview: see the top-level [README](../README.md).
13
13
 
14
14
  - [etlplus.api](api/README.md): Lightweight HTTP client and paginated REST helpers
15
15
  - [etlplus.file](file/README.md): Unified file format support and helpers
16
- - [etlplus.config](config/README.md): Configuration helpers for connectors, pipelines, jobs, and
17
- profiles
18
- - [etlplus.cli](cli/README.md): Command-line interface for ETLPlus workflows
16
+ - [etlplus.cli](cli/README.md): Command-line interface definitions for `etlplus`
19
17
  - [etlplus.database](database/README.md): Database engine, schema, and ORM helpers
20
18
  - [etlplus.templates](templates/README.md): SQL and DDL template helpers
21
19
  - [etlplus.validation](validation/README.md): Data validation utilities and helpers
20
+ - [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
21
+ profiles
22
22
 
23
23
  ## Quickstart
24
24
 
etlplus/api/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # etlplus.api subpackage
1
+ # `etlplus.api` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for paginated
4
4
  REST endpoints.
@@ -12,7 +12,7 @@ REST endpoints.
12
12
 
13
13
  Back to project overview: see the top-level [README](../../README.md).
14
14
 
15
- - [etlplus.api subpackage](#etlplusapi-subpackage)
15
+ - [`etlplus.api` Subpackage](#etlplusapi-subpackage)
16
16
  - [Installation](#installation)
17
17
  - [Quickstart](#quickstart)
18
18
  - [Overriding Rate Limits Per Call](#overriding-rate-limits-per-call)
@@ -22,6 +22,7 @@ Back to project overview: see the top-level [README](../../README.md).
22
22
  - [Authentication](#authentication)
23
23
  - [Errors and Rate Limiting](#errors-and-rate-limiting)
24
24
  - [Types and Transport](#types-and-transport)
25
+ - [Config Schemas](#config-schemas)
25
26
  - [Supporting Modules](#supporting-modules)
26
27
  - [Minimal Contract](#minimal-contract)
27
28
  - [See also](#see-also)
@@ -225,6 +226,36 @@ providers can fall back to their own defaults. If you already possess a static t
225
226
  `etlplus/api/request_manager.py` wraps `requests` sessions plus retry orchestration. Advanced
226
227
  users may consult those modules to adapt behavior.
227
228
 
229
+ ## Config Schemas
230
+
231
+ `etlplus.api.types` defines TypedDict-based configuration shapes for API profiles and endpoints.
232
+ Runtime parsing remains permissive in `etlplus.api.config`, but these types improve IDE
233
+ autocomplete and static analysis.
234
+
235
+ Exported types:
236
+
237
+ - `ApiConfigMap`: top-level API config shape
238
+ - `ApiProfileConfigMap`: per-profile API config shape
239
+ - `ApiProfileDefaultsMap`: defaults block within a profile
240
+ - `EndpointMap`: endpoint config shape
241
+
242
+ Example:
243
+
244
+ ```python
245
+ from etlplus.api import ApiConfigMap
246
+
247
+ api_cfg: ApiConfigMap = {
248
+ "base_url": "https://example.test",
249
+ "headers": {"Authorization": "Bearer token"},
250
+ "endpoints": {
251
+ "users": {
252
+ "path": "/users",
253
+ "method": "GET",
254
+ },
255
+ },
256
+ }
257
+ ```
258
+
228
259
  ## Supporting Modules
229
260
 
230
261
  - `etlplus.api.types` collects friendly aliases such as `Headers`, `Params`, `Url`, and
etlplus/api/config.py CHANGED
@@ -3,11 +3,6 @@
3
3
 
4
4
  Configuration dataclasses for REST API services, profiles, and endpoints.
5
5
 
6
- These models used to live under :mod:`etlplus.config`, but they belong in the
7
- API layer because they compose runtime types such as
8
- :class:`etlplus.api.EndpointClient`, :class:`etlplus.api.PaginationConfig`, and
9
- :class:`etlplus.api.RateLimitConfig`.
10
-
11
6
  Notes
12
7
  -----
13
8
  - TypedDict references remain editor hints only; :meth:`from_obj` accepts
@@ -41,9 +36,9 @@ from .pagination import PaginationConfig
41
36
  from .rate_limiting import RateLimitConfig
42
37
 
43
38
  if TYPE_CHECKING:
44
- from ..config.types import ApiConfigMap
45
- from ..config.types import ApiProfileConfigMap
46
- from ..config.types import EndpointMap
39
+ from .types import ApiConfigMap
40
+ from .types import ApiProfileConfigMap
41
+ from .types import EndpointMap
47
42
 
48
43
 
49
44
  # SECTION: EXPORTS ========================================================== #
etlplus/api/types.py CHANGED
@@ -20,9 +20,11 @@ Examples
20
20
  from __future__ import annotations
21
21
 
22
22
  from collections.abc import Callable
23
+ from collections.abc import Mapping
23
24
  from dataclasses import dataclass
24
25
  from typing import Any
25
26
  from typing import Self
27
+ from typing import TypedDict
26
28
  from typing import cast
27
29
 
28
30
  from ..types import JSONData
@@ -40,6 +42,11 @@ __all__ = [
40
42
  'Headers',
41
43
  'Params',
42
44
  'Url',
45
+ # Typed Dicts
46
+ 'ApiConfigMap',
47
+ 'ApiProfileConfigMap',
48
+ 'ApiProfileDefaultsMap',
49
+ 'EndpointMap',
43
50
  ]
44
51
 
45
52
 
@@ -49,6 +56,88 @@ __all__ = [
49
56
  _UNSET = object()
50
57
 
51
58
 
59
+ # SECTION: TYPED DICTS ====================================================== #
60
+
61
+
62
+ class ApiConfigMap(TypedDict, total=False):
63
+ """
64
+ Top-level API config shape parsed by ApiConfig.from_obj.
65
+
66
+ Either provide a ``base_url`` with optional ``headers`` and ``endpoints``,
67
+ or provide ``profiles`` with at least one profile having a ``base_url``.
68
+
69
+ See Also
70
+ --------
71
+ - :class:`etlplus.api.config.ApiConfig`
72
+ """
73
+
74
+ base_url: str
75
+ headers: StrAnyMap
76
+ endpoints: Mapping[str, EndpointMap | str]
77
+ profiles: Mapping[str, ApiProfileConfigMap]
78
+
79
+
80
+ class ApiProfileConfigMap(TypedDict, total=False):
81
+ """
82
+ Shape accepted for a profile entry under ApiConfigMap.profiles.
83
+
84
+ Notes
85
+ -----
86
+ ``base_url`` is required at runtime when profiles are provided.
87
+
88
+ See Also
89
+ --------
90
+ - :class:`etlplus.api.config.ApiProfileConfig`
91
+ """
92
+
93
+ base_url: str
94
+ headers: StrAnyMap
95
+ base_path: str
96
+ auth: StrAnyMap
97
+ defaults: ApiProfileDefaultsMap
98
+
99
+
100
+ class ApiProfileDefaultsMap(TypedDict, total=False):
101
+ """
102
+ Defaults block available under a profile (all keys optional).
103
+
104
+ Notes
105
+ -----
106
+ Runtime expects header values to be str; typing remains permissive.
107
+
108
+ See Also
109
+ --------
110
+ - :class:`etlplus.api.config.ApiProfileConfig`
111
+ - :class:`etlplus.api.pagination.PaginationConfig`
112
+ - :class:`etlplus.api.rate_limiting.RateLimitConfig`
113
+ """
114
+
115
+ headers: StrAnyMap
116
+ pagination: Any
117
+ rate_limit: Any
118
+
119
+
120
+ class EndpointMap(TypedDict, total=False):
121
+ """
122
+ Shape accepted by EndpointConfig.from_obj.
123
+
124
+ One of ``path`` or ``url`` should be provided.
125
+
126
+ See Also
127
+ --------
128
+ - :class:`etlplus.api.config.EndpointConfig`
129
+ """
130
+
131
+ path: str
132
+ url: str
133
+ method: str
134
+ path_params: StrAnyMap
135
+ query_params: StrAnyMap
136
+ body: Any
137
+ pagination: Any
138
+ rate_limit: Any
139
+
140
+
52
141
  # SECTION: DATA CLASSES ===================================================== #
53
142
 
54
143
 
etlplus/api/utils.py CHANGED
@@ -892,4 +892,8 @@ def resolve_request(
892
892
  'Session object must supply a callable '
893
893
  f'"{http_method.value}" method',
894
894
  )
895
- return request_callable, request_timeout, http_method
895
+ typed_request_callable = cast(
896
+ Callable[..., requests.Response],
897
+ request_callable,
898
+ )
899
+ return typed_request_callable, request_timeout, http_method
etlplus/cli/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # etlplus.cli subpackage
1
+ # `etlplus.cli` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.cli` subpackage: command-line interface for ETLPlus workflows.
4
4
 
@@ -9,7 +9,7 @@ Documentation for the `etlplus.cli` subpackage: command-line interface for ETLPl
9
9
 
10
10
  Back to project overview: see the top-level [README](../../README.md).
11
11
 
12
- - [etlplus.cli subpackage](#etlpluscli-subpackage)
12
+ - [`etlplus.cli` Subpackage](#etlpluscli-subpackage)
13
13
  - [Available Commands](#available-commands)
14
14
  - [Command Options](#command-options)
15
15
  - [Example: Running a Pipeline](#example-running-a-pipeline)
etlplus/cli/commands.py CHANGED
@@ -61,6 +61,24 @@ __all__ = ['app']
61
61
 
62
62
  # SECTION: TYPE ALIASES ==================================================== #
63
63
 
64
+
65
+ JobOption = Annotated[
66
+ str | None,
67
+ typer.Option(
68
+ '-j',
69
+ '--job',
70
+ help='Name of the job to run',
71
+ ),
72
+ ]
73
+
74
+ JobsOption = Annotated[
75
+ bool,
76
+ typer.Option(
77
+ '--jobs',
78
+ help='List available job names and exit',
79
+ ),
80
+ ]
81
+
64
82
  OperationsOption = Annotated[
65
83
  str,
66
84
  typer.Option(
@@ -89,6 +107,23 @@ PipelineConfigOption = Annotated[
89
107
  ),
90
108
  ]
91
109
 
110
+ PipelineOption = Annotated[
111
+ str | None,
112
+ typer.Option(
113
+ '-p',
114
+ '--pipeline',
115
+ help='Name of the pipeline to run',
116
+ ),
117
+ ]
118
+
119
+ PipelinesOption = Annotated[
120
+ bool,
121
+ typer.Option(
122
+ '--pipelines',
123
+ help='List ETL pipelines',
124
+ ),
125
+ ]
126
+
92
127
  RenderConfigOption = Annotated[
93
128
  str | None,
94
129
  typer.Option(
@@ -193,6 +228,22 @@ SourceTypeOption = Annotated[
193
228
  ),
194
229
  ]
195
230
 
231
+ SourcesOption = Annotated[
232
+ bool,
233
+ typer.Option(
234
+ '--sources',
235
+ help='List data sources',
236
+ ),
237
+ ]
238
+
239
+ SummaryOption = Annotated[
240
+ bool,
241
+ typer.Option(
242
+ '--summary',
243
+ help='Show pipeline summary (name, version, sources, targets, jobs)',
244
+ ),
245
+ ]
246
+
196
247
  TargetArg = Annotated[
197
248
  str,
198
249
  typer.Argument(
@@ -227,6 +278,22 @@ TargetTypeOption = Annotated[
227
278
  ),
228
279
  ]
229
280
 
281
+ TargetsOption = Annotated[
282
+ bool,
283
+ typer.Option(
284
+ '--targets',
285
+ help='List data targets',
286
+ ),
287
+ ]
288
+
289
+ TransformsOption = Annotated[
290
+ bool,
291
+ typer.Option(
292
+ '--transforms',
293
+ help='List data transforms',
294
+ ),
295
+ ]
296
+
230
297
 
231
298
  # SECTION: INTERNAL FUNCTIONS =============================================== #
232
299
 
@@ -341,36 +408,12 @@ def _root(
341
408
  def check_cmd(
342
409
  ctx: typer.Context,
343
410
  config: PipelineConfigOption,
344
- jobs: bool = typer.Option(
345
- False,
346
- '--jobs',
347
- help='List available job names and exit',
348
- ),
349
- pipelines: bool = typer.Option(
350
- False,
351
- '--pipelines',
352
- help='List ETL pipelines',
353
- ),
354
- sources: bool = typer.Option(
355
- False,
356
- '--sources',
357
- help='List data sources',
358
- ),
359
- summary: bool = typer.Option(
360
- False,
361
- '--summary',
362
- help='Show pipeline summary (name, version, sources, targets, jobs)',
363
- ),
364
- targets: bool = typer.Option(
365
- False,
366
- '--targets',
367
- help='List data targets',
368
- ),
369
- transforms: bool = typer.Option(
370
- False,
371
- '--transforms',
372
- help='List data transforms',
373
- ),
411
+ jobs: JobsOption = False,
412
+ pipelines: PipelinesOption = False,
413
+ sources: SourcesOption = False,
414
+ summary: SummaryOption = False,
415
+ targets: TargetsOption = False,
416
+ transforms: TransformsOption = False,
374
417
  ) -> int:
375
418
  """
376
419
  Inspect a pipeline configuration.
@@ -683,18 +726,8 @@ def render_cmd(
683
726
  def run_cmd(
684
727
  ctx: typer.Context,
685
728
  config: PipelineConfigOption,
686
- job: str | None = typer.Option(
687
- None,
688
- '-j',
689
- '--job',
690
- help='Name of the job to run',
691
- ),
692
- pipeline: str | None = typer.Option(
693
- None,
694
- '-p',
695
- '--pipeline',
696
- help='Name of the pipeline to run',
697
- ),
729
+ job: JobOption = None,
730
+ pipeline: PipelineOption = None,
698
731
  ) -> int:
699
732
  """
700
733
  Execute an ETL job or pipeline from a YAML configuration.
etlplus/cli/handlers.py CHANGED
@@ -14,8 +14,6 @@ from typing import Any
14
14
  from typing import Literal
15
15
  from typing import cast
16
16
 
17
- from ..config import PipelineConfig
18
- from ..config import load_pipeline_config
19
17
  from ..database import load_table_spec
20
18
  from ..database import render_tables
21
19
  from ..file import File
@@ -28,6 +26,8 @@ from ..ops import validate
28
26
  from ..ops.validate import FieldRules
29
27
  from ..types import JSONData
30
28
  from ..types import TemplateKey
29
+ from ..workflow import PipelineConfig
30
+ from ..workflow import load_pipeline_config
31
31
  from . import io as cli_io
32
32
 
33
33
  # SECTION: EXPORTS ========================================================== #
@@ -121,9 +121,12 @@ def _check_sections(
121
121
  if targets:
122
122
  sections['targets'] = [tgt.name for tgt in cfg.targets]
123
123
  if transforms:
124
- sections['transforms'] = [
125
- getattr(trf, 'name', None) for trf in cfg.transforms
126
- ]
124
+ if isinstance(cfg.transforms, Mapping):
125
+ sections['transforms'] = list(cfg.transforms)
126
+ else:
127
+ sections['transforms'] = [
128
+ getattr(trf, 'name', None) for trf in cfg.transforms
129
+ ]
127
130
  if not sections:
128
131
  sections['jobs'] = _pipeline_summary(cfg)['jobs']
129
132
  return sections
@@ -157,6 +160,29 @@ def _pipeline_summary(
157
160
  }
158
161
 
159
162
 
163
+ def _write_file_payload(
164
+ payload: JSONData,
165
+ target: str,
166
+ *,
167
+ format_hint: str | None,
168
+ ) -> None:
169
+ """
170
+ Write a JSON-like payload to a file path using an optional format hint.
171
+
172
+ Parameters
173
+ ----------
174
+ payload : JSONData
175
+ The structured data to write.
176
+ target : str
177
+ File path to write to.
178
+ format_hint : str | None
179
+ Optional format hint for :class:`FileFormat`.
180
+ """
181
+ file_path = Path(target)
182
+ file_format = FileFormat.coerce(format_hint) if format_hint else None
183
+ File(file_path, file_format=file_format).write(payload)
184
+
185
+
160
186
  # SECTION: FUNCTIONS ======================================================== #
161
187
 
162
188
 
@@ -572,15 +598,7 @@ def transform_handler(
572
598
 
573
599
  # TODO: Generalize to handle non-file targets.
574
600
  if target and target != '-':
575
- # Convert target to Path and target_format to FileFormat if needed
576
- file_path = Path(target)
577
- file_format = None
578
- if target_format is not None:
579
- try:
580
- file_format = FileFormat(target_format)
581
- except ValueError:
582
- file_format = None # or handle error as appropriate
583
- File(file_path, file_format=file_format).write(data)
601
+ _write_file_payload(data, target, format_hint=target_format)
584
602
  print(f'Data transformed and saved to {target}')
585
603
  return 0
586
604
 
etlplus/cli/main.py CHANGED
@@ -50,7 +50,7 @@ def _emit_context_help(
50
50
  return False
51
51
 
52
52
  with contextlib.redirect_stdout(sys.stderr):
53
- ctx.get_help()
53
+ print(ctx.get_help())
54
54
  return True
55
55
 
56
56
 
etlplus/cli/state.py CHANGED
@@ -15,6 +15,7 @@ from typing import Final
15
15
 
16
16
  import typer
17
17
 
18
+ from ..utils import normalize_str
18
19
  from .constants import DATA_CONNECTORS
19
20
 
20
21
  # SECTION: EXPORTS ========================================================== #
@@ -322,14 +323,10 @@ def validate_choice(
322
323
  typer.BadParameter
323
324
  If the input value is not in the set of valid choices.
324
325
  """
325
- v = str(value or '').strip().lower()
326
- normalized_choices = {c.lower() for c in choices}
326
+ v = normalize_str(str(value or ''))
327
+ normalized_choices = {normalize_str(c): c for c in choices}
327
328
  if v in normalized_choices:
328
- # Preserve original casing from choices when possible for messages
329
- for choice in choices:
330
- if choice.lower() == v:
331
- return choice
332
- return v
329
+ return normalized_choices[v]
333
330
  allowed = ', '.join(sorted(choices))
334
331
  raise typer.BadParameter(
335
332
  f"Invalid {label} '{value}'. Choose from: {allowed}",
@@ -1,4 +1,4 @@
1
- # etlplus.database subpackage
1
+ # `etlplus.database` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
4
4
 
@@ -9,7 +9,7 @@ Documentation for the `etlplus.database` subpackage: database engine, schema, an
9
9
 
10
10
  Back to project overview: see the top-level [README](../../README.md).
11
11
 
12
- - [etlplus.database subpackage](#etlplusdatabase-subpackage)
12
+ - [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
13
13
  - [Database Engine and Connections](#database-engine-and-connections)
14
14
  - [Schema and DDL Helpers](#schema-and-ddl-helpers)
15
15
  - [ORM Utilities](#orm-utilities)
@@ -136,9 +136,25 @@ def load_database_url_from_config(
136
136
  return url
137
137
 
138
138
 
139
- def make_engine(url: str | None = None, **engine_kwargs: Any) -> Engine:
140
- """Create a SQLAlchemy Engine, defaulting to env config if no URL given."""
139
+ def make_engine(
140
+ url: str | None = None,
141
+ **engine_kwargs: Any,
142
+ ) -> Engine:
143
+ """
144
+ Create a SQLAlchemy Engine, defaulting to env config if no URL given.
145
+
146
+ Parameters
147
+ ----------
148
+ url : str | None, optional
149
+ Database URL/DSN string. When omitted, ``DATABASE_URL`` is used.
150
+ **engine_kwargs : Any
151
+ Extra keyword arguments forwarded to ``create_engine``.
141
152
 
153
+ Returns
154
+ -------
155
+ Engine
156
+ Configured SQLAlchemy engine instance.
157
+ """
142
158
  resolved_url = url or DATABASE_URL
143
159
  return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
144
160
 
etlplus/database/orm.py CHANGED
@@ -201,12 +201,14 @@ def build_models(
201
201
  ) -> ModelRegistry:
202
202
  """
203
203
  Build SQLAlchemy ORM models from table specifications.
204
+
204
205
  Parameters
205
206
  ----------
206
207
  specs : list[TableSpec]
207
208
  List of table specifications.
208
209
  base : type[DeclarativeBase], optional
209
210
  Base class for the ORM models (default: :class:`Base`).
211
+
210
212
  Returns
211
213
  -------
212
214
  ModelRegistry
etlplus/file/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # etlplus.file subpackage
1
+ # `etlplus.file` Subpackage
2
2
 
3
3
  Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
4
4
  and writing data files.
@@ -11,7 +11,7 @@ and writing data files.
11
11
 
12
12
  Back to project overview: see the top-level [README](../../README.md).
13
13
 
14
- - [etlplus.file subpackage](#etlplusfile-subpackage)
14
+ - [`etlplus.file` Subpackage](#etlplusfile-subpackage)
15
15
  - [Supported File Formats](#supported-file-formats)
16
16
  - [Inferring File Format and Compression](#inferring-file-format-and-compression)
17
17
  - [Reading and Writing Files](#reading-and-writing-files)
etlplus/file/_io.py CHANGED
@@ -8,6 +8,7 @@ from __future__ import annotations
8
8
 
9
9
  import csv
10
10
  from pathlib import Path
11
+ from typing import Any
11
12
  from typing import cast
12
13
 
13
14
  from ..types import JSONData
@@ -17,6 +18,44 @@ from ..types import JSONList
17
18
  # SECTION: FUNCTIONS ======================================================== #
18
19
 
19
20
 
21
+ def coerce_record_payload(
22
+ payload: Any,
23
+ *,
24
+ format_name: str,
25
+ ) -> JSONData:
26
+ """
27
+ Validate that ``payload`` is an object or list of objects.
28
+
29
+ Parameters
30
+ ----------
31
+ payload : Any
32
+ Parsed payload to validate.
33
+ format_name : str
34
+ Human-readable format name for error messages.
35
+
36
+ Returns
37
+ -------
38
+ JSONData
39
+ ``payload`` when it is a dict or a list of dicts.
40
+
41
+ Raises
42
+ ------
43
+ TypeError
44
+ If the payload is not a dict or list of dicts.
45
+ """
46
+ if isinstance(payload, dict):
47
+ return cast(JSONDict, payload)
48
+ if isinstance(payload, list):
49
+ if all(isinstance(item, dict) for item in payload):
50
+ return cast(JSONList, payload)
51
+ raise TypeError(
52
+ f'{format_name} array must contain only objects (dicts)',
53
+ )
54
+ raise TypeError(
55
+ f'{format_name} root must be an object or an array of objects',
56
+ )
57
+
58
+
20
59
  def normalize_records(
21
60
  data: JSONData,
22
61
  format_name: str,
etlplus/file/json.py CHANGED
@@ -20,12 +20,10 @@ from __future__ import annotations
20
20
 
21
21
  import json
22
22
  from pathlib import Path
23
- from typing import cast
24
23
 
25
24
  from ..types import JSONData
26
- from ..types import JSONDict
27
- from ..types import JSONList
28
25
  from ..utils import count_records
26
+ from ._io import coerce_record_payload
29
27
 
30
28
  # SECTION: EXPORTS ========================================================== #
31
29
 
@@ -65,17 +63,7 @@ def read(
65
63
  with path.open('r', encoding='utf-8') as handle:
66
64
  loaded = json.load(handle)
67
65
 
68
- if isinstance(loaded, dict):
69
- return cast(JSONDict, loaded)
70
- if isinstance(loaded, list):
71
- if all(isinstance(item, dict) for item in loaded):
72
- return cast(JSONList, loaded)
73
- raise TypeError(
74
- 'JSON array must contain only objects (dicts) when loading file',
75
- )
76
- raise TypeError(
77
- 'JSON root must be an object or an array of objects when loading file',
78
- )
66
+ return coerce_record_payload(loaded, format_name='JSON')
79
67
 
80
68
 
81
69
  def write(