etlplus 0.14.3__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +4 -4
- etlplus/api/README.md +33 -2
- etlplus/api/auth.py +1 -1
- etlplus/api/config.py +5 -10
- etlplus/api/endpoint_client.py +4 -4
- etlplus/api/pagination/config.py +1 -1
- etlplus/api/pagination/paginator.py +6 -7
- etlplus/api/rate_limiting/config.py +4 -4
- etlplus/api/rate_limiting/rate_limiter.py +1 -1
- etlplus/api/retry_manager.py +2 -2
- etlplus/api/transport.py +1 -1
- etlplus/api/types.py +99 -0
- etlplus/api/utils.py +6 -2
- etlplus/cli/README.md +2 -2
- etlplus/cli/commands.py +75 -42
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +33 -15
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +2 -2
- etlplus/cli/state.py +4 -7
- etlplus/connector/__init__.py +43 -0
- etlplus/connector/api.py +161 -0
- etlplus/connector/connector.py +26 -0
- etlplus/connector/core.py +132 -0
- etlplus/connector/database.py +122 -0
- etlplus/connector/enums.py +52 -0
- etlplus/connector/file.py +120 -0
- etlplus/connector/types.py +40 -0
- etlplus/connector/utils.py +122 -0
- etlplus/database/README.md +2 -2
- etlplus/database/ddl.py +2 -2
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/enums.py +1 -33
- etlplus/file/README.md +2 -2
- etlplus/file/_imports.py +1 -0
- etlplus/file/_io.py +52 -4
- etlplus/file/accdb.py +3 -2
- etlplus/file/arrow.py +3 -2
- etlplus/file/avro.py +3 -2
- etlplus/file/bson.py +3 -2
- etlplus/file/cbor.py +3 -2
- etlplus/file/cfg.py +3 -2
- etlplus/file/conf.py +3 -2
- etlplus/file/core.py +11 -8
- etlplus/file/csv.py +3 -2
- etlplus/file/dat.py +3 -2
- etlplus/file/dta.py +3 -2
- etlplus/file/duckdb.py +3 -2
- etlplus/file/enums.py +1 -1
- etlplus/file/feather.py +3 -2
- etlplus/file/fwf.py +3 -2
- etlplus/file/gz.py +3 -2
- etlplus/file/hbs.py +3 -2
- etlplus/file/hdf5.py +3 -2
- etlplus/file/ini.py +3 -2
- etlplus/file/ion.py +3 -2
- etlplus/file/jinja2.py +3 -2
- etlplus/file/json.py +5 -16
- etlplus/file/log.py +3 -2
- etlplus/file/mat.py +3 -2
- etlplus/file/mdb.py +3 -2
- etlplus/file/msgpack.py +3 -2
- etlplus/file/mustache.py +3 -2
- etlplus/file/nc.py +3 -2
- etlplus/file/ndjson.py +3 -2
- etlplus/file/numbers.py +3 -2
- etlplus/file/ods.py +3 -2
- etlplus/file/orc.py +3 -2
- etlplus/file/parquet.py +3 -2
- etlplus/file/pb.py +3 -2
- etlplus/file/pbf.py +3 -2
- etlplus/file/properties.py +3 -2
- etlplus/file/proto.py +3 -2
- etlplus/file/psv.py +3 -2
- etlplus/file/rda.py +3 -2
- etlplus/file/rds.py +3 -2
- etlplus/file/sas7bdat.py +3 -2
- etlplus/file/sav.py +3 -2
- etlplus/file/sqlite.py +3 -2
- etlplus/file/stub.py +1 -0
- etlplus/file/sylk.py +3 -2
- etlplus/file/tab.py +3 -2
- etlplus/file/toml.py +3 -2
- etlplus/file/tsv.py +3 -2
- etlplus/file/txt.py +4 -3
- etlplus/file/vm.py +3 -2
- etlplus/file/wks.py +3 -2
- etlplus/file/xls.py +3 -2
- etlplus/file/xlsm.py +3 -2
- etlplus/file/xlsx.py +3 -2
- etlplus/file/xml.py +9 -3
- etlplus/file/xpt.py +3 -2
- etlplus/file/yaml.py +5 -16
- etlplus/file/zip.py +3 -2
- etlplus/file/zsav.py +3 -2
- etlplus/ops/extract.py +13 -1
- etlplus/ops/load.py +15 -2
- etlplus/ops/run.py +18 -13
- etlplus/ops/transform.py +2 -2
- etlplus/ops/utils.py +6 -35
- etlplus/ops/validate.py +3 -3
- etlplus/templates/README.md +2 -2
- etlplus/types.py +3 -2
- etlplus/utils.py +163 -29
- etlplus/{config → workflow}/README.md +6 -6
- etlplus/workflow/__init__.py +32 -0
- etlplus/{dag.py → workflow/dag.py} +6 -4
- etlplus/{config → workflow}/jobs.py +101 -38
- etlplus/{config → workflow}/pipeline.py +59 -51
- etlplus/{config → workflow}/profile.py +8 -5
- {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/METADATA +4 -4
- etlplus-0.16.0.dist-info/RECORD +141 -0
- {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/WHEEL +1 -1
- etlplus/config/__init__.py +0 -56
- etlplus/config/connector.py +0 -372
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- etlplus-0.14.3.dist-info/RECORD +0 -135
- {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/entry_points.txt +0 -0
- {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.14.3.dist-info → etlplus-0.16.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector.file` module.
|
|
3
|
+
|
|
4
|
+
File connector configuration dataclass.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- TypedDicts in this module are intentionally ``total=False`` and are not
|
|
9
|
+
enforced at runtime.
|
|
10
|
+
- :meth:`*.from_obj` constructors accept :class:`Mapping[str, Any]` and perform
|
|
11
|
+
tolerant parsing and light casting. This keeps the runtime permissive while
|
|
12
|
+
improving autocomplete and static analysis for contributors.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from dataclasses import field
|
|
19
|
+
from typing import Any
|
|
20
|
+
from typing import Self
|
|
21
|
+
from typing import TypedDict
|
|
22
|
+
from typing import overload
|
|
23
|
+
|
|
24
|
+
from ..types import StrAnyMap
|
|
25
|
+
from ..utils import coerce_dict
|
|
26
|
+
from .core import ConnectorBase
|
|
27
|
+
from .enums import DataConnectorType
|
|
28
|
+
from .types import ConnectorType
|
|
29
|
+
|
|
30
|
+
# SECTION: EXPORTS ========================================================== #
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
'ConnectorFile',
|
|
35
|
+
'ConnectorFileConfigMap',
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ConnectorFileConfigMap(TypedDict, total=False):
|
|
43
|
+
"""
|
|
44
|
+
Shape accepted by :meth:`ConnectorFile.from_obj` (all keys optional).
|
|
45
|
+
|
|
46
|
+
See Also
|
|
47
|
+
--------
|
|
48
|
+
- :meth:`etlplus.connector.file.ConnectorFile.from_obj`
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
name: str
|
|
52
|
+
type: ConnectorType
|
|
53
|
+
format: str
|
|
54
|
+
path: str
|
|
55
|
+
options: StrAnyMap
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass(kw_only=True, slots=True)
|
|
62
|
+
class ConnectorFile(ConnectorBase):
|
|
63
|
+
"""
|
|
64
|
+
Configuration for a file-based data connector.
|
|
65
|
+
|
|
66
|
+
Attributes
|
|
67
|
+
----------
|
|
68
|
+
type : ConnectorType
|
|
69
|
+
Connector kind, always ``'file'``.
|
|
70
|
+
format : str | None
|
|
71
|
+
File format (e.g., ``'json'``, ``'csv'``).
|
|
72
|
+
path : str | None
|
|
73
|
+
File path or URI.
|
|
74
|
+
options : dict[str, Any]
|
|
75
|
+
Reader/writer format options.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
# -- Attributes -- #
|
|
79
|
+
|
|
80
|
+
type: ConnectorType = DataConnectorType.FILE
|
|
81
|
+
format: str | None = None
|
|
82
|
+
path: str | None = None
|
|
83
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
84
|
+
|
|
85
|
+
# -- Class Methods -- #
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
@overload
|
|
89
|
+
def from_obj(cls, obj: ConnectorFileConfigMap) -> Self: ...
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
@overload
|
|
93
|
+
def from_obj(cls, obj: StrAnyMap) -> Self: ...
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def from_obj(
|
|
97
|
+
cls,
|
|
98
|
+
obj: StrAnyMap,
|
|
99
|
+
) -> Self:
|
|
100
|
+
"""
|
|
101
|
+
Parse a mapping into a ``ConnectorFile`` instance.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
obj : StrAnyMap
|
|
106
|
+
Mapping with at least ``name``.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
Self
|
|
111
|
+
Parsed connector instance.
|
|
112
|
+
"""
|
|
113
|
+
name = cls._require_name(obj, kind='File')
|
|
114
|
+
|
|
115
|
+
return cls(
|
|
116
|
+
name=name,
|
|
117
|
+
format=obj.get('format'),
|
|
118
|
+
path=obj.get('path'),
|
|
119
|
+
options=coerce_dict(obj.get('options')),
|
|
120
|
+
)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector.types` module.
|
|
3
|
+
|
|
4
|
+
Connector type aliases for :mod:`etlplus.connector`.
|
|
5
|
+
|
|
6
|
+
Examples
|
|
7
|
+
--------
|
|
8
|
+
>>> from etlplus.connector import Connector
|
|
9
|
+
>>> src: Connector = {
|
|
10
|
+
>>> "type": "file",
|
|
11
|
+
>>> "path": "/data/input.csv",
|
|
12
|
+
>>> }
|
|
13
|
+
>>> tgt: Connector = {
|
|
14
|
+
>>> "type": "database",
|
|
15
|
+
>>> "connection_string": "postgresql://user:pass@localhost/db",
|
|
16
|
+
>>> }
|
|
17
|
+
>>> from etlplus.api import RetryPolicy
|
|
18
|
+
>>> rp: RetryPolicy = {"max_attempts": 3, "backoff": 0.5}
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from typing import Literal
|
|
24
|
+
|
|
25
|
+
from .enums import DataConnectorType
|
|
26
|
+
|
|
27
|
+
# SECTION: EXPORTS ========================================================= #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
# Type Aliases
|
|
32
|
+
'ConnectorType',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: TYPE ALIASES ===================================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Literal type for supported connector kinds (strings or enum members)
|
|
40
|
+
type ConnectorType = DataConnectorType | Literal['api', 'database', 'file']
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector.utils` module.
|
|
3
|
+
|
|
4
|
+
Shared connector parsing helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Mapping
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .api import ConnectorApi
|
|
13
|
+
from .connector import Connector
|
|
14
|
+
from .database import ConnectorDb
|
|
15
|
+
from .enums import DataConnectorType
|
|
16
|
+
from .file import ConnectorFile
|
|
17
|
+
|
|
18
|
+
# SECTION: EXPORTS ========================================================== #
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# Functions
|
|
23
|
+
'parse_connector',
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _coerce_connector_type(
|
|
31
|
+
obj: Mapping[str, Any],
|
|
32
|
+
) -> DataConnectorType:
|
|
33
|
+
"""
|
|
34
|
+
Normalize and validate the connector ``type`` field.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
obj : Mapping[str, Any]
|
|
39
|
+
Mapping with a ``type`` entry.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
DataConnectorType
|
|
44
|
+
Normalized connector type enum.
|
|
45
|
+
|
|
46
|
+
Raises
|
|
47
|
+
------
|
|
48
|
+
TypeError
|
|
49
|
+
If ``type`` is missing or unsupported.
|
|
50
|
+
"""
|
|
51
|
+
if 'type' not in obj:
|
|
52
|
+
raise TypeError('Connector requires a "type"')
|
|
53
|
+
try:
|
|
54
|
+
return DataConnectorType.coerce(obj.get('type'))
|
|
55
|
+
except ValueError as exc:
|
|
56
|
+
allowed = ', '.join(DataConnectorType.choices())
|
|
57
|
+
raise TypeError(
|
|
58
|
+
f'Unsupported connector type: {obj.get("type")!r}. '
|
|
59
|
+
f'Expected one of {allowed}.',
|
|
60
|
+
) from exc
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _load_connector(
|
|
64
|
+
kind: DataConnectorType,
|
|
65
|
+
) -> type[Connector]:
|
|
66
|
+
"""
|
|
67
|
+
Resolve the connector class for the requested kind.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
kind : DataConnectorType
|
|
72
|
+
Connector kind enum.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
type[Connector]
|
|
77
|
+
Connector class corresponding to *kind*.
|
|
78
|
+
"""
|
|
79
|
+
match kind:
|
|
80
|
+
case DataConnectorType.API:
|
|
81
|
+
return ConnectorApi
|
|
82
|
+
case DataConnectorType.DATABASE:
|
|
83
|
+
return ConnectorDb
|
|
84
|
+
case DataConnectorType.FILE:
|
|
85
|
+
return ConnectorFile
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_connector(
|
|
92
|
+
obj: Mapping[str, Any],
|
|
93
|
+
) -> Connector:
|
|
94
|
+
"""
|
|
95
|
+
Dispatch to a concrete connector constructor based on ``type``.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
obj : Mapping[str, Any]
|
|
100
|
+
Mapping with at least ``name`` and ``type``.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
Connector
|
|
105
|
+
Concrete connector instance.
|
|
106
|
+
|
|
107
|
+
Raises
|
|
108
|
+
------
|
|
109
|
+
TypeError
|
|
110
|
+
If the mapping is invalid or the connector type is unsupported.
|
|
111
|
+
|
|
112
|
+
Notes
|
|
113
|
+
-----
|
|
114
|
+
Delegates to the tolerant ``from_obj`` constructors for each connector
|
|
115
|
+
kind. Connector types are normalized via
|
|
116
|
+
:class:`etlplus.connector.enums.DataConnectorType`, so common aliases
|
|
117
|
+
(e.g., ``'db'`` or ``'http'``) are accepted.
|
|
118
|
+
"""
|
|
119
|
+
if not isinstance(obj, Mapping):
|
|
120
|
+
raise TypeError('Connector configuration must be a mapping.')
|
|
121
|
+
connector_cls = _load_connector(_coerce_connector_type(obj))
|
|
122
|
+
return connector_cls.from_obj(obj)
|
etlplus/database/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.database
|
|
1
|
+
# `etlplus.database` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
|
|
4
4
|
|
|
@@ -9,7 +9,7 @@ Documentation for the `etlplus.database` subpackage: database engine, schema, an
|
|
|
9
9
|
|
|
10
10
|
Back to project overview: see the top-level [README](../../README.md).
|
|
11
11
|
|
|
12
|
-
- [etlplus.database
|
|
12
|
+
- [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
|
|
13
13
|
- [Database Engine and Connections](#database-engine-and-connections)
|
|
14
14
|
- [Schema and DDL Helpers](#schema-and-ddl-helpers)
|
|
15
15
|
- [ORM Utilities](#orm-utilities)
|
etlplus/database/ddl.py
CHANGED
|
@@ -233,7 +233,7 @@ def render_table_sql(
|
|
|
233
233
|
template : TemplateKey | None, optional
|
|
234
234
|
Template key to use (default: 'ddl').
|
|
235
235
|
template_path : str | None, optional
|
|
236
|
-
Path to a custom template file (overrides
|
|
236
|
+
Path to a custom template file (overrides *template*).
|
|
237
237
|
|
|
238
238
|
Returns
|
|
239
239
|
-------
|
|
@@ -264,7 +264,7 @@ def render_tables(
|
|
|
264
264
|
template : TemplateKey | None, optional
|
|
265
265
|
Template key to use (default: 'ddl').
|
|
266
266
|
template_path : str | None, optional
|
|
267
|
-
Path to a custom template file (overrides
|
|
267
|
+
Path to a custom template file (overrides *template*).
|
|
268
268
|
|
|
269
269
|
Returns
|
|
270
270
|
-------
|
etlplus/database/engine.py
CHANGED
|
@@ -87,7 +87,7 @@ def load_database_url_from_config(
|
|
|
87
87
|
Extract a database URL/DSN from a YAML/JSON config file.
|
|
88
88
|
|
|
89
89
|
The loader is schema-tolerant: it looks for a top-level "databases" map
|
|
90
|
-
and then for a named entry (
|
|
90
|
+
and then for a named entry (*name*). Each entry may contain either a
|
|
91
91
|
``connection_string``/``url``/``dsn`` or a nested ``default`` block with
|
|
92
92
|
those fields.
|
|
93
93
|
|
|
@@ -136,9 +136,25 @@ def load_database_url_from_config(
|
|
|
136
136
|
return url
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
def make_engine(
|
|
140
|
-
|
|
139
|
+
def make_engine(
|
|
140
|
+
url: str | None = None,
|
|
141
|
+
**engine_kwargs: Any,
|
|
142
|
+
) -> Engine:
|
|
143
|
+
"""
|
|
144
|
+
Create a SQLAlchemy Engine, defaulting to env config if no URL given.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
url : str | None, optional
|
|
149
|
+
Database URL/DSN string. When omitted, ``DATABASE_URL`` is used.
|
|
150
|
+
**engine_kwargs : Any
|
|
151
|
+
Extra keyword arguments forwarded to ``create_engine``.
|
|
141
152
|
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
Engine
|
|
156
|
+
Configured SQLAlchemy engine instance.
|
|
157
|
+
"""
|
|
142
158
|
resolved_url = url or DATABASE_URL
|
|
143
159
|
return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
|
|
144
160
|
|
etlplus/database/orm.py
CHANGED
|
@@ -201,12 +201,14 @@ def build_models(
|
|
|
201
201
|
) -> ModelRegistry:
|
|
202
202
|
"""
|
|
203
203
|
Build SQLAlchemy ORM models from table specifications.
|
|
204
|
+
|
|
204
205
|
Parameters
|
|
205
206
|
----------
|
|
206
207
|
specs : list[TableSpec]
|
|
207
208
|
List of table specifications.
|
|
208
209
|
base : type[DeclarativeBase], optional
|
|
209
210
|
Base class for the ORM models (default: :class:`Base`).
|
|
211
|
+
|
|
210
212
|
Returns
|
|
211
213
|
-------
|
|
212
214
|
ModelRegistry
|
etlplus/enums.py
CHANGED
|
@@ -22,7 +22,6 @@ __all__ = [
|
|
|
22
22
|
# Enums
|
|
23
23
|
'AggregateName',
|
|
24
24
|
'CoercibleStrEnum',
|
|
25
|
-
'DataConnectorType',
|
|
26
25
|
'OperatorName',
|
|
27
26
|
'PipelineStep',
|
|
28
27
|
]
|
|
@@ -76,7 +75,7 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
76
75
|
@classmethod
|
|
77
76
|
def coerce(cls, value: Self | str | object) -> Self:
|
|
78
77
|
"""
|
|
79
|
-
Convert an enum member or string-like input to a member of
|
|
78
|
+
Convert an enum member or string-like input to a member of *cls*.
|
|
80
79
|
|
|
81
80
|
Parameters
|
|
82
81
|
----------
|
|
@@ -168,37 +167,6 @@ class AggregateName(CoercibleStrEnum):
|
|
|
168
167
|
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
169
168
|
|
|
170
169
|
|
|
171
|
-
class DataConnectorType(CoercibleStrEnum):
|
|
172
|
-
"""Supported data connector types."""
|
|
173
|
-
|
|
174
|
-
# -- Constants -- #
|
|
175
|
-
|
|
176
|
-
API = 'api'
|
|
177
|
-
DATABASE = 'database'
|
|
178
|
-
FILE = 'file'
|
|
179
|
-
|
|
180
|
-
# -- Class Methods -- #
|
|
181
|
-
|
|
182
|
-
@classmethod
|
|
183
|
-
def aliases(cls) -> StrStrMap:
|
|
184
|
-
"""
|
|
185
|
-
Return a mapping of common aliases for each enum member.
|
|
186
|
-
|
|
187
|
-
Returns
|
|
188
|
-
-------
|
|
189
|
-
StrStrMap
|
|
190
|
-
A mapping of alias names to their corresponding enum member names.
|
|
191
|
-
"""
|
|
192
|
-
return {
|
|
193
|
-
'http': 'api',
|
|
194
|
-
'https': 'api',
|
|
195
|
-
'rest': 'api',
|
|
196
|
-
'db': 'database',
|
|
197
|
-
'filesystem': 'file',
|
|
198
|
-
'fs': 'file',
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
|
|
202
170
|
class OperatorName(CoercibleStrEnum):
|
|
203
171
|
"""Supported comparison operators with helpers."""
|
|
204
172
|
|
etlplus/file/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.file
|
|
1
|
+
# `etlplus.file` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
|
|
4
4
|
and writing data files.
|
|
@@ -11,7 +11,7 @@ and writing data files.
|
|
|
11
11
|
|
|
12
12
|
Back to project overview: see the top-level [README](../../README.md).
|
|
13
13
|
|
|
14
|
-
- [etlplus.file
|
|
14
|
+
- [`etlplus.file` Subpackage](#etlplusfile-subpackage)
|
|
15
15
|
- [Supported File Formats](#supported-file-formats)
|
|
16
16
|
- [Inferring File Format and Compression](#inferring-file-format-and-compression)
|
|
17
17
|
- [Reading and Writing Files](#reading-and-writing-files)
|
etlplus/file/_imports.py
CHANGED
etlplus/file/_io.py
CHANGED
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import csv
|
|
10
10
|
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
11
12
|
from typing import cast
|
|
12
13
|
|
|
13
14
|
from ..types import JSONData
|
|
@@ -17,6 +18,44 @@ from ..types import JSONList
|
|
|
17
18
|
# SECTION: FUNCTIONS ======================================================== #
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
def coerce_record_payload(
|
|
22
|
+
payload: Any,
|
|
23
|
+
*,
|
|
24
|
+
format_name: str,
|
|
25
|
+
) -> JSONData:
|
|
26
|
+
"""
|
|
27
|
+
Validate that *payload* is an object or list of objects.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
payload : Any
|
|
32
|
+
Parsed payload to validate.
|
|
33
|
+
format_name : str
|
|
34
|
+
Human-readable format name for error messages.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
JSONData
|
|
39
|
+
*payload* when it is a dict or a list of dicts.
|
|
40
|
+
|
|
41
|
+
Raises
|
|
42
|
+
------
|
|
43
|
+
TypeError
|
|
44
|
+
If the payload is not a dict or list of dicts.
|
|
45
|
+
"""
|
|
46
|
+
if isinstance(payload, dict):
|
|
47
|
+
return cast(JSONDict, payload)
|
|
48
|
+
if isinstance(payload, list):
|
|
49
|
+
if all(isinstance(item, dict) for item in payload):
|
|
50
|
+
return cast(JSONList, payload)
|
|
51
|
+
raise TypeError(
|
|
52
|
+
f'{format_name} array must contain only objects (dicts)',
|
|
53
|
+
)
|
|
54
|
+
raise TypeError(
|
|
55
|
+
f'{format_name} root must be an object or an array of objects',
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
20
59
|
def normalize_records(
|
|
21
60
|
data: JSONData,
|
|
22
61
|
format_name: str,
|
|
@@ -50,9 +89,13 @@ def normalize_records(
|
|
|
50
89
|
return [cast(JSONDict, data)]
|
|
51
90
|
|
|
52
91
|
|
|
53
|
-
def read_delimited(
|
|
92
|
+
def read_delimited(
|
|
93
|
+
path: Path,
|
|
94
|
+
*,
|
|
95
|
+
delimiter: str,
|
|
96
|
+
) -> JSONList:
|
|
54
97
|
"""
|
|
55
|
-
Read delimited content from
|
|
98
|
+
Read delimited content from *path*.
|
|
56
99
|
|
|
57
100
|
Parameters
|
|
58
101
|
----------
|
|
@@ -79,9 +122,14 @@ def read_delimited(path: Path, *, delimiter: str) -> JSONList:
|
|
|
79
122
|
return rows
|
|
80
123
|
|
|
81
124
|
|
|
82
|
-
def write_delimited(
|
|
125
|
+
def write_delimited(
|
|
126
|
+
path: Path,
|
|
127
|
+
data: JSONData,
|
|
128
|
+
*,
|
|
129
|
+
delimiter: str,
|
|
130
|
+
) -> int:
|
|
83
131
|
"""
|
|
84
|
-
Write
|
|
132
|
+
Write *data* to a delimited file and return record count.
|
|
85
133
|
|
|
86
134
|
Parameters
|
|
87
135
|
----------
|
etlplus/file/accdb.py
CHANGED
|
@@ -28,6 +28,7 @@ from . import stub
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
__all__ = [
|
|
31
|
+
# Functions
|
|
31
32
|
'read',
|
|
32
33
|
'write',
|
|
33
34
|
]
|
|
@@ -40,7 +41,7 @@ def read(
|
|
|
40
41
|
path: Path,
|
|
41
42
|
) -> JSONList:
|
|
42
43
|
"""
|
|
43
|
-
Read ACCDB content from
|
|
44
|
+
Read ACCDB content from *path*.
|
|
44
45
|
|
|
45
46
|
Parameters
|
|
46
47
|
----------
|
|
@@ -60,7 +61,7 @@ def write(
|
|
|
60
61
|
data: JSONData,
|
|
61
62
|
) -> int:
|
|
62
63
|
"""
|
|
63
|
-
Write
|
|
64
|
+
Write *data* to ACCDB at *path* and return record count.
|
|
64
65
|
|
|
65
66
|
Parameters
|
|
66
67
|
----------
|
etlplus/file/arrow.py
CHANGED
|
@@ -28,6 +28,7 @@ from . import stub
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
__all__ = [
|
|
31
|
+
# Functions
|
|
31
32
|
'read',
|
|
32
33
|
'write',
|
|
33
34
|
]
|
|
@@ -40,7 +41,7 @@ def read(
|
|
|
40
41
|
path: Path,
|
|
41
42
|
) -> JSONList:
|
|
42
43
|
"""
|
|
43
|
-
Read ARROW content from
|
|
44
|
+
Read ARROW content from *path*.
|
|
44
45
|
|
|
45
46
|
Parameters
|
|
46
47
|
----------
|
|
@@ -60,7 +61,7 @@ def write(
|
|
|
60
61
|
data: JSONData,
|
|
61
62
|
) -> int:
|
|
62
63
|
"""
|
|
63
|
-
Write
|
|
64
|
+
Write *data* to ARROW at *path* and return record count.
|
|
64
65
|
|
|
65
66
|
Parameters
|
|
66
67
|
----------
|
etlplus/file/avro.py
CHANGED
|
@@ -33,6 +33,7 @@ from ._io import normalize_records
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
__all__ = [
|
|
36
|
+
# Functions
|
|
36
37
|
'read',
|
|
37
38
|
'write',
|
|
38
39
|
]
|
|
@@ -124,7 +125,7 @@ def read(
|
|
|
124
125
|
path: Path,
|
|
125
126
|
) -> JSONList:
|
|
126
127
|
"""
|
|
127
|
-
Read AVRO content from
|
|
128
|
+
Read AVRO content from *path*.
|
|
128
129
|
|
|
129
130
|
Parameters
|
|
130
131
|
----------
|
|
@@ -147,7 +148,7 @@ def write(
|
|
|
147
148
|
data: JSONData,
|
|
148
149
|
) -> int:
|
|
149
150
|
"""
|
|
150
|
-
Write
|
|
151
|
+
Write *data* to AVRO at *path* and return record count.
|
|
151
152
|
|
|
152
153
|
Parameters
|
|
153
154
|
----------
|
etlplus/file/bson.py
CHANGED
|
@@ -27,6 +27,7 @@ from . import stub
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
__all__ = [
|
|
30
|
+
# Functions
|
|
30
31
|
'read',
|
|
31
32
|
'write',
|
|
32
33
|
]
|
|
@@ -39,7 +40,7 @@ def read(
|
|
|
39
40
|
path: Path,
|
|
40
41
|
) -> JSONList:
|
|
41
42
|
"""
|
|
42
|
-
Read BSON content from
|
|
43
|
+
Read BSON content from *path*.
|
|
43
44
|
|
|
44
45
|
Parameters
|
|
45
46
|
----------
|
|
@@ -59,7 +60,7 @@ def write(
|
|
|
59
60
|
data: JSONData,
|
|
60
61
|
) -> int:
|
|
61
62
|
"""
|
|
62
|
-
Write
|
|
63
|
+
Write *data* to BSON at *path* and return record count.
|
|
63
64
|
|
|
64
65
|
Parameters
|
|
65
66
|
----------
|
etlplus/file/cbor.py
CHANGED
|
@@ -28,6 +28,7 @@ from . import stub
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
__all__ = [
|
|
31
|
+
# Functions
|
|
31
32
|
'read',
|
|
32
33
|
'write',
|
|
33
34
|
]
|
|
@@ -40,7 +41,7 @@ def read(
|
|
|
40
41
|
path: Path,
|
|
41
42
|
) -> JSONList:
|
|
42
43
|
"""
|
|
43
|
-
Read CBOR content from
|
|
44
|
+
Read CBOR content from *path*.
|
|
44
45
|
|
|
45
46
|
Parameters
|
|
46
47
|
----------
|
|
@@ -60,7 +61,7 @@ def write(
|
|
|
60
61
|
data: JSONData,
|
|
61
62
|
) -> int:
|
|
62
63
|
"""
|
|
63
|
-
Write
|
|
64
|
+
Write *data* to CBOR at *path* and return record count.
|
|
64
65
|
|
|
65
66
|
Parameters
|
|
66
67
|
----------
|
etlplus/file/cfg.py
CHANGED
|
@@ -29,6 +29,7 @@ from . import stub
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
__all__ = [
|
|
32
|
+
# Functions
|
|
32
33
|
'read',
|
|
33
34
|
'write',
|
|
34
35
|
]
|
|
@@ -41,7 +42,7 @@ def read(
|
|
|
41
42
|
path: Path,
|
|
42
43
|
) -> JSONList:
|
|
43
44
|
"""
|
|
44
|
-
Read CFG content from
|
|
45
|
+
Read CFG content from *path*.
|
|
45
46
|
|
|
46
47
|
Parameters
|
|
47
48
|
----------
|
|
@@ -61,7 +62,7 @@ def write(
|
|
|
61
62
|
data: JSONData,
|
|
62
63
|
) -> int:
|
|
63
64
|
"""
|
|
64
|
-
Write
|
|
65
|
+
Write *data* to CFG file at *path* and return record count.
|
|
65
66
|
|
|
66
67
|
Parameters
|
|
67
68
|
----------
|