etlplus 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +3 -3
- etlplus/api/README.md +31 -0
- etlplus/api/auth.py +1 -1
- etlplus/api/config.py +5 -10
- etlplus/api/endpoint_client.py +4 -4
- etlplus/api/pagination/config.py +1 -1
- etlplus/api/pagination/paginator.py +6 -7
- etlplus/api/rate_limiting/config.py +4 -4
- etlplus/api/rate_limiting/rate_limiter.py +1 -1
- etlplus/api/retry_manager.py +2 -2
- etlplus/api/transport.py +1 -1
- etlplus/api/types.py +99 -0
- etlplus/api/utils.py +1 -1
- etlplus/cli/commands.py +75 -42
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +31 -13
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +2 -2
- etlplus/cli/state.py +4 -7
- etlplus/connector/__init__.py +43 -0
- etlplus/connector/api.py +161 -0
- etlplus/connector/connector.py +26 -0
- etlplus/connector/core.py +132 -0
- etlplus/connector/database.py +122 -0
- etlplus/connector/enums.py +52 -0
- etlplus/connector/file.py +120 -0
- etlplus/connector/types.py +40 -0
- etlplus/connector/utils.py +122 -0
- etlplus/database/ddl.py +2 -2
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/enums.py +1 -33
- etlplus/file/_imports.py +1 -0
- etlplus/file/_io.py +52 -4
- etlplus/file/accdb.py +3 -2
- etlplus/file/arrow.py +3 -2
- etlplus/file/avro.py +3 -2
- etlplus/file/bson.py +3 -2
- etlplus/file/cbor.py +3 -2
- etlplus/file/cfg.py +3 -2
- etlplus/file/conf.py +3 -2
- etlplus/file/core.py +11 -8
- etlplus/file/csv.py +3 -2
- etlplus/file/dat.py +3 -2
- etlplus/file/dta.py +3 -2
- etlplus/file/duckdb.py +3 -2
- etlplus/file/enums.py +1 -1
- etlplus/file/feather.py +3 -2
- etlplus/file/fwf.py +3 -2
- etlplus/file/gz.py +3 -2
- etlplus/file/hbs.py +3 -2
- etlplus/file/hdf5.py +3 -2
- etlplus/file/ini.py +3 -2
- etlplus/file/ion.py +3 -2
- etlplus/file/jinja2.py +3 -2
- etlplus/file/json.py +5 -16
- etlplus/file/log.py +3 -2
- etlplus/file/mat.py +3 -2
- etlplus/file/mdb.py +3 -2
- etlplus/file/msgpack.py +3 -2
- etlplus/file/mustache.py +3 -2
- etlplus/file/nc.py +3 -2
- etlplus/file/ndjson.py +3 -2
- etlplus/file/numbers.py +3 -2
- etlplus/file/ods.py +3 -2
- etlplus/file/orc.py +3 -2
- etlplus/file/parquet.py +3 -2
- etlplus/file/pb.py +3 -2
- etlplus/file/pbf.py +3 -2
- etlplus/file/properties.py +3 -2
- etlplus/file/proto.py +3 -2
- etlplus/file/psv.py +3 -2
- etlplus/file/rda.py +3 -2
- etlplus/file/rds.py +3 -2
- etlplus/file/sas7bdat.py +3 -2
- etlplus/file/sav.py +3 -2
- etlplus/file/sqlite.py +3 -2
- etlplus/file/stub.py +1 -0
- etlplus/file/sylk.py +3 -2
- etlplus/file/tab.py +3 -2
- etlplus/file/toml.py +3 -2
- etlplus/file/tsv.py +3 -2
- etlplus/file/txt.py +4 -3
- etlplus/file/vm.py +3 -2
- etlplus/file/wks.py +3 -2
- etlplus/file/xls.py +3 -2
- etlplus/file/xlsm.py +3 -2
- etlplus/file/xlsx.py +3 -2
- etlplus/file/xml.py +9 -3
- etlplus/file/xpt.py +3 -2
- etlplus/file/yaml.py +5 -16
- etlplus/file/zip.py +3 -2
- etlplus/file/zsav.py +3 -2
- etlplus/ops/extract.py +13 -1
- etlplus/ops/load.py +15 -2
- etlplus/ops/run.py +4 -4
- etlplus/ops/transform.py +2 -2
- etlplus/ops/utils.py +6 -35
- etlplus/ops/validate.py +3 -3
- etlplus/types.py +3 -2
- etlplus/utils.py +163 -29
- etlplus/workflow/__init__.py +0 -11
- etlplus/workflow/jobs.py +84 -27
- etlplus/workflow/pipeline.py +48 -48
- {etlplus-0.15.0.dist-info → etlplus-0.16.0.dist-info}/METADATA +4 -4
- etlplus-0.16.0.dist-info/RECORD +141 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.0.dist-info}/WHEEL +1 -1
- etlplus/config/README.md +0 -50
- etlplus/config/__init__.py +0 -33
- etlplus/config/types.py +0 -140
- etlplus/dag.py +0 -103
- etlplus/workflow/connector.py +0 -373
- etlplus/workflow/types.py +0 -115
- etlplus/workflow/utils.py +0 -120
- etlplus-0.15.0.dist-info/RECORD +0 -139
- {etlplus-0.15.0.dist-info → etlplus-0.16.0.dist-info}/entry_points.txt +0 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.0.dist-info}/top_level.txt +0 -0
etlplus/config/types.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
:mod:`etlplus.config.types` module.
|
|
3
|
-
|
|
4
|
-
Type aliases and editor-only TypedDicts for :mod:`etlplus.config`.
|
|
5
|
-
|
|
6
|
-
These types improve IDE autocomplete and static analysis while the runtime
|
|
7
|
-
parsers remain permissive.
|
|
8
|
-
|
|
9
|
-
Notes
|
|
10
|
-
-----
|
|
11
|
-
- TypedDicts in this module are intentionally ``total=False`` and are not
|
|
12
|
-
enforced at runtime.
|
|
13
|
-
- ``*.from_obj`` constructors accept ``Mapping[str, Any]`` and perform
|
|
14
|
-
tolerant parsing and light casting. This keeps the runtime permissive while
|
|
15
|
-
improving autocomplete and static analysis for contributors.
|
|
16
|
-
|
|
17
|
-
Examples
|
|
18
|
-
--------
|
|
19
|
-
>>> from etlplus.config import Connector
|
|
20
|
-
>>> src: Connector = {
|
|
21
|
-
>>> "type": "file",
|
|
22
|
-
>>> "path": "/data/input.csv",
|
|
23
|
-
>>> }
|
|
24
|
-
>>> tgt: Connector = {
|
|
25
|
-
>>> "type": "database",
|
|
26
|
-
>>> "connection_string": "postgresql://user:pass@localhost/db",
|
|
27
|
-
>>> }
|
|
28
|
-
>>> from etlplus.api import RetryPolicy
|
|
29
|
-
>>> rp: RetryPolicy = {"max_attempts": 3, "backoff": 0.5}
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
from __future__ import annotations
|
|
33
|
-
|
|
34
|
-
from collections.abc import Mapping
|
|
35
|
-
from typing import Any
|
|
36
|
-
from typing import TypedDict
|
|
37
|
-
|
|
38
|
-
from ..api import PaginationConfigMap
|
|
39
|
-
from ..api import RateLimitConfigMap
|
|
40
|
-
from ..types import StrAnyMap
|
|
41
|
-
|
|
42
|
-
# SECTION: EXPORTS ========================================================= #
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
__all__ = [
|
|
46
|
-
# TypedDicts
|
|
47
|
-
'ApiProfileDefaultsMap',
|
|
48
|
-
'ApiProfileConfigMap',
|
|
49
|
-
'ApiConfigMap',
|
|
50
|
-
'EndpointMap',
|
|
51
|
-
]
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# SECTION: TYPE ALIASES ===================================================== #
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
# Literal type for supported pagination kinds
|
|
58
|
-
# type PaginationType = Literal['page', 'offset', 'cursor']
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# SECTION: TYPED DICTS ====================================================== #
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class ApiConfigMap(TypedDict, total=False):
|
|
65
|
-
"""
|
|
66
|
-
Top-level API config shape parsed by ApiConfig.from_obj.
|
|
67
|
-
|
|
68
|
-
Either provide a 'base_url' with optional 'headers' and 'endpoints', or
|
|
69
|
-
provide 'profiles' with at least one profile having a 'base_url'.
|
|
70
|
-
|
|
71
|
-
See Also
|
|
72
|
-
--------
|
|
73
|
-
- etlplus.config.api.ApiConfig.from_obj: parses this mapping
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
base_url: str
|
|
77
|
-
headers: StrAnyMap
|
|
78
|
-
endpoints: Mapping[str, EndpointMap | str]
|
|
79
|
-
profiles: Mapping[str, ApiProfileConfigMap]
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class ApiProfileConfigMap(TypedDict, total=False):
|
|
83
|
-
"""
|
|
84
|
-
Shape accepted for a profile entry under ApiConfigMap.profiles.
|
|
85
|
-
|
|
86
|
-
Notes
|
|
87
|
-
-----
|
|
88
|
-
`base_url` is required at runtime when profiles are provided.
|
|
89
|
-
|
|
90
|
-
See Also
|
|
91
|
-
--------
|
|
92
|
-
- etlplus.config.api.ApiProfileConfig.from_obj: parses this mapping
|
|
93
|
-
"""
|
|
94
|
-
|
|
95
|
-
base_url: str
|
|
96
|
-
headers: StrAnyMap
|
|
97
|
-
base_path: str
|
|
98
|
-
auth: StrAnyMap
|
|
99
|
-
defaults: ApiProfileDefaultsMap
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
class ApiProfileDefaultsMap(TypedDict, total=False):
|
|
103
|
-
"""
|
|
104
|
-
Defaults block available under a profile (all keys optional).
|
|
105
|
-
|
|
106
|
-
Notes
|
|
107
|
-
-----
|
|
108
|
-
Runtime expects header values to be str; typing remains permissive.
|
|
109
|
-
|
|
110
|
-
See Also
|
|
111
|
-
--------
|
|
112
|
-
- etlplus.config.api.ApiProfileConfig.from_obj: consumes this block
|
|
113
|
-
- etlplus.config.pagination.PaginationConfig.from_obj: parses pagination
|
|
114
|
-
- etlplus.api.rate_limiting.RateLimitConfig.from_obj: parses rate_limit
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
headers: StrAnyMap
|
|
118
|
-
pagination: PaginationConfigMap | StrAnyMap
|
|
119
|
-
rate_limit: RateLimitConfigMap | StrAnyMap
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
class EndpointMap(TypedDict, total=False):
|
|
123
|
-
"""
|
|
124
|
-
Shape accepted by EndpointConfig.from_obj.
|
|
125
|
-
|
|
126
|
-
One of 'path' or 'url' should be provided.
|
|
127
|
-
|
|
128
|
-
See Also
|
|
129
|
-
--------
|
|
130
|
-
- etlplus.config.api.EndpointConfig.from_obj: parses this mapping
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
path: str
|
|
134
|
-
url: str
|
|
135
|
-
method: str
|
|
136
|
-
path_params: StrAnyMap
|
|
137
|
-
query_params: StrAnyMap
|
|
138
|
-
body: Any
|
|
139
|
-
pagination: PaginationConfigMap
|
|
140
|
-
rate_limit: RateLimitConfigMap
|
etlplus/dag.py
DELETED
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
:mod:`etlplus.dag` module.
|
|
3
|
-
|
|
4
|
-
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
-
``depends_on``.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
from collections import deque
|
|
11
|
-
from dataclasses import dataclass
|
|
12
|
-
|
|
13
|
-
from .config.jobs import JobConfig
|
|
14
|
-
|
|
15
|
-
# SECTION: EXPORTS ========================================================== #
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
__all__ = [
|
|
19
|
-
'DagError',
|
|
20
|
-
'topological_sort_jobs',
|
|
21
|
-
]
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
# SECTION: ERRORS =========================================================== #
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@dataclass(slots=True)
|
|
28
|
-
class DagError(ValueError):
|
|
29
|
-
"""
|
|
30
|
-
Raised when the job dependency graph is invalid.
|
|
31
|
-
|
|
32
|
-
Attributes
|
|
33
|
-
----------
|
|
34
|
-
message : str
|
|
35
|
-
Error message.
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
# -- Attributes -- #
|
|
39
|
-
|
|
40
|
-
message: str
|
|
41
|
-
|
|
42
|
-
# -- Magic Methods (Object Representation) -- #
|
|
43
|
-
|
|
44
|
-
def __str__(self) -> str:
|
|
45
|
-
return self.message
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def topological_sort_jobs(
|
|
52
|
-
jobs: list[JobConfig],
|
|
53
|
-
) -> list[JobConfig]:
|
|
54
|
-
"""
|
|
55
|
-
Return jobs in topological order based on ``depends_on``.
|
|
56
|
-
|
|
57
|
-
Parameters
|
|
58
|
-
----------
|
|
59
|
-
jobs : list[JobConfig]
|
|
60
|
-
List of job configurations to sort.
|
|
61
|
-
|
|
62
|
-
Returns
|
|
63
|
-
-------
|
|
64
|
-
list[JobConfig]
|
|
65
|
-
Jobs sorted in topological order.
|
|
66
|
-
|
|
67
|
-
Raises
|
|
68
|
-
------
|
|
69
|
-
DagError
|
|
70
|
-
If a dependency is missing, self-referential, or when a cycle is
|
|
71
|
-
detected.
|
|
72
|
-
"""
|
|
73
|
-
index = {job.name: job for job in jobs}
|
|
74
|
-
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
75
|
-
indegree: dict[str, int] = {name: 0 for name in index}
|
|
76
|
-
|
|
77
|
-
for job in jobs:
|
|
78
|
-
for dep in job.depends_on:
|
|
79
|
-
if dep not in index:
|
|
80
|
-
raise DagError(
|
|
81
|
-
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
82
|
-
)
|
|
83
|
-
if dep == job.name:
|
|
84
|
-
raise DagError(f'Job "{job.name}" depends on itself')
|
|
85
|
-
if job.name not in edges[dep]:
|
|
86
|
-
edges[dep].add(job.name)
|
|
87
|
-
indegree[job.name] += 1
|
|
88
|
-
|
|
89
|
-
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
90
|
-
ordered: list[str] = []
|
|
91
|
-
|
|
92
|
-
while queue:
|
|
93
|
-
name = queue.popleft()
|
|
94
|
-
ordered.append(name)
|
|
95
|
-
for child in sorted(edges[name]):
|
|
96
|
-
indegree[child] -= 1
|
|
97
|
-
if indegree[child] == 0:
|
|
98
|
-
queue.append(child)
|
|
99
|
-
|
|
100
|
-
if len(ordered) != len(jobs):
|
|
101
|
-
raise DagError('Dependency cycle detected')
|
|
102
|
-
|
|
103
|
-
return [index[name] for name in ordered]
|
etlplus/workflow/connector.py
DELETED
|
@@ -1,373 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
:mod:`etlplus.workflow.connector` module.
|
|
3
|
-
|
|
4
|
-
A module defining configuration types for data source/target connectors in ETL
|
|
5
|
-
pipelines. A "connector" is any I/O endpoint:
|
|
6
|
-
|
|
7
|
-
- file (local/remote file systems)
|
|
8
|
-
- database
|
|
9
|
-
- REST API service/endpoint
|
|
10
|
-
- (future) queues, streams, etc.
|
|
11
|
-
|
|
12
|
-
Examples
|
|
13
|
-
--------
|
|
14
|
-
- Use :class:`ConnectorApi`/:class:`ConnectorFile`/:class:`ConnectorDb` when
|
|
15
|
-
you want the concrete dataclasses.
|
|
16
|
-
- Use the :class:`Connector` union for typing a value that can be any
|
|
17
|
-
connector.
|
|
18
|
-
- Use :func:`parse_connector(obj)` to construct a connector instance from a
|
|
19
|
-
generic mapping that includes a *type* key.
|
|
20
|
-
|
|
21
|
-
Notes
|
|
22
|
-
-----
|
|
23
|
-
- TypedDict shapes are editor hints; runtime parsing remains permissive
|
|
24
|
-
(from_obj accepts Mapping[str, Any]).
|
|
25
|
-
- TypedDicts referenced in :mod:`etlplus.config.types` remain editor hints.
|
|
26
|
-
Runtime parsing stays permissive and tolerant.
|
|
27
|
-
|
|
28
|
-
See Also
|
|
29
|
-
--------
|
|
30
|
-
- TypedDict shapes for editor hints (not enforced at runtime):
|
|
31
|
-
:mod:`etlplus.config.types.ConnectorApiConfigMap`,
|
|
32
|
-
:mod:`etlplus.config.types.ConnectorDbConfigMap`,
|
|
33
|
-
:mod:`etlplus.config.types.ConnectorFileConfigMap`.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
from __future__ import annotations
|
|
37
|
-
|
|
38
|
-
from collections.abc import Mapping
|
|
39
|
-
from dataclasses import dataclass
|
|
40
|
-
from dataclasses import field
|
|
41
|
-
from typing import TYPE_CHECKING
|
|
42
|
-
from typing import Any
|
|
43
|
-
from typing import Self
|
|
44
|
-
from typing import overload
|
|
45
|
-
|
|
46
|
-
from ..api import PaginationConfig
|
|
47
|
-
from ..api import RateLimitConfig
|
|
48
|
-
from ..types import StrAnyMap
|
|
49
|
-
from ..utils import cast_str_dict
|
|
50
|
-
from ..utils import coerce_dict
|
|
51
|
-
|
|
52
|
-
if TYPE_CHECKING: # Editor-only typing hints to avoid runtime imports
|
|
53
|
-
from .types import ConnectorApiConfigMap
|
|
54
|
-
from .types import ConnectorDbConfigMap
|
|
55
|
-
from .types import ConnectorFileConfigMap
|
|
56
|
-
from .types import ConnectorType
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# SECTION: EXPORTS ========================================================== #
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
__all__ = [
|
|
63
|
-
# Data Classes
|
|
64
|
-
'ConnectorApi',
|
|
65
|
-
'ConnectorDb',
|
|
66
|
-
'ConnectorFile',
|
|
67
|
-
# Functions
|
|
68
|
-
'parse_connector',
|
|
69
|
-
# Type aliases
|
|
70
|
-
'Connector',
|
|
71
|
-
]
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# SECTION: DATA CLASSES ===================================================== #
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
@dataclass(kw_only=True, slots=True)
|
|
78
|
-
class ConnectorApi:
|
|
79
|
-
"""
|
|
80
|
-
Configuration for an API-based data connector.
|
|
81
|
-
|
|
82
|
-
Attributes
|
|
83
|
-
----------
|
|
84
|
-
name : str
|
|
85
|
-
Unique connector name.
|
|
86
|
-
type : ConnectorType
|
|
87
|
-
Connector kind literal, always ``'api'``.
|
|
88
|
-
url : str | None
|
|
89
|
-
Direct absolute URL (when not using ``service``/``endpoint`` refs).
|
|
90
|
-
method : str | None
|
|
91
|
-
Optional HTTP method; typically omitted for sources (defaults to
|
|
92
|
-
GET) and used for targets (e.g., ``'post'``).
|
|
93
|
-
headers : dict[str, str]
|
|
94
|
-
Additional request headers.
|
|
95
|
-
query_params : dict[str, Any]
|
|
96
|
-
Default query parameters.
|
|
97
|
-
pagination : PaginationConfig | None
|
|
98
|
-
Pagination settings (optional).
|
|
99
|
-
rate_limit : RateLimitConfig | None
|
|
100
|
-
Rate limiting settings (optional).
|
|
101
|
-
api : str | None
|
|
102
|
-
Service reference into the pipeline ``apis`` block (a.k.a.
|
|
103
|
-
``service``).
|
|
104
|
-
endpoint : str | None
|
|
105
|
-
Endpoint name within the referenced service.
|
|
106
|
-
"""
|
|
107
|
-
|
|
108
|
-
# -- Attributes -- #
|
|
109
|
-
|
|
110
|
-
name: str
|
|
111
|
-
type: ConnectorType = 'api'
|
|
112
|
-
|
|
113
|
-
# Direct form
|
|
114
|
-
url: str | None = None
|
|
115
|
-
# Optional HTTP method; typically omitted for sources (defaults to GET)
|
|
116
|
-
# at runtime) and used for targets (e.g., 'post', 'put').
|
|
117
|
-
method: str | None = None
|
|
118
|
-
headers: dict[str, str] = field(default_factory=dict)
|
|
119
|
-
query_params: dict[str, Any] = field(default_factory=dict)
|
|
120
|
-
pagination: PaginationConfig | None = None
|
|
121
|
-
rate_limit: RateLimitConfig | None = None
|
|
122
|
-
|
|
123
|
-
# Reference form (to top-level APIs/endpoints)
|
|
124
|
-
api: str | None = None
|
|
125
|
-
endpoint: str | None = None
|
|
126
|
-
|
|
127
|
-
# -- Class Methods -- #
|
|
128
|
-
|
|
129
|
-
@classmethod
|
|
130
|
-
@overload
|
|
131
|
-
def from_obj(cls, obj: ConnectorApiConfigMap) -> Self: ...
|
|
132
|
-
|
|
133
|
-
@classmethod
|
|
134
|
-
@overload
|
|
135
|
-
def from_obj(cls, obj: StrAnyMap) -> Self: ...
|
|
136
|
-
|
|
137
|
-
@classmethod
|
|
138
|
-
def from_obj(
|
|
139
|
-
cls,
|
|
140
|
-
obj: StrAnyMap,
|
|
141
|
-
) -> Self:
|
|
142
|
-
"""
|
|
143
|
-
Parse a mapping into a ``ConnectorApi`` instance.
|
|
144
|
-
|
|
145
|
-
Parameters
|
|
146
|
-
----------
|
|
147
|
-
obj : StrAnyMap
|
|
148
|
-
Mapping with at least ``name``.
|
|
149
|
-
|
|
150
|
-
Returns
|
|
151
|
-
-------
|
|
152
|
-
Self
|
|
153
|
-
Parsed connector instance.
|
|
154
|
-
|
|
155
|
-
Raises
|
|
156
|
-
------
|
|
157
|
-
TypeError
|
|
158
|
-
If ``name`` is missing or invalid.
|
|
159
|
-
"""
|
|
160
|
-
name = obj.get('name')
|
|
161
|
-
if not isinstance(name, str):
|
|
162
|
-
raise TypeError('ConnectorApi requires a "name" (str)')
|
|
163
|
-
headers = cast_str_dict(obj.get('headers'))
|
|
164
|
-
|
|
165
|
-
return cls(
|
|
166
|
-
name=name,
|
|
167
|
-
type='api',
|
|
168
|
-
url=obj.get('url'),
|
|
169
|
-
method=obj.get('method'),
|
|
170
|
-
headers=headers,
|
|
171
|
-
query_params=coerce_dict(obj.get('query_params')),
|
|
172
|
-
pagination=PaginationConfig.from_obj(obj.get('pagination')),
|
|
173
|
-
rate_limit=RateLimitConfig.from_obj(obj.get('rate_limit')),
|
|
174
|
-
api=obj.get('api') or obj.get('service'),
|
|
175
|
-
endpoint=obj.get('endpoint'),
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
@dataclass(kw_only=True, slots=True)
|
|
180
|
-
class ConnectorDb:
|
|
181
|
-
"""
|
|
182
|
-
Configuration for a database-based data connector.
|
|
183
|
-
|
|
184
|
-
Attributes
|
|
185
|
-
----------
|
|
186
|
-
name : str
|
|
187
|
-
Unique connector name.
|
|
188
|
-
type : ConnectorType
|
|
189
|
-
Connector kind literal, always ``'database'``.
|
|
190
|
-
connection_string : str | None
|
|
191
|
-
Connection string/DSN for the database.
|
|
192
|
-
query : str | None
|
|
193
|
-
Query to execute for extraction (optional).
|
|
194
|
-
table : str | None
|
|
195
|
-
Target/source table name (optional).
|
|
196
|
-
mode : str | None
|
|
197
|
-
Load mode hint (e.g., ``'append'``, ``'replace'``) — future use.
|
|
198
|
-
"""
|
|
199
|
-
|
|
200
|
-
# -- Attributes -- #
|
|
201
|
-
|
|
202
|
-
name: str
|
|
203
|
-
type: ConnectorType = 'database'
|
|
204
|
-
connection_string: str | None = None
|
|
205
|
-
query: str | None = None
|
|
206
|
-
table: str | None = None
|
|
207
|
-
mode: str | None = None # append|replace|upsert (future)
|
|
208
|
-
|
|
209
|
-
# -- Class Methods -- #
|
|
210
|
-
|
|
211
|
-
@classmethod
|
|
212
|
-
@overload
|
|
213
|
-
def from_obj(cls, obj: ConnectorDbConfigMap) -> Self: ...
|
|
214
|
-
|
|
215
|
-
@classmethod
|
|
216
|
-
@overload
|
|
217
|
-
def from_obj(cls, obj: StrAnyMap) -> Self: ...
|
|
218
|
-
|
|
219
|
-
@classmethod
|
|
220
|
-
def from_obj(
|
|
221
|
-
cls,
|
|
222
|
-
obj: StrAnyMap,
|
|
223
|
-
) -> Self:
|
|
224
|
-
"""
|
|
225
|
-
Parse a mapping into a ``ConnectorDb`` instance.
|
|
226
|
-
|
|
227
|
-
Parameters
|
|
228
|
-
----------
|
|
229
|
-
obj : StrAnyMap
|
|
230
|
-
Mapping with at least ``name``.
|
|
231
|
-
|
|
232
|
-
Returns
|
|
233
|
-
-------
|
|
234
|
-
Self
|
|
235
|
-
Parsed connector instance.
|
|
236
|
-
|
|
237
|
-
Raises
|
|
238
|
-
------
|
|
239
|
-
TypeError
|
|
240
|
-
If ``name`` is missing or invalid.
|
|
241
|
-
"""
|
|
242
|
-
name = obj.get('name')
|
|
243
|
-
if not isinstance(name, str):
|
|
244
|
-
raise TypeError('ConnectorDb requires a "name" (str)')
|
|
245
|
-
|
|
246
|
-
return cls(
|
|
247
|
-
name=name,
|
|
248
|
-
type='database',
|
|
249
|
-
connection_string=obj.get('connection_string'),
|
|
250
|
-
query=obj.get('query'),
|
|
251
|
-
table=obj.get('table'),
|
|
252
|
-
mode=obj.get('mode'),
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
@dataclass(kw_only=True, slots=True)
|
|
257
|
-
class ConnectorFile:
|
|
258
|
-
"""
|
|
259
|
-
Configuration for a file-based data connector.
|
|
260
|
-
|
|
261
|
-
Attributes
|
|
262
|
-
----------
|
|
263
|
-
name : str
|
|
264
|
-
Unique connector name.
|
|
265
|
-
type : ConnectorType
|
|
266
|
-
Connector kind literal, always ``'file'``.
|
|
267
|
-
format : str | None
|
|
268
|
-
File format (e.g., ``'json'``, ``'csv'``).
|
|
269
|
-
path : str | None
|
|
270
|
-
File path or URI.
|
|
271
|
-
options : dict[str, Any]
|
|
272
|
-
Reader/writer format options.
|
|
273
|
-
"""
|
|
274
|
-
|
|
275
|
-
# -- Attributes -- #
|
|
276
|
-
|
|
277
|
-
name: str
|
|
278
|
-
type: ConnectorType = 'file'
|
|
279
|
-
format: str | None = None
|
|
280
|
-
path: str | None = None
|
|
281
|
-
options: dict[str, Any] = field(default_factory=dict)
|
|
282
|
-
|
|
283
|
-
# -- Class Methods -- #
|
|
284
|
-
|
|
285
|
-
@classmethod
|
|
286
|
-
@overload
|
|
287
|
-
def from_obj(cls, obj: ConnectorFileConfigMap) -> Self: ...
|
|
288
|
-
|
|
289
|
-
@classmethod
|
|
290
|
-
@overload
|
|
291
|
-
def from_obj(cls, obj: StrAnyMap) -> Self: ...
|
|
292
|
-
|
|
293
|
-
@classmethod
|
|
294
|
-
def from_obj(
|
|
295
|
-
cls,
|
|
296
|
-
obj: StrAnyMap,
|
|
297
|
-
) -> Self:
|
|
298
|
-
"""
|
|
299
|
-
Parse a mapping into a ``ConnectorFile`` instance.
|
|
300
|
-
|
|
301
|
-
Parameters
|
|
302
|
-
----------
|
|
303
|
-
obj : StrAnyMap
|
|
304
|
-
Mapping with at least ``name``.
|
|
305
|
-
|
|
306
|
-
Returns
|
|
307
|
-
-------
|
|
308
|
-
Self
|
|
309
|
-
Parsed connector instance.
|
|
310
|
-
|
|
311
|
-
Raises
|
|
312
|
-
------
|
|
313
|
-
TypeError
|
|
314
|
-
If ``name`` is missing or invalid.
|
|
315
|
-
"""
|
|
316
|
-
name = obj.get('name')
|
|
317
|
-
if not isinstance(name, str):
|
|
318
|
-
raise TypeError('ConnectorFile requires a "name" (str)')
|
|
319
|
-
|
|
320
|
-
return cls(
|
|
321
|
-
name=name,
|
|
322
|
-
type='file',
|
|
323
|
-
format=obj.get('format'),
|
|
324
|
-
path=obj.get('path'),
|
|
325
|
-
options=coerce_dict(obj.get('options')),
|
|
326
|
-
)
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
def parse_connector(obj: Mapping[str, Any]) -> Connector:
|
|
333
|
-
"""
|
|
334
|
-
Dispatch to a concrete connector constructor based on ``type``.
|
|
335
|
-
|
|
336
|
-
Parameters
|
|
337
|
-
----------
|
|
338
|
-
obj : Mapping[str, Any]
|
|
339
|
-
Mapping with at least ``name`` and ``type``.
|
|
340
|
-
|
|
341
|
-
Returns
|
|
342
|
-
-------
|
|
343
|
-
Connector
|
|
344
|
-
Concrete connector instance.
|
|
345
|
-
|
|
346
|
-
Raises
|
|
347
|
-
------
|
|
348
|
-
TypeError
|
|
349
|
-
If ``type`` is unsupported or missing.
|
|
350
|
-
|
|
351
|
-
Notes
|
|
352
|
-
-----
|
|
353
|
-
Delegates to the tolerant ``from_obj`` constructors for each connector
|
|
354
|
-
kind.
|
|
355
|
-
"""
|
|
356
|
-
match str(obj.get('type', '')).casefold():
|
|
357
|
-
case 'file':
|
|
358
|
-
return ConnectorFile.from_obj(obj)
|
|
359
|
-
case 'database':
|
|
360
|
-
return ConnectorDb.from_obj(obj)
|
|
361
|
-
case 'api':
|
|
362
|
-
return ConnectorApi.from_obj(obj)
|
|
363
|
-
case _:
|
|
364
|
-
raise TypeError(
|
|
365
|
-
'Unsupported connector type; '
|
|
366
|
-
'expected one of {file, database, api}',
|
|
367
|
-
)
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
# SECTION: TYPED ALIASES (post-class definitions) ========================= #
|
|
371
|
-
|
|
372
|
-
# Type alias representing any supported connector
|
|
373
|
-
type Connector = ConnectorApi | ConnectorDb | ConnectorFile
|