etlplus 0.12.4__py3-none-any.whl → 0.12.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/file/_imports.py +141 -0
- etlplus/file/_io.py +1 -0
- etlplus/file/avro.py +4 -26
- etlplus/file/core.py +119 -84
- etlplus/file/dat.py +66 -0
- etlplus/file/enums.py +114 -15
- etlplus/file/feather.py +1 -1
- etlplus/file/fwf.py +66 -0
- etlplus/file/ndjson.py +2 -9
- etlplus/file/orc.py +1 -1
- etlplus/file/parquet.py +1 -1
- etlplus/file/psv.py +66 -0
- etlplus/file/stub.py +84 -0
- etlplus/file/tab.py +82 -0
- etlplus/file/txt.py +2 -9
- etlplus/file/xls.py +1 -1
- etlplus/file/xlsx.py +1 -1
- etlplus/file/yaml.py +3 -43
- {etlplus-0.12.4.dist-info → etlplus-0.12.9.dist-info}/METADATA +89 -1
- {etlplus-0.12.4.dist-info → etlplus-0.12.9.dist-info}/RECORD +24 -19
- etlplus/file/_pandas.py +0 -58
- {etlplus-0.12.4.dist-info → etlplus-0.12.9.dist-info}/WHEEL +0 -0
- {etlplus-0.12.4.dist-info → etlplus-0.12.9.dist-info}/entry_points.txt +0 -0
- {etlplus-0.12.4.dist-info → etlplus-0.12.9.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.12.4.dist-info → etlplus-0.12.9.dist-info}/top_level.txt +0 -0
etlplus/file/fwf.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.fwf` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing FWF (fixed-width fields) files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
from ..types import JSONList
|
|
13
|
+
from . import stub
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'read',
|
|
20
|
+
'write',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def read(
|
|
28
|
+
path: Path,
|
|
29
|
+
) -> JSONList:
|
|
30
|
+
"""
|
|
31
|
+
Read FWF content from ``path``.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
path : Path
|
|
36
|
+
Path to the FWF file on disk.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
JSONList
|
|
41
|
+
The list of dictionaries read from the FWF file.
|
|
42
|
+
"""
|
|
43
|
+
return stub.read(path, format_name='FWF')
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def write(
|
|
47
|
+
path: Path,
|
|
48
|
+
data: JSONData,
|
|
49
|
+
) -> int:
|
|
50
|
+
"""
|
|
51
|
+
Write ``data`` to FWF file at ``path`` and return record count.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
path : Path
|
|
56
|
+
Path to the FWF file on disk.
|
|
57
|
+
data : JSONData
|
|
58
|
+
Data to write as FWF file. Should be a list of dictionaries or a
|
|
59
|
+
single dictionary.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
int
|
|
64
|
+
The number of rows written to the FWF file.
|
|
65
|
+
"""
|
|
66
|
+
return stub.write(path, data, format_name='FWF')
|
etlplus/file/ndjson.py
CHANGED
|
@@ -14,6 +14,7 @@ from ..types import JSONData
|
|
|
14
14
|
from ..types import JSONDict
|
|
15
15
|
from ..types import JSONList
|
|
16
16
|
from ..utils import count_records
|
|
17
|
+
from ._io import normalize_records
|
|
17
18
|
|
|
18
19
|
# SECTION: EXPORTS ========================================================== #
|
|
19
20
|
|
|
@@ -87,15 +88,7 @@ def write(
|
|
|
87
88
|
TypeError
|
|
88
89
|
If ``data`` is a list containing non-dict items.
|
|
89
90
|
"""
|
|
90
|
-
rows
|
|
91
|
-
if isinstance(data, list):
|
|
92
|
-
if not all(isinstance(item, dict) for item in data):
|
|
93
|
-
raise TypeError(
|
|
94
|
-
'NDJSON payloads must contain only objects (dicts)',
|
|
95
|
-
)
|
|
96
|
-
rows = cast(JSONList, data)
|
|
97
|
-
else:
|
|
98
|
-
rows = [cast(JSONDict, data)]
|
|
91
|
+
rows = normalize_records(data, 'NDJSON')
|
|
99
92
|
|
|
100
93
|
if not rows:
|
|
101
94
|
return 0
|
etlplus/file/orc.py
CHANGED
|
@@ -11,8 +11,8 @@ from typing import cast
|
|
|
11
11
|
|
|
12
12
|
from ..types import JSONData
|
|
13
13
|
from ..types import JSONList
|
|
14
|
+
from ._imports import get_pandas
|
|
14
15
|
from ._io import normalize_records
|
|
15
|
-
from ._pandas import get_pandas
|
|
16
16
|
|
|
17
17
|
# SECTION: EXPORTS ========================================================== #
|
|
18
18
|
|
etlplus/file/parquet.py
CHANGED
|
@@ -11,8 +11,8 @@ from typing import cast
|
|
|
11
11
|
|
|
12
12
|
from ..types import JSONData
|
|
13
13
|
from ..types import JSONList
|
|
14
|
+
from ._imports import get_pandas
|
|
14
15
|
from ._io import normalize_records
|
|
15
|
-
from ._pandas import get_pandas
|
|
16
16
|
|
|
17
17
|
# SECTION: EXPORTS ========================================================== #
|
|
18
18
|
|
etlplus/file/psv.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.psv` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing PSV (pipe-separated values) files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
from ..types import JSONList
|
|
13
|
+
from . import stub
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'read',
|
|
20
|
+
'write',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def read(
|
|
28
|
+
path: Path,
|
|
29
|
+
) -> JSONList:
|
|
30
|
+
"""
|
|
31
|
+
Read PSV content from ``path``.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
path : Path
|
|
36
|
+
Path to the PSV file on disk.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
JSONList
|
|
41
|
+
The list of dictionaries read from the PSV file.
|
|
42
|
+
"""
|
|
43
|
+
return stub.read(path, format_name='PSV')
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def write(
|
|
47
|
+
path: Path,
|
|
48
|
+
data: JSONData,
|
|
49
|
+
) -> int:
|
|
50
|
+
"""
|
|
51
|
+
Write ``data`` to PSV file at ``path`` and return record count.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
path : Path
|
|
56
|
+
Path to the PSV file on disk.
|
|
57
|
+
data : JSONData
|
|
58
|
+
Data to write as PSV file. Should be a list of dictionaries or a
|
|
59
|
+
single dictionary.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
int
|
|
64
|
+
The number of rows written to the PSV file.
|
|
65
|
+
"""
|
|
66
|
+
return stub.write(path, data, format_name='PSV')
|
etlplus/file/stub.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.stub` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing stubbed files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from ..types import JSONData
|
|
12
|
+
from ..types import JSONList
|
|
13
|
+
|
|
14
|
+
# SECTION: EXPORTS ========================================================== #
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
'read',
|
|
19
|
+
'write',
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def read(
|
|
27
|
+
path: Path,
|
|
28
|
+
format_name: str = 'Stubbed',
|
|
29
|
+
) -> JSONList:
|
|
30
|
+
"""
|
|
31
|
+
Raises a :class:`NotImplementedError` for stubbed reads.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
path : Path
|
|
36
|
+
Path to the stubbed file on disk.
|
|
37
|
+
format_name : str
|
|
38
|
+
Human-readable format name.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
JSONList
|
|
43
|
+
The list of dictionaries read from the stubbed file.
|
|
44
|
+
|
|
45
|
+
Raises
|
|
46
|
+
------
|
|
47
|
+
NotImplementedError
|
|
48
|
+
Always, since this is a stub implementation.
|
|
49
|
+
"""
|
|
50
|
+
_ = path
|
|
51
|
+
raise NotImplementedError(f'{format_name} read is not implemented yet')
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def write(
|
|
55
|
+
path: Path,
|
|
56
|
+
data: JSONData,
|
|
57
|
+
format_name: str = 'Stubbed',
|
|
58
|
+
) -> int:
|
|
59
|
+
"""
|
|
60
|
+
Raises a :class:`NotImplementedError` for stubbed writes.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
path : Path
|
|
65
|
+
Path to the stubbed file on disk.
|
|
66
|
+
data : JSONData
|
|
67
|
+
Data to write as stubbed file. Should be a list of dictionaries or a
|
|
68
|
+
single dictionary.
|
|
69
|
+
format_name : str
|
|
70
|
+
Human-readable format name.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
int
|
|
75
|
+
The number of rows written to the stubbed file.
|
|
76
|
+
|
|
77
|
+
Raises
|
|
78
|
+
------
|
|
79
|
+
NotImplementedError
|
|
80
|
+
Always, since this is a stub implementation.
|
|
81
|
+
"""
|
|
82
|
+
_ = path
|
|
83
|
+
_ = data
|
|
84
|
+
raise NotImplementedError(f'{format_name} write is not implemented yet')
|
etlplus/file/tab.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.tab` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing TAB (tab-separated values) files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- A “TAB-formatted” file is not necessarily a TSV file when tabs aren’t
|
|
9
|
+
actually the delimiter that defines the fields, even if the text looks
|
|
10
|
+
column-aligned.
|
|
11
|
+
- Common cases:
|
|
12
|
+
- Fixed-width text (FWF) that uses tabs for alignment.
|
|
13
|
+
- Mixed whitespace (tabs + spaces) as “pretty printing”.
|
|
14
|
+
- Tabs embedded inside quoted fields (or unescaped tabs in free text).
|
|
15
|
+
- Header/metadata lines or multi-line records that break TSV assumptions.
|
|
16
|
+
- Not actually tab-delimited despite the name.
|
|
17
|
+
- Rule of thumb:
|
|
18
|
+
- If the file is truly tab-delimited, use :mod:`etlplus.file.tsv`.
|
|
19
|
+
- If the file has fixed-width fields, use :mod:`etlplus.file.fwf`.
|
|
20
|
+
- Otherwise, use :mod:`etlplus.file.tab` (i.e., this module).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from ..types import JSONData
|
|
28
|
+
from ..types import JSONList
|
|
29
|
+
from . import stub
|
|
30
|
+
|
|
31
|
+
# SECTION: EXPORTS ========================================================== #
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
'read',
|
|
36
|
+
'write',
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def read(
|
|
44
|
+
path: Path,
|
|
45
|
+
) -> JSONList:
|
|
46
|
+
"""
|
|
47
|
+
Read TAB content from ``path``.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
path : Path
|
|
52
|
+
Path to the TAB file on disk.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
JSONList
|
|
57
|
+
The list of dictionaries read from the TAB file.
|
|
58
|
+
"""
|
|
59
|
+
return stub.read(path, format_name='TAB')
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def write(
|
|
63
|
+
path: Path,
|
|
64
|
+
data: JSONData,
|
|
65
|
+
) -> int:
|
|
66
|
+
"""
|
|
67
|
+
Write ``data`` to TAB file at ``path`` and return record count.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
path : Path
|
|
72
|
+
Path to the TAB file on disk.
|
|
73
|
+
data : JSONData
|
|
74
|
+
Data to write as TAB file. Should be a list of dictionaries or a
|
|
75
|
+
single dictionary.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
int
|
|
80
|
+
The number of rows written to the TAB file.
|
|
81
|
+
"""
|
|
82
|
+
return stub.write(path, data, format_name='TAB')
|
etlplus/file/txt.py
CHANGED
|
@@ -7,12 +7,11 @@ Helpers for reading/writing text files.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import cast
|
|
11
10
|
|
|
12
11
|
from ..types import JSONData
|
|
13
|
-
from ..types import JSONDict
|
|
14
12
|
from ..types import JSONList
|
|
15
13
|
from ..utils import count_records
|
|
14
|
+
from ._io import normalize_records
|
|
16
15
|
|
|
17
16
|
# SECTION: EXPORTS ========================================================== #
|
|
18
17
|
|
|
@@ -77,13 +76,7 @@ def write(
|
|
|
77
76
|
If any item in ``data`` is not a dictionary or if any dictionary
|
|
78
77
|
does not contain a ``'text'`` key.
|
|
79
78
|
"""
|
|
80
|
-
rows
|
|
81
|
-
if isinstance(data, list):
|
|
82
|
-
if not all(isinstance(item, dict) for item in data):
|
|
83
|
-
raise TypeError('TXT payloads must contain only objects (dicts)')
|
|
84
|
-
rows = cast(JSONList, data)
|
|
85
|
-
else:
|
|
86
|
-
rows = [cast(JSONDict, data)]
|
|
79
|
+
rows = normalize_records(data, 'TXT')
|
|
87
80
|
|
|
88
81
|
if not rows:
|
|
89
82
|
return 0
|
etlplus/file/xls.py
CHANGED
etlplus/file/xlsx.py
CHANGED
|
@@ -11,8 +11,8 @@ from typing import cast
|
|
|
11
11
|
|
|
12
12
|
from ..types import JSONData
|
|
13
13
|
from ..types import JSONList
|
|
14
|
+
from ._imports import get_pandas
|
|
14
15
|
from ._io import normalize_records
|
|
15
|
-
from ._pandas import get_pandas
|
|
16
16
|
|
|
17
17
|
# SECTION: EXPORTS ========================================================== #
|
|
18
18
|
|
etlplus/file/yaml.py
CHANGED
|
@@ -7,13 +7,13 @@ Helpers for reading/writing YAML files.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any
|
|
11
10
|
from typing import cast
|
|
12
11
|
|
|
13
12
|
from ..types import JSONData
|
|
14
13
|
from ..types import JSONDict
|
|
15
14
|
from ..types import JSONList
|
|
16
15
|
from ..utils import count_records
|
|
16
|
+
from ._imports import get_yaml
|
|
17
17
|
|
|
18
18
|
# SECTION: EXPORTS ========================================================== #
|
|
19
19
|
|
|
@@ -24,43 +24,6 @@ __all__ = [
|
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
# Optional YAML support (lazy-loaded to avoid hard dependency)
|
|
31
|
-
# Cached access function to avoid global statements.
|
|
32
|
-
_YAML_CACHE: dict[str, Any] = {}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _get_yaml() -> Any:
|
|
39
|
-
"""
|
|
40
|
-
Return the PyYAML module, importing it on first use.
|
|
41
|
-
|
|
42
|
-
Raises an informative ImportError if the optional dependency is missing.
|
|
43
|
-
"""
|
|
44
|
-
mod = _YAML_CACHE.get('mod')
|
|
45
|
-
if mod is not None: # pragma: no cover - tiny branch
|
|
46
|
-
return mod
|
|
47
|
-
try:
|
|
48
|
-
_yaml_mod = __import__('yaml') # type: ignore[assignment]
|
|
49
|
-
except ImportError as e: # pragma: no cover
|
|
50
|
-
raise ImportError(
|
|
51
|
-
'YAML support requires optional dependency "PyYAML".\n'
|
|
52
|
-
'Install with: pip install PyYAML',
|
|
53
|
-
) from e
|
|
54
|
-
_YAML_CACHE['mod'] = _yaml_mod
|
|
55
|
-
|
|
56
|
-
return _yaml_mod
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _require_yaml() -> None:
|
|
60
|
-
"""Ensure PyYAML is available or raise an informative error."""
|
|
61
|
-
_get_yaml()
|
|
62
|
-
|
|
63
|
-
|
|
64
27
|
# SECTION: FUNCTIONS ======================================================== #
|
|
65
28
|
|
|
66
29
|
|
|
@@ -87,10 +50,8 @@ def read(
|
|
|
87
50
|
TypeError
|
|
88
51
|
If the YAML root is not an object or an array of objects.
|
|
89
52
|
"""
|
|
90
|
-
_require_yaml()
|
|
91
|
-
|
|
92
53
|
with path.open('r', encoding='utf-8') as handle:
|
|
93
|
-
loaded =
|
|
54
|
+
loaded = get_yaml().safe_load(handle)
|
|
94
55
|
|
|
95
56
|
if isinstance(loaded, dict):
|
|
96
57
|
return cast(JSONDict, loaded)
|
|
@@ -124,9 +85,8 @@ def write(
|
|
|
124
85
|
int
|
|
125
86
|
The number of records written.
|
|
126
87
|
"""
|
|
127
|
-
_require_yaml()
|
|
128
88
|
with path.open('w', encoding='utf-8') as handle:
|
|
129
|
-
|
|
89
|
+
get_yaml().safe_dump(
|
|
130
90
|
data,
|
|
131
91
|
handle,
|
|
132
92
|
sort_keys=False,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.9
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -68,6 +68,17 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
68
68
|
- [Features](#features)
|
|
69
69
|
- [Installation](#installation)
|
|
70
70
|
- [Quickstart](#quickstart)
|
|
71
|
+
- [Data Connectors](#data-connectors)
|
|
72
|
+
- [REST APIs (`api`)](#rest-apis-api)
|
|
73
|
+
- [Databases (`database`)](#databases-database)
|
|
74
|
+
- [Files (`file`)](#files-file)
|
|
75
|
+
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
76
|
+
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
77
|
+
- [Semi-Structured Text](#semi-structured-text)
|
|
78
|
+
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
79
|
+
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
80
|
+
- [Spreadsheets](#spreadsheets)
|
|
81
|
+
- [Data Archives](#data-archives)
|
|
71
82
|
- [Usage](#usage)
|
|
72
83
|
- [Command Line Interface](#command-line-interface)
|
|
73
84
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -191,6 +202,83 @@ assert validate(filtered, rules)["valid"]
|
|
|
191
202
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
192
203
|
```
|
|
193
204
|
|
|
205
|
+
## Data Connectors
|
|
206
|
+
|
|
207
|
+
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
208
|
+
are differentiated by their types, each of which is represented in the subsections below.
|
|
209
|
+
|
|
210
|
+
### REST APIs (`api`)
|
|
211
|
+
|
|
212
|
+
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
213
|
+
include GET for extract and PATCH/POST/PUT for load.
|
|
214
|
+
|
|
215
|
+
### Databases (`database`)
|
|
216
|
+
|
|
217
|
+
Database connectors use connection strings for extraction and loading, and
|
|
218
|
+
DDL can be rendered from table specs for migrations or schema checks.
|
|
219
|
+
|
|
220
|
+
### Files (`file`)
|
|
221
|
+
|
|
222
|
+
File formats are grouped as in `FileFormat`. Support is marked as:
|
|
223
|
+
|
|
224
|
+
- **Y**: implemented (may require optional dependencies)
|
|
225
|
+
- **N**: stubbed or not yet implemented
|
|
226
|
+
|
|
227
|
+
#### Stubbed / Placeholder
|
|
228
|
+
|
|
229
|
+
| Format | Supported | Description |
|
|
230
|
+
| --- | --- | --- |
|
|
231
|
+
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
232
|
+
|
|
233
|
+
#### Tabular & Delimited Text
|
|
234
|
+
|
|
235
|
+
| Format | Supported | Description |
|
|
236
|
+
| --- | --- | --- |
|
|
237
|
+
| `csv` | Y | Comma-Separated Values |
|
|
238
|
+
| `fwf` | N | Fixed-Width Fields |
|
|
239
|
+
| `dat` | N | Generic data file, often delimited or fixed-width |
|
|
240
|
+
| `psv` | N | Pipe-Separated Values |
|
|
241
|
+
| `tab` | N | Often synonymous with TSV |
|
|
242
|
+
| `tsv` | Y | Tab-Separated Values |
|
|
243
|
+
| `txt` | Y | Plain text, often delimited or fixed-width |
|
|
244
|
+
|
|
245
|
+
#### Semi-Structured Text
|
|
246
|
+
|
|
247
|
+
| Format | Supported | Description |
|
|
248
|
+
| --- | --- | --- |
|
|
249
|
+
| `json` | Y | JavaScript Object Notation |
|
|
250
|
+
| `ndjson` | Y | Newline-Delimited JSON |
|
|
251
|
+
| `xml` | Y | Extensible Markup Language |
|
|
252
|
+
| `yaml` | Y | YAML Ain't Markup Language |
|
|
253
|
+
|
|
254
|
+
#### Columnar / Analytics-Friendly
|
|
255
|
+
|
|
256
|
+
| Format | Supported | Description |
|
|
257
|
+
| --- | --- | --- |
|
|
258
|
+
| `feather` | Y | Apache Arrow Feather |
|
|
259
|
+
| `orc` | Y | Optimized Row Columnar; common in Hadoop |
|
|
260
|
+
| `parquet` | Y | Apache Parquet; common in Big Data |
|
|
261
|
+
|
|
262
|
+
#### Binary Serialization and Interchange
|
|
263
|
+
|
|
264
|
+
| Format | Supported | Description |
|
|
265
|
+
| --- | --- | --- |
|
|
266
|
+
| `avro` | Y | Apache Avro |
|
|
267
|
+
|
|
268
|
+
#### Spreadsheets
|
|
269
|
+
|
|
270
|
+
| Format | Supported | Description |
|
|
271
|
+
| --- | --- | --- |
|
|
272
|
+
| `xls` | Y | Microsoft Excel (BIFF); read-only |
|
|
273
|
+
| `xlsx` | Y | Microsoft Excel (Open XML) |
|
|
274
|
+
|
|
275
|
+
#### Data Archives
|
|
276
|
+
|
|
277
|
+
| Format | Supported | Description |
|
|
278
|
+
| --- | --- | --- |
|
|
279
|
+
| `gz` | Y | Gzip-compressed file |
|
|
280
|
+
| `zip` | Y | ZIP archive |
|
|
281
|
+
|
|
194
282
|
## Usage
|
|
195
283
|
|
|
196
284
|
### Command Line Interface
|
|
@@ -57,24 +57,29 @@ etlplus/database/schema.py,sha256=813C0Dd3WE53KTYot4dgjAxctgKXLXx-8_Rk_4r2e28,70
|
|
|
57
57
|
etlplus/database/types.py,sha256=_pkQyC14TzAlgyeIqZG4F5LWYknZbHw3TW68Auk7Ya0,795
|
|
58
58
|
etlplus/file/README.md,sha256=avWnyeKfs3uP3qa-DVBJ6t05jS2oFUPeQ3xf1Ph0eC0,3626
|
|
59
59
|
etlplus/file/__init__.py,sha256=X03bosSM-uSd6dh3ur0un6_ozFRw2Tm4PE6kVUjtXK8,475
|
|
60
|
-
etlplus/file/
|
|
61
|
-
etlplus/file/
|
|
62
|
-
etlplus/file/avro.py,sha256=
|
|
63
|
-
etlplus/file/core.py,sha256=
|
|
60
|
+
etlplus/file/_imports.py,sha256=9e8CWjyNIRcmiULEPuwtnJELUOXd4EvVv_vDnDYiB9c,3121
|
|
61
|
+
etlplus/file/_io.py,sha256=GXTcvjfXQX4rSdyu1JNhFmqQJlirDqd8dEGCN3dHvNg,2968
|
|
62
|
+
etlplus/file/avro.py,sha256=Fsg0Ml5iLFYr37CEQfjodcf8efc19xN57Q8fRrpCbSw,3977
|
|
63
|
+
etlplus/file/core.py,sha256=71AFsVwvAP6crsAphMkAmhqTwk_-TkEw9eByG6gwzpA,8862
|
|
64
64
|
etlplus/file/csv.py,sha256=gtEUWJO54veEtgaLB_QnmR8yOpeToq78nrtAPVTTl44,1269
|
|
65
|
-
etlplus/file/
|
|
66
|
-
etlplus/file/
|
|
65
|
+
etlplus/file/dat.py,sha256=O9Y7q9qkPoQSkFoX_LT8MrEdKCJ_JMPKKhGnfZ2sh0g,1238
|
|
66
|
+
etlplus/file/enums.py,sha256=RFK6vd_QI1XZx7erRLhD4Y0pQw6_z41M9p7MU8WVrA8,10924
|
|
67
|
+
etlplus/file/feather.py,sha256=YYIsIsn7e479usU422EQzLkBUv3h-9wcOvMOr4b4mPk,2183
|
|
68
|
+
etlplus/file/fwf.py,sha256=nNv2jACzOmu3AOaLxDx9Eg8BC4I05PoPSHDSZXMIBew,1252
|
|
67
69
|
etlplus/file/gz.py,sha256=NKsvIV7TIWn8USbvuZmRH9hr6OrXh4TzTfDykHD41Kk,2631
|
|
68
70
|
etlplus/file/json.py,sha256=_KAXb4rZ1C8xnaV10IkihuFh1lhbWvajFOlMrBCNVjQ,2099
|
|
69
|
-
etlplus/file/ndjson.py,sha256=
|
|
70
|
-
etlplus/file/orc.py,sha256=
|
|
71
|
-
etlplus/file/parquet.py,sha256=
|
|
71
|
+
etlplus/file/ndjson.py,sha256=zzfeELrGJGZOL8b2U035T2kidJta4xZJSSuhiRAMP9U,2166
|
|
72
|
+
etlplus/file/orc.py,sha256=VK0x5GiGaOrxKgYShYjg-Ay71k3hanoMFnZWKwUjQvw,2140
|
|
73
|
+
etlplus/file/parquet.py,sha256=v4IXdB5klpeNnFYG1tWZbJsQ4QvSQGVPWF8-xFqDWMY,2282
|
|
74
|
+
etlplus/file/psv.py,sha256=AFrbRXWemuHGfELnZ1yydVuPKu3dNyeklJ9pOVIkyyo,1255
|
|
75
|
+
etlplus/file/stub.py,sha256=UPiKD_CYkz66X6bZtSqWX3ShBFrMlf-Se83z4qYnIR4,1733
|
|
76
|
+
etlplus/file/tab.py,sha256=0bAlFGaVXNeR_VLBJtypvZIT4OSvRd0RIgSYvrrTJm0,2009
|
|
72
77
|
etlplus/file/tsv.py,sha256=NiqF84Ck8e_DinaiO8yKRR6fVUTnUhpThzo4E1QUD8k,1271
|
|
73
|
-
etlplus/file/txt.py,sha256=
|
|
74
|
-
etlplus/file/xls.py,sha256=
|
|
75
|
-
etlplus/file/xlsx.py,sha256=
|
|
78
|
+
etlplus/file/txt.py,sha256=kf7FrgExe827ZCjGChduLuDl1kehjirBhbX2UdsRumg,1957
|
|
79
|
+
etlplus/file/xls.py,sha256=P94lEH8ZMex_G5x1hotidcj-dvGHAnUY7ouzvSYaV7o,1772
|
|
80
|
+
etlplus/file/xlsx.py,sha256=vtiAS8Ng9FV1vCWYTd1YO2ORKIJG3HDfmqy3NkVpt0A,2182
|
|
76
81
|
etlplus/file/xml.py,sha256=rYtCPvyLn9djClN2xKeqRCPsMXnvCH4R8zj94NJRdQc,4018
|
|
77
|
-
etlplus/file/yaml.py,sha256=
|
|
82
|
+
etlplus/file/yaml.py,sha256=gm0fP5x5g12NoWkx13VUIPk4kpHdqSc_Gecsx_nt_AA,2089
|
|
78
83
|
etlplus/file/zip.py,sha256=nd26V3S0edklriKnKOGDTLlO8RBXTda_zLLEQrJgKL4,4185
|
|
79
84
|
etlplus/templates/README.md,sha256=kHSZ8FWcrlrcWz0hBIbho-k1Bi-PL-DQ7g02o-g70c8,1355
|
|
80
85
|
etlplus/templates/__init__.py,sha256=tsniN7XJYs3NwYxJ6c2HD5upHP3CDkLx-bQCMt97UOM,106
|
|
@@ -83,9 +88,9 @@ etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk
|
|
|
83
88
|
etlplus/validation/README.md,sha256=qusyiyJu2DsaK80jlwfXVZ0iDgeuTPOX2EL3a_fcFiw,1401
|
|
84
89
|
etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
|
|
85
90
|
etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
|
|
86
|
-
etlplus-0.12.
|
|
87
|
-
etlplus-0.12.
|
|
88
|
-
etlplus-0.12.
|
|
89
|
-
etlplus-0.12.
|
|
90
|
-
etlplus-0.12.
|
|
91
|
-
etlplus-0.12.
|
|
91
|
+
etlplus-0.12.9.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
92
|
+
etlplus-0.12.9.dist-info/METADATA,sha256=TLnRtOHKV6WimzxzUrgA39j0LUOZI0fAB8OUaGuqo4E,25708
|
|
93
|
+
etlplus-0.12.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
94
|
+
etlplus-0.12.9.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
95
|
+
etlplus-0.12.9.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
96
|
+
etlplus-0.12.9.dist-info/RECORD,,
|