etlplus 0.9.1__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/enums.py +110 -0
- etlplus/file.py +11 -16
- {etlplus-0.9.1.dist-info → etlplus-0.10.1.dist-info}/METADATA +1 -1
- {etlplus-0.9.1.dist-info → etlplus-0.10.1.dist-info}/RECORD +8 -8
- {etlplus-0.9.1.dist-info → etlplus-0.10.1.dist-info}/WHEEL +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.10.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.10.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.10.1.dist-info}/top_level.txt +0 -0
etlplus/enums.py
CHANGED
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import enum
|
|
10
10
|
import operator as _op
|
|
11
|
+
from pathlib import PurePath
|
|
11
12
|
from statistics import fmean
|
|
12
13
|
from typing import Self
|
|
13
14
|
|
|
@@ -19,16 +20,21 @@ from .types import StrStrMap
|
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
__all__ = [
|
|
23
|
+
# Enums
|
|
22
24
|
'AggregateName',
|
|
23
25
|
'CoercibleStrEnum',
|
|
26
|
+
'CompressionFormat',
|
|
24
27
|
'DataConnectorType',
|
|
25
28
|
'FileFormat',
|
|
26
29
|
'HttpMethod',
|
|
27
30
|
'OperatorName',
|
|
28
31
|
'PipelineStep',
|
|
32
|
+
# Functions
|
|
33
|
+
'coerce_compression_format',
|
|
29
34
|
'coerce_data_connector_type',
|
|
30
35
|
'coerce_file_format',
|
|
31
36
|
'coerce_http_method',
|
|
37
|
+
'infer_file_format_and_compression',
|
|
32
38
|
]
|
|
33
39
|
|
|
34
40
|
|
|
@@ -172,6 +178,39 @@ class AggregateName(CoercibleStrEnum):
|
|
|
172
178
|
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
173
179
|
|
|
174
180
|
|
|
181
|
+
class CompressionFormat(CoercibleStrEnum):
|
|
182
|
+
"""Supported compression formats for data files."""
|
|
183
|
+
|
|
184
|
+
# -- Constants -- #
|
|
185
|
+
|
|
186
|
+
GZ = 'gz'
|
|
187
|
+
ZIP = 'zip'
|
|
188
|
+
|
|
189
|
+
# -- Class Methods -- #
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def aliases(cls) -> StrStrMap:
|
|
193
|
+
"""
|
|
194
|
+
Return a mapping of common aliases for each enum member.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
StrStrMap
|
|
199
|
+
A mapping of alias names to their corresponding enum member names.
|
|
200
|
+
"""
|
|
201
|
+
return {
|
|
202
|
+
# File extensions
|
|
203
|
+
'.gz': 'gz',
|
|
204
|
+
'.gzip': 'gz',
|
|
205
|
+
'.zip': 'zip',
|
|
206
|
+
# MIME types
|
|
207
|
+
'application/gzip': 'gz',
|
|
208
|
+
'application/x-gzip': 'gz',
|
|
209
|
+
'application/zip': 'zip',
|
|
210
|
+
'application/x-zip-compressed': 'zip',
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
|
|
175
214
|
class DataConnectorType(CoercibleStrEnum):
|
|
176
215
|
"""Supported data connector types."""
|
|
177
216
|
|
|
@@ -415,6 +454,13 @@ class PipelineStep(CoercibleStrEnum):
|
|
|
415
454
|
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
416
455
|
|
|
417
456
|
|
|
457
|
+
# Compression formats that are also file formats.
|
|
458
|
+
_COMPRESSION_FILE_FORMATS: set[FileFormat] = {
|
|
459
|
+
FileFormat.GZ,
|
|
460
|
+
FileFormat.ZIP,
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
|
|
418
464
|
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
419
465
|
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
420
466
|
PipelineStep.FILTER: 0,
|
|
@@ -452,6 +498,18 @@ def coerce_file_format(
|
|
|
452
498
|
return FileFormat.coerce(file_format)
|
|
453
499
|
|
|
454
500
|
|
|
501
|
+
def coerce_compression_format(
|
|
502
|
+
compression_format: CompressionFormat | str,
|
|
503
|
+
) -> CompressionFormat:
|
|
504
|
+
"""
|
|
505
|
+
Normalize textual compression format values to :class:`CompressionFormat`.
|
|
506
|
+
|
|
507
|
+
This thin wrapper is kept for backward compatibility; prefer
|
|
508
|
+
:meth:`CompressionFormat.coerce` going forward.
|
|
509
|
+
"""
|
|
510
|
+
return CompressionFormat.coerce(compression_format)
|
|
511
|
+
|
|
512
|
+
|
|
455
513
|
def coerce_http_method(
|
|
456
514
|
http_method: HttpMethod | str,
|
|
457
515
|
) -> HttpMethod:
|
|
@@ -462,3 +520,55 @@ def coerce_http_method(
|
|
|
462
520
|
:meth:`HttpMethod.coerce` going forward.
|
|
463
521
|
"""
|
|
464
522
|
return HttpMethod.coerce(http_method)
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def infer_file_format_and_compression(
|
|
526
|
+
value: object,
|
|
527
|
+
) -> tuple[FileFormat | None, CompressionFormat | None]:
|
|
528
|
+
"""
|
|
529
|
+
Infer data format and compression from a filename, extension, or MIME type.
|
|
530
|
+
|
|
531
|
+
Parameters
|
|
532
|
+
----------
|
|
533
|
+
value : object
|
|
534
|
+
A filename, extension, MIME type, or existing enum member.
|
|
535
|
+
|
|
536
|
+
Returns
|
|
537
|
+
-------
|
|
538
|
+
tuple[FileFormat | None, CompressionFormat | None]
|
|
539
|
+
The inferred data format and compression, if any.
|
|
540
|
+
"""
|
|
541
|
+
if isinstance(value, FileFormat):
|
|
542
|
+
if value in _COMPRESSION_FILE_FORMATS:
|
|
543
|
+
return None, CompressionFormat.coerce(value.value)
|
|
544
|
+
return value, None
|
|
545
|
+
if isinstance(value, CompressionFormat):
|
|
546
|
+
return None, value
|
|
547
|
+
|
|
548
|
+
text = str(value).strip()
|
|
549
|
+
if not text:
|
|
550
|
+
return None, None
|
|
551
|
+
|
|
552
|
+
normalized = text.casefold()
|
|
553
|
+
mime = normalized.split(';', 1)[0].strip()
|
|
554
|
+
|
|
555
|
+
compression = CompressionFormat.try_coerce(mime)
|
|
556
|
+
fmt = FileFormat.try_coerce(mime)
|
|
557
|
+
|
|
558
|
+
suffixes = PurePath(text).suffixes
|
|
559
|
+
if suffixes:
|
|
560
|
+
normalized_suffixes = [suffix.casefold() for suffix in suffixes]
|
|
561
|
+
compression = (
|
|
562
|
+
CompressionFormat.try_coerce(normalized_suffixes[-1])
|
|
563
|
+
or compression
|
|
564
|
+
)
|
|
565
|
+
if compression is not None:
|
|
566
|
+
normalized_suffixes = normalized_suffixes[:-1]
|
|
567
|
+
if normalized_suffixes:
|
|
568
|
+
fmt = FileFormat.try_coerce(normalized_suffixes[-1]) or fmt
|
|
569
|
+
|
|
570
|
+
if fmt in _COMPRESSION_FILE_FORMATS:
|
|
571
|
+
compression = compression or CompressionFormat.coerce(fmt.value)
|
|
572
|
+
fmt = None
|
|
573
|
+
|
|
574
|
+
return fmt, compression
|
etlplus/file.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import Any
|
|
|
16
16
|
from typing import cast
|
|
17
17
|
|
|
18
18
|
from .enums import FileFormat
|
|
19
|
+
from .enums import infer_file_format_and_compression
|
|
19
20
|
from .types import JSONData
|
|
20
21
|
from .types import JSONDict
|
|
21
22
|
from .types import JSONList
|
|
@@ -33,15 +34,6 @@ __all__ = ['File']
|
|
|
33
34
|
|
|
34
35
|
_DEFAULT_XML_ROOT = 'root'
|
|
35
36
|
|
|
36
|
-
# Map common filename extensions to FileFormat (used for inference)
|
|
37
|
-
_EXT_TO_FORMAT: dict[str, FileFormat] = {
|
|
38
|
-
'csv': FileFormat.CSV,
|
|
39
|
-
'json': FileFormat.JSON,
|
|
40
|
-
'xml': FileFormat.XML,
|
|
41
|
-
'yaml': FileFormat.YAML,
|
|
42
|
-
'yml': FileFormat.YAML,
|
|
43
|
-
}
|
|
44
|
-
|
|
45
37
|
# Optional YAML support (lazy-loaded to avoid hard dependency)
|
|
46
38
|
# Cached access function to avoid global statements.
|
|
47
39
|
_YAML_CACHE: dict[str, Any] = {}
|
|
@@ -246,14 +238,17 @@ class File:
|
|
|
246
238
|
ValueError
|
|
247
239
|
If the extension is unknown or unsupported.
|
|
248
240
|
"""
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
return
|
|
252
|
-
|
|
241
|
+
fmt, compression = infer_file_format_and_compression(self.path)
|
|
242
|
+
if fmt is not None:
|
|
243
|
+
return fmt
|
|
244
|
+
if compression is not None:
|
|
253
245
|
raise ValueError(
|
|
254
|
-
'Cannot infer file format from '
|
|
255
|
-
f'
|
|
256
|
-
)
|
|
246
|
+
'Cannot infer file format from compressed file '
|
|
247
|
+
f'{self.path!r} with compression {compression.value!r}',
|
|
248
|
+
)
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f'Cannot infer file format from extension {self.path.suffix!r}',
|
|
251
|
+
)
|
|
257
252
|
|
|
258
253
|
# -- Instance Methods (Generic API) -- #
|
|
259
254
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
etlplus/__init__.py,sha256=M2gScnyir6WOMAh_EuoQIiAzdcTls0_5hbd_Q6of8I0,1021
|
|
2
2
|
etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
|
|
3
3
|
etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
|
|
4
|
-
etlplus/enums.py,sha256=
|
|
4
|
+
etlplus/enums.py,sha256=yW-Um1fDQbW9p70ooM8dDTH7D0XBgmT-l3bF66N0AQc,14801
|
|
5
5
|
etlplus/extract.py,sha256=f44JdHhNTACxgn44USx05paKTwq7LQY-V4wANCW9hVM,6173
|
|
6
|
-
etlplus/file.py,sha256=
|
|
6
|
+
etlplus/file.py,sha256=B-zebTrIFDKaaKzA9Fq5-L0JwDNYa2T--_6veR3N03s,17939
|
|
7
7
|
etlplus/load.py,sha256=R_y0_vtsEo1bwxWVQu2bfhB5ZIJoIoWu2ycCdvY4RnE,8737
|
|
8
8
|
etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
|
|
9
9
|
etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -57,9 +57,9 @@ etlplus/templates/ddl.sql.j2,sha256=s8fMWvcb4eaJVXkifuib1aQPljtZ8buuyB_uA-ZdU3Q,
|
|
|
57
57
|
etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk,2180
|
|
58
58
|
etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
|
|
59
59
|
etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
|
|
60
|
-
etlplus-0.
|
|
61
|
-
etlplus-0.
|
|
62
|
-
etlplus-0.
|
|
63
|
-
etlplus-0.
|
|
64
|
-
etlplus-0.
|
|
65
|
-
etlplus-0.
|
|
60
|
+
etlplus-0.10.1.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
61
|
+
etlplus-0.10.1.dist-info/METADATA,sha256=vjSqzhO6_KFW2lXvQgHGFOJDxOx-r4dSVef664jr1mU,21036
|
|
62
|
+
etlplus-0.10.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
63
|
+
etlplus-0.10.1.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
64
|
+
etlplus-0.10.1.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
65
|
+
etlplus-0.10.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|