etlplus 0.9.1__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/enums.py CHANGED
@@ -8,6 +8,7 @@ from __future__ import annotations
8
8
 
9
9
  import enum
10
10
  import operator as _op
11
+ from pathlib import PurePath
11
12
  from statistics import fmean
12
13
  from typing import Self
13
14
 
@@ -19,16 +20,21 @@ from .types import StrStrMap
19
20
 
20
21
 
21
22
  __all__ = [
23
+ # Enums
22
24
  'AggregateName',
23
25
  'CoercibleStrEnum',
26
+ 'CompressionFormat',
24
27
  'DataConnectorType',
25
28
  'FileFormat',
26
29
  'HttpMethod',
27
30
  'OperatorName',
28
31
  'PipelineStep',
32
+ # Functions
33
+ 'coerce_compression_format',
29
34
  'coerce_data_connector_type',
30
35
  'coerce_file_format',
31
36
  'coerce_http_method',
37
+ 'infer_file_format_and_compression',
32
38
  ]
33
39
 
34
40
 
@@ -172,6 +178,39 @@ class AggregateName(CoercibleStrEnum):
172
178
  return lambda xs, n: (fmean(xs) if xs else 0.0)
173
179
 
174
180
 
181
+ class CompressionFormat(CoercibleStrEnum):
182
+ """Supported compression formats for data files."""
183
+
184
+ # -- Constants -- #
185
+
186
+ GZ = 'gz'
187
+ ZIP = 'zip'
188
+
189
+ # -- Class Methods -- #
190
+
191
+ @classmethod
192
+ def aliases(cls) -> StrStrMap:
193
+ """
194
+ Return a mapping of common aliases for each enum member.
195
+
196
+ Returns
197
+ -------
198
+ StrStrMap
199
+ A mapping of alias names to their corresponding enum member names.
200
+ """
201
+ return {
202
+ # File extensions
203
+ '.gz': 'gz',
204
+ '.gzip': 'gz',
205
+ '.zip': 'zip',
206
+ # MIME types
207
+ 'application/gzip': 'gz',
208
+ 'application/x-gzip': 'gz',
209
+ 'application/zip': 'zip',
210
+ 'application/x-zip-compressed': 'zip',
211
+ }
212
+
213
+
175
214
  class DataConnectorType(CoercibleStrEnum):
176
215
  """Supported data connector types."""
177
216
 
@@ -415,6 +454,13 @@ class PipelineStep(CoercibleStrEnum):
415
454
  # SECTION: INTERNAL CONSTANTS ============================================== #
416
455
 
417
456
 
457
+ # Compression formats that are also file formats.
458
+ _COMPRESSION_FILE_FORMATS: set[FileFormat] = {
459
+ FileFormat.GZ,
460
+ FileFormat.ZIP,
461
+ }
462
+
463
+
418
464
  # Precomputed order index for PipelineStep; avoids recomputing on each access.
419
465
  _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
420
466
  PipelineStep.FILTER: 0,
@@ -452,6 +498,18 @@ def coerce_file_format(
452
498
  return FileFormat.coerce(file_format)
453
499
 
454
500
 
501
+ def coerce_compression_format(
502
+ compression_format: CompressionFormat | str,
503
+ ) -> CompressionFormat:
504
+ """
505
+ Normalize textual compression format values to :class:`CompressionFormat`.
506
+
507
+ This thin wrapper is kept for backward compatibility; prefer
508
+ :meth:`CompressionFormat.coerce` going forward.
509
+ """
510
+ return CompressionFormat.coerce(compression_format)
511
+
512
+
455
513
  def coerce_http_method(
456
514
  http_method: HttpMethod | str,
457
515
  ) -> HttpMethod:
@@ -462,3 +520,55 @@ def coerce_http_method(
462
520
  :meth:`HttpMethod.coerce` going forward.
463
521
  """
464
522
  return HttpMethod.coerce(http_method)
523
+
524
+
525
+ def infer_file_format_and_compression(
526
+ value: object,
527
+ ) -> tuple[FileFormat | None, CompressionFormat | None]:
528
+ """
529
+ Infer data format and compression from a filename, extension, or MIME type.
530
+
531
+ Parameters
532
+ ----------
533
+ value : object
534
+ A filename, extension, MIME type, or existing enum member.
535
+
536
+ Returns
537
+ -------
538
+ tuple[FileFormat | None, CompressionFormat | None]
539
+ The inferred data format and compression, if any.
540
+ """
541
+ if isinstance(value, FileFormat):
542
+ if value in _COMPRESSION_FILE_FORMATS:
543
+ return None, CompressionFormat.coerce(value.value)
544
+ return value, None
545
+ if isinstance(value, CompressionFormat):
546
+ return None, value
547
+
548
+ text = str(value).strip()
549
+ if not text:
550
+ return None, None
551
+
552
+ normalized = text.casefold()
553
+ mime = normalized.split(';', 1)[0].strip()
554
+
555
+ compression = CompressionFormat.try_coerce(mime)
556
+ fmt = FileFormat.try_coerce(mime)
557
+
558
+ suffixes = PurePath(text).suffixes
559
+ if suffixes:
560
+ normalized_suffixes = [suffix.casefold() for suffix in suffixes]
561
+ compression = (
562
+ CompressionFormat.try_coerce(normalized_suffixes[-1])
563
+ or compression
564
+ )
565
+ if compression is not None:
566
+ normalized_suffixes = normalized_suffixes[:-1]
567
+ if normalized_suffixes:
568
+ fmt = FileFormat.try_coerce(normalized_suffixes[-1]) or fmt
569
+
570
+ if fmt in _COMPRESSION_FILE_FORMATS:
571
+ compression = compression or CompressionFormat.coerce(fmt.value)
572
+ fmt = None
573
+
574
+ return fmt, compression
etlplus/file.py CHANGED
@@ -16,6 +16,7 @@ from typing import Any
16
16
  from typing import cast
17
17
 
18
18
  from .enums import FileFormat
19
+ from .enums import infer_file_format_and_compression
19
20
  from .types import JSONData
20
21
  from .types import JSONDict
21
22
  from .types import JSONList
@@ -33,15 +34,6 @@ __all__ = ['File']
33
34
 
34
35
  _DEFAULT_XML_ROOT = 'root'
35
36
 
36
- # Map common filename extensions to FileFormat (used for inference)
37
- _EXT_TO_FORMAT: dict[str, FileFormat] = {
38
- 'csv': FileFormat.CSV,
39
- 'json': FileFormat.JSON,
40
- 'xml': FileFormat.XML,
41
- 'yaml': FileFormat.YAML,
42
- 'yml': FileFormat.YAML,
43
- }
44
-
45
37
  # Optional YAML support (lazy-loaded to avoid hard dependency)
46
38
  # Cached access function to avoid global statements.
47
39
  _YAML_CACHE: dict[str, Any] = {}
@@ -246,14 +238,17 @@ class File:
246
238
  ValueError
247
239
  If the extension is unknown or unsupported.
248
240
  """
249
- ext = self.path.suffix.lstrip('.').casefold()
250
- try:
251
- return _EXT_TO_FORMAT[ext]
252
- except KeyError as e:
241
+ fmt, compression = infer_file_format_and_compression(self.path)
242
+ if fmt is not None:
243
+ return fmt
244
+ if compression is not None:
253
245
  raise ValueError(
254
- 'Cannot infer file format from '
255
- f'extension {self.path.suffix!r}',
256
- ) from e
246
+ 'Cannot infer file format from compressed file '
247
+ f'{self.path!r} with compression {compression.value!r}',
248
+ )
249
+ raise ValueError(
250
+ f'Cannot infer file format from extension {self.path.suffix!r}',
251
+ )
257
252
 
258
253
  # -- Instance Methods (Generic API) -- #
259
254
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.9.1
3
+ Version: 0.10.1
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -1,9 +1,9 @@
1
1
  etlplus/__init__.py,sha256=M2gScnyir6WOMAh_EuoQIiAzdcTls0_5hbd_Q6of8I0,1021
2
2
  etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
3
3
  etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
4
- etlplus/enums.py,sha256=kWDXOOhyYodhCxXDgQ_gP7000nO2i0kwpve8AUkr77k,11763
4
+ etlplus/enums.py,sha256=yW-Um1fDQbW9p70ooM8dDTH7D0XBgmT-l3bF66N0AQc,14801
5
5
  etlplus/extract.py,sha256=f44JdHhNTACxgn44USx05paKTwq7LQY-V4wANCW9hVM,6173
6
- etlplus/file.py,sha256=RxIAsGDN4f_vNA2B5-ct88JNd_ISAyYbooIRE5DstS8,17972
6
+ etlplus/file.py,sha256=B-zebTrIFDKaaKzA9Fq5-L0JwDNYa2T--_6veR3N03s,17939
7
7
  etlplus/load.py,sha256=R_y0_vtsEo1bwxWVQu2bfhB5ZIJoIoWu2ycCdvY4RnE,8737
8
8
  etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
9
9
  etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -57,9 +57,9 @@ etlplus/templates/ddl.sql.j2,sha256=s8fMWvcb4eaJVXkifuib1aQPljtZ8buuyB_uA-ZdU3Q,
57
57
  etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk,2180
58
58
  etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
59
59
  etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
60
- etlplus-0.9.1.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
61
- etlplus-0.9.1.dist-info/METADATA,sha256=FuV00vTR_UMB8DEjeUg9SWIxYU4EibsWmsCbra_1fmY,21035
62
- etlplus-0.9.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
- etlplus-0.9.1.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
64
- etlplus-0.9.1.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
65
- etlplus-0.9.1.dist-info/RECORD,,
60
+ etlplus-0.10.1.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
61
+ etlplus-0.10.1.dist-info/METADATA,sha256=vjSqzhO6_KFW2lXvQgHGFOJDxOx-r4dSVef664jr1mU,21036
62
+ etlplus-0.10.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
+ etlplus-0.10.1.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
64
+ etlplus-0.10.1.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
65
+ etlplus-0.10.1.dist-info/RECORD,,