PyPI - etlplus - Versions diffs - 0.9.0__py3-none-any.whl → 0.10.4__py3-none-any.whl - Mend

etlplus 0.9.0py3-none-any.whl → 0.10.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

etlplus/cli/commands.py CHANGED Viewed

@@ -443,9 +443,9 @@ def extract_cmd(
         Source (JSON payload, file/folder path, URL/URI, or - for STDIN)
         from which to extract data. Default is ``-``.
     source_format : SourceFormatOption, optional
-        Source data format. Overrides the inferred format (``csv``, ``json``,
-        ``parquet``, ``xml``) based on filename extension or STDIN content.
-        Default is ``None``.
+        Data source format. Overrides the inferred format (``csv``, ``json``,
+        etc.) based on filename extension or STDIN content. Default is
+        ``None``.
     source_type : SourceTypeOption, optional
         Data source type. Overrides the inferred type (``api``, ``database``,
         ``file``, ``folder``) based on URI/URL schema. Default is ``None``.
@@ -523,15 +523,15 @@ def load_cmd(
     ctx : typer.Context
         The Typer context.
     source_format : SourceFormatOption, optional
-        Source data format. Overrides the inferred format (``csv``, ``json``,
-        ``parquet``, ``xml``) based on STDIN content. Default is ``None``.
+        Data source format. Overrides the inferred format (``csv``, ``json``,
+        etc.) based on filename extension or STDIN content. Default is
+        ``None``.
     target : TargetArg, optional
         Target (file/folder path, URL/URI, or - for STDOUT) into which to load
         data. Default is ``-``.
     target_format : TargetFormatOption, optional
-        Format of the target data. Overrides the inferred format (``csv``,
-        ``json``, ``parquet``, ``xml``) based on filename extension. Default is
-        ``None``.
+        Target data format. Overrides the inferred format (``csv``, ``json``,
+        etc.) based on filename extension. Default is ``None``.
     target_type : TargetTypeOption, optional
         Data target type. Overrides the inferred type (``api``, ``database``,
         ``file``, ``folder``) based on URI/URL schema. Default is ``None``.
@@ -760,9 +760,9 @@ def transform_cmd(
         Source (JSON payload, file/folder path, URL/URI, or - for STDIN) from
         which to extract data. Default is ``-``.
     source_format : SourceFormatOption, optional
-        Source data format. Overrides the inferred format (``csv``, ``json``,
-        ``parquet``, ``xml``) based on filename extension or STDIN content.
-        Default is ``None``.
+        Data source format. Overrides the inferred format (``csv``, ``json``,
+        etc.) based on filename extension or STDIN content. Default is
+        ``None``.
     source_type : SourceTypeOption, optional
         Data source type. Overrides the inferred type (``api``, ``database``,
         ``file``, ``folder``) based on URI/URL schema. Default is ``None``.
@@ -770,9 +770,8 @@ def transform_cmd(
         Target (file/folder path, URL/URI, or - for STDOUT) into which to load
         data. Default is ``-``.
     target_format : TargetFormatOption, optional
-        Format of the target data. Overrides the inferred format (``csv``,
-        ``json``, ``parquet``, ``xml``) based on filename extension. Default is
-        ``None``.
+        Target data format. Overrides the inferred format (``csv``, ``json``,
+        etc.) based on filename extension. Default is ``None``.
     target_type : TargetTypeOption, optional
         Data target type. Overrides the inferred type (``api``, ``database``,
         ``file``, ``folder``) based on URI/URL schema. Default is ``None``.
@@ -876,11 +875,12 @@ def validate_cmd(
     source : SourceArg
         Data source to validate (path, JSON payload, or - for STDIN).
     source_format : SourceFormatOption, optional
-        Format of the source. Overrides filename-based inference when provided.
-        Default is ``None``.
-    source_type : SourceTypeOption, optional
-        Override the inferred source type (file, database, api). Default is
+        Data source format. Overrides the inferred format (``csv``, ``json``,
+        etc.) based on filename extension or STDIN content. Default is
         ``None``.
+    source_type : SourceTypeOption, optional
+        Data source type. Overrides the inferred type (``api``, ``database``,
+        ``file``, ``folder``) based on URI/URL schema. Default is ``None``.
     output : OutputOption, optional
         Output file for validated output (- for STDOUT). Default is ``None``.

etlplus/enums.py CHANGED Viewed

@@ -8,6 +8,7 @@ from __future__ import annotations
 import enum
 import operator as _op
+from pathlib import PurePath
 from statistics import fmean
 from typing import Self
@@ -19,16 +20,21 @@ from .types import StrStrMap
 __all__ = [
+    # Enums
     'AggregateName',
     'CoercibleStrEnum',
+    'CompressionFormat',
     'DataConnectorType',
     'FileFormat',
     'HttpMethod',
     'OperatorName',
     'PipelineStep',
+    # Functions
+    'coerce_compression_format',
     'coerce_data_connector_type',
     'coerce_file_format',
     'coerce_http_method',
+    'infer_file_format_and_compression',
 ]
@@ -172,6 +178,39 @@ class AggregateName(CoercibleStrEnum):
         return lambda xs, n: (fmean(xs) if xs else 0.0)
+class CompressionFormat(CoercibleStrEnum):
+    """Supported compression formats for data files."""
+    # -- Constants -- #
+    GZ = 'gz'
+    ZIP = 'zip'
+    # -- Class Methods -- #
+    @classmethod
+    def aliases(cls) -> StrStrMap:
+        """
+        Return a mapping of common aliases for each enum member.
+        Returns
+        -------
+        StrStrMap
+            A mapping of alias names to their corresponding enum member names.
+        """
+        return {
+            # File extensions
+            '.gz': 'gz',
+            '.gzip': 'gz',
+            '.zip': 'zip',
+            # MIME types
+            'application/gzip': 'gz',
+            'application/x-gzip': 'gz',
+            'application/zip': 'zip',
+            'application/x-zip-compressed': 'zip',
+        }
 class DataConnectorType(CoercibleStrEnum):
     """Supported data connector types."""
@@ -208,8 +247,19 @@ class FileFormat(CoercibleStrEnum):
     # -- Constants -- #
+    AVRO = 'avro'
     CSV = 'csv'
+    FEATHER = 'feather'
+    GZ = 'gz'
     JSON = 'json'
+    NDJSON = 'ndjson'
+    ORC = 'orc'
+    PARQUET = 'parquet'
+    TSV = 'tsv'
+    TXT = 'txt'
+    XLS = 'xls'
+    XLSX = 'xlsx'
+    ZIP = 'zip'
     XML = 'xml'
     YAML = 'yaml'
@@ -227,11 +277,61 @@ class FileFormat(CoercibleStrEnum):
         """
         return {
             # Common shorthand
+            'parq': 'parquet',
             'yml': 'yaml',
+            # File extensions
+            '.avro': 'avro',
+            '.csv': 'csv',
+            '.feather': 'feather',
+            '.gz': 'gz',
+            '.json': 'json',
+            '.jsonl': 'ndjson',
+            '.ndjson': 'ndjson',
+            '.orc': 'orc',
+            '.parquet': 'parquet',
+            '.pq': 'parquet',
+            '.tsv': 'tsv',
+            '.txt': 'txt',
+            '.xls': 'xls',
+            '.xlsx': 'xlsx',
+            '.zip': 'zip',
+            '.xml': 'xml',
+            '.yaml': 'yaml',
+            '.yml': 'yaml',
             # MIME types
-            'text/csv': 'csv',
+            'application/avro': 'avro',
+            'application/csv': 'csv',
+            'application/feather': 'feather',
+            'application/gzip': 'gz',
             'application/json': 'json',
+            'application/jsonlines': 'ndjson',
+            'application/ndjson': 'ndjson',
+            'application/orc': 'orc',
+            'application/parquet': 'parquet',
+            'application/vnd.apache.avro': 'avro',
+            'application/vnd.apache.parquet': 'parquet',
+            'application/vnd.apache.arrow.file': 'feather',
+            'application/vnd.apache.orc': 'orc',
+            'application/vnd.ms-excel': 'xls',
+            (
+                'application/vnd.openxmlformats-'
+                'officedocument.spreadsheetml.sheet'
+            ): 'xlsx',
+            'application/x-avro': 'avro',
+            'application/x-csv': 'csv',
+            'application/x-feather': 'feather',
+            'application/x-orc': 'orc',
+            'application/x-ndjson': 'ndjson',
+            'application/x-parquet': 'parquet',
+            'application/x-yaml': 'yaml',
             'application/xml': 'xml',
+            'application/zip': 'zip',
+            'text/csv': 'csv',
+            'text/plain': 'txt',
+            'text/tab-separated-values': 'tsv',
+            'text/tsv': 'tsv',
+            'text/xml': 'xml',
+            'text/yaml': 'yaml',
         }
@@ -365,6 +465,13 @@ class PipelineStep(CoercibleStrEnum):
 # SECTION: INTERNAL CONSTANTS ============================================== #
+# Compression formats that are also file formats.
+_COMPRESSION_FILE_FORMATS: set[FileFormat] = {
+    FileFormat.GZ,
+    FileFormat.ZIP,
+}
 # Precomputed order index for PipelineStep; avoids recomputing on each access.
 _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
     PipelineStep.FILTER: 0,
@@ -402,6 +509,18 @@ def coerce_file_format(
     return FileFormat.coerce(file_format)
+def coerce_compression_format(
+    compression_format: CompressionFormat | str,
+) -> CompressionFormat:
+    """
+    Normalize textual compression format values to :class:`CompressionFormat`.
+    This thin wrapper is kept for backward compatibility; prefer
+    :meth:`CompressionFormat.coerce` going forward.
+    """
+    return CompressionFormat.coerce(compression_format)
 def coerce_http_method(
     http_method: HttpMethod | str,
 ) -> HttpMethod:
@@ -412,3 +531,78 @@ def coerce_http_method(
     :meth:`HttpMethod.coerce` going forward.
     """
     return HttpMethod.coerce(http_method)
+def infer_file_format_and_compression(
+    value: object,
+    filename: object | None = None,
+) -> tuple[FileFormat | None, CompressionFormat | None]:
+    """
+    Infer data format and compression from a filename, extension, or MIME type.
+    Parameters
+    ----------
+    value : object
+        A filename, extension, MIME type, or existing enum member.
+    filename : object | None, optional
+        A filename to consult for extension-based inference (e.g. when
+        ``value`` is ``application/octet-stream``).
+    Returns
+    -------
+    tuple[FileFormat | None, CompressionFormat | None]
+        The inferred data format and compression, if any.
+    """
+    if isinstance(value, FileFormat):
+        if value in _COMPRESSION_FILE_FORMATS:
+            return None, CompressionFormat.coerce(value.value)
+        return value, None
+    if isinstance(value, CompressionFormat):
+        return None, value
+    text = str(value).strip()
+    if not text:
+        return None, None
+    normalized = text.casefold()
+    mime = normalized.split(';', 1)[0].strip()
+    is_octet_stream = mime == 'application/octet-stream'
+    compression = CompressionFormat.try_coerce(mime)
+    fmt = None if is_octet_stream else FileFormat.try_coerce(mime)
+    is_mime = mime.startswith(
+        (
+            'application/',
+            'text/',
+            'audio/',
+            'image/',
+            'video/',
+            'multipart/',
+        ),
+    )
+    suffix_source: object | None = filename if filename is not None else text
+    if is_mime and filename is None:
+        suffix_source = None
+    suffixes = (
+        PurePath(str(suffix_source)).suffixes
+        if suffix_source is not None
+        else []
+    )
+    if suffixes:
+        normalized_suffixes = [suffix.casefold() for suffix in suffixes]
+        compression = (
+            CompressionFormat.try_coerce(normalized_suffixes[-1])
+            or compression
+        )
+        if compression is not None:
+            normalized_suffixes = normalized_suffixes[:-1]
+        if normalized_suffixes:
+            fmt = FileFormat.try_coerce(normalized_suffixes[-1]) or fmt
+    if fmt in _COMPRESSION_FILE_FORMATS:
+        compression = compression or CompressionFormat.coerce(fmt.value)
+        fmt = None
+    return fmt, compression

etlplus/file.py CHANGED Viewed

@@ -16,6 +16,7 @@ from typing import Any
 from typing import cast
 from .enums import FileFormat
+from .enums import infer_file_format_and_compression
 from .types import JSONData
 from .types import JSONDict
 from .types import JSONList
@@ -33,15 +34,6 @@ __all__ = ['File']
 _DEFAULT_XML_ROOT = 'root'
-# Map common filename extensions to FileFormat (used for inference)
-_EXT_TO_FORMAT: dict[str, FileFormat] = {
-    'csv': FileFormat.CSV,
-    'json': FileFormat.JSON,
-    'xml': FileFormat.XML,
-    'yaml': FileFormat.YAML,
-    'yml': FileFormat.YAML,
-}
 # Optional YAML support (lazy-loaded to avoid hard dependency)
 # Cached access function to avoid global statements.
 _YAML_CACHE: dict[str, Any] = {}
@@ -246,14 +238,17 @@ class File:
         ValueError
             If the extension is unknown or unsupported.
         """
-        ext = self.path.suffix.lstrip('.').casefold()
-        try:
-            return _EXT_TO_FORMAT[ext]
-        except KeyError as e:
+        fmt, compression = infer_file_format_and_compression(self.path)
+        if fmt is not None:
+            return fmt
+        if compression is not None:
             raise ValueError(
-                'Cannot infer file format from '
-                f'extension {self.path.suffix!r}',
-            ) from e
+                'Cannot infer file format from compressed file '
+                f'{self.path!r} with compression {compression.value!r}',
+            )
+        raise ValueError(
+            f'Cannot infer file format from extension {self.path.suffix!r}',
+        )
     # -- Instance Methods (Generic API) -- #

{etlplus-0.9.0.dist-info → etlplus-0.10.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: etlplus
-Version: 0.9.0
+Version: 0.10.4
 Summary: A Swiss Army knife for simple ETL operations
 Home-page: https://github.com/Dagitali/ETLPlus
 Author: ETLPlus Team

{etlplus-0.9.0.dist-info → etlplus-0.10.4.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
 etlplus/__init__.py,sha256=M2gScnyir6WOMAh_EuoQIiAzdcTls0_5hbd_Q6of8I0,1021
 etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
 etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
-etlplus/enums.py,sha256=V_j18Ud2BCXpFsBk2pZGrvCVrvAMJ7uja1z9fppFGso,10175
+etlplus/enums.py,sha256=8hzprOLyeCCzlHaXpG4VfgmxPSEdlZeOnHLFzBneKNs,15969
 etlplus/extract.py,sha256=f44JdHhNTACxgn44USx05paKTwq7LQY-V4wANCW9hVM,6173
-etlplus/file.py,sha256=RxIAsGDN4f_vNA2B5-ct88JNd_ISAyYbooIRE5DstS8,17972
+etlplus/file.py,sha256=B-zebTrIFDKaaKzA9Fq5-L0JwDNYa2T--_6veR3N03s,17939
 etlplus/load.py,sha256=R_y0_vtsEo1bwxWVQu2bfhB5ZIJoIoWu2ycCdvY4RnE,8737
 etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
 etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -31,7 +31,7 @@ etlplus/api/rate_limiting/__init__.py,sha256=ZySB1dZettEDnWvI1EHf_TZ9L08M_kKsNR-
 etlplus/api/rate_limiting/config.py,sha256=2b4wIynblN-1EyMqI4aXa71SljzSjXYh5N1Nngr3jOg,9406
 etlplus/api/rate_limiting/rate_limiter.py,sha256=Uxozqd_Ej5Lsj-M-mLT2WexChgWh7x35_YP10yqYPQA,7159
 etlplus/cli/__init__.py,sha256=J97-Rv931IL1_b4AXnB7Fbbd7HKnHBpx18NQfC_kE6c,299
-etlplus/cli/commands.py,sha256=_nias9eSMZoTBiicXDNEkWLYfzd4-CcO2j_xPPxghls,24632
+etlplus/cli/commands.py,sha256=BK2qmFsser6AXOgEvpiadrYMIiwviAzqkSxMlBhRXRw,24670
 etlplus/cli/constants.py,sha256=KIZj7J2tNf5mJbkqAdZmu5FXYW2FQmxwgeOKWc3-3Hg,1944
 etlplus/cli/handlers.py,sha256=K0GazvrPgocJ-63HZqF0xhyJk8TB1Gcj-eIbWltXKRU,17759
 etlplus/cli/io.py,sha256=7sldiZz4-Geomge5IO_XYykXPa6UiORfUWzLCdQePG8,7846
@@ -57,9 +57,9 @@ etlplus/templates/ddl.sql.j2,sha256=s8fMWvcb4eaJVXkifuib1aQPljtZ8buuyB_uA-ZdU3Q,
 etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk,2180
 etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
 etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
-etlplus-0.9.0.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
-etlplus-0.9.0.dist-info/METADATA,sha256=ynMgjG7Wv_xkP0fBaAOj3-rpUgJHwZ7UOzCxoU8CBeE,21035
-etlplus-0.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-etlplus-0.9.0.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
-etlplus-0.9.0.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
-etlplus-0.9.0.dist-info/RECORD,,
+etlplus-0.10.4.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
+etlplus-0.10.4.dist-info/METADATA,sha256=M_lQUZ5o-JaD1KuZk_t0LeHbaOj_SdqqaJQSbDCW-zY,21036
+etlplus-0.10.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+etlplus-0.10.4.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
+etlplus-0.10.4.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
+etlplus-0.10.4.dist-info/RECORD,,

{etlplus-0.9.0.dist-info → etlplus-0.10.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{etlplus-0.9.0.dist-info → etlplus-0.10.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{etlplus-0.9.0.dist-info → etlplus-0.10.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{etlplus-0.9.0.dist-info → etlplus-0.10.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

etlplus 0.9.0__py3-none-any.whl → 0.10.4__py3-none-any.whl

etlplus 0.9.0py3-none-any.whl → 0.10.4py3-none-any.whl