PyPI - dlt-iceberg - Versions diffs - 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

dlt-iceberg 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

dlt_iceberg/__init__.py +6 -0
dlt_iceberg/adapter.py +276 -0
dlt_iceberg/destination.py +117 -16
dlt_iceberg/destination_client.py +455 -38
dlt_iceberg/partition_builder.py +12 -6
dlt_iceberg/schema_converter.py +4 -1
dlt_iceberg/sql_client.py +222 -0
dlt_iceberg-0.2.0.dist-info/METADATA +442 -0
dlt_iceberg-0.2.0.dist-info/RECORD +14 -0
{dlt_iceberg-0.1.3.dist-info → dlt_iceberg-0.2.0.dist-info}/WHEEL +1 -1
dlt_iceberg-0.1.3.dist-info/METADATA +0 -279
dlt_iceberg-0.1.3.dist-info/RECORD +0 -12
{dlt_iceberg-0.1.3.dist-info → dlt_iceberg-0.2.0.dist-info}/licenses/LICENSE +0 -0

dlt_iceberg/destination_client.py CHANGED Viewed

@@ -10,7 +10,7 @@ import time
 import threading
 from collections import defaultdict
 from pathlib import Path
-from typing import Dict, List, Optional, Iterable, Tuple, Type
+from typing import Any, Dict, List, Optional, Iterable, Tuple, Type
 from types import TracebackType
 import pyarrow as pa
@@ -22,7 +22,10 @@ from dlt.common.destination.client import (
     LoadJob,
     RunnableLoadJob,
     DestinationClientConfiguration,
+    SupportsOpenTables,
 )
+from dlt.common.schema.typing import TTableFormat
+from dlt.destinations.sql_client import WithSqlClient, SqlClientBase
 from dlt.common.schema import Schema, TTableSchema
 from dlt.common.schema.typing import TTableSchema as PreparedTableSchema
 from pyiceberg.catalog import load_catalog
@@ -94,7 +97,19 @@ class IcebergRestConfiguration(DestinationClientConfiguration):
     strict_casting: bool = False
     # Merge batch size (for upsert operations to avoid memory issues)
-    merge_batch_size: int = 100000
+    merge_batch_size: int = 500000
+    # Table location layout - controls directory structure for table files
+    # Supports patterns: {namespace}, {dataset_name}, {table_name}
+    # Example: "{namespace}/{table_name}" or "warehouse/{dataset_name}/{table_name}"
+    table_location_layout: Optional[str] = None
+    # Register tables found in storage but missing from catalog (backward compatibility)
+    register_new_tables: bool = False
+    # Hard delete column name - rows with this column set will be deleted during merge
+    # Set to None to disable hard delete
+    hard_delete_column: Optional[str] = "_dlt_deleted_at"
 class IcebergRestLoadJob(RunnableLoadJob):
@@ -144,11 +159,12 @@ class IcebergRestLoadJob(RunnableLoadJob):
             raise
-class IcebergRestClient(JobClientBase):
+class IcebergRestClient(JobClientBase, WithSqlClient, SupportsOpenTables):
     """
     Class-based Iceberg REST destination with atomic multi-file commits.
     Accumulates files during load and commits them atomically in complete_load().
+    Implements WithSqlClient and SupportsOpenTables for pipeline.dataset() support.
     """
     def __init__(
@@ -162,6 +178,77 @@ class IcebergRestClient(JobClientBase):
         # Catalog instance (created lazily)
         self._catalog = None
+        # SQL client instance (created lazily)
+        self._sql_client = None
+    # ---- WithSqlClient interface ----
+    @property
+    def sql_client(self) -> SqlClientBase:
+        """Get or create the DuckDB SQL client for dataset access."""
+        if self._sql_client is None:
+            from .sql_client import IcebergSqlClient
+            self._sql_client = IcebergSqlClient(
+                remote_client=self,
+                dataset_name=self.config.namespace,
+            )
+        return self._sql_client
+    @property
+    def sql_client_class(self) -> Type[SqlClientBase]:
+        """Return the SQL client class."""
+        from .sql_client import IcebergSqlClient
+        return IcebergSqlClient
+    # ---- SupportsOpenTables interface ----
+    def get_open_table_catalog(self, table_format: TTableFormat, catalog_name: str = None) -> Any:
+        """Get the PyIceberg catalog for accessing table metadata."""
+        if table_format != "iceberg":
+            raise ValueError(f"Unsupported table format: {table_format}")
+        return self._get_catalog()
+    def get_open_table_location(self, table_format: TTableFormat, table_name: str) -> str:
+        """Get the storage location for an Iceberg table."""
+        if table_format != "iceberg":
+            raise ValueError(f"Unsupported table format: {table_format}")
+        # Try to get location from catalog
+        try:
+            catalog = self._get_catalog()
+            identifier = f"{self.config.namespace}.{table_name}"
+            iceberg_table = catalog.load_table(identifier)
+            return iceberg_table.location()
+        except NoSuchTableError:
+            # Table doesn't exist yet, compute expected location
+            location = self._get_table_location(table_name)
+            if location:
+                return location
+            # Fallback to default warehouse location
+            warehouse = self.config.warehouse or ""
+            if warehouse and not warehouse.endswith("/"):
+                warehouse += "/"
+            return f"{warehouse}{self.config.namespace}/{table_name}"
+    def load_open_table(self, table_format: TTableFormat, table_name: str, **kwargs: Any) -> Any:
+        """Load and return a PyIceberg Table object."""
+        if table_format != "iceberg":
+            raise ValueError(f"Unsupported table format: {table_format}")
+        from dlt.common.destination.exceptions import DestinationUndefinedEntity
+        catalog = self._get_catalog()
+        identifier = f"{self.config.namespace}.{table_name}"
+        try:
+            return catalog.load_table(identifier)
+        except NoSuchTableError as e:
+            raise DestinationUndefinedEntity(table_name) from e
+    def is_open_table(self, table_format: TTableFormat, table_name: str) -> bool:
+        """Check if a table uses the specified open table format."""
+        # All tables in this destination are Iceberg tables
+        return table_format == "iceberg"
     def _get_catalog(self):
         """Get or create catalog connection."""
@@ -214,6 +301,123 @@ class IcebergRestClient(JobClientBase):
         self._catalog = load_catalog("dlt_catalog", **catalog_config)
         return self._catalog
+    def _get_table_location(self, table_name: str) -> Optional[str]:
+        """
+        Get the table location based on table_location_layout configuration.
+        Args:
+            table_name: Name of the table
+        Returns:
+            Table location string or None to use catalog default
+        """
+        if not self.config.table_location_layout:
+            return None
+        # Get warehouse base from config
+        warehouse = self.config.warehouse or ""
+        # Build location from layout pattern
+        location = self.config.table_location_layout.format(
+            namespace=self.config.namespace,
+            dataset_name=self.config.namespace,  # In dlt, dataset_name maps to namespace
+            table_name=table_name,
+        )
+        # If layout is relative (doesn't start with protocol), prepend warehouse
+        if not location.startswith(("s3://", "gs://", "az://", "file://", "hdfs://")):
+            # Ensure warehouse ends with / for proper joining
+            if warehouse and not warehouse.endswith("/"):
+                warehouse += "/"
+            location = f"{warehouse}{location}"
+        return location
+    def _register_tables_from_storage(self, catalog, namespace: str) -> None:
+        """
+        Register tables found in storage but missing from catalog.
+        Scans the warehouse directory for Iceberg metadata files and registers
+        any tables not already in the catalog. This provides backward compatibility
+        when tables exist in storage but haven't been registered.
+        """
+        if not self.config.register_new_tables:
+            return
+        if not self.config.warehouse:
+            logger.warning("Cannot register tables: no warehouse configured")
+            return
+        import os
+        from urllib.parse import urlparse
+        warehouse = self.config.warehouse
+        # Only support local filesystem for now
+        parsed = urlparse(warehouse)
+        if parsed.scheme and parsed.scheme != "file":
+            logger.info(
+                f"register_new_tables only supported for local filesystem, "
+                f"skipping for {parsed.scheme}"
+            )
+            return
+        # Get local path
+        local_path = parsed.path if parsed.scheme == "file" else warehouse
+        namespace_path = os.path.join(local_path, namespace)
+        if not os.path.exists(namespace_path):
+            logger.info(f"Namespace path {namespace_path} doesn't exist, nothing to register")
+            return
+        # Get existing tables in catalog
+        try:
+            existing_tables = {t[1] for t in catalog.list_tables(namespace)}
+        except NoSuchNamespaceError:
+            existing_tables = set()
+        # Scan for table directories with metadata
+        registered_count = 0
+        for item in os.listdir(namespace_path):
+            table_path = os.path.join(namespace_path, item)
+            if not os.path.isdir(table_path):
+                continue
+            # Check if it's an Iceberg table (has metadata directory)
+            metadata_path = os.path.join(table_path, "metadata")
+            if not os.path.exists(metadata_path):
+                continue
+            table_name = item
+            if table_name in existing_tables:
+                continue
+            # Find latest metadata file
+            metadata_files = [
+                f for f in os.listdir(metadata_path)
+                if f.endswith(".metadata.json")
+            ]
+            if not metadata_files:
+                continue
+            # Sort to get latest (by version number in filename)
+            metadata_files.sort(reverse=True)
+            latest_metadata = os.path.join(metadata_path, metadata_files[0])
+            try:
+                identifier = f"{namespace}.{table_name}"
+                catalog.register_table(
+                    identifier=identifier,
+                    metadata_location=f"file://{latest_metadata}",
+                )
+                logger.info(f"Registered table {identifier} from storage")
+                registered_count += 1
+            except Exception as e:
+                logger.warning(f"Failed to register table {table_name}: {e}")
+        if registered_count > 0:
+            logger.info(f"Registered {registered_count} tables from storage")
     def initialize_storage(self, truncate_tables: Optional[Iterable[str]] = None) -> None:
         """Create Iceberg namespace if it doesn't exist."""
         catalog = self._get_catalog()
@@ -230,6 +434,9 @@ class IcebergRestClient(JobClientBase):
             logger.error(f"Failed to initialize storage: {e}")
             raise
+        # Register tables from storage if enabled
+        self._register_tables_from_storage(catalog, namespace)
         # Handle truncation if requested
         if truncate_tables:
             for table_name in truncate_tables:
@@ -351,6 +558,168 @@ class IcebergRestClient(JobClientBase):
         logger.info(f"Load {load_id} completed successfully")
+    def _get_merge_strategy(self, table_schema: TTableSchema) -> str:
+        """Extract merge strategy from table schema.
+        write_disposition can be:
+        - "merge" (string) -> use upsert (backward compatible)
+        - {"disposition": "merge", "strategy": "delete-insert"} -> explicit strategy
+        Returns:
+            Merge strategy: "upsert" or "delete-insert"
+        """
+        write_disposition = table_schema.get("write_disposition", "append")
+        if isinstance(write_disposition, dict):
+            return write_disposition.get("strategy", "delete-insert")
+        # String "merge" - use upsert as our default (backward compatible)
+        return "upsert"
+    def _execute_delete_insert(
+        self,
+        iceberg_table,
+        combined_table: pa.Table,
+        primary_keys: List[str],
+        identifier: str,
+        hard_delete_filter=None,
+    ) -> Tuple[int, int, int]:
+        """Execute delete-insert merge strategy with optional hard deletes.
+        Deletes rows matching primary keys in incoming data, then appends new data.
+        Uses PyIceberg transaction for atomic hard-delete + delete + append.
+        Args:
+            iceberg_table: PyIceberg table object
+            combined_table: Arrow table with data to merge
+            primary_keys: List of primary key column names
+            identifier: Table identifier for logging
+            hard_delete_filter: Optional filter for hard deletes (rows to permanently remove)
+        Returns:
+            Tuple of (rows_deleted_estimate, rows_inserted, hard_deleted)
+        """
+        from pyiceberg.expressions import In, And, EqualTo, Or
+        # Build delete filter from primary key values in incoming data
+        if len(primary_keys) == 1:
+            pk_col = primary_keys[0]
+            pk_values = combined_table.column(pk_col).to_pylist()
+            # Deduplicate values
+            unique_pk_values = list(set(pk_values))
+            delete_filter = In(pk_col, unique_pk_values)
+            deleted_estimate = len(unique_pk_values)
+        else:
+            # Composite primary key - build OR of AND conditions
+            pk_tuples = set()
+            for i in range(len(combined_table)):
+                pk_tuple = tuple(
+                    combined_table.column(pk).to_pylist()[i] for pk in primary_keys
+                )
+                pk_tuples.add(pk_tuple)
+            conditions = []
+            for pk_tuple in pk_tuples:
+                and_conditions = [
+                    EqualTo(pk, val) for pk, val in zip(primary_keys, pk_tuple)
+                ]
+                if len(and_conditions) == 1:
+                    conditions.append(and_conditions[0])
+                else:
+                    conditions.append(And(*and_conditions))
+            if len(conditions) == 1:
+                delete_filter = conditions[0]
+            else:
+                delete_filter = Or(*conditions)
+            deleted_estimate = len(pk_tuples)
+        logger.info(
+            f"Delete-insert for {identifier}: deleting up to {deleted_estimate} "
+            f"matching rows, inserting {len(combined_table)} rows"
+        )
+        # Execute atomic hard-delete + delete + append using single transaction
+        with iceberg_table.transaction() as txn:
+            # Hard deletes first (permanent removal)
+            if hard_delete_filter is not None:
+                txn.delete(hard_delete_filter)
+            # Then delete-insert for merge
+            txn.delete(delete_filter)
+            txn.append(combined_table)
+        return (deleted_estimate, len(combined_table), 1 if hard_delete_filter else 0)
+    def _prepare_hard_deletes(
+        self,
+        combined_table: pa.Table,
+        primary_keys: List[str],
+    ) -> Tuple[pa.Table, Optional[Any], int]:
+        """
+        Prepare hard deletes from incoming data (does not execute).
+        Rows with the hard_delete_column set (non-null) will be deleted.
+        Returns the filter expression to use in a transaction.
+        Args:
+            combined_table: Arrow table with data including possible delete markers
+            primary_keys: List of primary key column names
+        Returns:
+            Tuple of (remaining_rows, delete_filter_or_none, num_to_delete)
+        """
+        hard_delete_col = self.config.hard_delete_column
+        # Check if hard delete column exists in data
+        if not hard_delete_col or hard_delete_col not in combined_table.column_names:
+            return combined_table, None, 0
+        from pyiceberg.expressions import In, And, EqualTo, Or
+        import pyarrow.compute as pc
+        # Get the delete marker column
+        delete_col = combined_table.column(hard_delete_col)
+        # Find rows marked for deletion (non-null values)
+        delete_mask = pc.is_valid(delete_col)
+        rows_to_delete = combined_table.filter(delete_mask)
+        rows_to_keep = combined_table.filter(pc.invert(delete_mask))
+        if len(rows_to_delete) == 0:
+            return rows_to_keep, None, 0
+        # Build delete filter from primary keys of rows to delete
+        if len(primary_keys) == 1:
+            pk_col = primary_keys[0]
+            pk_values = rows_to_delete.column(pk_col).to_pylist()
+            unique_pk_values = list(set(pk_values))
+            delete_filter = In(pk_col, unique_pk_values)
+        else:
+            # Composite primary key
+            pk_tuples = set()
+            for i in range(len(rows_to_delete)):
+                pk_tuple = tuple(
+                    rows_to_delete.column(pk).to_pylist()[i] for pk in primary_keys
+                )
+                pk_tuples.add(pk_tuple)
+            conditions = []
+            for pk_tuple in pk_tuples:
+                and_conditions = [
+                    EqualTo(pk, val) for pk, val in zip(primary_keys, pk_tuple)
+                ]
+                if len(and_conditions) == 1:
+                    conditions.append(and_conditions[0])
+                else:
+                    conditions.append(And(*and_conditions))
+            if len(conditions) == 1:
+                delete_filter = conditions[0]
+            else:
+                delete_filter = Or(*conditions)
+        return rows_to_keep, delete_filter, len(rows_to_delete)
     def _commit_table_files(
         self,
         catalog,
@@ -401,11 +770,19 @@ class IcebergRestClient(JobClientBase):
                     # Create table
                     logger.info(f"Creating table {identifier}")
-                    iceberg_table = catalog.create_table(
-                        identifier=identifier,
-                        schema=iceberg_schema,
-                        partition_spec=partition_spec,
-                    )
+                    # Get custom location if configured
+                    table_location = self._get_table_location(table_name)
+                    create_kwargs = {
+                        "identifier": identifier,
+                        "schema": iceberg_schema,
+                        "partition_spec": partition_spec,
+                    }
+                    if table_location:
+                        create_kwargs["location"] = table_location
+                        logger.info(f"Using custom location: {table_location}")
+                    iceberg_table = catalog.create_table(**create_kwargs)
                     logger.info(f"Created table {identifier} at {iceberg_table.location()}")
                 else:
                     # Table exists - check if schema evolution is needed
@@ -449,13 +826,18 @@ class IcebergRestClient(JobClientBase):
                 )
                 # ATOMIC COMMIT: Write all data in one transaction
-                if write_disposition == "replace":
+                # Handle both string and dict write_disposition
+                disposition_type = write_disposition
+                if isinstance(write_disposition, dict):
+                    disposition_type = write_disposition.get("disposition", "append")
+                if disposition_type == "replace":
                     logger.info(f"Overwriting table {identifier}")
                     iceberg_table.overwrite(combined_table)
-                elif write_disposition == "append":
+                elif disposition_type == "append":
                     logger.info(f"Appending to table {identifier}")
                     iceberg_table.append(combined_table)
-                elif write_disposition == "merge":
+                elif disposition_type == "merge":
                     # Get primary keys
                     primary_keys = table_schema.get("primary_key") or table_schema.get("x-merge-keys")
@@ -473,38 +855,73 @@ class IcebergRestClient(JobClientBase):
                         )
                         iceberg_table.append(combined_table)
                     else:
-                        logger.info(f"Merging into table {identifier} on keys {primary_keys}")
-                        # Batch upserts to avoid memory issues on large datasets
-                        batch_size = self.config.merge_batch_size
-                        total_updated = 0
-                        total_inserted = 0
-                        for batch_start in range(0, len(combined_table), batch_size):
-                            batch_end = min(batch_start + batch_size, len(combined_table))
-                            batch = combined_table.slice(batch_start, batch_end - batch_start)
+                        # Prepare hard deletes (rows marked for deletion)
+                        remaining_rows, hard_delete_filter, num_hard_deletes = self._prepare_hard_deletes(
+                            combined_table, primary_keys
+                        )
+                        if num_hard_deletes > 0:
+                            logger.info(f"Prepared {num_hard_deletes} rows for hard delete")
+                        # If all rows were hard deletes, just execute the delete
+                        if len(remaining_rows) == 0:
+                            if hard_delete_filter is not None:
+                                iceberg_table.delete(hard_delete_filter)
+                                logger.info(f"Executed {num_hard_deletes} hard deletes (no merge needed)")
+                            return
+                        # Get merge strategy
+                        merge_strategy = self._get_merge_strategy(table_schema)
+                        logger.info(
+                            f"Merging into table {identifier} on keys {primary_keys} "
+                            f"using strategy: {merge_strategy}"
+                        )
+                        if merge_strategy == "delete-insert":
+                            # Atomic hard-delete + delete + insert in single transaction
+                            deleted, inserted, _ = self._execute_delete_insert(
+                                iceberg_table, remaining_rows, primary_keys, identifier,
+                                hard_delete_filter=hard_delete_filter
+                            )
                             logger.info(
-                                f"Upserting batch {batch_start//batch_size + 1}: "
-                                f"rows {batch_start} to {batch_end} ({len(batch)} rows)"
+                                f"Delete-insert completed: ~{deleted} deleted, "
+                                f"{inserted} inserted"
                             )
+                        else:
+                            # Default: upsert strategy
+                            # Execute hard deletes first (separate transaction since upsert is atomic)
+                            if hard_delete_filter is not None:
+                                iceberg_table.delete(hard_delete_filter)
+                                logger.info(f"Executed {num_hard_deletes} hard deletes before upsert")
+                            batch_size = self.config.merge_batch_size
+                            total_updated = 0
+                            total_inserted = 0
+                            for batch_start in range(0, len(remaining_rows), batch_size):
+                                batch_end = min(batch_start + batch_size, len(remaining_rows))
+                                batch = remaining_rows.slice(batch_start, batch_end - batch_start)
+                                logger.info(
+                                    f"Upserting batch {batch_start//batch_size + 1}: "
+                                    f"rows {batch_start} to {batch_end} ({len(batch)} rows)"
+                                )
+                                upsert_result = iceberg_table.upsert(
+                                    df=batch,
+                                    join_cols=primary_keys,
+                                    when_matched_update_all=True,
+                                    when_not_matched_insert_all=True,
+                                )
+                                total_updated += upsert_result.rows_updated
+                                total_inserted += upsert_result.rows_inserted
-                            upsert_result = iceberg_table.upsert(
-                                df=batch,
-                                join_cols=primary_keys,
-                                when_matched_update_all=True,
-                                when_not_matched_insert_all=True,
+                            logger.info(
+                                f"Upsert completed: {total_updated} updated, "
+                                f"{total_inserted} inserted across {(len(remaining_rows) + batch_size - 1) // batch_size} batches"
                             )
-                            total_updated += upsert_result.rows_updated
-                            total_inserted += upsert_result.rows_inserted
-                        logger.info(
-                            f"Upsert completed: {total_updated} updated, "
-                            f"{total_inserted} inserted across {(total_rows + batch_size - 1) // batch_size} batches"
-                        )
                 else:
-                    raise ValueError(f"Unknown write disposition: {write_disposition}")
+                    raise ValueError(f"Unknown write disposition: {disposition_type}")
                 logger.info(
                     f"Successfully committed {len(file_data)} files "
@@ -600,7 +1017,7 @@ class iceberg_rest_class_based(Destination[IcebergRestConfiguration, "IcebergRes
         caps.supported_staging_file_formats = []
         # Merge strategies (we handle upsert ourselves)
-        caps.supported_merge_strategies = ["upsert"]
+        caps.supported_merge_strategies = ["delete-insert", "upsert"]
         # Replace strategies
         caps.supported_replace_strategies = ["truncate-and-insert", "insert-from-staging"]

dlt_iceberg/partition_builder.py CHANGED Viewed

@@ -35,6 +35,7 @@ from pyiceberg.transforms import (
 )
 from pyiceberg.types import (
     TimestampType,
+    TimestamptzType,
     DateType,
     StringType,
     IntegerType,
@@ -92,14 +93,14 @@ def validate_transform_for_type(
     Raises:
         ValueError: If transform is invalid for the field type
     """
-    # Temporal transforms only for timestamp/date
+    # Temporal transforms only for timestamp/timestamptz/date
     temporal_transforms = {"year", "month", "day", "hour"}
     if transform_type in temporal_transforms:
-        if not isinstance(field_type, (TimestampType, DateType)):
+        if not isinstance(field_type, (TimestampType, TimestamptzType, DateType)):
             raise ValueError(
                 f"Temporal transform '{transform_type}' cannot be applied to "
                 f"column '{col_name}' with type {field_type}. "
-                f"Use timestamp or date types for temporal transforms."
+                f"Use timestamp, timestamptz, or date types for temporal transforms."
             )
     # Bucket transform validation
@@ -181,16 +182,21 @@ def build_partition_spec(
             continue
         # Choose transform based on data type
+        col_hints = dlt_columns.get(col_name, {})
         transform = choose_partition_transform(
-            iceberg_field.field_type, col_name, dlt_columns.get(col_name, {})
+            iceberg_field.field_type, col_name, col_hints
         )
+        # Get custom partition field name or generate default
+        custom_name = col_hints.get("x-partition-name") or col_hints.get("partition_name")
+        partition_name = custom_name or f"{col_name}_{get_transform_name(transform)}"
         # Create partition field
         partition_field = PartitionField(
             source_id=iceberg_field.field_id,
             field_id=1000 + len(partition_fields),  # Start partition IDs at 1000
             transform=transform,
-            name=f"{col_name}_{get_transform_name(transform)}",
+            name=partition_name,
         )
         partition_fields.append(partition_field)
@@ -266,7 +272,7 @@ def choose_partition_transform(field_type, col_name: str, col_hints: dict):
             )
     # No hint specified - use defaults based on type
-    if isinstance(field_type, (TimestampType, DateType)):
+    if isinstance(field_type, (TimestampType, TimestamptzType, DateType)):
         # Default to month for timestamps/dates
         return MonthTransform()
     elif isinstance(field_type, (StringType, IntegerType, LongType)):

dlt_iceberg/schema_converter.py CHANGED Viewed

@@ -18,6 +18,7 @@ from pyiceberg.types import (
     StringType,
     BinaryType,
     TimestampType,
+    TimestamptzType,
     DateType,
     TimeType,
     ListType,
@@ -125,6 +126,8 @@ def convert_arrow_to_iceberg_type(arrow_type: pa.DataType):
     # Temporal types
     elif pa.types.is_timestamp(arrow_type):
+        if arrow_type.tz is not None:
+            return TimestamptzType()
         return TimestampType()
     elif pa.types.is_date(arrow_type):
         return DateType()
@@ -197,7 +200,7 @@ def convert_dlt_type_to_iceberg_type(dlt_type: str):
         "bool": BooleanType(),
         "boolean": BooleanType(),
         "timestamp": TimestampType(),
-        "timestamptz": TimestampType(),
+        "timestamptz": TimestamptzType(),
         "date": DateType(),
         "time": TimeType(),
         "binary": BinaryType(),

dlt-iceberg 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

dlt-iceberg 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl