deltacat 2.0.0b11__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,50 @@
1
- from typing import Optional
1
+ import logging
2
+ from typing import Optional, Any, Set
2
3
 
3
4
  from pyiceberg.catalog import Catalog
4
- from deltacat.storage.model.scan.push_down import Pushdown
5
+ from pyiceberg.table import Table
6
+ import deltacat.logs as logs
7
+
8
+ from deltacat.storage.model.scan.push_down import Pushdown, PartitionFilter
5
9
  from deltacat.storage.model.scan.scan_plan import ScanPlan
6
10
  from deltacat.storage.model.scan.scan_task import FileScanTask, DataFile
7
11
  from deltacat.storage.util.scan_planner import ScanPlanner
8
12
  from deltacat.experimental.storage.iceberg.impl import _try_load_iceberg_table
13
+ from deltacat.experimental.storage.iceberg.visitor import IcebergExpressionVisitor
14
+
15
+ # Initialize DeltaCAT logger
16
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
9
17
 
10
18
 
11
19
  class IcebergScanPlanner(ScanPlanner):
12
20
  def __init__(self, catalog: Catalog):
13
21
  self.catalog = catalog
22
+ self.expression_visitor = IcebergExpressionVisitor()
23
+
24
+ @classmethod
25
+ def _collect_filter_fields(cls, expr: Any) -> Set[str]:
26
+ """
27
+ Collects all field names referenced in the filter expression.
28
+
29
+ Args:
30
+ expr: The expression to analyze
31
+
32
+ Returns:
33
+ Set of field names referenced in the expression
34
+ """
35
+ fields = set()
36
+ if hasattr(expr, "field"):
37
+ fields.add(expr.field)
38
+ if hasattr(expr, "left"):
39
+ fields.update(cls._collect_filter_fields(expr.left))
40
+ if hasattr(expr, "right"):
41
+ fields.update(cls._collect_filter_fields(expr.right))
42
+ if hasattr(expr, "expr"):
43
+ fields.update(cls._collect_filter_fields(expr.expr))
44
+ if hasattr(expr, "values"):
45
+ for value in expr.values:
46
+ fields.update(cls._collect_filter_fields(value))
47
+ return fields
14
48
 
15
49
  def create_scan_plan(
16
50
  self,
@@ -21,8 +55,75 @@ class IcebergScanPlanner(ScanPlanner):
21
55
  iceberg_table = _try_load_iceberg_table(
22
56
  self.catalog, namespace=namespace, table_name=table_name
23
57
  )
58
+
59
+ # TODO: implement row, column predicate pushdown to Iceberg
60
+
61
+ # Get the partition spec
62
+ partition_spec = iceberg_table.spec()
63
+
64
+ # Check if the table is partitioned
65
+ is_partitioned = len(partition_spec.fields) > 0
66
+
67
+ scan = iceberg_table.scan()
68
+ if is_partitioned:
69
+ if pushdown and pushdown.partition_filter:
70
+ filter_fields = self._collect_filter_fields(pushdown.partition_filter)
71
+ logger.info(
72
+ f"Pushdown partition filter is enabled, converting to Iceberg. Fields discovered in filter: {', '.join(sorted(filter_fields))}"
73
+ )
74
+ # Handle partition filter if present, DeltaCAT only supports partition-level filters right now
75
+ iceberg_expression = self._convert_partition_filter(
76
+ iceberg_table, pushdown.partition_filter
77
+ )
78
+ scan = scan.filter(iceberg_expression)
79
+
24
80
  file_scan_tasks = []
25
- # TODO: implement predicate pushdown to Iceberg
26
- for scan_task in iceberg_table.scan().plan_files():
81
+ for scan_task in scan.plan_files():
27
82
  file_scan_tasks.append(FileScanTask([DataFile(scan_task.file.file_path)]))
28
83
  return ScanPlan(file_scan_tasks)
84
+
85
+ @classmethod
86
+ def _validate_partition_references(
87
+ cls, expr: Any, partition_cols: Set[str]
88
+ ) -> None:
89
+ """
90
+ Validates that the expression only references partition columns.
91
+
92
+ Args:
93
+ expr: The expression to validate
94
+ partition_cols: Set of valid partition column names
95
+
96
+ Raises:
97
+ ValueError: If the expression references a non-partition column
98
+ """
99
+ if hasattr(expr, "field"): # Reference type expression
100
+ if expr.field not in partition_cols:
101
+ raise ValueError(
102
+ f"Filter references non-partition column: {expr.field}. "
103
+ f"Partition columns are: {partition_cols}"
104
+ )
105
+ # Recursively validate nested expressions
106
+ if hasattr(expr, "left"):
107
+ cls._validate_partition_references(expr.left, partition_cols)
108
+ if hasattr(expr, "right"):
109
+ cls._validate_partition_references(expr.right, partition_cols)
110
+ if hasattr(expr, "expr"):
111
+ cls._validate_partition_references(expr.expr, partition_cols)
112
+ if hasattr(expr, "values"):
113
+ for value in expr.values:
114
+ cls._validate_partition_references(value, partition_cols)
115
+
116
+ def _convert_partition_filter(
117
+ self, table: Table, partition_filter: PartitionFilter
118
+ ):
119
+ """
120
+ Convert DeltaCAT partition filter to PyIceberg expression,
121
+ validating that only partition columns are referenced.
122
+ """
123
+ partition_cols = set(field.name for field in table.spec().fields)
124
+
125
+ # Validate before converting
126
+ self._validate_partition_references(partition_filter, partition_cols)
127
+
128
+ # Convert to PyIceberg expression
129
+ return self.expression_visitor.visit(partition_filter)
@@ -41,7 +41,7 @@ from deltacat.experimental.storage.iceberg.model import (
41
41
  NamespaceMapper,
42
42
  TableMapper,
43
43
  )
44
- from deltacat.types.media import ContentType, StorageType, TableType
44
+ from deltacat.types.media import ContentType, StorageType, DatasetType
45
45
  from deltacat.utils.common import ReadKwargsProvider
46
46
 
47
47
  from pyiceberg.catalog import Catalog
@@ -281,7 +281,7 @@ def get_latest_delta(
281
281
 
282
282
  def download_delta(
283
283
  delta_like: Union[Delta, DeltaLocator],
284
- table_type: TableType = TableType.PYARROW,
284
+ table_type: DatasetType = DatasetType.PYARROW,
285
285
  storage_type: StorageType = StorageType.DISTRIBUTED,
286
286
  max_parallelism: Optional[int] = None,
287
287
  columns: Optional[List[str]] = None,
@@ -303,7 +303,7 @@ def download_delta(
303
303
  def download_delta_manifest_entry(
304
304
  delta_like: Union[Delta, DeltaLocator],
305
305
  entry_index: int,
306
- table_type: TableType = TableType.PYARROW,
306
+ table_type: DatasetType = DatasetType.PYARROW,
307
307
  columns: Optional[List[str]] = None,
308
308
  file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
309
309
  *args,
@@ -603,6 +603,8 @@ def stage_delta(
603
603
  properties: Optional[DeltaProperties] = None,
604
604
  s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
605
605
  content_type: ContentType = ContentType.PARQUET,
606
+ schema: Optional[Schema] = None,
607
+ sort_scheme_id: Optional[str] = None,
606
608
  *args,
607
609
  **kwargs,
608
610
  ) -> Delta:
@@ -66,6 +66,7 @@ from deltacat.storage import (
66
66
  TableVersionLocator,
67
67
  Transform,
68
68
  TransformName,
69
+ TruncateStrategy,
69
70
  TruncateTransform,
70
71
  TruncateTransformParameters,
71
72
  UnknownTransform,
@@ -227,7 +228,10 @@ class TransformMapper(ModelMapper[IcebergTransform, Transform]):
227
228
  )
228
229
  if isinstance(obj, IcebergTruncateTransform):
229
230
  return TruncateTransform.of(
230
- TruncateTransformParameters.of(width=obj.width),
231
+ TruncateTransformParameters.of(
232
+ width=obj.width,
233
+ truncate_strategy=TruncateStrategy.ICEBERG,
234
+ ),
231
235
  )
232
236
  return UnknownTransform.of()
233
237
 
@@ -323,7 +327,7 @@ class PartitionSchemeMapper(ModelMapper[PartitionSpec, PartitionScheme]):
323
327
  elif not schema:
324
328
  err_msg = "Schema is required for Partition Spec conversion."
325
329
  raise ValueError(err_msg)
326
- keys = [PartitionKeyMapper.map(field, schema) for field in obj.fields]
330
+ keys = [PartitionKeyMapper.map(field, schema) for field in obj.fields] or None
327
331
  return PartitionScheme.of(
328
332
  keys=keys,
329
333
  name=name,
@@ -425,7 +429,7 @@ class SortSchemeMapper(ModelMapper[IcebergSortOrder, SortScheme]):
425
429
  elif not schema:
426
430
  err_msg = "Schema is required for Sort Order conversion."
427
431
  raise ValueError(err_msg)
428
- keys = [SortKeyMapper.map(field, schema) for field in obj.fields]
432
+ keys = [SortKeyMapper.map(field, schema) for field in obj.fields] or None
429
433
  return SortScheme.of(
430
434
  keys=keys,
431
435
  name=name,
@@ -0,0 +1,119 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import pyarrow
5
+ from deltacat.storage.model.scan.push_down import PartitionFilter
6
+
7
+ import deltacat.logs as logs
8
+ from deltacat.storage.model.expression import Reference, Literal
9
+ from deltacat.storage.model.expression.visitor import ExpressionVisitor
10
+ from pyiceberg.expressions import (
11
+ And,
12
+ Or,
13
+ Not,
14
+ EqualTo,
15
+ NotEqualTo,
16
+ GreaterThan,
17
+ GreaterThanOrEqual,
18
+ LessThan,
19
+ LessThanOrEqual,
20
+ IsNull,
21
+ In,
22
+ )
23
+
24
+ # Initialize DeltaCAT logger
25
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
26
+
27
+
28
+ class IcebergExpressionVisitor(ExpressionVisitor[None, Any]):
29
+ """
30
+ Visitor that translates DeltaCAT expressions to PyIceberg expressions.
31
+ """
32
+
33
+ def visit(self, expr, context=None):
34
+ # Handle PartitionFilter by extracting and visiting the inner expression
35
+ if isinstance(expr, PartitionFilter):
36
+ return self.visit(expr.expr, context)
37
+ # Handle all other expressions using the parent's visit method
38
+ return super().visit(expr, context)
39
+
40
+ def visit_reference(self, expr: Reference, context=None) -> str:
41
+ return expr.field
42
+
43
+ def visit_literal(self, expr: Literal, context=None) -> Any:
44
+ # Convert PyArrow scalar to Python native type
45
+ return (
46
+ expr.value.as_py() if isinstance(expr.value, pyarrow.Scalar) else expr.value
47
+ )
48
+
49
+ def visit_and(self, expr, context=None):
50
+ left = self.visit(expr.left, context)
51
+ right = self.visit(expr.right, context)
52
+ return And(left, right)
53
+
54
+ def visit_or(self, expr, context=None):
55
+ left = self.visit(expr.left, context)
56
+ right = self.visit(expr.right, context)
57
+ return Or(left, right)
58
+
59
+ def visit_not(self, expr, context=None):
60
+ operand = self.visit(expr.operand, context)
61
+ return Not(operand)
62
+
63
+ def visit_equal(self, expr, context=None):
64
+ left = self.visit(expr.left, context)
65
+ right = self.visit(expr.right, context)
66
+ return EqualTo(left, right)
67
+
68
+ def visit_not_equal(self, expr, context=None):
69
+ left = self.visit(expr.left, context)
70
+ right = self.visit(expr.right, context)
71
+ return NotEqualTo(left, right)
72
+
73
+ def visit_greater_than(self, expr, context=None):
74
+ left = self.visit(expr.left, context)
75
+ right = self.visit(expr.right, context)
76
+ return GreaterThan(left, right)
77
+
78
+ def visit_greater_than_equal(self, expr, context=None):
79
+ left = self.visit(expr.left, context)
80
+ right = self.visit(expr.right, context)
81
+ return GreaterThanOrEqual(left, right)
82
+
83
+ def visit_less_than(self, expr, context=None):
84
+ left = self.visit(expr.left, context)
85
+ right = self.visit(expr.right, context)
86
+ return LessThan(left, right)
87
+
88
+ def visit_less_than_equal(self, expr, context=None):
89
+ left = self.visit(expr.left, context)
90
+ right = self.visit(expr.right, context)
91
+ return LessThanOrEqual(left, right)
92
+
93
+ def visit_is_null(self, expr, context=None):
94
+ operand = self.visit(expr.operand, context)
95
+ return IsNull(operand)
96
+
97
+ def visit_in(self, expr, context=None):
98
+ value = self.visit(expr.value, context)
99
+ values = [self.visit(v, context) for v in expr.values]
100
+ return In(value, values)
101
+
102
+ def visit_between(self, expr, context=None):
103
+ value = self.visit(expr.value, context)
104
+ lower = self.visit(expr.lower, context)
105
+ upper = self.visit(expr.upper, context)
106
+ return And(GreaterThanOrEqual(value, lower), LessThanOrEqual(value, upper))
107
+
108
+ # PyIceberg does not have a direct equivalent of LIKE
109
+ def visit_like(self, expr, context=None):
110
+ value = self.visit(expr.value, context)
111
+ pattern = self.visit(expr.pattern, context)
112
+ logger.warning(
113
+ f"LIKE operation is not supported in PyIceberg. Ignoring LIKE filter: {value} LIKE '{pattern}'. "
114
+ "This may result in more data being returned than expected."
115
+ )
116
+ # Return None or a default expression that won't filter anything
117
+ return (
118
+ None # or return NotEqualTo(value, None) # matches everything except NULL
119
+ )
@@ -52,7 +52,6 @@ from deltacat.storage import (
52
52
  TableVersion,
53
53
  TableVersionLocator,
54
54
  Transaction,
55
- TransactionType,
56
55
  TransactionOperation,
57
56
  TransactionOperationType,
58
57
  )
@@ -299,7 +298,6 @@ class Dataset:
299
298
  partition_values=DEFAULT_PARTITION_VALUES,
300
299
  partition_id=self._partition_id,
301
300
  ),
302
- schema=None,
303
301
  content_types=None,
304
302
  ),
305
303
  ]
@@ -312,7 +310,6 @@ class Dataset:
312
310
  ]
313
311
 
314
312
  transaction = Transaction.of(
315
- txn_type=TransactionType.APPEND,
316
313
  txn_operations=TransactionOperationList.of(txn_operations),
317
314
  )
318
315
 
@@ -10,7 +10,6 @@ from deltacat.storage import (
10
10
  Delta,
11
11
  DeltaType,
12
12
  Transaction,
13
- TransactionType,
14
13
  TransactionOperation,
15
14
  TransactionOperationType,
16
15
  )
@@ -169,7 +168,6 @@ class DeltacatManifestIO(ManifestIO):
169
168
  delta["level"] = level
170
169
 
171
170
  tx_results = Transaction.of(
172
- txn_type=TransactionType.APPEND,
173
171
  txn_operations=TransactionOperationList.of(
174
172
  [
175
173
  TransactionOperation.of(
@@ -5,6 +5,7 @@ from typing import Generator, Optional
5
5
  import pyarrow
6
6
  import pyarrow.fs
7
7
 
8
+ from deltacat.constants import REV_DIR_NAME
8
9
  from deltacat.storage import Delta
9
10
  from deltacat.storage.model.partition import PartitionLocator
10
11
  from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
@@ -83,7 +84,7 @@ class DatasetMetastore:
83
84
  param: filesystem: The filesystem to search for the revisions.
84
85
  returns: The latest revision as a RivuletDelta.
85
86
  """
86
- rev_directory = posixpath.join(delta_dir, "rev")
87
+ rev_directory = posixpath.join(delta_dir, REV_DIR_NAME)
87
88
  revisions = filesystem.get_file_info(
88
89
  pyarrow.fs.FileSelector(rev_directory, allow_not_found=True)
89
90
  )
@@ -128,7 +129,7 @@ class DatasetMetastore:
128
129
  return
129
130
 
130
131
  # Locate "rev" directory inside the partition
131
- rev_directory = posixpath.join(partition_path, "rev")
132
+ rev_directory = posixpath.join(partition_path, REV_DIR_NAME)
132
133
  rev_info = filesystem.get_file_info(rev_directory)
133
134
 
134
135
  if rev_info.type != pyarrow.fs.FileType.Directory:
@@ -350,7 +350,6 @@ class DeltaCatDatasource(Datasource):
350
350
  ]
351
351
  elif self._deltacat_read_type == DeltacatReadType.METADATA_LIST:
352
352
  # do a shallow read of the top-level DeltaCAT metadata
353
- print(f"listers: {self._reader.listers}")
354
353
  listers = copy.deepcopy(self._reader.listers)
355
354
  listers = [listers[0]]
356
355
  read_tasks = self._list_all_metafiles_read_tasks(
@@ -20,6 +20,9 @@ from deltacat.storage.model.metafile import (
20
20
  from deltacat.storage.model.transaction import (
21
21
  TransactionOperation,
22
22
  Transaction,
23
+ read_transaction,
24
+ transactions,
25
+ transaction,
23
26
  )
24
27
  from deltacat.storage.model.namespace import (
25
28
  Namespace,
@@ -31,6 +34,7 @@ from deltacat.storage.model.partition import (
31
34
  PartitionLocator,
32
35
  PartitionLocatorAlias,
33
36
  PartitionKey,
37
+ PartitionKeyList,
34
38
  PartitionScheme,
35
39
  PartitionSchemeList,
36
40
  PartitionValues,
@@ -43,6 +47,9 @@ from deltacat.storage.model.schema import (
43
47
  NestedFieldName,
44
48
  Schema,
45
49
  SchemaList,
50
+ SchemaUpdate,
51
+ SchemaUpdateOperation,
52
+ SchemaUpdateOperations,
46
53
  )
47
54
  from deltacat.storage.model.stream import (
48
55
  Stream,
@@ -75,6 +82,7 @@ from deltacat.storage.model.transform import (
75
82
  MonthTransform,
76
83
  YearTransform,
77
84
  TruncateTransform,
85
+ TruncateStrategy,
78
86
  )
79
87
  from deltacat.storage.model.types import (
80
88
  CommitState,
@@ -88,11 +96,12 @@ from deltacat.storage.model.types import (
88
96
  SchemaConsistencyType,
89
97
  StreamFormat,
90
98
  SortOrder,
91
- TransactionType,
92
99
  TransactionOperationType,
100
+ TransactionStatus,
93
101
  )
94
102
  from deltacat.storage.model.sort_key import (
95
103
  SortKey,
104
+ SortKeyList,
96
105
  SortScheme,
97
106
  SortSchemeList,
98
107
  )
@@ -138,6 +147,7 @@ __all__ = [
138
147
  "NullOrder",
139
148
  "Partition",
140
149
  "PartitionKey",
150
+ "PartitionKeyList",
141
151
  "PartitionLocator",
142
152
  "PartitionLocatorAlias",
143
153
  "PartitionScheme",
@@ -145,8 +155,12 @@ __all__ = [
145
155
  "PartitionValues",
146
156
  "Schema",
147
157
  "SchemaList",
158
+ "SchemaUpdate",
159
+ "SchemaUpdateOperation",
160
+ "SchemaUpdateOperations",
148
161
  "SchemaConsistencyType",
149
162
  "SortKey",
163
+ "SortKeyList",
150
164
  "SortOrder",
151
165
  "SortScheme",
152
166
  "SortSchemeList",
@@ -163,13 +177,17 @@ __all__ = [
163
177
  "Transaction",
164
178
  "TransactionOperation",
165
179
  "TransactionOperationType",
166
- "TransactionType",
180
+ "TransactionStatus",
167
181
  "Transform",
168
182
  "TransformName",
169
183
  "TransformParameters",
170
184
  "TruncateTransform",
171
185
  "TruncateTransformParameters",
186
+ "TruncateStrategy",
172
187
  "UnknownTransform",
173
188
  "VoidTransform",
174
189
  "YearTransform",
190
+ "read_transaction",
191
+ "transactions",
192
+ "transaction",
175
193
  ]