deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. deltacat/__init__.py +96 -17
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +0 -18
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2435 -279
  12. deltacat/catalog/model/catalog.py +154 -77
  13. deltacat/catalog/model/properties.py +63 -22
  14. deltacat/compute/compactor/compaction_session.py +97 -75
  15. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  16. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  17. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  18. deltacat/compute/compactor/repartition_session.py +8 -21
  19. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  20. deltacat/compute/compactor/steps/materialize.py +9 -7
  21. deltacat/compute/compactor/steps/repartition.py +12 -11
  22. deltacat/compute/compactor/utils/io.py +6 -5
  23. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  24. deltacat/compute/compactor/utils/system_columns.py +3 -1
  25. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  26. deltacat/compute/compactor_v2/constants.py +30 -1
  27. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  28. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  29. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  30. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  31. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  32. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  33. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  34. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  35. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  36. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  37. deltacat/compute/compactor_v2/utils/io.py +11 -4
  38. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  39. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  40. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  41. deltacat/compute/converter/converter_session.py +145 -32
  42. deltacat/compute/converter/model/convert_input.py +26 -19
  43. deltacat/compute/converter/model/convert_input_files.py +33 -16
  44. deltacat/compute/converter/model/convert_result.py +35 -16
  45. deltacat/compute/converter/model/converter_session_params.py +24 -21
  46. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  47. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  48. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  49. deltacat/compute/converter/steps/convert.py +157 -50
  50. deltacat/compute/converter/steps/dedupe.py +24 -11
  51. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  52. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  53. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  54. deltacat/compute/converter/utils/io.py +101 -12
  55. deltacat/compute/converter/utils/s3u.py +33 -27
  56. deltacat/compute/janitor.py +205 -0
  57. deltacat/compute/jobs/client.py +25 -12
  58. deltacat/compute/resource_estimation/delta.py +38 -6
  59. deltacat/compute/resource_estimation/model.py +8 -0
  60. deltacat/constants.py +45 -2
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/env.py +10 -0
  64. deltacat/examples/basic_logging.py +1 -3
  65. deltacat/examples/compactor/aws/__init__.py +1 -0
  66. deltacat/examples/compactor/bootstrap.py +863 -0
  67. deltacat/examples/compactor/compactor.py +373 -0
  68. deltacat/examples/compactor/explorer.py +473 -0
  69. deltacat/examples/compactor/gcp/__init__.py +1 -0
  70. deltacat/examples/compactor/job_runner.py +439 -0
  71. deltacat/examples/compactor/utils/__init__.py +1 -0
  72. deltacat/examples/compactor/utils/common.py +261 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  79. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  80. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  81. deltacat/examples/indexer/indexer.py +2 -2
  82. deltacat/examples/indexer/job_runner.py +1 -2
  83. deltacat/exceptions.py +66 -4
  84. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  85. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  86. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +29 -11
  87. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  88. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  89. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  90. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  91. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  92. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  93. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  94. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  95. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  96. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  97. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  98. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  99. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  100. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  101. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  102. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  103. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  104. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  105. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  107. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  108. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  109. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  110. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  111. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  112. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  113. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  114. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  115. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  116. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  117. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  118. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  119. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  120. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  121. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  122. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  123. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  124. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  125. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  126. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  127. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  128. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  129. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  130. deltacat/io/datasource/deltacat_datasource.py +0 -1
  131. deltacat/io/reader/deltacat_read_api.py +1 -1
  132. deltacat/storage/__init__.py +20 -2
  133. deltacat/storage/interface.py +54 -32
  134. deltacat/storage/main/impl.py +1494 -541
  135. deltacat/storage/model/delta.py +27 -3
  136. deltacat/storage/model/locator.py +6 -12
  137. deltacat/storage/model/manifest.py +182 -6
  138. deltacat/storage/model/metafile.py +151 -78
  139. deltacat/storage/model/namespace.py +8 -1
  140. deltacat/storage/model/partition.py +117 -42
  141. deltacat/storage/model/schema.py +2427 -159
  142. deltacat/storage/model/shard.py +6 -2
  143. deltacat/storage/model/sort_key.py +40 -0
  144. deltacat/storage/model/stream.py +9 -2
  145. deltacat/storage/model/table.py +12 -1
  146. deltacat/storage/model/table_version.py +11 -0
  147. deltacat/storage/model/transaction.py +1184 -208
  148. deltacat/storage/model/transform.py +81 -2
  149. deltacat/storage/model/types.py +48 -26
  150. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  151. deltacat/tests/aws/test_s3u.py +2 -31
  152. deltacat/tests/catalog/data/__init__.py +0 -0
  153. deltacat/tests/catalog/main/__init__.py +0 -0
  154. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  155. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  156. deltacat/tests/catalog/model/__init__.py +0 -0
  157. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  158. deltacat/tests/catalog/test_catalogs.py +103 -106
  159. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  160. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  161. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  162. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  163. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  164. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  165. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  166. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  167. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  168. deltacat/tests/compute/conftest.py +8 -44
  169. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  170. deltacat/tests/compute/converter/utils.py +15 -6
  171. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  172. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  173. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  174. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  175. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  176. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  177. deltacat/tests/compute/test_janitor.py +236 -0
  178. deltacat/tests/compute/test_util_common.py +716 -43
  179. deltacat/tests/compute/test_util_constant.py +0 -1
  180. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  181. deltacat/tests/daft/__init__.py +0 -0
  182. deltacat/tests/daft/test_model.py +97 -0
  183. deltacat/tests/experimental/__init__.py +1 -0
  184. deltacat/tests/experimental/catalog/__init__.py +0 -0
  185. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  186. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  187. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  188. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  189. deltacat/tests/experimental/daft/__init__.py +0 -0
  190. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  191. deltacat/tests/experimental/storage/__init__.py +0 -0
  192. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  193. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  194. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  195. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
  196. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  197. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  198. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  199. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  200. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  201. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  202. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  203. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  204. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
  205. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  206. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  207. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  208. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  209. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  210. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  211. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  212. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  213. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  214. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  215. deltacat/tests/storage/model/test_schema.py +171 -0
  216. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  217. deltacat/tests/storage/model/test_shard.py +3 -1
  218. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  219. deltacat/tests/storage/model/test_transaction.py +393 -48
  220. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  221. deltacat/tests/test_deltacat_api.py +988 -4
  222. deltacat/tests/test_exceptions.py +9 -5
  223. deltacat/tests/test_utils/pyarrow.py +52 -21
  224. deltacat/tests/test_utils/storage.py +23 -34
  225. deltacat/tests/types/__init__.py +0 -0
  226. deltacat/tests/types/test_tables.py +104 -0
  227. deltacat/tests/utils/exceptions.py +22 -0
  228. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  229. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  230. deltacat/tests/utils/test_daft.py +121 -31
  231. deltacat/tests/utils/test_numpy.py +1193 -0
  232. deltacat/tests/utils/test_pandas.py +1106 -0
  233. deltacat/tests/utils/test_polars.py +1040 -0
  234. deltacat/tests/utils/test_pyarrow.py +1370 -89
  235. deltacat/types/media.py +224 -14
  236. deltacat/types/tables.py +2329 -59
  237. deltacat/utils/arguments.py +33 -1
  238. deltacat/utils/daft.py +823 -36
  239. deltacat/utils/export.py +3 -1
  240. deltacat/utils/filesystem.py +100 -0
  241. deltacat/utils/metafile_locator.py +2 -1
  242. deltacat/utils/numpy.py +118 -26
  243. deltacat/utils/pandas.py +577 -48
  244. deltacat/utils/polars.py +658 -27
  245. deltacat/utils/pyarrow.py +1258 -213
  246. deltacat/utils/ray_utils/dataset.py +101 -10
  247. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  248. deltacat/utils/url.py +57 -16
  249. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  250. deltacat-2.0.0b12.dist-info/RECORD +439 -0
  251. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  252. deltacat/catalog/iceberg/__init__.py +0 -4
  253. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  254. deltacat/compute/merge_on_read/__init__.py +0 -4
  255. deltacat/compute/merge_on_read/daft.py +0 -40
  256. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  257. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  258. deltacat/daft/daft_scan.py +0 -115
  259. deltacat/daft/model.py +0 -258
  260. deltacat/daft/translator.py +0 -126
  261. deltacat/examples/common/fixtures.py +0 -15
  262. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  263. deltacat/storage/rivulet/__init__.py +0 -11
  264. deltacat/storage/rivulet/feather/__init__.py +0 -5
  265. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  266. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  267. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  268. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  269. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  270. deltacat/utils/s3fs.py +0 -21
  271. deltacat-2.0.0b10.dist-info/METADATA +0 -68
  272. deltacat-2.0.0b10.dist-info/RECORD +0 -381
  273. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  274. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  275. /deltacat/{daft → docs/autogen/schema}/__init__.py +0 -0
  276. /deltacat/{examples/common → docs/autogen/schema/inference}/__init__.py +0 -0
  277. /deltacat/examples/{iceberg → compactor}/__init__.py +0 -0
  278. /deltacat/{storage/iceberg → examples/experimental}/__init__.py +0 -0
  279. /deltacat/{storage/rivulet/arrow → examples/experimental/iceberg}/__init__.py +0 -0
  280. /deltacat/{storage/rivulet/fs → examples/experimental/iceberg/converter}/__init__.py +0 -0
  281. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  282. /deltacat/{storage/rivulet/reader → experimental/catalog}/__init__.py +0 -0
  283. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  284. /deltacat/{storage/rivulet/schema → experimental/compatibility}/__init__.py +0 -0
  285. /deltacat/{storage/rivulet/writer → experimental/converter_agent}/__init__.py +0 -0
  286. /deltacat/{tests/storage/rivulet → experimental/converter_agent/beam}/__init__.py +0 -0
  287. /deltacat/{tests/storage/rivulet/fs → experimental/storage}/__init__.py +0 -0
  288. /deltacat/{tests/storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
  289. /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  290. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/storage/rivulet/fs/__init__.py} +0 -0
  291. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  292. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  293. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  294. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  295. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  296. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  297. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  298. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,119 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import pyarrow
5
+ from deltacat.storage.model.scan.push_down import PartitionFilter
6
+
7
+ import deltacat.logs as logs
8
+ from deltacat.storage.model.expression import Reference, Literal
9
+ from deltacat.storage.model.expression.visitor import ExpressionVisitor
10
+ from pyiceberg.expressions import (
11
+ And,
12
+ Or,
13
+ Not,
14
+ EqualTo,
15
+ NotEqualTo,
16
+ GreaterThan,
17
+ GreaterThanOrEqual,
18
+ LessThan,
19
+ LessThanOrEqual,
20
+ IsNull,
21
+ In,
22
+ )
23
+
24
+ # Initialize DeltaCAT logger
25
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
26
+
27
+
28
+ class IcebergExpressionVisitor(ExpressionVisitor[None, Any]):
29
+ """
30
+ Visitor that translates DeltaCAT expressions to PyIceberg expressions.
31
+ """
32
+
33
+ def visit(self, expr, context=None):
34
+ # Handle PartitionFilter by extracting and visiting the inner expression
35
+ if isinstance(expr, PartitionFilter):
36
+ return self.visit(expr.expr, context)
37
+ # Handle all other expressions using the parent's visit method
38
+ return super().visit(expr, context)
39
+
40
+ def visit_reference(self, expr: Reference, context=None) -> str:
41
+ return expr.field
42
+
43
+ def visit_literal(self, expr: Literal, context=None) -> Any:
44
+ # Convert PyArrow scalar to Python native type
45
+ return (
46
+ expr.value.as_py() if isinstance(expr.value, pyarrow.Scalar) else expr.value
47
+ )
48
+
49
+ def visit_and(self, expr, context=None):
50
+ left = self.visit(expr.left, context)
51
+ right = self.visit(expr.right, context)
52
+ return And(left, right)
53
+
54
+ def visit_or(self, expr, context=None):
55
+ left = self.visit(expr.left, context)
56
+ right = self.visit(expr.right, context)
57
+ return Or(left, right)
58
+
59
+ def visit_not(self, expr, context=None):
60
+ operand = self.visit(expr.operand, context)
61
+ return Not(operand)
62
+
63
+ def visit_equal(self, expr, context=None):
64
+ left = self.visit(expr.left, context)
65
+ right = self.visit(expr.right, context)
66
+ return EqualTo(left, right)
67
+
68
+ def visit_not_equal(self, expr, context=None):
69
+ left = self.visit(expr.left, context)
70
+ right = self.visit(expr.right, context)
71
+ return NotEqualTo(left, right)
72
+
73
+ def visit_greater_than(self, expr, context=None):
74
+ left = self.visit(expr.left, context)
75
+ right = self.visit(expr.right, context)
76
+ return GreaterThan(left, right)
77
+
78
+ def visit_greater_than_equal(self, expr, context=None):
79
+ left = self.visit(expr.left, context)
80
+ right = self.visit(expr.right, context)
81
+ return GreaterThanOrEqual(left, right)
82
+
83
+ def visit_less_than(self, expr, context=None):
84
+ left = self.visit(expr.left, context)
85
+ right = self.visit(expr.right, context)
86
+ return LessThan(left, right)
87
+
88
+ def visit_less_than_equal(self, expr, context=None):
89
+ left = self.visit(expr.left, context)
90
+ right = self.visit(expr.right, context)
91
+ return LessThanOrEqual(left, right)
92
+
93
+ def visit_is_null(self, expr, context=None):
94
+ operand = self.visit(expr.operand, context)
95
+ return IsNull(operand)
96
+
97
+ def visit_in(self, expr, context=None):
98
+ value = self.visit(expr.value, context)
99
+ values = [self.visit(v, context) for v in expr.values]
100
+ return In(value, values)
101
+
102
+ def visit_between(self, expr, context=None):
103
+ value = self.visit(expr.value, context)
104
+ lower = self.visit(expr.lower, context)
105
+ upper = self.visit(expr.upper, context)
106
+ return And(GreaterThanOrEqual(value, lower), LessThanOrEqual(value, upper))
107
+
108
+ # PyIceberg does not have a direct equivalent of LIKE
109
+ def visit_like(self, expr, context=None):
110
+ value = self.visit(expr.value, context)
111
+ pattern = self.visit(expr.pattern, context)
112
+ logger.warning(
113
+ f"LIKE operation is not supported in PyIceberg. Ignoring LIKE filter: {value} LIKE '{pattern}'. "
114
+ "This may result in more data being returned than expected."
115
+ )
116
+ # Return None or a default expression that won't filter anything
117
+ return (
118
+ None # or return NotEqualTo(value, None) # matches everything except NULL
119
+ )
@@ -0,0 +1,11 @@
1
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
2
+ from deltacat.experimental.storage.rivulet.schema.schema import Field
3
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
4
+ from deltacat.experimental.storage.rivulet.schema.schema import Datatype
5
+
6
+ __all__ = [
7
+ "Schema",
8
+ "Field",
9
+ "Dataset",
10
+ "Datatype",
11
+ ]
@@ -2,10 +2,13 @@ from abc import ABC, abstractmethod
2
2
  from typing import Iterator, List, Any
3
3
  import pyarrow as pa
4
4
 
5
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
6
- from deltacat.storage.rivulet import Schema
7
- from deltacat.storage.rivulet.serializer import DataSerializer, MEMTABLE_DATA
8
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
5
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
6
+ from deltacat.experimental.storage.rivulet import Schema
7
+ from deltacat.experimental.storage.rivulet.serializer import (
8
+ DataSerializer,
9
+ MEMTABLE_DATA,
10
+ )
11
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
9
12
 
10
13
 
11
14
  class ArrowSerializer(DataSerializer, ABC):
@@ -24,19 +24,23 @@ from deltacat.storage.model.shard import Shard, ShardingStrategy
24
24
  from deltacat.storage.model.stream import Stream, StreamLocator
25
25
  from deltacat.storage.model.transaction import TransactionOperationList
26
26
  from deltacat.storage.model.types import CommitState, StreamFormat
27
- from deltacat.storage.rivulet.fs.file_store import FileStore
28
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
29
- from deltacat.storage.rivulet.reader.dataset_metastore import DatasetMetastore
30
- from deltacat.storage.rivulet import Schema, Field
27
+ from deltacat.experimental.storage.rivulet.fs.file_store import FileStore
28
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
29
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
30
+ DatasetMetastore,
31
+ )
32
+ from deltacat.experimental.storage.rivulet import Schema, Field
31
33
  from deltacat.utils.export import export_dataset
32
34
  from .schema.schema import Datatype
33
35
 
34
- from deltacat.storage.rivulet.reader.data_scan import DataScan
35
- from deltacat.storage.rivulet.reader.dataset_reader import DatasetReader
36
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
36
+ from deltacat.experimental.storage.rivulet.reader.data_scan import DataScan
37
+ from deltacat.experimental.storage.rivulet.reader.dataset_reader import DatasetReader
38
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
39
+ QueryExpression,
40
+ )
37
41
 
38
- from deltacat.storage.rivulet.writer.dataset_writer import DatasetWriter
39
- from deltacat.storage.rivulet.writer.memtable_dataset_writer import (
42
+ from deltacat.experimental.storage.rivulet.writer.dataset_writer import DatasetWriter
43
+ from deltacat.experimental.storage.rivulet.writer.memtable_dataset_writer import (
40
44
  MemtableDatasetWriter,
41
45
  )
42
46
 
@@ -48,7 +52,6 @@ from deltacat.storage import (
48
52
  TableVersion,
49
53
  TableVersionLocator,
50
54
  Transaction,
51
- TransactionType,
52
55
  TransactionOperation,
53
56
  TransactionOperationType,
54
57
  )
@@ -295,7 +298,6 @@ class Dataset:
295
298
  partition_values=DEFAULT_PARTITION_VALUES,
296
299
  partition_id=self._partition_id,
297
300
  ),
298
- schema=None,
299
301
  content_types=None,
300
302
  ),
301
303
  ]
@@ -308,7 +310,6 @@ class Dataset:
308
310
  ]
309
311
 
310
312
  transaction = Transaction.of(
311
- txn_type=TransactionType.APPEND,
312
313
  txn_operations=TransactionOperationList.of(txn_operations),
313
314
  )
314
315
 
@@ -2,13 +2,16 @@ from __future__ import annotations
2
2
 
3
3
  from typing import List, Callable, Any
4
4
 
5
- from deltacat.storage.rivulet.field_group import FieldGroup
6
- from deltacat.storage.rivulet.mvp.Table import MvpTable
7
- from deltacat.storage.rivulet import Schema
8
- from deltacat.storage.rivulet.reader.data_scan import DataScan
9
- from deltacat.storage.rivulet.reader.dataset_metastore import DatasetMetastore
10
- from deltacat.storage.rivulet.reader.dataset_reader import DatasetReader
11
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
5
+ from deltacat.experimental.storage.rivulet.mvp.Table import MvpTable
6
+ from deltacat.experimental.storage.rivulet import Schema
7
+ from deltacat.experimental.storage.rivulet.reader.data_scan import DataScan
8
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
9
+ DatasetMetastore,
10
+ )
11
+ from deltacat.experimental.storage.rivulet.reader.dataset_reader import DatasetReader
12
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
13
+ QueryExpression,
14
+ )
12
15
 
13
16
 
14
17
  class DatasetExecutor:
@@ -22,12 +25,10 @@ class DatasetExecutor:
22
25
 
23
26
  def __init__(
24
27
  self,
25
- field_groups: List[FieldGroup],
26
28
  schema: Schema,
27
29
  metastore: DatasetMetastore,
28
30
  ):
29
31
  self.effective_schema: Schema = schema.__deepcopy__()
30
- self.field_groups = field_groups
31
32
  self.output: MvpTable | None = None
32
33
  self._metastore = metastore
33
34
 
@@ -64,18 +65,9 @@ class DatasetExecutor:
64
65
 
65
66
  TODO for now this is doing dumb in-memory implementation and later this is going to be replaced by rust library
66
67
  """
67
- if len(self.field_groups) == 1:
68
- return self._read_as_mvp_table(schema, self.field_groups[0])
69
- else:
70
- ds1 = self._read_as_mvp_table(schema, self.field_groups[0])
71
- ds2 = self._read_as_mvp_table(schema, self.field_groups[1])
72
- merged = MvpTable.merge(ds1, ds2, schema.primary_key.name)
73
- for i in range(2, len(self.field_groups)):
74
- ds_i = self._read_as_mvp_table(schema, self.field_groups[i])
75
- merged = MvpTable.merge(merged, ds_i, schema.primary_key.name)
76
- return merged
68
+ return self._read_as_mvp_table(schema)
77
69
 
78
- def _read_as_mvp_table(self, schema: Schema, field_group: FieldGroup):
70
+ def _read_as_mvp_table(self, schema: Schema):
79
71
  data = list(
80
72
  DataScan(
81
73
  schema, QueryExpression(), DatasetReader(self._metastore)
@@ -0,0 +1,7 @@
1
+ # TODO later on this will be moved to a dedicated package
2
+ from deltacat.experimental.storage.rivulet.feather.file_reader import FeatherFileReader
3
+ from deltacat.experimental.storage.rivulet.reader.reader_type_registrar import (
4
+ FileReaderRegistrar,
5
+ )
6
+
7
+ FileReaderRegistrar.register_reader("feather", FeatherFileReader)
@@ -5,15 +5,17 @@ from typing import Optional
5
5
  import pyarrow.ipc
6
6
  from pyarrow import RecordBatch, RecordBatchFileReader
7
7
 
8
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
9
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
10
- from deltacat.storage.rivulet.reader.data_reader import (
8
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
9
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
10
+ from deltacat.experimental.storage.rivulet.reader.data_reader import (
11
11
  RowAndKey,
12
12
  FileReader,
13
13
  FILE_FORMAT,
14
14
  )
15
- from deltacat.storage.rivulet.reader.pyarrow_data_reader import RecordBatchRowIndex
16
- from deltacat.storage.rivulet.schema.schema import Schema
15
+ from deltacat.experimental.storage.rivulet.reader.pyarrow_data_reader import (
16
+ RecordBatchRowIndex,
17
+ )
18
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
17
19
 
18
20
 
19
21
  class FeatherFileReader(FileReader[RecordBatchRowIndex]):
@@ -3,10 +3,10 @@ from typing import List
3
3
  import pyarrow as pa
4
4
  from pyarrow import feather
5
5
 
6
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
7
- from deltacat.storage.rivulet import Schema
8
- from deltacat.storage.rivulet.arrow.serializer import ArrowSerializer
9
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
6
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
7
+ from deltacat.experimental.storage.rivulet import Schema
8
+ from deltacat.experimental.storage.rivulet.arrow.serializer import ArrowSerializer
9
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
10
10
 
11
11
 
12
12
  class FeatherDataSerializer(ArrowSerializer):
@@ -3,9 +3,9 @@ import time
3
3
  from typing import List, Generator
4
4
 
5
5
  from deltacat.storage.model.partition import PartitionLocator
6
- from deltacat.storage.rivulet.fs.file_store import FileStore
7
- from deltacat.storage.rivulet.fs.input_file import InputFile
8
- from deltacat.storage.rivulet.fs.output_file import OutputFile
6
+ from deltacat.experimental.storage.rivulet.fs.file_store import FileStore
7
+ from deltacat.experimental.storage.rivulet.fs.input_file import InputFile
8
+ from deltacat.experimental.storage.rivulet.fs.output_file import OutputFile
9
9
  from deltacat.utils.metafile_locator import _find_partition_path
10
10
 
11
11
 
@@ -4,8 +4,8 @@ from pyarrow.fs import FileSystem, FileType, FileSelector
4
4
  # TODO(deltacat): Rely on deltacat implementation to resolve path and filesystem.
5
5
  from ray.data.datasource.path_util import _resolve_paths_and_filesystem
6
6
 
7
- from deltacat.storage.rivulet.fs.input_file import FSInputFile
8
- from deltacat.storage.rivulet.fs.output_file import FSOutputFile
7
+ from deltacat.experimental.storage.rivulet.fs.input_file import FSInputFile
8
+ from deltacat.experimental.storage.rivulet.fs.output_file import FSOutputFile
9
9
 
10
10
 
11
11
  class FileStore:
@@ -5,7 +5,7 @@ from typing import Protocol
5
5
 
6
6
  from pyarrow.fs import FileSystem, FileType
7
7
 
8
- from deltacat.storage.rivulet.fs.input_file import FSInputFile, InputFile
8
+ from deltacat.experimental.storage.rivulet.fs.input_file import FSInputFile, InputFile
9
9
 
10
10
 
11
11
  class OutputStream(Protocol): # pragma: no cover
@@ -1,9 +1,9 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import List, Callable, Any, Protocol
3
3
 
4
- from deltacat.storage.rivulet.dataset_executor import DatasetExecutor
5
- from deltacat.storage.rivulet.mvp.Table import MvpTable
6
- from deltacat.storage.rivulet import Schema
4
+ from deltacat.experimental.storage.rivulet.dataset_executor import DatasetExecutor
5
+ from deltacat.experimental.storage.rivulet import Schema
6
+ from deltacat.experimental.storage.rivulet.mvp.Table import MvpTable
7
7
 
8
8
 
9
9
  class DatasetOperation(Protocol):
@@ -99,7 +99,7 @@ class LogicalPlan:
99
99
  self.operations.append(CollectOperation())
100
100
  return self
101
101
 
102
- def execute(self, executor: DatasetExecutor) -> "MvpTable":
102
+ def execute(self, executor: DatasetExecutor) -> MvpTable:
103
103
  for operation in self.operations:
104
104
  operation.visit(executor)
105
105
  return executor.output
@@ -10,7 +10,6 @@ from deltacat.storage import (
10
10
  Delta,
11
11
  DeltaType,
12
12
  Transaction,
13
- TransactionType,
14
13
  TransactionOperation,
15
14
  TransactionOperationType,
16
15
  )
@@ -19,7 +18,7 @@ from deltacat.storage.model.partition import PartitionLocator
19
18
  from deltacat.storage.model.transaction import TransactionOperationList
20
19
 
21
20
  from deltacat.storage.model.types import StreamFormat
22
- from deltacat.storage.rivulet import Schema
21
+ from deltacat.experimental.storage.rivulet import Schema
23
22
 
24
23
  StreamPosition = int
25
24
  """The stream position for creating a consistent ordering of manifests."""
@@ -169,7 +168,6 @@ class DeltacatManifestIO(ManifestIO):
169
168
  delta["level"] = level
170
169
 
171
170
  tx_results = Transaction.of(
172
- txn_type=TransactionType.APPEND,
173
171
  txn_operations=TransactionOperationList.of(
174
172
  [
175
173
  TransactionOperation.of(
@@ -4,9 +4,9 @@ import json
4
4
  from itertools import zip_longest
5
5
  from typing import List
6
6
 
7
- from deltacat.storage.rivulet.fs.input_file import InputFile
8
- from deltacat.storage.rivulet.fs.output_file import OutputFile
9
- from deltacat.storage.rivulet.metastore.sst import (
7
+ from deltacat.experimental.storage.rivulet.fs.input_file import InputFile
8
+ from deltacat.experimental.storage.rivulet.fs.output_file import OutputFile
9
+ from deltacat.experimental.storage.rivulet.metastore.sst import (
10
10
  SSTWriter,
11
11
  SSTableRow,
12
12
  SSTReader,
@@ -1,8 +1,8 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Protocol, Any, List
3
3
 
4
- from deltacat.storage.rivulet.fs.input_file import InputFile
5
- from deltacat.storage.rivulet.fs.output_file import OutputFile
4
+ from deltacat.experimental.storage.rivulet.fs.input_file import InputFile
5
+ from deltacat.experimental.storage.rivulet.fs.output_file import OutputFile
6
6
 
7
7
 
8
8
  @dataclass(frozen=True)
@@ -8,9 +8,9 @@ from typing import Any, Dict, Set, List, FrozenSet, Iterable, TypeVar, NamedTupl
8
8
 
9
9
  from intervaltree import Interval, IntervalTree
10
10
 
11
- from deltacat.storage.rivulet.metastore.delta import DeltaContext
12
- from deltacat.storage.rivulet.metastore.sst import SSTable, SSTableRow
13
- from deltacat.storage.rivulet import Schema
11
+ from deltacat.experimental.storage.rivulet.metastore.delta import DeltaContext
12
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTable, SSTableRow
13
+ from deltacat.experimental.storage.rivulet import Schema
14
14
 
15
15
  T = TypeVar("T")
16
16
 
@@ -0,0 +1,7 @@
1
+ # TODO later on this will be moved to a dedicated package
2
+ from deltacat.experimental.storage.rivulet.parquet.file_reader import ParquetFileReader
3
+ from deltacat.experimental.storage.rivulet.reader.reader_type_registrar import (
4
+ FileReaderRegistrar,
5
+ )
6
+
7
+ FileReaderRegistrar.register_reader("parquet", ParquetFileReader)
@@ -4,15 +4,17 @@ from typing import Optional
4
4
 
5
5
  from pyarrow import RecordBatch
6
6
 
7
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
8
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
9
- from deltacat.storage.rivulet.reader.data_reader import (
7
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
8
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
9
+ from deltacat.experimental.storage.rivulet.reader.data_reader import (
10
10
  RowAndKey,
11
11
  FileReader,
12
12
  FILE_FORMAT,
13
13
  )
14
- from deltacat.storage.rivulet.reader.pyarrow_data_reader import RecordBatchRowIndex
15
- from deltacat.storage.rivulet.schema.schema import Schema
14
+ from deltacat.experimental.storage.rivulet.reader.pyarrow_data_reader import (
15
+ RecordBatchRowIndex,
16
+ )
17
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
16
18
  import pyarrow.parquet as pq
17
19
  import pyarrow as pa
18
20
 
@@ -3,11 +3,11 @@ from typing import List, Any
3
3
  import pyarrow as pa
4
4
  from pyarrow.parquet import FileMetaData
5
5
 
6
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
7
- from deltacat.storage.rivulet import Schema
8
- from deltacat.storage.rivulet.arrow.serializer import ArrowSerializer
6
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
7
+ from deltacat.experimental.storage.rivulet import Schema
8
+ from deltacat.experimental.storage.rivulet.arrow.serializer import ArrowSerializer
9
9
 
10
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
10
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
11
11
 
12
12
 
13
13
  class ParquetDataSerializer(ArrowSerializer):
@@ -15,19 +15,30 @@ from typing import (
15
15
  AbstractSet,
16
16
  )
17
17
 
18
- from deltacat.storage.rivulet.metastore.delta import DeltaContext
19
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
20
- from deltacat.storage.rivulet.metastore.sst_interval_tree import (
18
+ from deltacat.experimental.storage.rivulet.metastore.delta import DeltaContext
19
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
20
+ from deltacat.experimental.storage.rivulet.metastore.sst_interval_tree import (
21
21
  OrderedBlockGroups,
22
22
  BlockGroup,
23
23
  Block,
24
24
  )
25
- from deltacat.storage.rivulet.reader.data_reader import RowAndKey, FileReader
26
- from deltacat.storage.rivulet.reader.dataset_metastore import DatasetMetastore
27
- from deltacat.storage.rivulet.reader.pyarrow_data_reader import ArrowDataReader
28
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
29
- from deltacat.storage.rivulet.reader.reader_type_registrar import FileReaderRegistrar
30
- from deltacat.storage.rivulet import Schema
25
+ from deltacat.experimental.storage.rivulet.reader.data_reader import (
26
+ RowAndKey,
27
+ FileReader,
28
+ )
29
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
30
+ DatasetMetastore,
31
+ )
32
+ from deltacat.experimental.storage.rivulet.reader.pyarrow_data_reader import (
33
+ ArrowDataReader,
34
+ )
35
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
36
+ QueryExpression,
37
+ )
38
+ from deltacat.experimental.storage.rivulet.reader.reader_type_registrar import (
39
+ FileReaderRegistrar,
40
+ )
41
+ from deltacat.experimental.storage.rivulet import Schema
31
42
  from deltacat import logs
32
43
 
33
44
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
@@ -13,9 +13,9 @@ from typing import (
13
13
  Optional,
14
14
  )
15
15
 
16
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
17
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
18
- from deltacat.storage.rivulet.schema.schema import Schema
16
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
17
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
18
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
19
19
 
20
20
  FILE_FORMAT = TypeVar("FILE_FORMAT")
21
21
  MEMORY_FORMAT = TypeVar("MEMORY_FORMAT")
@@ -3,9 +3,11 @@ from typing import Generator, Dict, Optional
3
3
  import pyarrow as pa
4
4
 
5
5
  from deltacat.storage.model.shard import Shard
6
- from deltacat.storage.rivulet.reader.dataset_reader import DatasetReader
7
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
8
- from deltacat.storage.rivulet import Schema
6
+ from deltacat.experimental.storage.rivulet.reader.dataset_reader import DatasetReader
7
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
8
+ QueryExpression,
9
+ )
10
+ from deltacat.experimental.storage.rivulet import Schema
9
11
 
10
12
 
11
13
  class DataScan:
@@ -5,18 +5,19 @@ from typing import Generator, Optional
5
5
  import pyarrow
6
6
  import pyarrow.fs
7
7
 
8
+ from deltacat.constants import REV_DIR_NAME
8
9
  from deltacat.storage import Delta
9
10
  from deltacat.storage.model.partition import PartitionLocator
10
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
11
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
11
12
  from deltacat.utils.filesystem import resolve_path_and_filesystem
12
- from deltacat.storage.rivulet.metastore.json_sst import JsonSstReader
13
- from deltacat.storage.rivulet.metastore.delta import (
13
+ from deltacat.experimental.storage.rivulet.metastore.json_sst import JsonSstReader
14
+ from deltacat.experimental.storage.rivulet.metastore.delta import (
14
15
  ManifestIO,
15
16
  DeltaContext,
16
17
  RivuletDelta,
17
18
  DeltacatManifestIO,
18
19
  )
19
- from deltacat.storage.rivulet.metastore.sst import SSTReader, SSTable
20
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTReader, SSTable
20
21
  from deltacat.utils.metafile_locator import _find_table_path
21
22
  from deltacat import logs
22
23
 
@@ -83,7 +84,7 @@ class DatasetMetastore:
83
84
  param: filesystem: The filesystem to search for the revisions.
84
85
  returns: The latest revision as a RivuletDelta.
85
86
  """
86
- rev_directory = posixpath.join(delta_dir, "rev")
87
+ rev_directory = posixpath.join(delta_dir, REV_DIR_NAME)
87
88
  revisions = filesystem.get_file_info(
88
89
  pyarrow.fs.FileSelector(rev_directory, allow_not_found=True)
89
90
  )
@@ -128,7 +129,7 @@ class DatasetMetastore:
128
129
  return
129
130
 
130
131
  # Locate "rev" directory inside the partition
131
- rev_directory = posixpath.join(partition_path, "rev")
132
+ rev_directory = posixpath.join(partition_path, REV_DIR_NAME)
132
133
  rev_info = filesystem.get_file_info(rev_directory)
133
134
 
134
135
  if rev_info.type != pyarrow.fs.FileType.Directory:
@@ -2,18 +2,20 @@ import logging
2
2
  from typing import Generator, Optional, Set, Type, TypeVar, Any
3
3
 
4
4
  from deltacat.storage.model.shard import Shard
5
- from deltacat.storage.rivulet.metastore.sst import SSTableRow, SSTable
6
- from deltacat.storage.rivulet.metastore.sst_interval_tree import (
5
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow, SSTable
6
+ from deltacat.experimental.storage.rivulet.metastore.sst_interval_tree import (
7
7
  BlockIntervalTree,
8
8
  OrderedBlockGroups,
9
9
  )
10
- from deltacat.storage.rivulet.reader.block_scanner import BlockScanner
11
- from deltacat.storage.rivulet.reader.dataset_metastore import (
10
+ from deltacat.experimental.storage.rivulet.reader.block_scanner import BlockScanner
11
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
12
12
  DatasetMetastore,
13
13
  ManifestAccessor,
14
14
  )
15
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
16
- from deltacat.storage.rivulet import Schema
15
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
16
+ QueryExpression,
17
+ )
18
+ from deltacat.experimental.storage.rivulet import Schema
17
19
 
18
20
  # The type of data returned to reader
19
21
  T = TypeVar("T")
@@ -4,7 +4,10 @@ from typing import Generator, Dict, Type, NamedTuple, List
4
4
 
5
5
  from pyarrow import RecordBatch
6
6
 
7
- from deltacat.storage.rivulet.reader.data_reader import DataReader, MEMORY_FORMAT
7
+ from deltacat.experimental.storage.rivulet.reader.data_reader import (
8
+ DataReader,
9
+ MEMORY_FORMAT,
10
+ )
8
11
  import pyarrow as pa
9
12
 
10
13