deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,229 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Tuple, Optional
4
+
5
+ from deltacat.catalog.model.catalog import Catalog as DCCatalog
6
+ from deltacat.catalog.model.table_definition import TableDefinition
7
+
8
+ from daft.catalog import Catalog, Identifier, Table
9
+ from daft.dataframe import DataFrame
10
+ from daft.logical.schema import Schema
11
+ from deltacat.constants import DEFAULT_NAMESPACE
12
+
13
+
14
+ class DaftCatalog(Catalog):
15
+ """
16
+ Wrapper class to create a Daft catalog from a DeltaCAT catalog.
17
+
18
+ The initialization of DeltaCAT and Daft catalogs is managed in `deltacat.catalog.catalog.py`. The user
19
+ is just expected to initialize catalogs through the DeltaCAT public interface (init / put_catalog).
20
+
21
+ TODO (mccember) in follow up PR we need to consider how to keep the DeltaCAT Catalogs class and Daft session in sync,
22
+ and the user-facing entrypoint to get a Daft catalog
23
+
24
+ This class itself expects a `Catalog` and will invoke the underlying implementation
25
+ similar to `deltacat.catalog.delegate.py`, like:
26
+ catalog.impl.create_namespace(namespace, inner=catalog.inner)
27
+
28
+ We cannot route calls through the higher level catalog registry / delegate since this wrapper class is at a lower
29
+ layer and does not manage registering catalogs.
30
+ """
31
+
32
+ def __init__(self, catalog: DCCatalog, name: str):
33
+ """
34
+ Initialize given DeltaCAT catalog. This catalog is also registered with DeltaCAT (via deltacat.put_catalog) given the provided Name
35
+
36
+ :param catalog: DeltaCAT Catalog object. If None, the catalog will be fetched from `deltacat.Catalogs`
37
+ given the catalog name.
38
+
39
+ :param name: Name of DeltaCAT catalog. If the name is not yet registered with `deltacat.Catalogs`,
40
+ it will be registered upon creation to ensure that the DeltaCAT and Daft catalogs keep in sync.
41
+
42
+ :param kwargs: Additional keyword arguments passed to deltacat.get_catalog or deltacat.put_catalog,
43
+ such as 'namespace' for tests.
44
+ """
45
+ self.dc_catalog = catalog
46
+ self._name = name
47
+
48
+ @property
49
+ def name(self) -> str:
50
+ return self._name
51
+
52
+ ###
53
+ # create_*
54
+ ###
55
+ def create_namespace(self, identifier: Identifier | str):
56
+ """Create a new namespace in the catalog."""
57
+ if isinstance(identifier, Identifier):
58
+ identifier = str(identifier)
59
+ self.dc_catalog.impl.create_namespace(identifier, inner=self.dc_catalog.inner)
60
+
61
+ def create_table(
62
+ self, identifier: Identifier | str, source: Schema | DataFrame, **kwargs
63
+ ) -> Table:
64
+ """
65
+ Create a DeltaCAT table via Daft catalog API
66
+
67
+ End users calling create_table through the daft table API may provide kwargs which will be plumbed through
68
+ to deltacat create_table. For full list of keyword arguments accepted by create_table.
69
+
70
+ Note: as of 4/22, Daft create_table does not yet support kwargs. Tracked at: https://github.com/Eventual-Inc/Daft/issues/4195
71
+
72
+ :param identifier: Daft table identifier. Sequence of strings of the format (namespace) or (namespace, table)
73
+ or (namespace, table, table version). If this is a string, it is a dot delimited string of the same format.
74
+ Identifiers can be created either like Identifier("namespace", "table", "version") OR
75
+ Identifier.from_str("namespace.table.version")
76
+
77
+ :param source: a TableSource, either a Daft DataFrame, Daft Schema, or str (filesystem path)
78
+ """
79
+ if isinstance(source, DataFrame):
80
+ return self._create_table_from_df(identifier, source)
81
+ elif isinstance(source, Schema):
82
+ return self._create_table_from_schema(identifier, source)
83
+ else:
84
+ raise Exception(
85
+ f"Expected table source to be Schema or DataFrame. Found: {type(source)}"
86
+ )
87
+
88
+ def _create_table_from_df(
89
+ self, ident: Identifier | str, source: DataFrame, **kwargs
90
+ ) -> Table:
91
+ """
92
+ Create a table from a DataFrame.
93
+ """
94
+ t = self._create_table_from_schema(ident, source.schema(), **kwargs)
95
+ # TODO (mccember) append data upon creation
96
+ return t
97
+
98
+ def _create_table_from_schema(
99
+ self, ident: Identifier | str, source: Schema, **kwargs
100
+ ) -> Table:
101
+ """
102
+ Create a table from a schema.
103
+ """
104
+ namespace, name, version = self._extract_namespace_name_version(ident)
105
+
106
+ # Convert the Daft schema to a DeltaCAT schema
107
+ # This is a simplified version, would need to be enhanced for production
108
+ deltacat_schema = self._convert_schema_to_deltacat(source)
109
+
110
+ # Create the table in DeltaCAT
111
+ table_def = self.dc_catalog.impl.create_table(
112
+ name,
113
+ namespace=namespace,
114
+ version=version,
115
+ schema=deltacat_schema,
116
+ inner=self.dc_catalog.inner,
117
+ **kwargs,
118
+ )
119
+
120
+ return DaftTable._from_obj(table_def)
121
+
122
+ ###
123
+ # drop_*
124
+ ###
125
+
126
+ def drop_namespace(self, identifier: Identifier | str):
127
+ raise NotImplementedError()
128
+
129
+ def drop_table(self, identifier: Identifier | str):
130
+ raise NotImplementedError()
131
+
132
+ ###
133
+ # get_*
134
+ ###
135
+
136
+ def get_table(self, identifier: Identifier | str, **kwargs) -> Table:
137
+ namespace, table, version = self._extract_namespace_name_version(identifier)
138
+
139
+ table_def = self.dc_catalog.impl.get_table(
140
+ table,
141
+ namespace=namespace,
142
+ table_version=version,
143
+ inner=self.dc_catalog.inner,
144
+ **kwargs,
145
+ )
146
+
147
+ if not table_def:
148
+ raise ValueError(f"Table {identifier} not found")
149
+
150
+ return DaftTable._from_obj(table_def)
151
+
152
+ ###
153
+ # list_*
154
+ ###
155
+
156
+ def list_namespaces(self, pattern: str | None = None) -> list[Identifier]:
157
+ raise NotImplementedError("Not implemented")
158
+
159
+ def list_tables(self, pattern: str | None = None) -> list[str]:
160
+ raise NotImplementedError("Not implemented")
161
+
162
+ def _extract_namespace_name_version(
163
+ self, ident: Identifier | str
164
+ ) -> Tuple[str, str, Optional[str]]:
165
+ """
166
+ Extract namespace, name,version from identifier
167
+
168
+ Returns a 3-tuple. If no namespace is provided, uses DeltaCAT defualt namespace
169
+ """
170
+ default_namespace = DEFAULT_NAMESPACE
171
+
172
+ if isinstance(ident, str):
173
+ ident = Identifier.from_str(ident)
174
+
175
+ if isinstance(ident, Identifier):
176
+ if len(ident) == 1:
177
+ return (default_namespace, ident[0], None)
178
+ elif len(ident) == 2:
179
+ return (ident[0], ident[1], None)
180
+ elif len(ident) == 3:
181
+ return (ident[0], ident[1], ident[2])
182
+ else:
183
+ raise ValueError(
184
+ f"Expected table identifier to be in format (table) or (namespace, table)"
185
+ f"or (namespace, table, version). Found: {ident}"
186
+ )
187
+
188
+ def _convert_schema_to_deltacat(self, schema: Schema):
189
+ """Convert Daft schema to DeltaCAT schema.
190
+ For now, just use PyArrow schema as intermediary
191
+ TODO look into how enhancements on schema can be propagated between Daft<=>DeltaCAT
192
+ """
193
+ from deltacat.storage.model.schema import Schema as DeltaCATSchema
194
+
195
+ return DeltaCATSchema.of(schema=schema.to_pyarrow_schema())
196
+
197
+
198
+ class DaftTable(Table):
199
+ """
200
+ Wrapper class to create a Daft table from a DeltaCAT table
201
+ """
202
+
203
+ _inner: TableDefinition
204
+
205
+ _read_options = set()
206
+ _write_options = set()
207
+
208
+ def __init__(self, inner: TableDefinition):
209
+ self._inner = inner
210
+
211
+ @property
212
+ def name(self) -> str:
213
+ """Return the table name."""
214
+ return self._inner.table_version.table_name
215
+
216
+ @staticmethod
217
+ def _from_obj(obj: object) -> DaftTable:
218
+ """Returns a DeltaCATTable if the given object can be adapted so."""
219
+ if isinstance(obj, TableDefinition):
220
+ t = DaftTable.__new__(DaftTable)
221
+ t._inner = obj
222
+ return t
223
+ raise ValueError(f"Unsupported DeltaCAT table type: {type(obj)}")
224
+
225
+ def read(self, **options) -> DataFrame:
226
+ raise NotImplementedError("Not implemented")
227
+
228
+ def write(self, df: DataFrame | object, mode: str = "append", **options):
229
+ raise NotImplementedError("Not implemented")
File without changes
File without changes
@@ -0,0 +1,129 @@
1
+ import logging
2
+ from typing import Optional, Any, Set
3
+
4
+ from pyiceberg.catalog import Catalog
5
+ from pyiceberg.table import Table
6
+ import deltacat.logs as logs
7
+
8
+ from deltacat.storage.model.scan.push_down import Pushdown, PartitionFilter
9
+ from deltacat.storage.model.scan.scan_plan import ScanPlan
10
+ from deltacat.storage.model.scan.scan_task import FileScanTask, DataFile
11
+ from deltacat.storage.util.scan_planner import ScanPlanner
12
+ from deltacat.experimental.storage.iceberg.impl import _try_load_iceberg_table
13
+ from deltacat.experimental.storage.iceberg.visitor import IcebergExpressionVisitor
14
+
15
+ # Initialize DeltaCAT logger
16
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
17
+
18
+
19
+ class IcebergScanPlanner(ScanPlanner):
20
+ def __init__(self, catalog: Catalog):
21
+ self.catalog = catalog
22
+ self.expression_visitor = IcebergExpressionVisitor()
23
+
24
+ @classmethod
25
+ def _collect_filter_fields(cls, expr: Any) -> Set[str]:
26
+ """
27
+ Collects all field names referenced in the filter expression.
28
+
29
+ Args:
30
+ expr: The expression to analyze
31
+
32
+ Returns:
33
+ Set of field names referenced in the expression
34
+ """
35
+ fields = set()
36
+ if hasattr(expr, "field"):
37
+ fields.add(expr.field)
38
+ if hasattr(expr, "left"):
39
+ fields.update(cls._collect_filter_fields(expr.left))
40
+ if hasattr(expr, "right"):
41
+ fields.update(cls._collect_filter_fields(expr.right))
42
+ if hasattr(expr, "expr"):
43
+ fields.update(cls._collect_filter_fields(expr.expr))
44
+ if hasattr(expr, "values"):
45
+ for value in expr.values:
46
+ fields.update(cls._collect_filter_fields(value))
47
+ return fields
48
+
49
+ def create_scan_plan(
50
+ self,
51
+ table_name: str,
52
+ namespace: Optional[str] = None,
53
+ pushdown: Optional[Pushdown] = None,
54
+ ) -> ScanPlan:
55
+ iceberg_table = _try_load_iceberg_table(
56
+ self.catalog, namespace=namespace, table_name=table_name
57
+ )
58
+
59
+ # TODO: implement row, column predicate pushdown to Iceberg
60
+
61
+ # Get the partition spec
62
+ partition_spec = iceberg_table.spec()
63
+
64
+ # Check if the table is partitioned
65
+ is_partitioned = len(partition_spec.fields) > 0
66
+
67
+ scan = iceberg_table.scan()
68
+ if is_partitioned:
69
+ if pushdown and pushdown.partition_filter:
70
+ filter_fields = self._collect_filter_fields(pushdown.partition_filter)
71
+ logger.info(
72
+ f"Pushdown partition filter is enabled, converting to Iceberg. Fields discovered in filter: {', '.join(sorted(filter_fields))}"
73
+ )
74
+ # Handle partition filter if present, DeltaCAT only supports partition-level filters right now
75
+ iceberg_expression = self._convert_partition_filter(
76
+ iceberg_table, pushdown.partition_filter
77
+ )
78
+ scan = scan.filter(iceberg_expression)
79
+
80
+ file_scan_tasks = []
81
+ for scan_task in scan.plan_files():
82
+ file_scan_tasks.append(FileScanTask([DataFile(scan_task.file.file_path)]))
83
+ return ScanPlan(file_scan_tasks)
84
+
85
+ @classmethod
86
+ def _validate_partition_references(
87
+ cls, expr: Any, partition_cols: Set[str]
88
+ ) -> None:
89
+ """
90
+ Validates that the expression only references partition columns.
91
+
92
+ Args:
93
+ expr: The expression to validate
94
+ partition_cols: Set of valid partition column names
95
+
96
+ Raises:
97
+ ValueError: If the expression references a non-partition column
98
+ """
99
+ if hasattr(expr, "field"): # Reference type expression
100
+ if expr.field not in partition_cols:
101
+ raise ValueError(
102
+ f"Filter references non-partition column: {expr.field}. "
103
+ f"Partition columns are: {partition_cols}"
104
+ )
105
+ # Recursively validate nested expressions
106
+ if hasattr(expr, "left"):
107
+ cls._validate_partition_references(expr.left, partition_cols)
108
+ if hasattr(expr, "right"):
109
+ cls._validate_partition_references(expr.right, partition_cols)
110
+ if hasattr(expr, "expr"):
111
+ cls._validate_partition_references(expr.expr, partition_cols)
112
+ if hasattr(expr, "values"):
113
+ for value in expr.values:
114
+ cls._validate_partition_references(value, partition_cols)
115
+
116
+ def _convert_partition_filter(
117
+ self, table: Table, partition_filter: PartitionFilter
118
+ ):
119
+ """
120
+ Convert DeltaCAT partition filter to PyIceberg expression,
121
+ validating that only partition columns are referenced.
122
+ """
123
+ partition_cols = set(field.name for field in table.spec().fields)
124
+
125
+ # Validate before converting
126
+ self._validate_partition_references(partition_filter, partition_cols)
127
+
128
+ # Convert to PyIceberg expression
129
+ return self.expression_visitor.visit(partition_filter)
@@ -32,7 +32,7 @@ from deltacat.storage import (
32
32
  NamespaceProperties,
33
33
  )
34
34
  from deltacat.storage.model.manifest import Manifest
35
- from deltacat.storage.iceberg.model import (
35
+ from deltacat.experimental.storage.iceberg.model import (
36
36
  SchemaMapper,
37
37
  PartitionSchemeMapper,
38
38
  SortSchemeMapper,
@@ -41,7 +41,7 @@ from deltacat.storage.iceberg.model import (
41
41
  NamespaceMapper,
42
42
  TableMapper,
43
43
  )
44
- from deltacat.types.media import ContentType, StorageType, TableType
44
+ from deltacat.types.media import ContentType, StorageType, DatasetType
45
45
  from deltacat.utils.common import ReadKwargsProvider
46
46
 
47
47
  from pyiceberg.catalog import Catalog
@@ -281,7 +281,7 @@ def get_latest_delta(
281
281
 
282
282
  def download_delta(
283
283
  delta_like: Union[Delta, DeltaLocator],
284
- table_type: TableType = TableType.PYARROW,
284
+ table_type: DatasetType = DatasetType.PYARROW,
285
285
  storage_type: StorageType = StorageType.DISTRIBUTED,
286
286
  max_parallelism: Optional[int] = None,
287
287
  columns: Optional[List[str]] = None,
@@ -303,7 +303,7 @@ def download_delta(
303
303
  def download_delta_manifest_entry(
304
304
  delta_like: Union[Delta, DeltaLocator],
305
305
  entry_index: int,
306
- table_type: TableType = TableType.PYARROW,
306
+ table_type: DatasetType = DatasetType.PYARROW,
307
307
  columns: Optional[List[str]] = None,
308
308
  file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
309
309
  *args,
@@ -603,6 +603,8 @@ def stage_delta(
603
603
  properties: Optional[DeltaProperties] = None,
604
604
  s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
605
605
  content_type: ContentType = ContentType.PARQUET,
606
+ schema: Optional[Schema] = None,
607
+ sort_scheme_id: Optional[str] = None,
606
608
  *args,
607
609
  **kwargs,
608
610
  ) -> Delta:
@@ -66,6 +66,7 @@ from deltacat.storage import (
66
66
  TableVersionLocator,
67
67
  Transform,
68
68
  TransformName,
69
+ TruncateStrategy,
69
70
  TruncateTransform,
70
71
  TruncateTransformParameters,
71
72
  UnknownTransform,
@@ -227,7 +228,10 @@ class TransformMapper(ModelMapper[IcebergTransform, Transform]):
227
228
  )
228
229
  if isinstance(obj, IcebergTruncateTransform):
229
230
  return TruncateTransform.of(
230
- TruncateTransformParameters.of(width=obj.width),
231
+ TruncateTransformParameters.of(
232
+ width=obj.width,
233
+ truncate_strategy=TruncateStrategy.ICEBERG,
234
+ ),
231
235
  )
232
236
  return UnknownTransform.of()
233
237
 
@@ -323,7 +327,7 @@ class PartitionSchemeMapper(ModelMapper[PartitionSpec, PartitionScheme]):
323
327
  elif not schema:
324
328
  err_msg = "Schema is required for Partition Spec conversion."
325
329
  raise ValueError(err_msg)
326
- keys = [PartitionKeyMapper.map(field, schema) for field in obj.fields]
330
+ keys = [PartitionKeyMapper.map(field, schema) for field in obj.fields] or None
327
331
  return PartitionScheme.of(
328
332
  keys=keys,
329
333
  name=name,
@@ -425,7 +429,7 @@ class SortSchemeMapper(ModelMapper[IcebergSortOrder, SortScheme]):
425
429
  elif not schema:
426
430
  err_msg = "Schema is required for Sort Order conversion."
427
431
  raise ValueError(err_msg)
428
- keys = [SortKeyMapper.map(field, schema) for field in obj.fields]
432
+ keys = [SortKeyMapper.map(field, schema) for field in obj.fields] or None
429
433
  return SortScheme.of(
430
434
  keys=keys,
431
435
  name=name,
@@ -0,0 +1,119 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import pyarrow
5
+ from deltacat.storage.model.scan.push_down import PartitionFilter
6
+
7
+ import deltacat.logs as logs
8
+ from deltacat.storage.model.expression import Reference, Literal
9
+ from deltacat.storage.model.expression.visitor import ExpressionVisitor
10
+ from pyiceberg.expressions import (
11
+ And,
12
+ Or,
13
+ Not,
14
+ EqualTo,
15
+ NotEqualTo,
16
+ GreaterThan,
17
+ GreaterThanOrEqual,
18
+ LessThan,
19
+ LessThanOrEqual,
20
+ IsNull,
21
+ In,
22
+ )
23
+
24
+ # Initialize DeltaCAT logger
25
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
26
+
27
+
28
+ class IcebergExpressionVisitor(ExpressionVisitor[None, Any]):
29
+ """
30
+ Visitor that translates DeltaCAT expressions to PyIceberg expressions.
31
+ """
32
+
33
+ def visit(self, expr, context=None):
34
+ # Handle PartitionFilter by extracting and visiting the inner expression
35
+ if isinstance(expr, PartitionFilter):
36
+ return self.visit(expr.expr, context)
37
+ # Handle all other expressions using the parent's visit method
38
+ return super().visit(expr, context)
39
+
40
+ def visit_reference(self, expr: Reference, context=None) -> str:
41
+ return expr.field
42
+
43
+ def visit_literal(self, expr: Literal, context=None) -> Any:
44
+ # Convert PyArrow scalar to Python native type
45
+ return (
46
+ expr.value.as_py() if isinstance(expr.value, pyarrow.Scalar) else expr.value
47
+ )
48
+
49
+ def visit_and(self, expr, context=None):
50
+ left = self.visit(expr.left, context)
51
+ right = self.visit(expr.right, context)
52
+ return And(left, right)
53
+
54
+ def visit_or(self, expr, context=None):
55
+ left = self.visit(expr.left, context)
56
+ right = self.visit(expr.right, context)
57
+ return Or(left, right)
58
+
59
+ def visit_not(self, expr, context=None):
60
+ operand = self.visit(expr.operand, context)
61
+ return Not(operand)
62
+
63
+ def visit_equal(self, expr, context=None):
64
+ left = self.visit(expr.left, context)
65
+ right = self.visit(expr.right, context)
66
+ return EqualTo(left, right)
67
+
68
+ def visit_not_equal(self, expr, context=None):
69
+ left = self.visit(expr.left, context)
70
+ right = self.visit(expr.right, context)
71
+ return NotEqualTo(left, right)
72
+
73
+ def visit_greater_than(self, expr, context=None):
74
+ left = self.visit(expr.left, context)
75
+ right = self.visit(expr.right, context)
76
+ return GreaterThan(left, right)
77
+
78
+ def visit_greater_than_equal(self, expr, context=None):
79
+ left = self.visit(expr.left, context)
80
+ right = self.visit(expr.right, context)
81
+ return GreaterThanOrEqual(left, right)
82
+
83
+ def visit_less_than(self, expr, context=None):
84
+ left = self.visit(expr.left, context)
85
+ right = self.visit(expr.right, context)
86
+ return LessThan(left, right)
87
+
88
+ def visit_less_than_equal(self, expr, context=None):
89
+ left = self.visit(expr.left, context)
90
+ right = self.visit(expr.right, context)
91
+ return LessThanOrEqual(left, right)
92
+
93
+ def visit_is_null(self, expr, context=None):
94
+ operand = self.visit(expr.operand, context)
95
+ return IsNull(operand)
96
+
97
+ def visit_in(self, expr, context=None):
98
+ value = self.visit(expr.value, context)
99
+ values = [self.visit(v, context) for v in expr.values]
100
+ return In(value, values)
101
+
102
+ def visit_between(self, expr, context=None):
103
+ value = self.visit(expr.value, context)
104
+ lower = self.visit(expr.lower, context)
105
+ upper = self.visit(expr.upper, context)
106
+ return And(GreaterThanOrEqual(value, lower), LessThanOrEqual(value, upper))
107
+
108
+ # PyIceberg does not have a direct equivalent of LIKE
109
+ def visit_like(self, expr, context=None):
110
+ value = self.visit(expr.value, context)
111
+ pattern = self.visit(expr.pattern, context)
112
+ logger.warning(
113
+ f"LIKE operation is not supported in PyIceberg. Ignoring LIKE filter: {value} LIKE '{pattern}'. "
114
+ "This may result in more data being returned than expected."
115
+ )
116
+ # Return None or a default expression that won't filter anything
117
+ return (
118
+ None # or return NotEqualTo(value, None) # matches everything except NULL
119
+ )
@@ -0,0 +1,11 @@
1
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
2
+ from deltacat.experimental.storage.rivulet.schema.schema import Field
3
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
4
+ from deltacat.experimental.storage.rivulet.schema.schema import Datatype
5
+
6
+ __all__ = [
7
+ "Schema",
8
+ "Field",
9
+ "Dataset",
10
+ "Datatype",
11
+ ]
@@ -2,10 +2,13 @@ from abc import ABC, abstractmethod
2
2
  from typing import Iterator, List, Any
3
3
  import pyarrow as pa
4
4
 
5
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
6
- from deltacat.storage.rivulet import Schema
7
- from deltacat.storage.rivulet.serializer import DataSerializer, MEMTABLE_DATA
8
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
5
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
6
+ from deltacat.experimental.storage.rivulet import Schema
7
+ from deltacat.experimental.storage.rivulet.serializer import (
8
+ DataSerializer,
9
+ MEMTABLE_DATA,
10
+ )
11
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
9
12
 
10
13
 
11
14
  class ArrowSerializer(DataSerializer, ABC):
@@ -24,19 +24,23 @@ from deltacat.storage.model.shard import Shard, ShardingStrategy
24
24
  from deltacat.storage.model.stream import Stream, StreamLocator
25
25
  from deltacat.storage.model.transaction import TransactionOperationList
26
26
  from deltacat.storage.model.types import CommitState, StreamFormat
27
- from deltacat.storage.rivulet.fs.file_store import FileStore
28
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
29
- from deltacat.storage.rivulet.reader.dataset_metastore import DatasetMetastore
30
- from deltacat.storage.rivulet import Schema, Field
27
+ from deltacat.experimental.storage.rivulet.fs.file_store import FileStore
28
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
29
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
30
+ DatasetMetastore,
31
+ )
32
+ from deltacat.experimental.storage.rivulet import Schema, Field
31
33
  from deltacat.utils.export import export_dataset
32
34
  from .schema.schema import Datatype
33
35
 
34
- from deltacat.storage.rivulet.reader.data_scan import DataScan
35
- from deltacat.storage.rivulet.reader.dataset_reader import DatasetReader
36
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
36
+ from deltacat.experimental.storage.rivulet.reader.data_scan import DataScan
37
+ from deltacat.experimental.storage.rivulet.reader.dataset_reader import DatasetReader
38
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
39
+ QueryExpression,
40
+ )
37
41
 
38
- from deltacat.storage.rivulet.writer.dataset_writer import DatasetWriter
39
- from deltacat.storage.rivulet.writer.memtable_dataset_writer import (
42
+ from deltacat.experimental.storage.rivulet.writer.dataset_writer import DatasetWriter
43
+ from deltacat.experimental.storage.rivulet.writer.memtable_dataset_writer import (
40
44
  MemtableDatasetWriter,
41
45
  )
42
46
 
@@ -48,7 +52,6 @@ from deltacat.storage import (
48
52
  TableVersion,
49
53
  TableVersionLocator,
50
54
  Transaction,
51
- TransactionType,
52
55
  TransactionOperation,
53
56
  TransactionOperationType,
54
57
  )
@@ -295,7 +298,6 @@ class Dataset:
295
298
  partition_values=DEFAULT_PARTITION_VALUES,
296
299
  partition_id=self._partition_id,
297
300
  ),
298
- schema=None,
299
301
  content_types=None,
300
302
  ),
301
303
  ]
@@ -308,7 +310,6 @@ class Dataset:
308
310
  ]
309
311
 
310
312
  transaction = Transaction.of(
311
- txn_type=TransactionType.APPEND,
312
313
  txn_operations=TransactionOperationList.of(txn_operations),
313
314
  )
314
315