deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,136 @@
1
+ import daft
2
+ from daft import Table, Identifier
3
+ import pytest
4
+ import uuid
5
+
6
+ from deltacat.catalog import Catalog as DeltaCATCatalog
7
+ from deltacat.catalog import CatalogProperties
8
+ from deltacat.experimental.daft.daft_catalog import DaftCatalog
9
+ import shutil
10
+ import tempfile
11
+
12
+ from deltacat.experimental.catalog.iceberg import impl as IcebergCatalog
13
+ from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
14
+
15
+ from pyiceberg.catalog import CatalogType
16
+
17
+
18
+ class TestCatalogIntegration:
19
+ @classmethod
20
+ def setup_method(cls):
21
+ cls.tmpdir = tempfile.mkdtemp()
22
+
23
+ @classmethod
24
+ def teardown_method(cls):
25
+ shutil.rmtree(cls.tmpdir)
26
+
27
+ def test_create_table(self):
28
+ """Demonstrate DeltaCAT-Daft integration."""
29
+ # Create a DeltaCAT catalog
30
+ catalog_props = CatalogProperties(root=self.tmpdir)
31
+ dc_catalog = DeltaCATCatalog(catalog_props)
32
+
33
+ # Use a random catalog name to prevent namespacing conflicts with other tests
34
+ # Convert the DeltaCAT catalog to a Daft catalog
35
+ catalog_name = f"deltacat_{uuid.uuid4().hex[:8]}"
36
+
37
+ daft_catalog = DaftCatalog(catalog=dc_catalog, name=catalog_name)
38
+
39
+ # Register the catalog with Daft's catalog system
40
+ daft.attach_catalog(daft_catalog, catalog_name)
41
+
42
+ # Create a sample DataFrame
43
+ df = daft.from_pydict({"id": [1, 2, 3], "value": ["a", "b", "c"]})
44
+ # Create then get table
45
+ daft_catalog.create_table(Identifier("example_table"), df)
46
+ table: Table = daft_catalog.get_table(Identifier("example_table"))
47
+ assert table.name == "example_table"
48
+
49
+ def test_get_table(self):
50
+ """Test getting a table from the DeltaCAT-Daft catalog."""
51
+ # Create a DeltaCAT catalog using the existing tmpdir
52
+ catalog_props = CatalogProperties(root=self.tmpdir)
53
+ dc_catalog = DeltaCATCatalog(catalog_props)
54
+
55
+ # Convert to DaftCatalog and attach to Daft
56
+ catalog_name = f"deltacat_{uuid.uuid4().hex[:8]}"
57
+ daft_catalog = DaftCatalog(dc_catalog, catalog_name)
58
+ daft.attach_catalog(daft_catalog, catalog_name)
59
+
60
+ # Create a sample DataFrame and table
61
+ df = daft.from_pydict({"id": [1, 2, 3], "value": ["a", "b", "c"]})
62
+ table_name = "test_get_table"
63
+ daft_catalog.create_table(Identifier(table_name), df)
64
+
65
+ # Get the table using different forms of identifiers
66
+ table2 = daft_catalog.get_table(Identifier(table_name))
67
+ assert table2 is not None
68
+ assert table2.name == table_name
69
+
70
+ # 3. With namespace. DeltaCAT used the default namespace since it was not provided
71
+ table3 = daft_catalog.get_table(Identifier("default", table_name))
72
+ assert table3 is not None
73
+ assert table3.name == table_name
74
+
75
+ # Test non-existent table raises an appropriate error
76
+ with pytest.raises(ValueError, match="Table nonexistent_table not found"):
77
+ daft_catalog.get_table(Identifier("nonexistent_table"))
78
+
79
+
80
+ class TestIcebergCatalogIntegration:
81
+ @classmethod
82
+ def setup_method(cls):
83
+ cls.tmpdir = tempfile.mkdtemp()
84
+
85
+ @classmethod
86
+ def teardown_method(cls):
87
+ shutil.rmtree(cls.tmpdir)
88
+
89
+ def test_iceberg_catalog_integration(self):
90
+ # Create a unique warehouse path for this test
91
+ warehouse_path = self.tmpdir
92
+
93
+ # Configure an Iceberg catalog with the warehouse path
94
+ config = IcebergCatalogConfig(
95
+ type=CatalogType.SQL,
96
+ properties={
97
+ "warehouse": warehouse_path,
98
+ "uri": f"sqlite:////{warehouse_path}/sql-catalog.db",
99
+ },
100
+ )
101
+ dc_catalog = IcebergCatalog.from_config(config)
102
+
103
+ # Convert the DeltaCAT catalog to a Daft catalog
104
+ catalog_name = f"deltacat_iceberg_{uuid.uuid4().hex[:8]}"
105
+ daft_catalog = DaftCatalog(dc_catalog, catalog_name)
106
+ daft.attach_catalog(daft_catalog, catalog_name)
107
+
108
+ # Create a sample DataFrame
109
+ df = daft.from_pydict({"id": [1, 2, 3], "value": ["a", "b", "c"]})
110
+
111
+ # Create a table with the Daft catalog
112
+ table_name = "example_table"
113
+ namespace = "example_namespace"
114
+ daft_catalog.create_table(Identifier(namespace, table_name), df)
115
+
116
+ # Query that Iceberg table exists using PyIceberg
117
+ iceberg_catalog = dc_catalog.inner
118
+
119
+ # Verify the table exists in the Iceberg catalog
120
+ tables = iceberg_catalog.list_tables(namespace)
121
+
122
+ assert any(
123
+ t[0] == namespace and t[1] == table_name for t in tables
124
+ ), f"Table {table_name} not found in Iceberg catalog"
125
+
126
+ # Load the table from Iceberg catalog and verify its properties
127
+ iceberg_table = iceberg_catalog.load_table(f"{namespace}.{table_name}")
128
+
129
+ # Check that the schema matches our DataFrame
130
+ schema = iceberg_table.schema()
131
+ assert (
132
+ schema.find_field("id") is not None
133
+ ), "Field 'id' not fcound in table schema"
134
+ assert (
135
+ schema.find_field("value") is not None
136
+ ), "Field 'value' not found in table schema"
File without changes
@@ -3,9 +3,9 @@ import io
3
3
  import pytest
4
4
  from faker import Faker
5
5
 
6
- from deltacat.storage.rivulet.schema.datatype import Datatype
7
- from deltacat.storage.rivulet.mvp.Table import MvpTable
8
- from deltacat.storage.rivulet.schema.schema import Schema
6
+ from deltacat.experimental.storage.rivulet.schema.datatype import Datatype
7
+ from deltacat.experimental.storage.rivulet.mvp.Table import MvpTable
8
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
9
9
  import random
10
10
  import string
11
11
  from PIL import Image
@@ -2,8 +2,9 @@ import pytest
2
2
 
3
3
  import pyarrow as pa
4
4
  import pyarrow.parquet as pq
5
- from deltacat import Datatype, Dataset
6
- from deltacat.storage.rivulet import Schema, Field
5
+ from deltacat.experimental.storage.rivulet.schema.datatype import Datatype
6
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
7
+ from deltacat.experimental.storage.rivulet import Schema, Field
7
8
  from deltacat.utils.metafile_locator import _find_partition_path
8
9
 
9
10
 
@@ -0,0 +1,80 @@
1
+ import pytest
2
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
3
+ QueryExpression,
4
+ )
5
+ from deltacat.experimental.storage.rivulet.shard.range_shard import RangeShard
6
+
7
+
8
+ @pytest.fixture
9
+ def sample_range_shard():
10
+ return RangeShard(min_key=5, max_key=15)
11
+
12
+
13
+ @pytest.fixture
14
+ def sample_string_shard():
15
+ return RangeShard(min_key="apple", max_key="zebra")
16
+
17
+
18
+ def test_with_key():
19
+ query = QueryExpression[int]()
20
+ query.with_key(5)
21
+ assert query.min_key == 5
22
+ assert query.max_key == 5
23
+ with pytest.raises(ValueError):
24
+ query.with_key(10)
25
+
26
+
27
+ def test_with_range():
28
+ query = QueryExpression[int]()
29
+ query.with_range(10, 5)
30
+ assert query.min_key == 5
31
+ assert query.max_key == 10
32
+ with pytest.raises(ValueError):
33
+ query.with_range(20, 25)
34
+
35
+
36
+ def test_matches_query():
37
+ query = QueryExpression[int]()
38
+ assert query.matches_query(5)
39
+ assert query.matches_query(-999)
40
+ query.with_range(10, 20)
41
+ assert query.matches_query(15)
42
+ assert not query.matches_query(25)
43
+ assert not query.matches_query(5)
44
+
45
+
46
+ def test_below_query_range():
47
+ query = QueryExpression[int]()
48
+ assert not query.below_query_range(5)
49
+ query.with_range(10, 20)
50
+ assert query.below_query_range(5)
51
+ assert not query.below_query_range(15)
52
+ assert not query.below_query_range(25)
53
+
54
+
55
+ def test_with_shard_existing_query(sample_range_shard):
56
+ query = QueryExpression[int]().with_range(10, 20)
57
+ new_query = QueryExpression.with_shard(query, sample_range_shard)
58
+ assert new_query.min_key == 5
59
+ assert new_query.max_key == 20
60
+
61
+
62
+ def test_with_shard_none_shard():
63
+ query = QueryExpression[int]().with_range(10, 20)
64
+ result = QueryExpression.with_shard(query, None)
65
+ assert result.min_key == 10
66
+ assert result.max_key == 20
67
+
68
+
69
+ def test_with_shard_existing_query_string(sample_string_shard):
70
+ query = QueryExpression[str]().with_range("banana", "yellow")
71
+ new_query = QueryExpression.with_shard(query, sample_string_shard)
72
+ assert new_query.min_key == "apple"
73
+ assert new_query.max_key == "zebra"
74
+
75
+
76
+ def test_query_expression_string_matches():
77
+ query = QueryExpression[str]().with_range("apple", "cat")
78
+ assert query.matches_query("apple")
79
+ assert query.matches_query("banana")
80
+ assert not query.matches_query("dog")
@@ -0,0 +1,119 @@
1
+ import pytest
2
+ from deltacat.tests.experimental.storage.rivulet.test_utils import verify_pyarrow_scan
3
+ import pyarrow as pa
4
+ from deltacat.experimental.storage.rivulet import Schema, Field, Datatype
5
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
6
+
7
+
8
+ @pytest.fixture
9
+ def combined_schema():
10
+ return Schema(
11
+ fields=[
12
+ Field("id", Datatype.int64(), is_merge_key=True),
13
+ Field("name", Datatype.string()),
14
+ Field("age", Datatype.int32()),
15
+ Field("height", Datatype.int64()),
16
+ Field("gender", Datatype.string()),
17
+ ]
18
+ )
19
+
20
+
21
+ @pytest.fixture
22
+ def initial_schema():
23
+ return Schema(
24
+ fields=[
25
+ Field("id", Datatype.int32(), is_merge_key=True),
26
+ Field("name", Datatype.string()),
27
+ Field("age", Datatype.int32()),
28
+ ]
29
+ )
30
+
31
+
32
+ @pytest.fixture
33
+ def extended_schema():
34
+ return Schema(
35
+ fields=[
36
+ Field("id", Datatype.int64(), is_merge_key=True),
37
+ Field("height", Datatype.int64()),
38
+ Field("gender", Datatype.string()),
39
+ ]
40
+ )
41
+
42
+
43
+ @pytest.fixture
44
+ def sample_data():
45
+ return {
46
+ "id": [1, 2, 3],
47
+ "name": ["Alice", "Bob", "Charlie"],
48
+ "age": [25, 30, 35],
49
+ }
50
+
51
+
52
+ @pytest.fixture
53
+ def extended_data():
54
+ return {
55
+ "id": [1, 2, 3],
56
+ "height": [150, 160, 159],
57
+ "gender": ["male", "female", "male"],
58
+ }
59
+
60
+
61
+ @pytest.fixture
62
+ def combined_data(sample_data, extended_data):
63
+ data = sample_data.copy()
64
+ data.update(extended_data)
65
+ return data
66
+
67
+
68
+ @pytest.fixture
69
+ def parquet_data(tmp_path, sample_data):
70
+ parquet_path = tmp_path / "test.parquet"
71
+ table = pa.Table.from_pydict(sample_data)
72
+ pa.parquet.write_table(table, parquet_path)
73
+ return parquet_path
74
+
75
+
76
+ @pytest.fixture
77
+ def sample_dataset(parquet_data, tmp_path):
78
+ return Dataset.from_parquet(
79
+ name="test_dataset",
80
+ file_uri=str(parquet_data),
81
+ metadata_uri=str(tmp_path),
82
+ merge_keys="id",
83
+ )
84
+
85
+
86
+ def test_end_to_end_scan_with_multiple_schemas(
87
+ sample_dataset,
88
+ initial_schema,
89
+ extended_schema,
90
+ combined_schema,
91
+ sample_data,
92
+ extended_data,
93
+ combined_data,
94
+ ):
95
+ # Verify initial scan.
96
+ verify_pyarrow_scan(sample_dataset.scan().to_arrow(), initial_schema, sample_data)
97
+
98
+ # Add a new schema to the dataset
99
+ sample_dataset.add_schema(schema=extended_schema, schema_name="schema2")
100
+ new_data = [
101
+ {"id": 1, "height": 150, "gender": "male"},
102
+ {"id": 2, "height": 160, "gender": "female"},
103
+ {"id": 3, "height": 159, "gender": "male"},
104
+ ]
105
+ writer = sample_dataset.writer(schema_name="schema2")
106
+ writer.write(new_data)
107
+ writer.flush()
108
+
109
+ # Verify scan with the extended schema retrieves only extended datfa
110
+ verify_pyarrow_scan(
111
+ sample_dataset.scan(schema_name="schema2").to_arrow(),
112
+ extended_schema,
113
+ extended_data,
114
+ )
115
+
116
+ # Verify a combined scan retrieves data matching the combined schema
117
+ verify_pyarrow_scan(
118
+ sample_dataset.scan().to_arrow(), combined_schema, combined_data
119
+ )
@@ -0,0 +1,71 @@
1
+ import pytest
2
+ import os
3
+
4
+ from deltacat.experimental.storage.rivulet.metastore.delta import DeltacatManifestIO
5
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
6
+ DatasetMetastore,
7
+ )
8
+ from deltacat.experimental.storage.rivulet.schema.datatype import Datatype
9
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
10
+ from deltacat.experimental.storage.rivulet import Schema
11
+
12
+
13
+ @pytest.fixture
14
+ def sample_schema():
15
+ return Schema(
16
+ {("id", Datatype.int32()), ("name", Datatype.string())},
17
+ "id",
18
+ )
19
+
20
+
21
+ @pytest.fixture
22
+ def sample_pydict():
23
+ return {"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]}
24
+
25
+
26
+ def test_dataset_metastore_e2e(sample_schema, tmp_path):
27
+ # Setup
28
+ dataset = Dataset(metadata_uri=tmp_path, dataset_name="dataset")
29
+ file_provider = dataset._file_provider
30
+ manifest_io = DeltacatManifestIO(file_provider.uri, dataset._locator)
31
+
32
+ # Create multiple manifests
33
+ manifests_data = [
34
+ {"sst_files": ["sst1.sst", "sst2.sst"], "level": 1},
35
+ {"sst_files": ["sst3.sst", "sst4.sst"], "level": 2},
36
+ ]
37
+
38
+ # Create SST files and manifests
39
+ manifest_paths = []
40
+ for manifest_data in manifests_data:
41
+ sst_files = manifest_data["sst_files"]
42
+ for sst in sst_files:
43
+ with open(os.path.join(file_provider.uri, sst), "w") as f:
44
+ f.write("test data")
45
+
46
+ manifest_path = manifest_io.write(
47
+ sst_files, sample_schema, manifest_data["level"]
48
+ )
49
+ manifest_paths.append(manifest_path)
50
+
51
+ # Initialize DatasetMetastore
52
+ metastore = DatasetMetastore(
53
+ file_provider.uri,
54
+ file_provider,
55
+ file_provider._locator,
56
+ manifest_io=manifest_io,
57
+ )
58
+
59
+ # Test manifest generation
60
+ manifest_accessors = list(metastore.generate_manifests())
61
+ assert len(manifest_accessors) == len(manifests_data)
62
+
63
+ # Verify each manifest accessor
64
+ for accessor in manifest_accessors:
65
+ assert accessor.context.schema == sample_schema
66
+ manifests_data_index = 0 if accessor.context.level == 1 else 1
67
+ assert accessor.context.level == manifests_data[manifests_data_index]["level"]
68
+ assert (
69
+ accessor.manifest.sst_files
70
+ == manifests_data[manifests_data_index]["sst_files"]
71
+ )
@@ -1,6 +1,6 @@
1
1
  import pytest
2
2
  import pyarrow as pa
3
- from deltacat.storage.rivulet import Schema, Field, Datatype
3
+ from deltacat.experimental.storage.rivulet import Schema, Field, Datatype
4
4
 
5
5
 
6
6
  def test_field_initialization():
@@ -0,0 +1,162 @@
1
+ import pytest
2
+ import pyarrow as pa
3
+ import pyarrow.parquet as pq
4
+
5
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
6
+ from deltacat.experimental.storage.rivulet.shard.range_shard import (
7
+ RangeShard,
8
+ RangeShardingStrategy,
9
+ )
10
+
11
+
12
+ @pytest.fixture
13
+ def sample_numeric_dataset(tmp_path):
14
+ """
15
+ Creates a small Parquet file with integer-based min/max keys and
16
+ initializes a Dataset from it. Merge key is 'id' with values [1,2,3].
17
+ So min_key=1, max_key=3.
18
+ """
19
+ data = {
20
+ "id": [1, 2, 3],
21
+ "name": ["Alice", "Bob", "Charlie"],
22
+ "age": [25, 30, 35],
23
+ }
24
+ table = pa.Table.from_pydict(data)
25
+ parquet_file = tmp_path / "numeric_data.parquet"
26
+ pq.write_table(table, parquet_file)
27
+
28
+ ds = Dataset.from_parquet(
29
+ name="numeric_dataset",
30
+ file_uri=str(parquet_file),
31
+ metadata_uri=tmp_path,
32
+ merge_keys="id",
33
+ )
34
+ return ds
35
+
36
+
37
+ @pytest.fixture
38
+ def sample_string_dataset(tmp_path):
39
+ """
40
+ Creates a small Parquet file with a string-based merge key ('name')
41
+ and initializes a Dataset from it. Merge key has values
42
+ ['Alice', 'Bob', 'Charlie'] => min_key='Alice', max_key='Charlie'.
43
+ """
44
+ data = {
45
+ "name": ["Alice", "Charlie", "Bob"], # random order
46
+ "value": [100, 200, 150],
47
+ }
48
+ table = pa.Table.from_pydict(data)
49
+ parquet_file = tmp_path / "string_data.parquet"
50
+ pq.write_table(table, parquet_file)
51
+
52
+ ds = Dataset.from_parquet(
53
+ name="string_dataset",
54
+ file_uri=str(parquet_file),
55
+ metadata_uri=tmp_path,
56
+ merge_keys="name",
57
+ )
58
+ return ds
59
+
60
+
61
+ def test_shards(sample_numeric_dataset, sample_string_dataset):
62
+ shards = sample_numeric_dataset.shards(num_shards=2)
63
+
64
+ num_shards = len(list(shards))
65
+ assert num_shards == 2
66
+
67
+ shard = shards[0]
68
+ records = list(sample_numeric_dataset.scan(shard=shard).to_pydict())
69
+ num_records = len(records)
70
+ assert num_records == 2
71
+
72
+ assert records[0]["id"] == 1
73
+ assert records[0]["name"] == "Alice"
74
+
75
+ assert records[1]["id"] == 2
76
+ assert records[1]["name"] == "Bob"
77
+
78
+
79
+ def test_range_shard_repr():
80
+ shard = RangeShard(min_key=5, max_key=15)
81
+ assert repr(shard) == "Shard(type=range, min_key=5, max_key=15)"
82
+
83
+
84
+ def test_range_shard_split_integers():
85
+ shards = RangeShard.split(global_min=1, global_max=10, num_shards=2)
86
+ assert len(shards) == 2
87
+
88
+ assert shards[0].min_key == 1
89
+ assert shards[0].max_key == 5
90
+ assert shards[1].min_key == 6
91
+ assert shards[1].max_key == 10
92
+
93
+
94
+ def test_range_shard_split_integers_single_shard():
95
+ shards = RangeShard.split(global_min=1, global_max=10, num_shards=1)
96
+ assert len(shards) == 1
97
+ assert shards[0].min_key == 1
98
+ assert shards[0].max_key == 10
99
+
100
+
101
+ def test_range_shard_split_integers_same_value():
102
+ shards = RangeShard.split(global_min=5, global_max=5, num_shards=3)
103
+ assert len(shards) == 1
104
+
105
+
106
+ def test_range_sharding_strategy_integers(sample_numeric_dataset):
107
+ strategy = RangeShardingStrategy()
108
+ shards = list(
109
+ strategy.shards(num_shards=2, metastore=sample_numeric_dataset._metastore)
110
+ )
111
+
112
+ assert len(shards) == 2, "Expected 2 shards for dataset with keys [1,2,3]"
113
+
114
+ shard1, shard2 = shards
115
+ assert isinstance(shard1, RangeShard)
116
+ assert isinstance(shard2, RangeShard)
117
+ assert shard1.min_key == 1
118
+ assert shard1.max_key == 2
119
+ assert shard2.min_key == 3
120
+ assert shard2.max_key == 3
121
+
122
+
123
+ def test_range_sharding_strategy_integers_single_shard(sample_numeric_dataset):
124
+ strategy = RangeShardingStrategy()
125
+ shards = list(
126
+ strategy.shards(num_shards=1, metastore=sample_numeric_dataset._metastore)
127
+ )
128
+ assert len(shards) == 1
129
+ shard = shards[0]
130
+ assert shard.min_key == 1
131
+ assert shard.max_key == 3
132
+
133
+
134
+ def test_range_sharding_strategy_strings(sample_string_dataset):
135
+ strategy = RangeShardingStrategy()
136
+ shards = list(
137
+ strategy.shards(num_shards=2, metastore=sample_string_dataset._metastore)
138
+ )
139
+
140
+ assert len(shards) == 2, "Expected 2 shards for string-based dataset"
141
+ shard1, shard2 = shards
142
+ assert isinstance(shard1, RangeShard)
143
+ assert isinstance(shard2, RangeShard)
144
+
145
+ assert shard1.min_key == "Alice"
146
+ assert shard1.max_key < "Charlie"
147
+
148
+ assert shard2.min_key == shard1.max_key
149
+ assert shard2.max_key == "Charlie"
150
+
151
+
152
+ def test_range_sharding_strategy_strings_single_shard(sample_string_dataset):
153
+ strategy = RangeShardingStrategy()
154
+ shards = list(
155
+ strategy.shards(num_shards=1, metastore=sample_string_dataset._metastore)
156
+ )
157
+
158
+ assert len(shards) == 1
159
+
160
+ shard = shards[0]
161
+ assert shard.min_key == "Alice"
162
+ assert shard.max_key == "Charlie"
@@ -3,9 +3,11 @@ from deltacat.utils.metafile_locator import _find_partition_path
3
3
  import pytest
4
4
 
5
5
  import pyarrow as pa
6
- from deltacat.storage.rivulet import Schema, Field, Datatype
7
- from deltacat.storage.rivulet.dataset import Dataset
8
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
6
+ from deltacat.experimental.storage.rivulet import Schema, Field, Datatype
7
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
8
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
9
+ QueryExpression,
10
+ )
9
11
 
10
12
 
11
13
  @pytest.fixture
@@ -57,7 +59,7 @@ def test_dataset_creation_metadata_structure(tmp_path):
57
59
  dataset = Dataset(dataset_name="test_dataset", metadata_uri=str(tmp_path))
58
60
 
59
61
  assert dataset._metadata_folder.startswith(".riv-meta")
60
- assert dataset._namespace == "DEFAULT"
62
+ assert dataset._namespace == "default"
61
63
  assert dataset.dataset_name == "test_dataset"
62
64
  assert dataset._metadata_path == str(tmp_path / ".riv-meta-test_dataset")
63
65
 
@@ -2,11 +2,11 @@ import os
2
2
 
3
3
  import pytest
4
4
 
5
- from deltacat import Dataset
6
- from deltacat.storage.rivulet.fs.file_store import FileStore
7
- from deltacat.storage.rivulet.schema.datatype import Datatype
8
- from deltacat.storage.rivulet.metastore.delta import DeltacatManifestIO
9
- from deltacat.storage.rivulet import Schema, Field
5
+ from deltacat.experimental.storage.rivulet.dataset import Dataset
6
+ from deltacat.experimental.storage.rivulet.fs.file_store import FileStore
7
+ from deltacat.experimental.storage.rivulet.schema.datatype import Datatype
8
+ from deltacat.experimental.storage.rivulet.metastore.delta import DeltacatManifestIO
9
+ from deltacat.experimental.storage.rivulet import Schema, Field
10
10
  import pyarrow as pa
11
11
  import pyarrow.parquet
12
12
 
@@ -2,16 +2,16 @@ from typing import List, FrozenSet, Dict
2
2
 
3
3
  import pytest
4
4
 
5
- from deltacat.storage.rivulet.metastore.delta import DeltaContext
6
- from deltacat.storage.rivulet.metastore.sst import SSTable, SSTableRow
7
- from deltacat.storage.rivulet.metastore.sst_interval_tree import (
5
+ from deltacat.experimental.storage.rivulet.metastore.delta import DeltaContext
6
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTable, SSTableRow
7
+ from deltacat.experimental.storage.rivulet.metastore.sst_interval_tree import (
8
8
  BlockIntervalTree,
9
9
  BlockGroup,
10
10
  OrderedBlockGroups,
11
11
  Block,
12
12
  )
13
- from deltacat.storage.rivulet.schema.datatype import Datatype
14
- from deltacat.storage.rivulet import Schema
13
+ from deltacat.experimental.storage.rivulet.schema.datatype import Datatype
14
+ from deltacat.experimental.storage.rivulet import Schema
15
15
 
16
16
 
17
17
  @pytest.fixture