deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
File without changes
@@ -0,0 +1,16 @@
1
+ import unittest
2
+ from unittest.mock import MagicMock
3
+
4
+ from deltacat import TableDefinition
5
+
6
+
7
+ class TestTableDefinition(unittest.TestCase):
8
+ def test_create_scan_plan_not_initialized(self):
9
+ mock_table = MagicMock()
10
+ mock_table.table_name = "mock_table_name"
11
+ mock_table.namespace = "mock_namespace"
12
+
13
+ table_definition = TableDefinition({"table": mock_table})
14
+ with self.assertRaises(RuntimeError) as context:
15
+ table_definition.create_scan_plan()
16
+ self.assertIn("ScanPlanner is not initialized", str(context.exception))
@@ -1,23 +1,24 @@
1
- import unittest
2
1
  import pytest
3
- import ray
4
2
  import tempfile
5
3
  import shutil
6
4
  import uuid
7
5
  from unittest import mock
8
6
  import os
9
7
 
10
- from deltacat.catalog import CatalogProperties
11
- from pyiceberg.catalog import Catalog as IcebergCatalog
12
-
13
- from deltacat.catalog.model.catalog import (
8
+ from deltacat.catalog import (
9
+ CatalogProperties,
14
10
  Catalog,
15
- init,
11
+ clear_catalogs,
16
12
  get_catalog,
17
- put_catalog,
13
+ init,
14
+ init_local,
18
15
  is_initialized,
16
+ put_catalog,
19
17
  )
20
- from deltacat.catalog.iceberg.iceberg_catalog_config import IcebergCatalogConfig
18
+ from deltacat.experimental.catalog.iceberg import impl as IcebergCatalog
19
+ from pyiceberg.catalog import Catalog as PyIcebergCatalog
20
+
21
+ from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
21
22
 
22
23
  from pyiceberg.catalog import CatalogType
23
24
 
@@ -25,75 +26,54 @@ from pyiceberg.catalog import CatalogType
25
26
  # Test module to mock a catalog implementation
26
27
  class MockCatalogImpl:
27
28
  @staticmethod
28
- def initialize(*args, **kwargs):
29
+ def initialize(config, *args, **kwargs):
29
30
  # Return some state that the catalog would normally maintain
30
- return {"initialized": True, "args": args, "kwargs": kwargs}
31
+ return {
32
+ "initialized": True,
33
+ "config": config,
34
+ "args": args,
35
+ "kwargs": kwargs,
36
+ }
31
37
 
32
38
 
33
39
  @pytest.fixture(scope="function")
34
- def reset_catalogs_ray_actor():
35
- """
36
- Setup and teardown for Ray environment for tests.
40
+ def reset_catalogs():
41
+ clear_catalogs()
37
42
 
38
- This will kill the actor all_catalogs, essentially wiping global state for catalogs
39
-
40
- NOTE: tests using this fixture must be run serially. As of April 7 2025, the unit test suite had various
41
- failures if run in parallel, in part because the state of all_catalogs in ray is shared across tests.
42
-
43
- NOTE: when using this fixture, ensure you pass ray_init_args={"ignore_reinit_error": True} into all
44
- functions which may re-initialize ray. This is because the production code checks the all_catalogs actor
45
- in order to determine whether it needs to initialize Ray
46
- """
47
- # Reset the global catalog_actor state before each test
48
- import deltacat.catalog.model.catalog as catalog_module
49
-
50
- # Initialize Ray if not already initialized
51
- if not ray.is_initialized():
52
- ray.init(ignore_reinit_error=True)
53
- yield
54
-
55
- # Clean up the actor if it exists
56
- if catalog_module.all_catalogs is not None:
57
- try:
58
- ray.kill(catalog_module.all_catalogs)
59
- except Exception:
60
- pass
61
- finally:
62
- catalog_module.all_catalogs = None
63
43
 
64
-
65
- class TestCatalog(unittest.TestCase):
44
+ class TestCatalog:
66
45
  """Tests for the Catalog class itself, without Ray initialization."""
67
46
 
68
47
  def test_catalog_constructor(self):
69
48
  """Test that the Catalog constructor correctly initializes with the given implementation."""
70
49
  catalog = Catalog(impl=MockCatalogImpl)
71
50
 
72
- self.assertEqual(catalog.impl, MockCatalogImpl)
51
+ assert catalog.impl == MockCatalogImpl
73
52
 
74
53
  # Check that inner state was correctly initialized
75
54
  # This just asserts that kwargs were plumbed through from Catalog constructor
76
- self.assertTrue(catalog.inner["initialized"])
77
- self.assertEqual(catalog.inner["args"], ())
78
- self.assertEqual(catalog.inner["kwargs"], {})
55
+ assert catalog.inner["initialized"]
56
+ assert catalog.inner["config"] is None
57
+ assert catalog.inner["args"] == ()
58
+ assert catalog.inner["kwargs"] == {}
79
59
 
80
60
  def test_iceberg_factory_method(self):
81
61
  """Test the iceberg factory method correctly creates an Iceberg catalog."""
82
62
  # Create a mock for the Iceberg catalog module
83
63
  with mock.patch(
84
- "deltacat.catalog.model.catalog.IcebergCatalog"
64
+ "deltacat.experimental.catalog.iceberg.impl.IcebergCatalog"
85
65
  ) as mock_iceberg_catalog:
86
66
  # Configure the mock to return a known value when initialize is called
87
67
  mock_iceberg_catalog.initialize.return_value = {"iceberg": True}
88
68
 
89
69
  # Create an Iceberg catalog config and invoke iceberg factory method
90
70
  config = IcebergCatalogConfig(type=CatalogType.IN_MEMORY, properties={})
91
- catalog = Catalog.iceberg(config)
71
+ catalog = IcebergCatalog.from_config(config)
92
72
 
93
73
  # Check that the implementation is set to iceberg_catalog
94
- self.assertEqual(catalog.impl, mock_iceberg_catalog)
74
+ assert catalog.impl == mock_iceberg_catalog
95
75
  # Check that the inner state is set to the output of initialize
96
- self.assertEqual(catalog.inner, {"iceberg": True})
76
+ assert catalog.inner == {"iceberg": True}
97
77
 
98
78
 
99
79
  class TestCatalogsIntegration:
@@ -109,8 +89,7 @@ class TestCatalogsIntegration:
109
89
  catalog = Catalog(impl=MockCatalogImpl)
110
90
  init(
111
91
  catalog,
112
- ray_init_args={"ignore_reinit_error": True},
113
- **{"force_reinitialize": True},
92
+ force=True,
114
93
  )
115
94
 
116
95
  @classmethod
@@ -118,17 +97,13 @@ class TestCatalogsIntegration:
118
97
  if cls.temp_dir and os.path.exists(cls.temp_dir):
119
98
  shutil.rmtree(cls.temp_dir)
120
99
 
121
- def test_init_single_catalog(self, reset_catalogs_ray_actor):
100
+ def test_init_single_catalog(self, reset_catalogs):
122
101
  """Test initializing a single catalog."""
123
102
 
124
103
  catalog = Catalog(impl=MockCatalogImpl)
125
104
 
126
105
  # Initialize with a single catalog and Ray init args including the namespace
127
- init(
128
- catalog,
129
- ray_init_args={"ignore_reinit_error": True},
130
- **{"force_reinitialize": True},
131
- )
106
+ init(catalog, force=True)
132
107
 
133
108
  assert is_initialized()
134
109
 
@@ -137,7 +112,7 @@ class TestCatalogsIntegration:
137
112
  assert retrieved_catalog.impl == MockCatalogImpl
138
113
  assert retrieved_catalog.inner["initialized"]
139
114
 
140
- def test_init_multiple_catalogs(self, reset_catalogs_ray_actor):
115
+ def test_init_multiple_catalogs(self, reset_catalogs):
141
116
  """Test initializing multiple catalogs."""
142
117
  # Create catalogs
143
118
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
@@ -145,11 +120,7 @@ class TestCatalogsIntegration:
145
120
 
146
121
  # Initialize with multiple catalogs and Ray init args including the namespace
147
122
  catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
148
- init(
149
- catalogs_dict,
150
- ray_init_args={"ignore_reinit_error": True},
151
- **{"force_reinitialize": True},
152
- )
123
+ init(catalogs_dict, force=True)
153
124
 
154
125
  assert is_initialized()
155
126
 
@@ -162,7 +133,7 @@ class TestCatalogsIntegration:
162
133
  assert retrieved_catalog2.impl == MockCatalogImpl
163
134
  assert retrieved_catalog2.inner["kwargs"]["id"] == 2
164
135
 
165
- def test_init_with_default_catalog_name(self, reset_catalogs_ray_actor):
136
+ def test_init_with_default_catalog_name(self, reset_catalogs):
166
137
  """Test initializing with a specified default catalog name."""
167
138
  # Create catalogs
168
139
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
@@ -173,8 +144,7 @@ class TestCatalogsIntegration:
173
144
  init(
174
145
  catalogs_dict,
175
146
  default="catalog2",
176
- ray_init_args={"ignore_reinit_error": True},
177
- **{"force_reinitialize": True},
147
+ force=True,
178
148
  )
179
149
 
180
150
  # Get the default catalog and check it's catalog2
@@ -182,16 +152,12 @@ class TestCatalogsIntegration:
182
152
  assert default_catalog.impl == MockCatalogImpl
183
153
  assert default_catalog.inner["kwargs"]["id"] == 2
184
154
 
185
- def test_put_catalog(self, reset_catalogs_ray_actor):
155
+ def test_put_catalog(self, reset_catalogs):
186
156
  """Test adding a catalog after initialization."""
187
157
  # Initialize with a single catalog
188
158
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
189
159
  catalog2 = Catalog(impl=MockCatalogImpl, id=2)
190
- init(
191
- {"catalog1": catalog1},
192
- ray_init_args={"ignore_reinit_error": True},
193
- **{"force_reinitialize": True},
194
- )
160
+ init({"catalog1": catalog1}, force=True)
195
161
 
196
162
  # Add a second catalog
197
163
  put_catalog("catalog2", catalog2)
@@ -203,21 +169,19 @@ class TestCatalogsIntegration:
203
169
  retrieved_catalog2 = get_catalog("catalog2")
204
170
  assert retrieved_catalog2.inner["kwargs"]["id"] == 2
205
171
 
206
- def test_put_catalog_that_already_exists(self, reset_catalogs_ray_actor):
172
+ def test_put_catalog_that_already_exists(self, reset_catalogs):
207
173
  catalog = Catalog(impl=MockCatalogImpl, id=1)
208
174
  catalog2 = Catalog(impl=MockCatalogImpl, id=2)
209
175
  put_catalog(
210
176
  "test_catalog",
211
177
  catalog,
212
178
  id=1,
213
- ray_init_args={"ignore_reinit_error": True},
214
179
  )
215
180
 
216
181
  # Try to add another catalog with the same name. Should not error
217
182
  put_catalog(
218
183
  "test_catalog",
219
184
  catalog2,
220
- ray_init_args={"ignore_reinit_error": True},
221
185
  )
222
186
 
223
187
  retrieved_catalog = get_catalog("test_catalog")
@@ -228,40 +192,76 @@ class TestCatalogsIntegration:
228
192
  put_catalog(
229
193
  "test_catalog",
230
194
  catalog,
231
- ray_init_args={"ignore_reinit_error": True},
232
195
  fail_if_exists=True,
233
196
  )
234
197
 
235
- def test_get_catalog_nonexistent(self, reset_catalogs_ray_actor):
198
+ def test_get_catalog_nonexistent(self, reset_catalogs):
236
199
  """Test that trying to get a nonexistent catalog raises an error."""
237
200
  # Initialize with a catalog
238
201
  catalog = Catalog(impl=MockCatalogImpl)
239
- init(
240
- {"test_catalog": catalog},
241
- ray_init_args={"ignore_reinit_error": True},
242
- **{"force_reinitialize": True},
243
- )
202
+ init({"test_catalog": catalog}, force=True)
244
203
 
245
204
  # Try to get a nonexistent catalog
246
205
  with pytest.raises(ValueError):
247
206
  get_catalog("nonexistent")
248
207
 
249
- def test_get_catalog_no_default(self, reset_catalogs_ray_actor):
208
+ def test_get_catalog_no_default(self, reset_catalogs):
250
209
  """Test that trying to get the default catalog when none is set raises an error."""
251
210
  # Initialize with multiple catalogs but no default
252
211
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
253
212
  catalog2 = Catalog(impl=MockCatalogImpl, id=2)
254
- init(
255
- {"catalog1": catalog1, "catalog2": catalog2},
256
- ray_init_args={"ignore_reinit_error": True},
257
- **{"force_reinitialize": True},
258
- )
213
+ init({"catalog1": catalog1, "catalog2": catalog2}, force=True)
259
214
 
260
215
  # Try to get the default catalog
261
216
  with pytest.raises(ValueError):
262
217
  get_catalog()
263
218
 
264
- def test_default_catalog_initialization(self, reset_catalogs_ray_actor):
219
+ def test_init_local(self, reset_catalogs):
220
+ """Test that init_local() creates a default local catalog."""
221
+ # Initialize with default local catalog
222
+ init_local(force=True)
223
+
224
+ assert is_initialized()
225
+
226
+ # Should be able to get the default catalog
227
+ default_catalog = get_catalog()
228
+ assert default_catalog is not None
229
+
230
+ # The default catalog should be accessible by name "default"
231
+ named_catalog = get_catalog("default")
232
+ assert named_catalog is not None
233
+ assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
234
+
235
+ def test_init_local_with_path(self, reset_catalogs):
236
+ """Test that init_local(path) creates a default local catalog with specified path."""
237
+ # Create a temporary directory for the test
238
+ custom_path = tempfile.mkdtemp()
239
+
240
+ try:
241
+ # Initialize with custom path
242
+ init_local(path=custom_path, force=True)
243
+
244
+ assert is_initialized()
245
+
246
+ # Should be able to get the default catalog
247
+ default_catalog = get_catalog()
248
+ assert default_catalog is not None
249
+
250
+ # The default catalog should be accessible by name "default"
251
+ named_catalog = get_catalog("default")
252
+ assert named_catalog is not None
253
+ assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
254
+
255
+ # Verify the catalog is using the custom path
256
+ catalog_properties = named_catalog.inner
257
+ assert catalog_properties.root == custom_path
258
+
259
+ finally:
260
+ # Clean up the temporary directory
261
+ if os.path.exists(custom_path):
262
+ shutil.rmtree(custom_path)
263
+
264
+ def test_default_catalog_initialization(self, reset_catalogs):
265
265
  """Test that a Default catalog can be initialized and accessed using the factory method."""
266
266
  from deltacat.catalog.model.properties import CatalogProperties
267
267
 
@@ -270,15 +270,11 @@ class TestCatalogsIntegration:
270
270
  # Create the catalog properties
271
271
  config = CatalogProperties(root=self.temp_dir)
272
272
 
273
- # Create the catalog using the factory method
274
- catalog = Catalog.default(config)
273
+ # Create the catalog
274
+ catalog = Catalog(config)
275
275
 
276
276
  # Initialize DeltaCAT with this catalog
277
- init(
278
- {catalog_name: catalog},
279
- ray_init_args={"ignore_reinit_error": True},
280
- **{"force_reinitialize": True},
281
- )
277
+ init({catalog_name: catalog}, force=True)
282
278
 
283
279
  # Retrieve the catalog and verify it's the same one
284
280
  retrieved_catalog = get_catalog(catalog_name)
@@ -286,16 +282,14 @@ class TestCatalogsIntegration:
286
282
  assert isinstance(retrieved_catalog.inner, CatalogProperties)
287
283
  assert retrieved_catalog.inner.root == self.temp_dir
288
284
 
289
- def test_default_catalog_initialization_from_kwargs(self, reset_catalogs_ray_actor):
285
+ def test_default_catalog_initialization_from_kwargs(self, reset_catalogs):
290
286
 
291
287
  catalog_name = str(uuid.uuid4())
292
- # Initialize DeltaCAT with this catalog
293
- from deltacat.catalog.main import impl as DeltacatCatalog
294
288
 
289
+ # Initialize DeltaCAT with this catalog
295
290
  put_catalog(
296
291
  catalog_name,
297
- Catalog(DeltacatCatalog, **{"root": "test_root"}),
298
- ray_init_args={"ignore_reinit_error": True},
292
+ Catalog(root="test_root"),
299
293
  )
300
294
 
301
295
  # Retrieve the catalog and verify it's the same one
@@ -304,7 +298,7 @@ class TestCatalogsIntegration:
304
298
  assert isinstance(retrieved_catalog.inner, CatalogProperties)
305
299
  assert retrieved_catalog.inner.root == "test_root"
306
300
 
307
- def test_iceberg_catalog_initialization(self, reset_catalogs_ray_actor):
301
+ def test_iceberg_catalog_initialization(self, reset_catalogs):
308
302
  """Test that an Iceberg catalog can be initialized and accessed."""
309
303
  catalog_name = str(uuid.uuid4())
310
304
 
@@ -314,11 +308,14 @@ class TestCatalogsIntegration:
314
308
  )
315
309
 
316
310
  # Create the catalog using the factory method
317
- catalog = Catalog.iceberg(config)
311
+ catalog = IcebergCatalog.from_config(config)
318
312
 
319
- put_catalog(catalog_name, catalog, ray_init_args={"ignore_reinit_error": True})
313
+ put_catalog(catalog_name, catalog)
320
314
 
321
315
  # Retrieve the catalog and verify it's the same one
322
316
  retrieved_catalog = get_catalog(catalog_name)
323
- assert retrieved_catalog.impl.__name__ == "deltacat.catalog.iceberg.impl"
324
- assert isinstance(retrieved_catalog.inner, IcebergCatalog)
317
+ assert (
318
+ retrieved_catalog.impl.__name__
319
+ == "deltacat.experimental.catalog.iceberg.impl"
320
+ )
321
+ assert isinstance(retrieved_catalog.inner, PyIcebergCatalog)