deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
File without changes
@@ -0,0 +1,16 @@
1
+ import unittest
2
+ from unittest.mock import MagicMock
3
+
4
+ from deltacat import TableDefinition
5
+
6
+
7
+ class TestTableDefinition(unittest.TestCase):
8
+ def test_create_scan_plan_not_initialized(self):
9
+ mock_table = MagicMock()
10
+ mock_table.table_name = "mock_table_name"
11
+ mock_table.namespace = "mock_namespace"
12
+
13
+ table_definition = TableDefinition({"table": mock_table})
14
+ with self.assertRaises(RuntimeError) as context:
15
+ table_definition.create_scan_plan()
16
+ self.assertIn("ScanPlanner is not initialized", str(context.exception))
@@ -0,0 +1,321 @@
1
+ import pytest
2
+ import tempfile
3
+ import shutil
4
+ import uuid
5
+ from unittest import mock
6
+ import os
7
+
8
+ from deltacat.catalog import (
9
+ CatalogProperties,
10
+ Catalog,
11
+ clear_catalogs,
12
+ get_catalog,
13
+ init,
14
+ init_local,
15
+ is_initialized,
16
+ put_catalog,
17
+ )
18
+ from deltacat.experimental.catalog.iceberg import impl as IcebergCatalog
19
+ from pyiceberg.catalog import Catalog as PyIcebergCatalog
20
+
21
+ from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
22
+
23
+ from pyiceberg.catalog import CatalogType
24
+
25
+
26
+ # Test module to mock a catalog implementation
27
+ class MockCatalogImpl:
28
+ @staticmethod
29
+ def initialize(config, *args, **kwargs):
30
+ # Return some state that the catalog would normally maintain
31
+ return {
32
+ "initialized": True,
33
+ "config": config,
34
+ "args": args,
35
+ "kwargs": kwargs,
36
+ }
37
+
38
+
39
+ @pytest.fixture(scope="function")
40
+ def reset_catalogs():
41
+ clear_catalogs()
42
+
43
+
44
+ class TestCatalog:
45
+ """Tests for the Catalog class itself, without Ray initialization."""
46
+
47
+ def test_catalog_constructor(self):
48
+ """Test that the Catalog constructor correctly initializes with the given implementation."""
49
+ catalog = Catalog(impl=MockCatalogImpl)
50
+
51
+ assert catalog.impl == MockCatalogImpl
52
+
53
+ # Check that inner state was correctly initialized
54
+ # This just asserts that kwargs were plumbed through from Catalog constructor
55
+ assert catalog.inner["initialized"]
56
+ assert catalog.inner["config"] is None
57
+ assert catalog.inner["args"] == ()
58
+ assert catalog.inner["kwargs"] == {}
59
+
60
+ def test_iceberg_factory_method(self):
61
+ """Test the iceberg factory method correctly creates an Iceberg catalog."""
62
+ # Create a mock for the Iceberg catalog module
63
+ with mock.patch(
64
+ "deltacat.experimental.catalog.iceberg.impl.IcebergCatalog"
65
+ ) as mock_iceberg_catalog:
66
+ # Configure the mock to return a known value when initialize is called
67
+ mock_iceberg_catalog.initialize.return_value = {"iceberg": True}
68
+
69
+ # Create an Iceberg catalog config and invoke iceberg factory method
70
+ config = IcebergCatalogConfig(type=CatalogType.IN_MEMORY, properties={})
71
+ catalog = IcebergCatalog.from_config(config)
72
+
73
+ # Check that the implementation is set to iceberg_catalog
74
+ assert catalog.impl == mock_iceberg_catalog
75
+ # Check that the inner state is set to the output of initialize
76
+ assert catalog.inner == {"iceberg": True}
77
+
78
+
79
+ class TestCatalogsIntegration:
80
+ """Integration tests for Default catalog functionality."""
81
+
82
+ temp_dir = None
83
+
84
+ @classmethod
85
+ def setup_class(cls):
86
+ cls.temp_dir = tempfile.mkdtemp()
87
+ # Other tests are going to have initialized ray catalog. Initialize here to ensure
88
+ # that when this test class is run individuall it mimicks running with other tests
89
+ catalog = Catalog(impl=MockCatalogImpl)
90
+ init(
91
+ catalog,
92
+ force=True,
93
+ )
94
+
95
+ @classmethod
96
+ def teardown_class(cls):
97
+ if cls.temp_dir and os.path.exists(cls.temp_dir):
98
+ shutil.rmtree(cls.temp_dir)
99
+
100
+ def test_init_single_catalog(self, reset_catalogs):
101
+ """Test initializing a single catalog."""
102
+
103
+ catalog = Catalog(impl=MockCatalogImpl)
104
+
105
+ # Initialize with a single catalog and Ray init args including the namespace
106
+ init(catalog, force=True)
107
+
108
+ assert is_initialized()
109
+
110
+ # Get the default catalog and check it's the same one we initialized with
111
+ retrieved_catalog = get_catalog()
112
+ assert retrieved_catalog.impl == MockCatalogImpl
113
+ assert retrieved_catalog.inner["initialized"]
114
+
115
+ def test_init_multiple_catalogs(self, reset_catalogs):
116
+ """Test initializing multiple catalogs."""
117
+ # Create catalogs
118
+ catalog1 = Catalog(impl=MockCatalogImpl, id=1)
119
+ catalog2 = Catalog(impl=MockCatalogImpl, id=2)
120
+
121
+ # Initialize with multiple catalogs and Ray init args including the namespace
122
+ catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
123
+ init(catalogs_dict, force=True)
124
+
125
+ assert is_initialized()
126
+
127
+ # Get catalogs by name and check they're the same ones we initialized with
128
+ retrieved_catalog1 = get_catalog("catalog1")
129
+ assert retrieved_catalog1.impl == MockCatalogImpl
130
+ assert retrieved_catalog1.inner["kwargs"]["id"] == 1
131
+
132
+ retrieved_catalog2 = get_catalog("catalog2")
133
+ assert retrieved_catalog2.impl == MockCatalogImpl
134
+ assert retrieved_catalog2.inner["kwargs"]["id"] == 2
135
+
136
+ def test_init_with_default_catalog_name(self, reset_catalogs):
137
+ """Test initializing with a specified default catalog name."""
138
+ # Create catalogs
139
+ catalog1 = Catalog(impl=MockCatalogImpl, id=1)
140
+ catalog2 = Catalog(impl=MockCatalogImpl, id=2)
141
+
142
+ # Initialize with multiple catalogs and specify a default
143
+ catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
144
+ init(
145
+ catalogs_dict,
146
+ default="catalog2",
147
+ force=True,
148
+ )
149
+
150
+ # Get the default catalog and check it's catalog2
151
+ default_catalog = get_catalog()
152
+ assert default_catalog.impl == MockCatalogImpl
153
+ assert default_catalog.inner["kwargs"]["id"] == 2
154
+
155
+ def test_put_catalog(self, reset_catalogs):
156
+ """Test adding a catalog after initialization."""
157
+ # Initialize with a single catalog
158
+ catalog1 = Catalog(impl=MockCatalogImpl, id=1)
159
+ catalog2 = Catalog(impl=MockCatalogImpl, id=2)
160
+ init({"catalog1": catalog1}, force=True)
161
+
162
+ # Add a second catalog
163
+ put_catalog("catalog2", catalog2)
164
+
165
+ # Check both catalogs are available
166
+ retrieved_catalog1 = get_catalog("catalog1")
167
+ assert retrieved_catalog1.inner["kwargs"]["id"] == 1
168
+
169
+ retrieved_catalog2 = get_catalog("catalog2")
170
+ assert retrieved_catalog2.inner["kwargs"]["id"] == 2
171
+
172
+ def test_put_catalog_that_already_exists(self, reset_catalogs):
173
+ catalog = Catalog(impl=MockCatalogImpl, id=1)
174
+ catalog2 = Catalog(impl=MockCatalogImpl, id=2)
175
+ put_catalog(
176
+ "test_catalog",
177
+ catalog,
178
+ id=1,
179
+ )
180
+
181
+ # Try to add another catalog with the same name. Should not error
182
+ put_catalog(
183
+ "test_catalog",
184
+ catalog2,
185
+ )
186
+
187
+ retrieved_catalog = get_catalog("test_catalog")
188
+ assert retrieved_catalog.inner["kwargs"]["id"] == 2
189
+
190
+ # If fail_if_exists, put call should fail
191
+ with pytest.raises(ValueError):
192
+ put_catalog(
193
+ "test_catalog",
194
+ catalog,
195
+ fail_if_exists=True,
196
+ )
197
+
198
+ def test_get_catalog_nonexistent(self, reset_catalogs):
199
+ """Test that trying to get a nonexistent catalog raises an error."""
200
+ # Initialize with a catalog
201
+ catalog = Catalog(impl=MockCatalogImpl)
202
+ init({"test_catalog": catalog}, force=True)
203
+
204
+ # Try to get a nonexistent catalog
205
+ with pytest.raises(ValueError):
206
+ get_catalog("nonexistent")
207
+
208
+ def test_get_catalog_no_default(self, reset_catalogs):
209
+ """Test that trying to get the default catalog when none is set raises an error."""
210
+ # Initialize with multiple catalogs but no default
211
+ catalog1 = Catalog(impl=MockCatalogImpl, id=1)
212
+ catalog2 = Catalog(impl=MockCatalogImpl, id=2)
213
+ init({"catalog1": catalog1, "catalog2": catalog2}, force=True)
214
+
215
+ # Try to get the default catalog
216
+ with pytest.raises(ValueError):
217
+ get_catalog()
218
+
219
+ def test_init_local(self, reset_catalogs):
220
+ """Test that init_local() creates a default local catalog."""
221
+ # Initialize with default local catalog
222
+ init_local(force=True)
223
+
224
+ assert is_initialized()
225
+
226
+ # Should be able to get the default catalog
227
+ default_catalog = get_catalog()
228
+ assert default_catalog is not None
229
+
230
+ # The default catalog should be accessible by name "default"
231
+ named_catalog = get_catalog("default")
232
+ assert named_catalog is not None
233
+ assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
234
+
235
+ def test_init_local_with_path(self, reset_catalogs):
236
+ """Test that init_local(path) creates a default local catalog with specified path."""
237
+ # Create a temporary directory for the test
238
+ custom_path = tempfile.mkdtemp()
239
+
240
+ try:
241
+ # Initialize with custom path
242
+ init_local(path=custom_path, force=True)
243
+
244
+ assert is_initialized()
245
+
246
+ # Should be able to get the default catalog
247
+ default_catalog = get_catalog()
248
+ assert default_catalog is not None
249
+
250
+ # The default catalog should be accessible by name "default"
251
+ named_catalog = get_catalog("default")
252
+ assert named_catalog is not None
253
+ assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
254
+
255
+ # Verify the catalog is using the custom path
256
+ catalog_properties = named_catalog.inner
257
+ assert catalog_properties.root == custom_path
258
+
259
+ finally:
260
+ # Clean up the temporary directory
261
+ if os.path.exists(custom_path):
262
+ shutil.rmtree(custom_path)
263
+
264
+ def test_default_catalog_initialization(self, reset_catalogs):
265
+ """Test that a Default catalog can be initialized and accessed using the factory method."""
266
+ from deltacat.catalog.model.properties import CatalogProperties
267
+
268
+ catalog_name = str(uuid.uuid4())
269
+
270
+ # Create the catalog properties
271
+ config = CatalogProperties(root=self.temp_dir)
272
+
273
+ # Create the catalog
274
+ catalog = Catalog(config)
275
+
276
+ # Initialize DeltaCAT with this catalog
277
+ init({catalog_name: catalog}, force=True)
278
+
279
+ # Retrieve the catalog and verify it's the same one
280
+ retrieved_catalog = get_catalog(catalog_name)
281
+ assert retrieved_catalog.impl.__name__ == "deltacat.catalog.main.impl"
282
+ assert isinstance(retrieved_catalog.inner, CatalogProperties)
283
+ assert retrieved_catalog.inner.root == self.temp_dir
284
+
285
+ def test_default_catalog_initialization_from_kwargs(self, reset_catalogs):
286
+
287
+ catalog_name = str(uuid.uuid4())
288
+
289
+ # Initialize DeltaCAT with this catalog
290
+ put_catalog(
291
+ catalog_name,
292
+ Catalog(root="test_root"),
293
+ )
294
+
295
+ # Retrieve the catalog and verify it's the same one
296
+ retrieved_catalog = get_catalog(catalog_name)
297
+ assert retrieved_catalog.impl.__name__ == "deltacat.catalog.main.impl"
298
+ assert isinstance(retrieved_catalog.inner, CatalogProperties)
299
+ assert retrieved_catalog.inner.root == "test_root"
300
+
301
+ def test_iceberg_catalog_initialization(self, reset_catalogs):
302
+ """Test that an Iceberg catalog can be initialized and accessed."""
303
+ catalog_name = str(uuid.uuid4())
304
+
305
+ # Create the Iceberg catalog config
306
+ config = IcebergCatalogConfig(
307
+ type=CatalogType.IN_MEMORY, properties={"warehouse": self.temp_dir}
308
+ )
309
+
310
+ # Create the catalog using the factory method
311
+ catalog = IcebergCatalog.from_config(config)
312
+
313
+ put_catalog(catalog_name, catalog)
314
+
315
+ # Retrieve the catalog and verify it's the same one
316
+ retrieved_catalog = get_catalog(catalog_name)
317
+ assert (
318
+ retrieved_catalog.impl.__name__
319
+ == "deltacat.experimental.catalog.iceberg.impl"
320
+ )
321
+ assert isinstance(retrieved_catalog.inner, PyIcebergCatalog)