deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,248 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Generic, TypeVar, Callable, Optional
3
+ from functools import singledispatchmethod
4
+ import re
5
+
6
+ from deltacat.storage.model.expression import (
7
+ Expression,
8
+ Reference,
9
+ Literal,
10
+ BinaryExpression,
11
+ UnaryExpression,
12
+ In,
13
+ Between,
14
+ Like,
15
+ )
16
+
17
+
18
+ C = TypeVar("C") # Context type
19
+ R = TypeVar("R") # Return type
20
+
21
+
22
+ class ExpressionVisitor(ABC, Generic[C, R]):
23
+ """
24
+ Visitor pattern for deltacat expressions.
25
+
26
+ This base class provides two ways to implement visitors:
27
+ 1. Using a procedure dictionary (_PROCEDURES) - for simple, declarative visitors
28
+ 2. Using specialized visit_xyz methods with snake_case naming - for more control
29
+
30
+ Subclasses need only implement visit_reference and visit_literal, plus either:
31
+ - Define _PROCEDURES dictionary with functions for handling different expression types
32
+ - Implement specific visit_xyz methods (using snake_case) for individual expressions
33
+ """
34
+
35
+ # Default procedure dictionary for subclasses to override
36
+ _PROCEDURES: Dict[str, Callable] = {}
37
+
38
+ def __init__(self):
39
+ """Initialize visitor and validate required methods."""
40
+ # Pre-check for required methods
41
+ if not hasattr(self, "visit_reference") or not callable(
42
+ getattr(self, "visit_reference")
43
+ ):
44
+ raise NotImplementedError("Subclasses must implement visit_reference")
45
+ if not hasattr(self, "visit_literal") or not callable(
46
+ getattr(self, "visit_literal")
47
+ ):
48
+ raise NotImplementedError("Subclasses must implement visit_literal")
49
+ self._setup_default_procedure_handlers()
50
+
51
+ def _to_snake_case(self, name: str) -> str:
52
+ """Convert PascalCase or camelCase to snake_case."""
53
+ pattern = re.compile(r"(?<!^)(?=[A-Z])")
54
+ return pattern.sub("_", name).lower()
55
+
56
+ def _setup_default_procedure_handlers(self):
57
+ """Set up default procedure application methods if not overridden."""
58
+ if not hasattr(self, "_apply_binary") or not callable(
59
+ getattr(self, "_apply_binary")
60
+ ):
61
+ self._apply_binary = lambda proc, left, right: proc(left, right)
62
+ if not hasattr(self, "_apply_unary") or not callable(
63
+ getattr(self, "_apply_unary")
64
+ ):
65
+ self._apply_unary = lambda proc, operand: proc(operand)
66
+ if not hasattr(self, "_apply_in") or not callable(getattr(self, "_apply_in")):
67
+ self._apply_in = lambda proc, value, values: proc(value, values)
68
+ if not hasattr(self, "_apply_between") or not callable(
69
+ getattr(self, "_apply_between")
70
+ ):
71
+ self._apply_between = lambda proc, value, lower, upper: proc(
72
+ value, lower, upper
73
+ )
74
+ if not hasattr(self, "_apply_like") or not callable(
75
+ getattr(self, "_apply_like")
76
+ ):
77
+ self._apply_like = lambda proc, value, pattern: proc(value, pattern)
78
+
79
+ @singledispatchmethod
80
+ def visit(self, expr: Expression, context: Optional[C] = None) -> R:
81
+ """
82
+ Generic visit method that dispatches to specific methods based on expression type.
83
+
84
+ Args:
85
+ expr: The expression to visit
86
+ context: Optional context to pass through the visitor
87
+
88
+ Returns:
89
+ Result of visiting the expression
90
+ """
91
+ expr_type = type(expr).__name__
92
+ raise NotImplementedError(f"No visit method for type {expr_type}")
93
+
94
+ @visit.register
95
+ def _visit_reference(self, expr: Reference, context: Optional[C] = None) -> R:
96
+ """Visit a Reference expression."""
97
+ return self.visit_reference(expr, context)
98
+
99
+ @visit.register
100
+ def _visit_literal(self, expr: Literal, context: Optional[C] = None) -> R:
101
+ """Visit a Literal expression."""
102
+ return self.visit_literal(expr, context)
103
+
104
+ @visit.register
105
+ def _visit_binary(self, expr: BinaryExpression, context: Optional[C] = None) -> R:
106
+ """Visit a binary expression using method specialization or procedures."""
107
+ expr_type = type(expr).__name__
108
+
109
+ left_result = self.visit(expr.left, context)
110
+ right_result = self.visit(expr.right, context)
111
+
112
+ method_name = f"visit_{self._to_snake_case(expr_type)}"
113
+ if hasattr(self, method_name):
114
+ method = getattr(self, method_name)
115
+ return method(expr, context)
116
+
117
+ if expr_type in self._PROCEDURES:
118
+ return self._apply_binary(
119
+ self._PROCEDURES[expr_type], left_result, right_result
120
+ )
121
+
122
+ try:
123
+ return self.visit_binary_expression(
124
+ expr, left_result, right_result, context
125
+ )
126
+ except NotImplementedError:
127
+ raise NotImplementedError(f"No handler for {expr_type}")
128
+
129
+ @visit.register
130
+ def _visit_unary(self, expr: UnaryExpression, context: Optional[C] = None) -> R:
131
+ """Visit a unary expression using method specialization or procedures."""
132
+ expr_type = type(expr).__name__
133
+
134
+ operand_result = self.visit(expr.operand, context)
135
+
136
+ method_name = f"visit_{self._to_snake_case(expr_type)}"
137
+ if hasattr(self, method_name):
138
+ method = getattr(self, method_name)
139
+ return method(expr, context)
140
+
141
+ if expr_type in self._PROCEDURES:
142
+ return self._apply_unary(self._PROCEDURES[expr_type], operand_result)
143
+
144
+ try:
145
+ return self.visit_unary_expression(expr, operand_result, context)
146
+ except NotImplementedError:
147
+ raise NotImplementedError(f"No handler for {expr_type}")
148
+
149
+ @visit.register
150
+ def _visit_in(self, expr: In, context: Optional[C] = None) -> R:
151
+ """Visit an In expression."""
152
+ if hasattr(self, "visit_in"):
153
+ return self.visit_in(expr, context)
154
+
155
+ if "In" in self._PROCEDURES:
156
+ value_result = self.visit(expr.value, context)
157
+ values_results = [self.visit(v, context) for v in expr.values]
158
+ return self._apply_in(self._PROCEDURES["In"], value_result, values_results)
159
+
160
+ raise NotImplementedError("No handler for In expression")
161
+
162
+ @visit.register
163
+ def _visit_between(self, expr: Between, context: Optional[C] = None) -> R:
164
+ """Visit a Between expression."""
165
+ if hasattr(self, "visit_between"):
166
+ return self.visit_between(expr, context)
167
+
168
+ if "Between" in self._PROCEDURES:
169
+ value_result = self.visit(expr.value, context)
170
+ lower_result = self.visit(expr.lower, context)
171
+ upper_result = self.visit(expr.upper, context)
172
+ return self._apply_between(
173
+ self._PROCEDURES["Between"], value_result, lower_result, upper_result
174
+ )
175
+
176
+ raise NotImplementedError("No handler for Between expression")
177
+
178
+ @visit.register
179
+ def _visit_like(self, expr: Like, context: Optional[C] = None) -> R:
180
+ """Visit a Like expression."""
181
+ if hasattr(self, "visit_like"):
182
+ return self.visit_like(expr, context)
183
+
184
+ if "Like" in self._PROCEDURES:
185
+ value_result = self.visit(expr.value, context)
186
+ pattern_result = self.visit(expr.pattern, context)
187
+ return self._apply_like(
188
+ self._PROCEDURES["Like"], value_result, pattern_result
189
+ )
190
+
191
+ raise NotImplementedError("No handler for Like expression")
192
+
193
+ @abstractmethod
194
+ def visit_reference(self, expr: Reference, context: Optional[C] = None) -> R:
195
+ """Visit a Reference expression."""
196
+ pass
197
+
198
+ @abstractmethod
199
+ def visit_literal(self, expr: Literal, context: Optional[C] = None) -> R:
200
+ """Visit a Literal expression."""
201
+ pass
202
+
203
+ def visit_binary_expression(
204
+ self, expr: BinaryExpression, left: R, right: R, context: Optional[C] = None
205
+ ) -> R:
206
+ """Default fallback handler for binary expressions."""
207
+ raise NotImplementedError(f"No handler for {type(expr).__name__}")
208
+
209
+ def visit_unary_expression(
210
+ self, expr: UnaryExpression, operand: R, context: Optional[C] = None
211
+ ) -> R:
212
+ """Default fallback handler for unary expressions."""
213
+ raise NotImplementedError(f"No handler for {type(expr).__name__}")
214
+
215
+
216
+ class DisplayVisitor(ExpressionVisitor[Expression, str]):
217
+ """
218
+ Visitor implementation that formats expressions in standard infix notation.
219
+ For example: "a = b AND c > d" instead of "(AND (= a b) (> c d))".
220
+ """
221
+
222
+ # Map all expression types to their string formatting procedures with infix notation
223
+ _PROCEDURES = {
224
+ # Binary operations with infix notation
225
+ "Equal": lambda left, right: f"{left} = {right}",
226
+ "NotEqual": lambda left, right: f"{left} <> {right}",
227
+ "GreaterThan": lambda left, right: f"{left} > {right}",
228
+ "LessThan": lambda left, right: f"{left} < {right}",
229
+ "GreaterThanEqual": lambda left, right: f"{left} >= {right}",
230
+ "LessThanEqual": lambda left, right: f"{left} <= {right}",
231
+ "And": lambda left, right: f"({left} AND {right})",
232
+ "Or": lambda left, right: f"({left} OR {right})",
233
+ # Unary operations
234
+ "Not": lambda operand: f"NOT ({operand})",
235
+ "IsNull": lambda operand: f"({operand}) IS NULL",
236
+ # Special operations
237
+ "In": lambda value, values: f"{value} IN ({', '.join(values)})",
238
+ "Between": lambda value, lower, upper: f"{value} BETWEEN {lower} AND {upper}",
239
+ "Like": lambda value, pattern: f"{value} LIKE {pattern}",
240
+ }
241
+
242
+ def visit_reference(self, expr: Reference, context=None) -> str:
243
+ """Format a field reference."""
244
+ return expr.field
245
+
246
+ def visit_literal(self, expr: Literal, context=None) -> str:
247
+ """Format a literal value using its PyArrow representation."""
248
+ return str(expr.value)
@@ -0,0 +1,24 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Generic, Optional, TypeVar
3
+
4
+ T = TypeVar("T")
5
+ U = TypeVar("U")
6
+
7
+
8
+ class ModelMapper(ABC, Generic[T, U]):
9
+ @staticmethod
10
+ @abstractmethod
11
+ def map(obj: Optional[T], *args, **kwargs) -> Optional[U]:
12
+ pass
13
+
14
+ @staticmethod
15
+ @abstractmethod
16
+ def unmap(obj: Optional[U], **kwargs) -> Optional[T]:
17
+ pass
18
+
19
+
20
+ class OneWayModelMapper(ABC, Generic[T, U]):
21
+ @staticmethod
22
+ @abstractmethod
23
+ def map(obj: Optional[T], **kwargs) -> Optional[U]:
24
+ pass
@@ -21,6 +21,14 @@ class ListResult(dict, Generic[T]):
21
21
  list_result["nextPageProvider"] = next_page_provider
22
22
  return list_result
23
23
 
24
+ @staticmethod
25
+ def empty() -> ListResult:
26
+ list_result = ListResult()
27
+ list_result["items"] = []
28
+ list_result["paginationKey"] = None
29
+ list_result["nextPageProvider"] = None
30
+ return list_result
31
+
24
32
  def read_page(self) -> Optional[List[T]]:
25
33
  return self.get("items")
26
34
 
@@ -1,32 +1,116 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional, List
5
+
1
6
  from deltacat.utils.common import sha1_digest, sha1_hexdigest
2
7
 
8
+ DEFAULT_NAME_SEPARATOR = "|"
9
+ DEFAULT_PATH_SEPARATOR = "/"
10
+
11
+
12
+ class LocatorName:
13
+ """
14
+ Assigns a name to a catalog object. All sibling catalog objects must be
15
+ assigned unique names (e.g., all namespaces in a catalog must be assigned
16
+ unique locator names, all tables under a namespace must be assigned unique
17
+ locator names, etc.). Names may be mutable (e.g., namespace and table names)
18
+ or immutable (e.g., partition/stream IDs and delta stream positions). Names
19
+ may be single or multi-part.
20
+ """
21
+
22
+ @property
23
+ def immutable_id(self) -> Optional[str]:
24
+ """
25
+ If this locator name is immutable (i.e., if the object it refers to
26
+ can't be renamed) then returns an immutable ID suitable for use in
27
+ URLS or filesystem paths. Returns None if this locator name is mutable
28
+ (i.e., if the object it refers to can be renamed).
29
+ """
30
+ raise NotImplementedError()
31
+
32
+ @immutable_id.setter
33
+ def immutable_id(self, immutable_id: Optional[str]) -> None:
34
+ """
35
+ If this locator name is immutable (i.e., if the object it refers to
36
+ can't be renamed), then sets an immutable ID for this
37
+ locator name suitable for use in URLS or filesystem paths. Note that
38
+ the ID is only considered immutable in durable catalog storage, and
39
+ remains mutable in transient memory (i.e., this setter remains
40
+ functional regardless of whether an ID is already assigned, but each
41
+ update causes it to refer to a new, distinct object in durable storage).
42
+ """
43
+ raise NotImplementedError()
44
+
45
+ def parts(self) -> List[str]:
46
+ """
47
+ Returns the ordered parts of this locator's name.
48
+ """
49
+ raise NotImplementedError()
50
+
51
+ def join(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
52
+ """
53
+ Returns this locator name as a string by joining its parts with the
54
+ given separator.
55
+ """
56
+ return separator.join(self.parts())
57
+
58
+ def exists(self) -> bool:
59
+ """
60
+ Returns True if this locator name is defined, False otherwise.
61
+ """
62
+ return self.immutable_id or all(self.parts())
63
+
3
64
 
4
65
  class Locator:
5
- def canonical_string(self) -> str:
66
+ """
67
+ Creates a globally unique reference to any named catalog object. Locators
68
+ are composed of the name of the referenced catalog object and its parent
69
+ Locator (if any). Every Locator has a canonical string representation that
70
+ can be used for global equality checks. Cryptographic digests of this
71
+ canonical string can be used for uniform random hash distribution and
72
+ path-based references to the underlying catalog object in filesystems or
73
+ URLs.
74
+ """
75
+
76
+ @property
77
+ def name(self) -> LocatorName:
6
78
  """
7
- Returns a unique string for the given locator that can be used
8
- for equality checks (i.e. two locators are equal if they have
9
- the same canonical string).
79
+ Returns the name of this locator.
10
80
  """
11
81
  raise NotImplementedError()
12
82
 
83
+ @property
84
+ def parent(self) -> Optional[Locator]:
85
+ """
86
+ Returns the parent of this locator, if any.
87
+ """
88
+ raise NotImplementedError()
89
+
90
+ def canonical_string(self, separator: str = DEFAULT_NAME_SEPARATOR) -> str:
91
+ """
92
+ Returns a unique string for the given locator that can be used
93
+ for equality checks between objects with the same parent.
94
+ """
95
+ return separator.join([str(part) for part in self.name.parts()])
96
+
13
97
  def digest(self) -> bytes:
14
98
  """
15
99
  Return a digest of the given locator that can be used for
16
- equality checks (i.e. two locators are equal if they have the
17
- same digest) and uniform random hash distribution.
100
+ equality checks between objects with the same parent and uniform
101
+ random hash distribution.
18
102
  """
19
103
  return sha1_digest(self.canonical_string().encode("utf-8"))
20
104
 
21
105
  def hexdigest(self) -> str:
22
106
  """
23
107
  Returns a hexdigest of the given locator suitable
24
- for use in equality (i.e. two locators are equal if they have the same
25
- hexdigest) and inclusion in URLs.
108
+ equality checks between objects with the same parent and
109
+ inclusion in URLs.
26
110
  """
27
111
  return sha1_hexdigest(self.canonical_string().encode("utf-8"))
28
112
 
29
- def path(self, root: str, separator: str = "/") -> str:
113
+ def path(self, root: str, separator: str = DEFAULT_PATH_SEPARATOR) -> str:
30
114
  """
31
115
  Returns a path for the locator of the form: "{root}/{hexdigest}", where
32
116
  the default path separator of "/" may optionally be overridden with