deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,36 +1,55 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
+ import json
5
+ import posixpath
4
6
  from typing import Any, Dict, List, Optional
5
7
 
6
- from deltacat.aws.redshift import Manifest, ManifestAuthor, ManifestMeta
7
- from deltacat.storage.model.delete_parameters import DeleteParameters
8
- from deltacat.storage.model.locator import Locator
8
+ import pyarrow
9
+
10
+ from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
11
+ from deltacat.constants import TXN_DIR_NAME
12
+ from deltacat.storage.model.manifest import (
13
+ Manifest,
14
+ ManifestMeta,
15
+ ManifestAuthor,
16
+ )
17
+ from deltacat.storage.model.locator import (
18
+ Locator,
19
+ LocatorName,
20
+ )
9
21
  from deltacat.storage.model.namespace import NamespaceLocator
10
- from deltacat.storage.model.partition import PartitionLocator
22
+ from deltacat.storage.model.partition import (
23
+ PartitionLocator,
24
+ PartitionValues,
25
+ )
11
26
  from deltacat.storage.model.stream import StreamLocator
12
- from deltacat.storage.model.table import TableLocator
27
+ from deltacat.storage.model.table import (
28
+ TableLocator,
29
+ Table,
30
+ )
13
31
  from deltacat.storage.model.table_version import TableVersionLocator
14
- from deltacat.storage.model.types import DeltaType
15
- from deltacat.storage.model.partition_spec import DeltaPartitionSpec, PartitionValues
32
+ from deltacat.storage.model.types import (
33
+ DeltaType,
34
+ StreamFormat,
35
+ )
16
36
 
37
+ DeltaProperties = Dict[str, Any]
17
38
 
18
- class Delta(dict):
39
+
40
+ class Delta(Metafile):
19
41
  @staticmethod
20
42
  def of(
21
43
  locator: Optional[DeltaLocator],
22
44
  delta_type: Optional[DeltaType],
23
45
  meta: Optional[ManifestMeta],
24
- properties: Optional[Dict[str, str]],
46
+ properties: Optional[DeltaProperties],
25
47
  manifest: Optional[Manifest],
26
48
  previous_stream_position: Optional[int] = None,
27
- delete_parameters: Optional[DeleteParameters] = None,
28
- partition_spec: Optional[DeltaPartitionSpec] = None,
29
49
  ) -> Delta:
30
50
  """
31
51
  Creates a Delta metadata model with the given Delta Locator, Delta Type,
32
- manifest metadata, properties, manifest, and previous delta stream
33
- position.
52
+ manifest metadata, properties, manifest, and previous delta stream position.
34
53
  """
35
54
  delta = Delta()
36
55
  delta.locator = locator
@@ -39,8 +58,6 @@ class Delta(dict):
39
58
  delta.properties = properties
40
59
  delta.manifest = manifest
41
60
  delta.previous_stream_position = previous_stream_position
42
- delta.delete_parameters = delete_parameters
43
- delta.partition_spec = partition_spec
44
61
  return delta
45
62
 
46
63
  @staticmethod
@@ -48,7 +65,7 @@ class Delta(dict):
48
65
  deltas: List[Delta],
49
66
  manifest_author: Optional[ManifestAuthor] = None,
50
67
  stream_position: Optional[int] = None,
51
- properties: Optional[Dict[str, str]] = None,
68
+ properties: Optional[DeltaProperties] = None,
52
69
  ) -> Delta:
53
70
  """
54
71
  Merges the input list of deltas into a single delta. All input deltas to
@@ -93,25 +110,10 @@ class Delta(dict):
93
110
  f"Deltas to merge must all share the same delta type "
94
111
  f"(found {len(distinct_delta_types)} delta types)."
95
112
  )
96
- distinct_partition_spec = set([d.partition_spec for d in deltas])
97
- if len(distinct_partition_spec) > 1:
98
- raise ValueError(
99
- f"Deltas to merge must all share the same partition spec "
100
- f"(found {len(distinct_partition_spec)} partition specs)."
101
- )
102
113
  merged_manifest = Manifest.merge_manifests(
103
114
  manifests,
104
115
  manifest_author,
105
116
  )
106
- distinct_delta_type = list(distinct_delta_types)[0]
107
- merged_delete_parameters = None
108
- if distinct_delta_type is DeltaType.DELETE:
109
- delete_parameters: List[DeleteParameters] = [
110
- d.delete_parameters for d in deltas if d.delete_parameters
111
- ]
112
- merged_delete_parameters: Optional[
113
- DeleteParameters
114
- ] = DeleteParameters.merge_delete_parameters(delete_parameters)
115
117
  partition_locator = deltas[0].partition_locator
116
118
  prev_positions = [d.previous_stream_position for d in deltas]
117
119
  prev_position = None if None in prev_positions else max(prev_positions)
@@ -122,7 +124,6 @@ class Delta(dict):
122
124
  properties,
123
125
  merged_manifest,
124
126
  prev_position,
125
- merged_delete_parameters,
126
127
  )
127
128
 
128
129
  @property
@@ -148,11 +149,11 @@ class Delta(dict):
148
149
  self["meta"] = meta
149
150
 
150
151
  @property
151
- def properties(self) -> Optional[Dict[str, str]]:
152
+ def properties(self) -> Optional[DeltaProperties]:
152
153
  return self.get("properties")
153
154
 
154
155
  @properties.setter
155
- def properties(self, properties: Optional[Dict[str, str]]) -> None:
156
+ def properties(self, properties: Optional[DeltaProperties]) -> None:
156
157
  self["properties"] = properties
157
158
 
158
159
  @property
@@ -222,7 +223,7 @@ class Delta(dict):
222
223
  def storage_type(self) -> Optional[str]:
223
224
  delta_locator = self.locator
224
225
  if delta_locator:
225
- return delta_locator.storage_type
226
+ return delta_locator.stream_format
226
227
  return None
227
228
 
228
229
  @property
@@ -253,6 +254,13 @@ class Delta(dict):
253
254
  return delta_locator.stream_id
254
255
  return None
255
256
 
257
+ @property
258
+ def stream_format(self) -> Optional[str]:
259
+ delta_locator = self.locator
260
+ if delta_locator:
261
+ return delta_locator.stream_format
262
+ return None
263
+
256
264
  @property
257
265
  def partition_id(self) -> Optional[str]:
258
266
  delta_locator = self.locator
@@ -267,6 +275,13 @@ class Delta(dict):
267
275
  return delta_locator.partition_values
268
276
  return None
269
277
 
278
+ @property
279
+ def partition_values_json(self) -> Optional[str]:
280
+ partition_values = (
281
+ self.partition_values if self.partition_values is not None else None
282
+ )
283
+ return json.dumps(partition_values)
284
+
270
285
  @property
271
286
  def stream_position(self) -> Optional[int]:
272
287
  delta_locator = self.locator
@@ -274,27 +289,71 @@ class Delta(dict):
274
289
  return delta_locator.stream_position
275
290
  return None
276
291
 
277
- @property
278
- def delete_parameters(self) -> Optional[DeleteParameters]:
279
- delete_parameters = self.get("delete_parameters")
292
+ def url(self, catalog_name: Optional[str] = None) -> str:
280
293
  return (
281
- None if delete_parameters is None else DeleteParameters(delete_parameters)
294
+ f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{self.partition_values_json}/{self.stream_position}/"
295
+ if catalog_name
296
+ else f"table://{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{self.partition_values_json}/{self.stream_position}/"
282
297
  )
283
298
 
284
- @delete_parameters.setter
285
- def delete_parameters(self, delete_parameters: Optional[DeleteParameters]) -> None:
286
- self["delete_parameters"] = delete_parameters
299
+ def to_serializable(self) -> Delta:
300
+ serializable = self
301
+ if serializable.table_locator:
302
+ serializable: Delta = Delta.update_for(self)
303
+ # remove the mutable table locator
304
+ serializable.table_version_locator.table_locator = TableLocator.at(
305
+ namespace=self.id,
306
+ table_name=self.id,
307
+ )
308
+ return serializable
309
+
310
+ def from_serializable(
311
+ self,
312
+ path: str,
313
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
314
+ ) -> Delta:
315
+ # TODO(pdames): Lazily restore table locator on 1st property get.
316
+ # Cache Metafile ID <-> Table/Namespace-Name map at Catalog Init, then
317
+ # swap only Metafile IDs with Names here.
318
+ if self.table_locator and self.table_locator.table_name == self.id:
319
+ parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
320
+ base_metafile_path=path,
321
+ parent_number=4,
322
+ )
323
+ txn_log_dir = posixpath.join(
324
+ posixpath.dirname(
325
+ posixpath.dirname(
326
+ posixpath.dirname(parent_rev_dir_path),
327
+ )
328
+ ),
329
+ TXN_DIR_NAME,
330
+ )
331
+ table = Table.read(
332
+ MetafileRevisionInfo.latest_revision(
333
+ revision_dir_path=parent_rev_dir_path,
334
+ filesystem=filesystem,
335
+ success_txn_log_dir=txn_log_dir,
336
+ ).path,
337
+ filesystem,
338
+ )
339
+ self.table_version_locator.table_locator = table.locator
340
+ return self
341
+
342
+
343
+ class DeltaLocatorName(LocatorName):
344
+ def __init__(self, locator: DeltaLocator):
345
+ self.locator = locator
287
346
 
288
347
  @property
289
- def partition_spec(self) -> Optional[DeltaPartitionSpec]:
290
- val: Dict[str, Any] = self.get("partitionSpec")
291
- if val is not None and not isinstance(val, DeltaPartitionSpec):
292
- self.partition_spec = val = DeltaPartitionSpec(val)
293
- return val
348
+ def immutable_id(self) -> Optional[str]:
349
+ return str(self.locator.stream_position)
350
+
351
+ @immutable_id.setter
352
+ def immutable_id(self, immutable_id: Optional[str]):
353
+ self.locator.stream_position = int(immutable_id)
294
354
 
295
- @partition_spec.setter
296
- def partition_spec(self, value: Optional[DeltaPartitionSpec]) -> None:
297
- self["partitionSpec"] = value
355
+ def parts(self) -> List[str]:
356
+ return [str(self.locator.stream_position)]
298
357
 
299
358
 
300
359
  class DeltaLocator(Locator, dict):
@@ -318,25 +377,47 @@ class DeltaLocator(Locator, dict):
318
377
  table_name: Optional[str],
319
378
  table_version: Optional[str],
320
379
  stream_id: Optional[str],
321
- storage_type: Optional[str],
380
+ stream_format: Optional[StreamFormat],
322
381
  partition_values: Optional[PartitionValues],
323
382
  partition_id: Optional[str],
324
383
  stream_position: Optional[int],
325
384
  ) -> DeltaLocator:
326
- partition_locator = PartitionLocator.at(
327
- namespace,
328
- table_name,
329
- table_version,
330
- stream_id,
331
- storage_type,
332
- partition_values,
333
- partition_id,
385
+ partition_locator = (
386
+ PartitionLocator.at(
387
+ namespace,
388
+ table_name,
389
+ table_version,
390
+ stream_id,
391
+ stream_format,
392
+ partition_values,
393
+ partition_id,
394
+ )
395
+ if any(
396
+ [
397
+ partition_id,
398
+ partition_values,
399
+ stream_id,
400
+ stream_format,
401
+ table_name,
402
+ table_version,
403
+ namespace,
404
+ ]
405
+ )
406
+ else None
334
407
  )
335
408
  return DeltaLocator.of(
336
409
  partition_locator,
337
410
  stream_position,
338
411
  )
339
412
 
413
+ @property
414
+ def name(self):
415
+ return DeltaLocatorName(self)
416
+
417
+ @property
418
+ def parent(self) -> Optional[PartitionLocator]:
419
+ return self.partition_locator
420
+
340
421
  @property
341
422
  def partition_locator(self) -> Optional[PartitionLocator]:
342
423
  val: Dict[str, Any] = self.get("partitionLocator")
@@ -406,10 +487,10 @@ class DeltaLocator(Locator, dict):
406
487
  return None
407
488
 
408
489
  @property
409
- def storage_type(self) -> Optional[str]:
490
+ def stream_format(self) -> Optional[str]:
410
491
  partition_locator = self.partition_locator
411
492
  if partition_locator:
412
- return partition_locator.storage_type
493
+ return partition_locator.stream_format
413
494
  return None
414
495
 
415
496
  @property
@@ -432,13 +513,3 @@ class DeltaLocator(Locator, dict):
432
513
  if partition_locator:
433
514
  return partition_locator.table_version
434
515
  return None
435
-
436
- def canonical_string(self) -> str:
437
- """
438
- Returns a unique string for the given locator that can be used
439
- for equality checks (i.e. two locators are equal if they have
440
- the same canonical string).
441
- """
442
- pl_hexdigest = self.partition_locator.hexdigest()
443
- stream_position = self.stream_position
444
- return f"{pl_hexdigest}|{stream_position}"
@@ -0,0 +1,47 @@
1
+ from deltacat.storage.model.expression.expression import (
2
+ Expression,
3
+ UnaryExpression,
4
+ BinaryExpression,
5
+ BooleanExpression,
6
+ Reference,
7
+ Literal,
8
+ Equal,
9
+ NotEqual,
10
+ GreaterThan,
11
+ LessThan,
12
+ GreaterThanEqual,
13
+ LessThanEqual,
14
+ And,
15
+ Or,
16
+ Not,
17
+ In,
18
+ Between,
19
+ Like,
20
+ IsNull,
21
+ )
22
+
23
+ from deltacat.storage.model.expression.visitor import ExpressionVisitor, DisplayVisitor
24
+
25
+ __all__ = [
26
+ "Expression",
27
+ "UnaryExpression",
28
+ "BinaryExpression",
29
+ "BooleanExpression",
30
+ "Reference",
31
+ "Literal",
32
+ "Equal",
33
+ "NotEqual",
34
+ "GreaterThan",
35
+ "LessThan",
36
+ "GreaterThanEqual",
37
+ "LessThanEqual",
38
+ "And",
39
+ "Or",
40
+ "Not",
41
+ "In",
42
+ "Between",
43
+ "Like",
44
+ "IsNull",
45
+ "ExpressionVisitor",
46
+ "DisplayVisitor",
47
+ ]