deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,397 +0,0 @@
1
- # Allow classes to use self-referencing Type hints in Python 3.7.
2
- from __future__ import annotations
3
- from typing import Any, Dict, List, Optional, Set, Tuple
4
- import pyarrow as pa
5
-
6
- from deltacat.tests.compute.test_util_common import (
7
- PartitionKey,
8
- )
9
-
10
- from deltacat.storage import (
11
- Delta,
12
- DeltaType,
13
- Partition,
14
- PartitionLocator,
15
- Stream,
16
- )
17
- from deltacat.tests.compute.test_util_common import (
18
- create_src_table,
19
- create_destination_table,
20
- create_rebase_table,
21
- )
22
- import logging
23
- from deltacat import logs
24
-
25
- logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
26
-
27
-
28
- def _add_deltas_to_partition(
29
- deltas_ingredients: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
30
- partition: Optional[Partition],
31
- ds_mock_kwargs: Optional[Dict[str, Any]],
32
- ) -> List[Optional[Delta], int]:
33
- import deltacat.tests.local_deltacat_storage as ds
34
-
35
- all_deltas_length = 0
36
- for (delta_data, delta_type, delete_parameters) in deltas_ingredients:
37
- staged_delta: Delta = ds.stage_delta(
38
- delta_data,
39
- partition,
40
- delta_type,
41
- delete_parameters=delete_parameters,
42
- **ds_mock_kwargs,
43
- )
44
- incremental_delta = ds.commit_delta(
45
- staged_delta,
46
- **ds_mock_kwargs,
47
- )
48
- all_deltas_length += len(delta_data) if delta_data else 0
49
- return incremental_delta, all_deltas_length
50
-
51
-
52
- def add_late_deltas_to_partition(
53
- late_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
54
- source_partition: Optional[Partition],
55
- ds_mock_kwargs: Optional[Dict[str, Any]],
56
- ) -> List[Optional[Delta], int]:
57
- return _add_deltas_to_partition(late_deltas, source_partition, ds_mock_kwargs)
58
-
59
-
60
- def create_incremental_deltas_on_source_table(
61
- source_namespace: str,
62
- source_table_name: str,
63
- source_table_version: str,
64
- source_table_stream: Stream,
65
- partition_values_param,
66
- incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
67
- ds_mock_kwargs: Optional[Dict[str, Any]] = None,
68
- ) -> Tuple[PartitionLocator, Delta, int, bool]:
69
- import deltacat.tests.local_deltacat_storage as ds
70
-
71
- incremental_delta_length = 0
72
- is_delete = False
73
- src_partition: Partition = ds.get_partition(
74
- source_table_stream.locator,
75
- partition_values_param,
76
- **ds_mock_kwargs,
77
- )
78
- for (
79
- incremental_data,
80
- incremental_delta_type,
81
- incremental_delete_parameters,
82
- ) in incremental_deltas:
83
- if incremental_delta_type is DeltaType.DELETE:
84
- is_delete = True
85
- incremental_delta: Delta = ds.commit_delta(
86
- ds.stage_delta(
87
- incremental_data,
88
- src_partition,
89
- incremental_delta_type,
90
- delete_parameters=incremental_delete_parameters,
91
- **ds_mock_kwargs,
92
- ),
93
- **ds_mock_kwargs,
94
- )
95
- incremental_delta_length += len(incremental_data) if incremental_data else 0
96
- src_table_stream_after_committed_delta: Stream = ds.get_stream(
97
- source_namespace,
98
- source_table_name,
99
- source_table_version,
100
- **ds_mock_kwargs,
101
- )
102
- src_partition_after_committed_delta: Partition = ds.get_partition(
103
- src_table_stream_after_committed_delta.locator,
104
- partition_values_param,
105
- **ds_mock_kwargs,
106
- )
107
- return (
108
- src_partition_after_committed_delta.locator,
109
- incremental_delta,
110
- incremental_delta_length,
111
- is_delete,
112
- )
113
-
114
-
115
- def create_src_w_deltas_destination_plus_destination(
116
- primary_keys: Set[str],
117
- sort_keys: Optional[List[Any]],
118
- partition_keys: Optional[List[PartitionKey]],
119
- input_deltas: pa.Table,
120
- input_delta_type: DeltaType,
121
- partition_values: Optional[List[Any]],
122
- ds_mock_kwargs: Optional[Dict[str, Any]],
123
- simulate_is_inplace: bool = False,
124
- ) -> Tuple[Stream, Stream, Optional[Stream], str, str, str]:
125
- import deltacat.tests.local_deltacat_storage as ds
126
-
127
- source_namespace, source_table_name, source_table_version = create_src_table(
128
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
129
- )
130
-
131
- source_table_stream: Stream = ds.get_stream(
132
- namespace=source_namespace,
133
- table_name=source_table_name,
134
- table_version=source_table_version,
135
- **ds_mock_kwargs,
136
- )
137
- staged_partition: Partition = ds.stage_partition(
138
- source_table_stream, partition_values, **ds_mock_kwargs
139
- )
140
- ds.commit_delta(
141
- ds.stage_delta(
142
- input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
143
- ),
144
- **ds_mock_kwargs,
145
- )
146
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
147
- source_table_stream_after_committed: Stream = ds.get_stream(
148
- namespace=source_namespace,
149
- table_name=source_table_name,
150
- table_version=source_table_version,
151
- **ds_mock_kwargs,
152
- )
153
- destination_table_namespace: Optional[str] = None
154
- destination_table_name: Optional[str] = None
155
- destination_table_version: Optional[str] = None
156
- if not simulate_is_inplace:
157
- (
158
- destination_table_namespace,
159
- destination_table_name,
160
- destination_table_version,
161
- ) = create_destination_table(
162
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
163
- )
164
- else:
165
- # not creating a table as in-place
166
- destination_table_namespace = source_namespace
167
- destination_table_name = source_table_name
168
- destination_table_version = source_table_version
169
-
170
- destination_table_stream: Stream = ds.get_stream(
171
- namespace=destination_table_namespace,
172
- table_name=destination_table_name,
173
- table_version=destination_table_version,
174
- **ds_mock_kwargs,
175
- )
176
- return (
177
- source_table_stream_after_committed,
178
- destination_table_stream,
179
- None,
180
- source_namespace,
181
- source_table_name,
182
- source_table_version,
183
- )
184
-
185
-
186
- def create_src_w_deltas_destination_rebase_w_deltas_strategy(
187
- primary_keys: Set[str],
188
- sort_keys: Optional[List[Any]],
189
- partition_keys: Optional[List[PartitionKey]],
190
- input_deltas: pa.Table,
191
- input_delta_type: DeltaType,
192
- partition_values: Optional[List[Any]],
193
- ds_mock_kwargs: Optional[Dict[str, Any]],
194
- ) -> Tuple[Stream, Stream, Optional[Stream]]:
195
- import deltacat.tests.local_deltacat_storage as ds
196
- from deltacat.storage import Delta
197
- from deltacat.utils.common import current_time_ms
198
-
199
- last_stream_position = current_time_ms()
200
- source_namespace, source_table_name, source_table_version = create_src_table(
201
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
202
- )
203
-
204
- source_table_stream: Stream = ds.get_stream(
205
- namespace=source_namespace,
206
- table_name=source_table_name,
207
- table_version=source_table_version,
208
- **ds_mock_kwargs,
209
- )
210
- staged_partition: Partition = ds.stage_partition(
211
- source_table_stream, partition_values, **ds_mock_kwargs
212
- )
213
- staged_delta: Delta = ds.stage_delta(
214
- input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
215
- )
216
- staged_delta.locator.stream_position = last_stream_position
217
- ds.commit_delta(
218
- staged_delta,
219
- **ds_mock_kwargs,
220
- )
221
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
222
- source_table_stream_after_committed: Stream = ds.get_stream(
223
- namespace=source_namespace,
224
- table_name=source_table_name,
225
- table_version=source_table_version,
226
- **ds_mock_kwargs,
227
- )
228
- # create the destination table
229
- (
230
- destination_table_namespace,
231
- destination_table_name,
232
- destination_table_version,
233
- ) = create_destination_table(
234
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
235
- )
236
- # create the rebase table
237
- (
238
- rebase_table_namespace,
239
- rebase_table_name,
240
- rebase_table_version,
241
- ) = create_rebase_table(primary_keys, sort_keys, partition_keys, ds_mock_kwargs)
242
- rebasing_table_stream: Stream = ds.get_stream(
243
- namespace=rebase_table_namespace,
244
- table_name=rebase_table_name,
245
- table_version=rebase_table_version,
246
- **ds_mock_kwargs,
247
- )
248
- staged_partition: Partition = ds.stage_partition(
249
- rebasing_table_stream, partition_values, **ds_mock_kwargs
250
- )
251
- staged_delta: Delta = ds.stage_delta(
252
- input_deltas, staged_partition, **ds_mock_kwargs
253
- )
254
- staged_delta.locator.stream_position = last_stream_position
255
- ds.commit_delta(
256
- staged_delta,
257
- **ds_mock_kwargs,
258
- )
259
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
260
-
261
- # get streams
262
- # TODO: Add deltas to destination stream
263
- destination_table_stream: Stream = ds.get_stream(
264
- namespace=destination_table_namespace,
265
- table_name=destination_table_name,
266
- table_version=destination_table_version,
267
- **ds_mock_kwargs,
268
- )
269
- rebased_stream_after_committed: Stream = ds.get_stream(
270
- namespace=rebase_table_namespace,
271
- table_name=rebase_table_name,
272
- table_version=rebase_table_version,
273
- **ds_mock_kwargs,
274
- )
275
- return (
276
- source_table_stream_after_committed,
277
- destination_table_stream,
278
- rebased_stream_after_committed,
279
- )
280
-
281
-
282
- def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
283
- primary_keys: Set[str],
284
- sort_keys: Optional[List[Any]],
285
- partition_keys: Optional[List[PartitionKey]],
286
- input_deltas: List[pa.Table],
287
- partition_values: Optional[List[Any]],
288
- ds_mock_kwargs: Optional[Dict[str, Any]],
289
- ) -> Tuple[Stream, Stream, Optional[Stream], bool]:
290
- import deltacat.tests.local_deltacat_storage as ds
291
- from deltacat.storage import Partition, Stream
292
-
293
- source_namespace, source_table_name, source_table_version = create_src_table(
294
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
295
- )
296
-
297
- source_table_stream: Stream = ds.get_stream(
298
- namespace=source_namespace,
299
- table_name=source_table_name,
300
- table_version=source_table_version,
301
- **ds_mock_kwargs,
302
- )
303
- staged_partition: Partition = ds.stage_partition(
304
- source_table_stream, partition_values, **ds_mock_kwargs
305
- )
306
- is_delete = False
307
- input_delta_length = 0
308
- for (
309
- input_delta,
310
- input_delta_type,
311
- input_delta_parameters,
312
- ) in input_deltas:
313
- if input_delta_type is DeltaType.DELETE:
314
- is_delete = True
315
- staged_delta = ds.stage_delta(
316
- input_delta,
317
- staged_partition,
318
- input_delta_type,
319
- delete_parameters=input_delta_parameters,
320
- **ds_mock_kwargs,
321
- )
322
- ds.commit_delta(
323
- staged_delta,
324
- **ds_mock_kwargs,
325
- )
326
- input_delta_length += len(input_delta) if input_delta else 0
327
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
328
- source_table_stream_after_committed: Stream = ds.get_stream(
329
- namespace=source_namespace,
330
- table_name=source_table_name,
331
- table_version=source_table_version,
332
- **ds_mock_kwargs,
333
- )
334
- # create the destination table
335
- (
336
- destination_table_namespace,
337
- destination_table_name,
338
- destination_table_version,
339
- ) = create_destination_table(
340
- primary_keys, sort_keys, partition_keys, ds_mock_kwargs
341
- )
342
- # create the rebase table
343
- (
344
- rebase_table_namespace,
345
- rebase_table_name,
346
- rebase_table_version,
347
- ) = create_rebase_table(primary_keys, sort_keys, partition_keys, ds_mock_kwargs)
348
- rebasing_table_stream: Stream = ds.get_stream(
349
- namespace=rebase_table_namespace,
350
- table_name=rebase_table_name,
351
- table_version=rebase_table_version,
352
- **ds_mock_kwargs,
353
- )
354
- staged_partition: Partition = ds.stage_partition(
355
- rebasing_table_stream, partition_values, **ds_mock_kwargs
356
- )
357
- input_delta_length = 0
358
- for (
359
- input_delta,
360
- input_delta_type,
361
- input_delta_parameters,
362
- ) in input_deltas:
363
- if input_delta_type is DeltaType.DELETE:
364
- is_delete = True
365
- staged_delta = ds.stage_delta(
366
- input_delta,
367
- staged_partition,
368
- input_delta_type,
369
- delete_parameters=input_delta_parameters,
370
- **ds_mock_kwargs,
371
- )
372
- ds.commit_delta(
373
- staged_delta,
374
- **ds_mock_kwargs,
375
- )
376
- input_delta_length += len(input_delta) if input_delta else 0
377
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
378
-
379
- # get streams
380
- destination_table_stream: Stream = ds.get_stream(
381
- namespace=destination_table_namespace,
382
- table_name=destination_table_name,
383
- table_version=destination_table_version,
384
- **ds_mock_kwargs,
385
- )
386
- rebased_stream_after_committed: Stream = ds.get_stream(
387
- namespace=rebase_table_namespace,
388
- table_name=rebase_table_name,
389
- table_version=rebase_table_version,
390
- **ds_mock_kwargs,
391
- )
392
- return (
393
- source_table_stream_after_committed,
394
- destination_table_stream,
395
- rebased_stream_after_committed,
396
- is_delete,
397
- )