deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,388 +0,0 @@
1
- # Allow classes to use self-referencing Type hints in Python 3.7.
2
- from __future__ import annotations
3
- from typing import Any, Dict, List, Optional, Tuple
4
- import pyarrow as pa
5
-
6
- from deltacat.tests.compute.test_util_common import (
7
- PartitionKey,
8
- )
9
-
10
- from deltacat.storage import (
11
- Delta,
12
- DeltaType,
13
- Partition,
14
- PartitionLocator,
15
- Stream,
16
- )
17
- from deltacat.tests.compute.test_util_common import (
18
- create_src_table,
19
- create_destination_table,
20
- create_rebase_table,
21
- )
22
- import logging
23
- from deltacat import logs
24
-
25
- logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
26
-
27
-
28
- def _add_deltas_to_partition(
29
- deltas_ingredients: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
30
- partition: Optional[Partition],
31
- ds_mock_kwargs: Optional[Dict[str, Any]],
32
- ) -> List[Optional[Delta], int]:
33
- import deltacat.tests.local_deltacat_storage as ds
34
-
35
- all_deltas_length = 0
36
- for (delta_data, delta_type, delete_parameters) in deltas_ingredients:
37
- staged_delta: Delta = ds.stage_delta(
38
- delta_data,
39
- partition,
40
- delta_type,
41
- entry_params=delete_parameters,
42
- **ds_mock_kwargs,
43
- )
44
- incremental_delta = ds.commit_delta(
45
- staged_delta,
46
- **ds_mock_kwargs,
47
- )
48
- all_deltas_length += len(delta_data) if delta_data else 0
49
- return incremental_delta, all_deltas_length
50
-
51
-
52
- def add_late_deltas_to_partition(
53
- late_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
54
- source_partition: Optional[Partition],
55
- ds_mock_kwargs: Optional[Dict[str, Any]],
56
- ) -> List[Optional[Delta], int]:
57
- return _add_deltas_to_partition(late_deltas, source_partition, ds_mock_kwargs)
58
-
59
-
60
- def create_incremental_deltas_on_source_table(
61
- source_namespace: str,
62
- source_table_name: str,
63
- source_table_version: str,
64
- source_table_stream: Stream,
65
- partition_values_param,
66
- incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
67
- ds_mock_kwargs: Optional[Dict[str, Any]] = None,
68
- ) -> Tuple[PartitionLocator, Delta, int, bool]:
69
- import deltacat.tests.local_deltacat_storage as ds
70
-
71
- incremental_delta_length = 0
72
- is_delete = False
73
- src_partition: Partition = ds.get_partition(
74
- source_table_stream.locator,
75
- partition_values_param,
76
- **ds_mock_kwargs,
77
- )
78
- for (
79
- incremental_data,
80
- incremental_delta_type,
81
- incremental_delete_parameters,
82
- ) in incremental_deltas:
83
- if incremental_delta_type is DeltaType.DELETE:
84
- is_delete = True
85
- incremental_delta: Delta = ds.commit_delta(
86
- ds.stage_delta(
87
- incremental_data,
88
- src_partition,
89
- incremental_delta_type,
90
- entry_params=incremental_delete_parameters,
91
- **ds_mock_kwargs,
92
- ),
93
- **ds_mock_kwargs,
94
- )
95
- incremental_delta_length += len(incremental_data) if incremental_data else 0
96
- src_table_stream_after_committed_delta: Stream = ds.get_stream(
97
- source_namespace,
98
- source_table_name,
99
- source_table_version,
100
- **ds_mock_kwargs,
101
- )
102
- src_partition_after_committed_delta: Partition = ds.get_partition(
103
- src_table_stream_after_committed_delta.locator,
104
- partition_values_param,
105
- **ds_mock_kwargs,
106
- )
107
- return (
108
- src_partition_after_committed_delta.locator,
109
- incremental_delta,
110
- incremental_delta_length,
111
- is_delete,
112
- )
113
-
114
-
115
- def create_src_w_deltas_destination_plus_destination(
116
- sort_keys: Optional[List[Any]],
117
- partition_keys: Optional[List[PartitionKey]],
118
- input_deltas: pa.Table,
119
- input_delta_type: DeltaType,
120
- partition_values: Optional[List[Any]],
121
- ds_mock_kwargs: Optional[Dict[str, Any]],
122
- simulate_is_inplace: bool = False,
123
- ) -> Tuple[Stream, Stream, Optional[Stream], str, str, str]:
124
- import deltacat.tests.local_deltacat_storage as ds
125
-
126
- source_namespace, source_table_name, source_table_version = create_src_table(
127
- sort_keys, partition_keys, ds_mock_kwargs
128
- )
129
-
130
- source_table_stream: Stream = ds.get_stream(
131
- namespace=source_namespace,
132
- table_name=source_table_name,
133
- table_version=source_table_version,
134
- **ds_mock_kwargs,
135
- )
136
- staged_partition: Partition = ds.stage_partition(
137
- source_table_stream, partition_values, **ds_mock_kwargs
138
- )
139
- ds.commit_delta(
140
- ds.stage_delta(
141
- input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
142
- ),
143
- **ds_mock_kwargs,
144
- )
145
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
146
- source_table_stream_after_committed: Stream = ds.get_stream(
147
- namespace=source_namespace,
148
- table_name=source_table_name,
149
- table_version=source_table_version,
150
- **ds_mock_kwargs,
151
- )
152
- destination_table_namespace: Optional[str] = None
153
- destination_table_name: Optional[str] = None
154
- destination_table_version: Optional[str] = None
155
- if not simulate_is_inplace:
156
- (
157
- destination_table_namespace,
158
- destination_table_name,
159
- destination_table_version,
160
- ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
161
- else:
162
- # not creating a table as in-place
163
- destination_table_namespace = source_namespace
164
- destination_table_name = source_table_name
165
- destination_table_version = source_table_version
166
-
167
- destination_table_stream: Stream = ds.get_stream(
168
- namespace=destination_table_namespace,
169
- table_name=destination_table_name,
170
- table_version=destination_table_version,
171
- **ds_mock_kwargs,
172
- )
173
- return (
174
- source_table_stream_after_committed,
175
- destination_table_stream,
176
- None,
177
- source_namespace,
178
- source_table_name,
179
- source_table_version,
180
- )
181
-
182
-
183
- def create_src_w_deltas_destination_rebase_w_deltas_strategy(
184
- sort_keys: Optional[List[Any]],
185
- partition_keys: Optional[List[PartitionKey]],
186
- input_deltas: pa.Table,
187
- input_delta_type: DeltaType,
188
- partition_values: Optional[List[Any]],
189
- ds_mock_kwargs: Optional[Dict[str, Any]],
190
- ) -> Tuple[Stream, Stream, Optional[Stream]]:
191
- import deltacat.tests.local_deltacat_storage as ds
192
- from deltacat.storage import Delta
193
- from deltacat.utils.common import current_time_ms
194
-
195
- last_stream_position = current_time_ms()
196
- source_namespace, source_table_name, source_table_version = create_src_table(
197
- sort_keys, partition_keys, ds_mock_kwargs
198
- )
199
-
200
- source_table_stream: Stream = ds.get_stream(
201
- namespace=source_namespace,
202
- table_name=source_table_name,
203
- table_version=source_table_version,
204
- **ds_mock_kwargs,
205
- )
206
- staged_partition: Partition = ds.stage_partition(
207
- source_table_stream, partition_values, **ds_mock_kwargs
208
- )
209
- staged_delta: Delta = ds.stage_delta(
210
- input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
211
- )
212
- staged_delta.locator.stream_position = last_stream_position
213
- ds.commit_delta(
214
- staged_delta,
215
- **ds_mock_kwargs,
216
- )
217
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
218
- source_table_stream_after_committed: Stream = ds.get_stream(
219
- namespace=source_namespace,
220
- table_name=source_table_name,
221
- table_version=source_table_version,
222
- **ds_mock_kwargs,
223
- )
224
- # create the destination table
225
- (
226
- destination_table_namespace,
227
- destination_table_name,
228
- destination_table_version,
229
- ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
230
- # create the rebase table
231
- (
232
- rebase_table_namespace,
233
- rebase_table_name,
234
- rebase_table_version,
235
- ) = create_rebase_table(sort_keys, partition_keys, ds_mock_kwargs)
236
- rebasing_table_stream: Stream = ds.get_stream(
237
- namespace=rebase_table_namespace,
238
- table_name=rebase_table_name,
239
- table_version=rebase_table_version,
240
- **ds_mock_kwargs,
241
- )
242
- staged_partition: Partition = ds.stage_partition(
243
- rebasing_table_stream, partition_values, **ds_mock_kwargs
244
- )
245
- staged_delta: Delta = ds.stage_delta(
246
- input_deltas, staged_partition, **ds_mock_kwargs
247
- )
248
- staged_delta.locator.stream_position = last_stream_position
249
- ds.commit_delta(
250
- staged_delta,
251
- **ds_mock_kwargs,
252
- )
253
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
254
-
255
- # get streams
256
- # TODO: Add deltas to destination stream
257
- destination_table_stream: Stream = ds.get_stream(
258
- namespace=destination_table_namespace,
259
- table_name=destination_table_name,
260
- table_version=destination_table_version,
261
- **ds_mock_kwargs,
262
- )
263
- rebased_stream_after_committed: Stream = ds.get_stream(
264
- namespace=rebase_table_namespace,
265
- table_name=rebase_table_name,
266
- table_version=rebase_table_version,
267
- **ds_mock_kwargs,
268
- )
269
- return (
270
- source_table_stream_after_committed,
271
- destination_table_stream,
272
- rebased_stream_after_committed,
273
- )
274
-
275
-
276
- def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
277
- sort_keys: Optional[List[Any]],
278
- partition_keys: Optional[List[PartitionKey]],
279
- input_deltas: List[pa.Table],
280
- partition_values: Optional[List[Any]],
281
- ds_mock_kwargs: Optional[Dict[str, Any]],
282
- ) -> Tuple[Stream, Stream, Optional[Stream], bool]:
283
- import deltacat.tests.local_deltacat_storage as ds
284
- from deltacat.storage import Partition, Stream
285
-
286
- source_namespace, source_table_name, source_table_version = create_src_table(
287
- sort_keys, partition_keys, ds_mock_kwargs
288
- )
289
-
290
- source_table_stream: Stream = ds.get_stream(
291
- namespace=source_namespace,
292
- table_name=source_table_name,
293
- table_version=source_table_version,
294
- **ds_mock_kwargs,
295
- )
296
- staged_partition: Partition = ds.stage_partition(
297
- source_table_stream, partition_values, **ds_mock_kwargs
298
- )
299
- is_delete = False
300
- input_delta_length = 0
301
- for (
302
- input_delta,
303
- input_delta_type,
304
- input_delta_parameters,
305
- ) in input_deltas:
306
- if input_delta_type is DeltaType.DELETE:
307
- is_delete = True
308
- staged_delta = ds.stage_delta(
309
- input_delta,
310
- staged_partition,
311
- input_delta_type,
312
- entry_params=input_delta_parameters,
313
- **ds_mock_kwargs,
314
- )
315
- ds.commit_delta(
316
- staged_delta,
317
- **ds_mock_kwargs,
318
- )
319
- input_delta_length += len(input_delta) if input_delta else 0
320
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
321
- source_table_stream_after_committed: Stream = ds.get_stream(
322
- namespace=source_namespace,
323
- table_name=source_table_name,
324
- table_version=source_table_version,
325
- **ds_mock_kwargs,
326
- )
327
- # create the destination table
328
- (
329
- destination_table_namespace,
330
- destination_table_name,
331
- destination_table_version,
332
- ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
333
- # create the rebase table
334
- (
335
- rebase_table_namespace,
336
- rebase_table_name,
337
- rebase_table_version,
338
- ) = create_rebase_table(sort_keys, partition_keys, ds_mock_kwargs)
339
- rebasing_table_stream: Stream = ds.get_stream(
340
- namespace=rebase_table_namespace,
341
- table_name=rebase_table_name,
342
- table_version=rebase_table_version,
343
- **ds_mock_kwargs,
344
- )
345
- staged_partition: Partition = ds.stage_partition(
346
- rebasing_table_stream, partition_values, **ds_mock_kwargs
347
- )
348
- input_delta_length = 0
349
- for (
350
- input_delta,
351
- input_delta_type,
352
- input_delta_parameters,
353
- ) in input_deltas:
354
- if input_delta_type is DeltaType.DELETE:
355
- is_delete = True
356
- staged_delta = ds.stage_delta(
357
- input_delta,
358
- staged_partition,
359
- input_delta_type,
360
- entry_params=input_delta_parameters,
361
- **ds_mock_kwargs,
362
- )
363
- ds.commit_delta(
364
- staged_delta,
365
- **ds_mock_kwargs,
366
- )
367
- input_delta_length += len(input_delta) if input_delta else 0
368
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
369
-
370
- # get streams
371
- destination_table_stream: Stream = ds.get_stream(
372
- namespace=destination_table_namespace,
373
- table_name=destination_table_name,
374
- table_version=destination_table_version,
375
- **ds_mock_kwargs,
376
- )
377
- rebased_stream_after_committed: Stream = ds.get_stream(
378
- namespace=rebase_table_namespace,
379
- table_name=rebase_table_name,
380
- table_version=rebase_table_version,
381
- **ds_mock_kwargs,
382
- )
383
- return (
384
- source_table_stream_after_committed,
385
- destination_table_stream,
386
- rebased_stream_after_committed,
387
- is_delete,
388
- )