deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,45 +1,42 @@
1
- import ray
2
- import os
3
- from moto import mock_s3
1
+ import tempfile
2
+ from typing import Any, Dict, List, Optional, Set, Tuple, Callable
3
+ import uuid
4
4
  import pytest
5
- import boto3
6
- from boto3.resources.base import ServiceResource
5
+
7
6
  import pyarrow as pa
8
- from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
7
+ import ray
8
+ import pandas as pd
9
+
10
+ from deltacat.io.file_object_store import FileObjectStore
9
11
  from pytest_benchmark.fixture import BenchmarkFixture
10
12
 
11
13
  from deltacat.tests.compute.test_util_constant import (
12
14
  BASE_TEST_SOURCE_NAMESPACE,
13
15
  BASE_TEST_SOURCE_TABLE_NAME,
14
16
  BASE_TEST_SOURCE_TABLE_VERSION,
15
- TEST_S3_RCF_BUCKET_NAME,
16
17
  DEFAULT_NUM_WORKERS,
17
18
  DEFAULT_WORKER_INSTANCE_CPUS,
18
19
  )
19
20
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
20
21
  from deltacat.tests.compute.test_util_common import (
21
- get_rcf,
22
- )
23
- from deltacat.tests.test_utils.utils import read_s3_contents
24
- from deltacat.tests.compute.test_util_common import (
25
- get_compacted_delta_locator_from_rcf,
26
- )
27
- from deltacat.tests.compute.test_util_create_table_deltas_repo import (
28
- create_incremental_deltas_on_source_table,
29
- )
30
- from deltacat.tests.compute.test_util_create_table_deltas_repo import (
31
- create_src_w_deltas_destination_rebase_w_deltas_strategy,
22
+ create_src_w_deltas_destination_rebase_w_deltas_strategy_main,
23
+ create_incremental_deltas_on_source_table_main,
24
+ get_rci_from_partition,
25
+ read_audit_file,
26
+ PartitionKey,
27
+ get_compacted_delta_locator_from_partition,
32
28
  )
33
29
  from deltacat.tests.compute.compact_partition_rebase_then_incremental_test_cases import (
34
30
  REBASE_THEN_INCREMENTAL_TEST_CASES,
35
31
  )
36
- from typing import Any, Callable, Dict, List, Optional, Set, Tuple
32
+
37
33
  from deltacat.types.media import StorageType
38
34
  from deltacat.storage import (
39
35
  DeltaType,
40
36
  DeltaLocator,
41
37
  Partition,
42
38
  PartitionLocator,
39
+ metastore,
43
40
  )
44
41
  from deltacat.types.media import ContentType
45
42
  from deltacat.compute.compactor.model.compact_partition_params import (
@@ -64,29 +61,24 @@ def setup_ray_cluster():
64
61
  ray.shutdown()
65
62
 
66
63
 
67
- @pytest.fixture(autouse=True, scope="module")
68
- def mock_aws_credential():
69
- os.environ["AWS_ACCESS_KEY_ID"] = "testing"
70
- os.environ["AWS_SECRET_ACCESS_ID"] = "testing"
71
- os.environ["AWS_SECURITY_TOKEN"] = "testing"
72
- os.environ["AWS_SESSION_TOKEN"] = "testing"
73
- os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
74
- yield
75
-
64
+ """
65
+ FUNCTION scoped fixtures
66
+ """
76
67
 
77
- @pytest.fixture(scope="module")
78
- def s3_resource(mock_aws_credential):
79
- with mock_s3():
80
- yield boto3.resource("s3")
81
68
 
69
+ @pytest.fixture(autouse=True, scope="function")
70
+ def enable_bucketing_spec_validation(monkeypatch):
71
+ """
72
+ Enable the bucketing spec validation for all tests.
73
+ This will help catch hash bucket drift in testing.
74
+ """
75
+ import deltacat.compute.compactor_v2.steps.merge
82
76
 
83
- @pytest.fixture(autouse=True, scope="module")
84
- def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
85
- s3_resource.create_bucket(
86
- ACL="authenticated-read",
87
- Bucket=TEST_S3_RCF_BUCKET_NAME,
77
+ monkeypatch.setattr(
78
+ deltacat.compute.compactor_v2.steps.merge,
79
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
80
+ "ASSERT",
88
81
  )
89
- yield
90
82
 
91
83
 
92
84
  @pytest.mark.parametrize(
@@ -162,13 +154,12 @@ def setup_compaction_artifacts_s3_bucket(s3_resource: ServiceResource):
162
154
  ],
163
155
  ids=[test_name for test_name in REBASE_THEN_INCREMENTAL_TEST_CASES],
164
156
  )
165
- def test_compact_partition_rebase_then_incremental(
166
- s3_resource: ServiceResource,
167
- local_deltacat_storage_kwargs: Dict[str, Any],
157
+ def test_compact_partition_rebase_then_incremental_main(
158
+ main_deltacat_storage_kwargs: Dict[str, Any],
168
159
  test_name: str,
169
160
  primary_keys: Set[str],
170
161
  sort_keys: List[Optional[Any]],
171
- partition_keys_param: Optional[List[Any]],
162
+ partition_keys_param: Optional[List[PartitionKey]],
172
163
  partition_values_param: List[Optional[str]],
173
164
  input_deltas_param: List[pa.Array],
174
165
  input_deltas_delta_type: str,
@@ -188,9 +179,15 @@ def test_compact_partition_rebase_then_incremental(
188
179
  compact_partition_func: Callable,
189
180
  benchmark: BenchmarkFixture,
190
181
  ):
191
- import deltacat.tests.local_deltacat_storage as ds
182
+ ds_mock_kwargs = main_deltacat_storage_kwargs
183
+ """
184
+ This test performs rebase compaction first, then incremental compaction on the same data.
185
+ This tests the scenario where we first do a rebase (with different source/destination partitions)
186
+ and then follow up with incremental compaction using the result of the rebase.
187
+
188
+ This version uses the main metastore implementation instead of local storage.
189
+ """
192
190
 
193
- ds_mock_kwargs = local_deltacat_storage_kwargs
194
191
  """
195
192
  REBASE
196
193
  """
@@ -199,7 +196,7 @@ def test_compact_partition_rebase_then_incremental(
199
196
  source_table_stream,
200
197
  destination_table_stream,
201
198
  rebased_table_stream,
202
- ) = create_src_w_deltas_destination_rebase_w_deltas_strategy(
199
+ ) = create_src_w_deltas_destination_rebase_w_deltas_strategy_main(
203
200
  sort_keys,
204
201
  partition_keys,
205
202
  input_deltas_param,
@@ -207,19 +204,48 @@ def test_compact_partition_rebase_then_incremental(
207
204
  partition_values_param,
208
205
  ds_mock_kwargs,
209
206
  )
210
- source_partition: Partition = ds.get_partition(
207
+
208
+ # Convert partition values for partition lookup (same as in other helper functions)
209
+ converted_partition_values_for_lookup = partition_values_param
210
+ if partition_values_param and partition_keys:
211
+ converted_partition_values_for_lookup = []
212
+ for i, (value, pk) in enumerate(zip(partition_values_param, partition_keys)):
213
+ if pk.key_type.value == "int": # Use .value to get string representation
214
+ converted_partition_values_for_lookup.append(int(value))
215
+ elif pk.key_type.value == "timestamp":
216
+ # Handle timestamp partition values
217
+ if isinstance(value, str) and "T" in value and value.endswith("Z"):
218
+ ts = pd.to_datetime(value)
219
+ # Convert to microseconds since epoch for PyArrow timestamp[us]
220
+ converted_partition_values_for_lookup.append(
221
+ int(ts.timestamp() * 1_000_000)
222
+ )
223
+ else:
224
+ converted_partition_values_for_lookup.append(value)
225
+ else:
226
+ converted_partition_values_for_lookup.append(value)
227
+
228
+ source_partition: Partition = metastore.get_partition(
211
229
  source_table_stream.locator,
212
- partition_values_param,
230
+ converted_partition_values_for_lookup,
213
231
  **ds_mock_kwargs,
214
232
  )
233
+ # Generate a destination partition ID based on the source partition
234
+ destination_partition_id = str(uuid.uuid4())
215
235
  destination_partition_locator: PartitionLocator = PartitionLocator.of(
216
236
  destination_table_stream.locator,
217
- partition_values_param,
218
- None,
237
+ converted_partition_values_for_lookup,
238
+ destination_partition_id,
239
+ )
240
+ all_column_names = metastore.get_table_version_column_names(
241
+ destination_partition_locator.namespace,
242
+ destination_partition_locator.table_name,
243
+ destination_partition_locator.table_version,
244
+ **ds_mock_kwargs,
219
245
  )
220
- rebased_partition: Partition = ds.get_partition(
246
+ rebased_partition: Partition = metastore.get_partition(
221
247
  rebased_table_stream.locator,
222
- partition_values_param,
248
+ converted_partition_values_for_lookup,
223
249
  **ds_mock_kwargs,
224
250
  )
225
251
  num_workers, worker_instance_cpu = DEFAULT_NUM_WORKERS, DEFAULT_WORKER_INSTANCE_CPUS
@@ -230,151 +256,195 @@ def test_compact_partition_rebase_then_incremental(
230
256
  pgm = PlacementGroupManager(
231
257
  1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
232
258
  ).pgs[0]
233
- compact_partition_params = CompactPartitionParams.of(
234
- {
235
- "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
236
- "compacted_file_content_type": ContentType.PARQUET,
237
- "dd_max_parallelism_ratio": 1.0,
238
- "deltacat_storage": ds,
239
- "deltacat_storage_kwargs": ds_mock_kwargs,
240
- "destination_partition_locator": destination_partition_locator,
241
- "hash_bucket_count": hash_bucket_count_param,
242
- "last_stream_position_to_compact": source_partition.stream_position,
243
- "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
244
- "object_store": RayPlasmaObjectStore(),
245
- "pg_config": pgm,
246
- "primary_keys": primary_keys,
247
- "read_kwargs_provider": read_kwargs_provider_param,
248
- "rebase_source_partition_locator": source_partition.locator,
249
- "records_per_compacted_file": records_per_compacted_file_param,
250
- "s3_client_kwargs": {},
251
- "source_partition_locator": rebased_partition.locator,
252
- "sort_keys": sort_keys if sort_keys else None,
253
- }
254
- )
255
- # execute
256
- rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
257
- compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
258
- s3_resource, rcf_file_s3_uri
259
- )
260
- tables = ds.download_delta(
261
- compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
262
- )
263
- actual_rebase_compacted_table = pa.concat_tables(tables)
264
- # if no primary key is specified then sort by sort_key for consistent assertion
265
- sorting_cols: List[Any] = (
266
- [(val, "ascending") for val in primary_keys]
267
- if primary_keys
268
- else [pa_key for key in sort_keys for pa_key in key.arrow]
269
- if sort_keys
270
- else []
271
- )
272
- rebase_expected_compact_partition_result = (
273
- rebase_expected_compact_partition_result.combine_chunks().sort_by(sorting_cols)
274
- )
275
- actual_rebase_compacted_table = (
276
- actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
277
- )
278
- assert actual_rebase_compacted_table.equals(
279
- rebase_expected_compact_partition_result
280
- ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
281
- """
282
- INCREMENTAL
283
- """
284
- (
285
- source_partition_locator_w_deltas,
286
- new_delta,
287
- incremental_delta_length,
288
- has_delete_deltas,
289
- ) = create_incremental_deltas_on_source_table(
290
- BASE_TEST_SOURCE_NAMESPACE,
291
- BASE_TEST_SOURCE_TABLE_NAME,
292
- BASE_TEST_SOURCE_TABLE_VERSION,
293
- source_table_stream,
294
- partition_values_param,
295
- incremental_deltas,
296
- ds_mock_kwargs,
297
- )
298
- compact_partition_params = CompactPartitionParams.of(
299
- {
300
- "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
301
- "compacted_file_content_type": ContentType.PARQUET,
302
- "dd_max_parallelism_ratio": 1.0,
303
- "deltacat_storage": ds,
304
- "deltacat_storage_kwargs": ds_mock_kwargs,
305
- "destination_partition_locator": compacted_delta_locator.partition_locator,
306
- "drop_duplicates": drop_duplicates_param,
307
- "hash_bucket_count": hash_bucket_count_param,
308
- "last_stream_position_to_compact": new_delta.stream_position,
309
- "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
310
- "object_store": RayPlasmaObjectStore(),
311
- "pg_config": pgm,
312
- "primary_keys": primary_keys,
313
- "read_kwargs_provider": read_kwargs_provider_param,
314
- "rebase_source_partition_locator": None,
315
- "rebase_source_partition_high_watermark": None,
316
- "records_per_compacted_file": records_per_compacted_file_param,
317
- "s3_client_kwargs": {},
318
- "source_partition_locator": source_partition_locator_w_deltas,
319
- "sort_keys": sort_keys if sort_keys else None,
320
- }
321
- )
322
- if expected_terminal_exception:
323
- with pytest.raises(expected_terminal_exception) as exc_info:
324
- compact_partition_func(compact_partition_params)
325
- assert expected_terminal_exception_message in str(exc_info.value)
326
- return
327
- rcf_file_s3_uri = compact_partition_func(compact_partition_params)
328
- round_completion_info = get_rcf(s3_resource, rcf_file_s3_uri)
329
- compacted_delta_locator_incremental: DeltaLocator = (
330
- round_completion_info.compacted_delta_locator
331
- )
332
- # assert if RCF covers all files
333
- if compactor_version != CompactorVersion.V1.value:
334
- previous_end = None
335
- for start, end in round_completion_info.hb_index_to_entry_range.values():
336
- assert (previous_end is None and start == 0) or start == previous_end
337
- previous_end = end
338
- assert (
339
- previous_end == round_completion_info.compacted_pyarrow_write_result.files
340
- )
341
259
 
342
- audit_bucket, audit_key = round_completion_info.compaction_audit_url.replace(
343
- "s3://", ""
344
- ).split("/", 1)
345
- compaction_audit_obj: dict = read_s3_contents(s3_resource, audit_bucket, audit_key)
346
- compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
347
- **compaction_audit_obj
348
- )
260
+ with tempfile.TemporaryDirectory() as test_dir:
261
+ # Extract catalog from storage kwargs
262
+ catalog = ds_mock_kwargs.get("inner")
349
263
 
350
- tables = ds.download_delta(
351
- compacted_delta_locator_incremental,
352
- storage_type=StorageType.LOCAL,
353
- **ds_mock_kwargs,
354
- )
355
- actual_compacted_table = pa.concat_tables(tables)
356
- expected_terminal_compact_partition_result = (
357
- expected_terminal_compact_partition_result.combine_chunks().sort_by(
358
- sorting_cols
264
+ compact_partition_params = CompactPartitionParams.of(
265
+ {
266
+ "catalog": catalog,
267
+ "compacted_file_content_type": ContentType.PARQUET,
268
+ "dd_max_parallelism_ratio": 1.0,
269
+ "deltacat_storage": metastore,
270
+ "deltacat_storage_kwargs": ds_mock_kwargs,
271
+ "destination_partition_locator": destination_partition_locator,
272
+ "hash_bucket_count": hash_bucket_count_param,
273
+ "last_stream_position_to_compact": source_partition.stream_position,
274
+ "list_deltas_kwargs": {
275
+ **ds_mock_kwargs,
276
+ **{"equivalent_table_types": []},
277
+ },
278
+ "object_store": FileObjectStore(test_dir),
279
+ "original_fields": {
280
+ "pk_col_1",
281
+ "pk_col_2",
282
+ "sk_col_1",
283
+ "sk_col_2",
284
+ "col_1",
285
+ "col_2",
286
+ "region_id",
287
+ },
288
+ "pg_config": pgm,
289
+ "primary_keys": primary_keys,
290
+ "all_column_names": all_column_names,
291
+ "read_kwargs_provider": read_kwargs_provider_param,
292
+ "rebase_source_partition_locator": source_partition.locator,
293
+ "records_per_compacted_file": records_per_compacted_file_param,
294
+ "source_partition_locator": rebased_partition.locator,
295
+ "sort_keys": sort_keys if sort_keys else None,
296
+ }
359
297
  )
360
- )
361
- actual_compacted_table = actual_compacted_table.combine_chunks().sort_by(
362
- sorting_cols
363
- )
364
- # NOTE: if delete type-deltas are present this relationship no longer holds true
365
- if not has_delete_deltas:
366
- assert compaction_audit.input_records == (
367
- incremental_delta_length if incremental_deltas else 0
368
- ) + len(actual_rebase_compacted_table), (
369
- " Total input records must be equal to incremental deltas"
370
- " + previous compacted table size"
298
+ # execute
299
+ benchmark(compact_partition_func, compact_partition_params)
300
+ compacted_delta_locator: DeltaLocator = (
301
+ get_compacted_delta_locator_from_partition(
302
+ destination_partition_locator,
303
+ metastore,
304
+ catalog=catalog,
305
+ )
306
+ )
307
+ tables = metastore.download_delta(
308
+ compacted_delta_locator,
309
+ storage_type=StorageType.LOCAL,
310
+ **ds_mock_kwargs,
311
+ )
312
+ actual_rebase_compacted_table = pa.concat_tables(tables)
313
+ all_column_names = metastore.get_table_version_column_names(
314
+ destination_partition_locator.namespace,
315
+ destination_partition_locator.table_name,
316
+ destination_partition_locator.table_version,
317
+ **ds_mock_kwargs,
318
+ )
319
+ # if no primary key is specified then sort by sort_key for consistent assertion
320
+ sorting_cols: List[Any] = (
321
+ [(val, "ascending") for val in primary_keys]
322
+ if primary_keys
323
+ else [pa_key for key in sort_keys for pa_key in key.arrow]
324
+ if sort_keys
325
+ else []
326
+ )
327
+ rebase_expected_compact_partition_result = (
328
+ rebase_expected_compact_partition_result.combine_chunks().sort_by(
329
+ sorting_cols
330
+ )
331
+ )
332
+ actual_rebase_compacted_table = (
333
+ actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
334
+ )
335
+ assert actual_rebase_compacted_table.equals(
336
+ rebase_expected_compact_partition_result
337
+ ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
338
+
339
+ """
340
+ INCREMENTAL
341
+ """
342
+ (
343
+ source_partition_locator_w_deltas,
344
+ new_delta,
345
+ incremental_delta_length,
346
+ has_delete_deltas,
347
+ ) = create_incremental_deltas_on_source_table_main(
348
+ BASE_TEST_SOURCE_NAMESPACE,
349
+ BASE_TEST_SOURCE_TABLE_NAME,
350
+ BASE_TEST_SOURCE_TABLE_VERSION,
351
+ source_table_stream,
352
+ partition_values_param,
353
+ incremental_deltas,
354
+ ds_mock_kwargs,
371
355
  )
372
356
 
373
- assert actual_compacted_table.equals(
374
- expected_terminal_compact_partition_result
375
- ), f"{actual_compacted_table} does not match {expected_terminal_compact_partition_result}"
357
+ # Handle empty incremental deltas case
358
+ if new_delta is None:
359
+ # For empty incremental deltas, the expected result should be the same as rebase result
360
+ # Skip incremental compaction and just verify the rebase result
361
+ actual_compact_partition_result = actual_rebase_compacted_table
362
+ compaction_audit = None
363
+ else:
364
+ # Perform incremental compaction when there are actual deltas
365
+ last_stream_position = new_delta.stream_position
376
366
 
377
- if assert_compaction_audit is not None:
378
- if not assert_compaction_audit(compactor_version, compaction_audit):
379
- assert False, "Compaction audit assertion failed"
380
- return
367
+ compact_partition_params = CompactPartitionParams.of(
368
+ {
369
+ "catalog": catalog,
370
+ "compacted_file_content_type": ContentType.PARQUET,
371
+ "dd_max_parallelism_ratio": 1.0,
372
+ "deltacat_storage": metastore,
373
+ "deltacat_storage_kwargs": ds_mock_kwargs,
374
+ "destination_partition_locator": compacted_delta_locator.partition_locator,
375
+ "drop_duplicates": drop_duplicates_param,
376
+ "hash_bucket_count": hash_bucket_count_param,
377
+ "last_stream_position_to_compact": last_stream_position,
378
+ "list_deltas_kwargs": {
379
+ **ds_mock_kwargs,
380
+ **{"equivalent_table_types": []},
381
+ },
382
+ "object_store": FileObjectStore(test_dir),
383
+ "original_fields": {
384
+ "pk_col_1",
385
+ "pk_col_2",
386
+ "sk_col_1",
387
+ "sk_col_2",
388
+ "col_1",
389
+ "col_2",
390
+ "region_id",
391
+ },
392
+ "pg_config": pgm,
393
+ "primary_keys": primary_keys,
394
+ "all_column_names": all_column_names,
395
+ "read_kwargs_provider": read_kwargs_provider_param,
396
+ "rebase_source_partition_locator": None,
397
+ "rebase_source_partition_high_watermark": None,
398
+ "records_per_compacted_file": records_per_compacted_file_param,
399
+ "source_partition_locator": source_partition_locator_w_deltas,
400
+ "sort_keys": sort_keys if sort_keys else None,
401
+ }
402
+ )
403
+ if expected_terminal_exception:
404
+ with pytest.raises(expected_terminal_exception) as exc_info:
405
+ compact_partition_func(compact_partition_params)
406
+ assert expected_terminal_exception_message in str(exc_info.value)
407
+ return
408
+ compact_partition_func(compact_partition_params)
409
+ # assert
410
+ compacted_delta_locator: DeltaLocator = (
411
+ get_compacted_delta_locator_from_partition(
412
+ destination_partition_locator, metastore, catalog=catalog
413
+ )
414
+ )
415
+ tables = metastore.download_delta(
416
+ compacted_delta_locator,
417
+ storage_type=StorageType.LOCAL,
418
+ **ds_mock_kwargs,
419
+ )
420
+ actual_compact_partition_result = pa.concat_tables(tables)
421
+
422
+ # Get compaction audit for verification if needed
423
+ round_completion_info = get_rci_from_partition(
424
+ destination_partition_locator, metastore, catalog=catalog
425
+ )
426
+ # Get catalog root for audit file resolution
427
+ catalog_root = catalog.root
428
+
429
+ compaction_audit_obj: dict = read_audit_file(
430
+ round_completion_info.compaction_audit_url, catalog_root
431
+ )
432
+ compaction_audit = CompactionSessionAuditInfo(**compaction_audit_obj)
433
+
434
+ # Verify the final result
435
+ actual_compact_partition_result = (
436
+ actual_compact_partition_result.combine_chunks().sort_by(sorting_cols)
437
+ )
438
+ expected_terminal_compact_partition_result = (
439
+ expected_terminal_compact_partition_result.combine_chunks().sort_by(
440
+ sorting_cols
441
+ )
442
+ )
443
+ assert actual_compact_partition_result.equals(
444
+ expected_terminal_compact_partition_result
445
+ ), f"{actual_compact_partition_result} does not match {expected_terminal_compact_partition_result}"
446
+
447
+ if assert_compaction_audit is not None and compaction_audit is not None:
448
+ if not assert_compaction_audit(compactor_version, compaction_audit):
449
+ pytest.fail("Compaction audit assertion failed")
450
+ return