deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. deltacat/__init__.py +96 -17
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +0 -18
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2435 -279
  12. deltacat/catalog/model/catalog.py +154 -77
  13. deltacat/catalog/model/properties.py +63 -22
  14. deltacat/compute/compactor/compaction_session.py +97 -75
  15. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  16. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  17. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  18. deltacat/compute/compactor/repartition_session.py +8 -21
  19. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  20. deltacat/compute/compactor/steps/materialize.py +9 -7
  21. deltacat/compute/compactor/steps/repartition.py +12 -11
  22. deltacat/compute/compactor/utils/io.py +6 -5
  23. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  24. deltacat/compute/compactor/utils/system_columns.py +3 -1
  25. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  26. deltacat/compute/compactor_v2/constants.py +30 -1
  27. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  28. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  29. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  30. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  31. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  32. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  33. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  34. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  35. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  36. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  37. deltacat/compute/compactor_v2/utils/io.py +11 -4
  38. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  39. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  40. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  41. deltacat/compute/converter/converter_session.py +145 -32
  42. deltacat/compute/converter/model/convert_input.py +26 -19
  43. deltacat/compute/converter/model/convert_input_files.py +33 -16
  44. deltacat/compute/converter/model/convert_result.py +35 -16
  45. deltacat/compute/converter/model/converter_session_params.py +24 -21
  46. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  47. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  48. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  49. deltacat/compute/converter/steps/convert.py +157 -50
  50. deltacat/compute/converter/steps/dedupe.py +24 -11
  51. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  52. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  53. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  54. deltacat/compute/converter/utils/io.py +101 -12
  55. deltacat/compute/converter/utils/s3u.py +33 -27
  56. deltacat/compute/janitor.py +205 -0
  57. deltacat/compute/jobs/client.py +25 -12
  58. deltacat/compute/resource_estimation/delta.py +38 -6
  59. deltacat/compute/resource_estimation/model.py +8 -0
  60. deltacat/constants.py +45 -2
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/env.py +10 -0
  64. deltacat/examples/basic_logging.py +1 -3
  65. deltacat/examples/compactor/aws/__init__.py +1 -0
  66. deltacat/examples/compactor/bootstrap.py +863 -0
  67. deltacat/examples/compactor/compactor.py +373 -0
  68. deltacat/examples/compactor/explorer.py +473 -0
  69. deltacat/examples/compactor/gcp/__init__.py +1 -0
  70. deltacat/examples/compactor/job_runner.py +439 -0
  71. deltacat/examples/compactor/utils/__init__.py +1 -0
  72. deltacat/examples/compactor/utils/common.py +261 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  79. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  80. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  81. deltacat/examples/indexer/indexer.py +2 -2
  82. deltacat/examples/indexer/job_runner.py +1 -2
  83. deltacat/exceptions.py +66 -4
  84. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  85. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  86. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +29 -11
  87. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  88. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  89. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  90. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  91. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  92. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  93. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  94. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  95. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  96. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  97. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  98. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  99. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  100. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  101. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  102. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  103. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  104. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  105. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  107. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  108. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  109. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  110. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  111. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  112. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  113. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  114. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  115. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  116. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  117. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  118. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  119. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  120. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  121. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  122. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  123. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  124. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  125. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  126. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  127. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  128. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  129. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  130. deltacat/io/datasource/deltacat_datasource.py +0 -1
  131. deltacat/io/reader/deltacat_read_api.py +1 -1
  132. deltacat/storage/__init__.py +20 -2
  133. deltacat/storage/interface.py +54 -32
  134. deltacat/storage/main/impl.py +1494 -541
  135. deltacat/storage/model/delta.py +27 -3
  136. deltacat/storage/model/locator.py +6 -12
  137. deltacat/storage/model/manifest.py +182 -6
  138. deltacat/storage/model/metafile.py +151 -78
  139. deltacat/storage/model/namespace.py +8 -1
  140. deltacat/storage/model/partition.py +117 -42
  141. deltacat/storage/model/schema.py +2427 -159
  142. deltacat/storage/model/shard.py +6 -2
  143. deltacat/storage/model/sort_key.py +40 -0
  144. deltacat/storage/model/stream.py +9 -2
  145. deltacat/storage/model/table.py +12 -1
  146. deltacat/storage/model/table_version.py +11 -0
  147. deltacat/storage/model/transaction.py +1184 -208
  148. deltacat/storage/model/transform.py +81 -2
  149. deltacat/storage/model/types.py +48 -26
  150. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  151. deltacat/tests/aws/test_s3u.py +2 -31
  152. deltacat/tests/catalog/data/__init__.py +0 -0
  153. deltacat/tests/catalog/main/__init__.py +0 -0
  154. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  155. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  156. deltacat/tests/catalog/model/__init__.py +0 -0
  157. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  158. deltacat/tests/catalog/test_catalogs.py +103 -106
  159. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  160. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  161. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  162. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  163. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  164. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  165. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  166. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  167. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  168. deltacat/tests/compute/conftest.py +8 -44
  169. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  170. deltacat/tests/compute/converter/utils.py +15 -6
  171. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  172. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  173. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  174. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  175. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  176. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  177. deltacat/tests/compute/test_janitor.py +236 -0
  178. deltacat/tests/compute/test_util_common.py +716 -43
  179. deltacat/tests/compute/test_util_constant.py +0 -1
  180. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  181. deltacat/tests/daft/__init__.py +0 -0
  182. deltacat/tests/daft/test_model.py +97 -0
  183. deltacat/tests/experimental/__init__.py +1 -0
  184. deltacat/tests/experimental/catalog/__init__.py +0 -0
  185. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  186. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  187. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  188. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  189. deltacat/tests/experimental/daft/__init__.py +0 -0
  190. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  191. deltacat/tests/experimental/storage/__init__.py +0 -0
  192. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  193. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  194. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  195. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
  196. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  197. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  198. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  199. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  200. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  201. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  202. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  203. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  204. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
  205. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  206. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  207. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  208. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  209. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  210. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  211. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  212. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  213. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  214. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  215. deltacat/tests/storage/model/test_schema.py +171 -0
  216. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  217. deltacat/tests/storage/model/test_shard.py +3 -1
  218. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  219. deltacat/tests/storage/model/test_transaction.py +393 -48
  220. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  221. deltacat/tests/test_deltacat_api.py +988 -4
  222. deltacat/tests/test_exceptions.py +9 -5
  223. deltacat/tests/test_utils/pyarrow.py +52 -21
  224. deltacat/tests/test_utils/storage.py +23 -34
  225. deltacat/tests/types/__init__.py +0 -0
  226. deltacat/tests/types/test_tables.py +104 -0
  227. deltacat/tests/utils/exceptions.py +22 -0
  228. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  229. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  230. deltacat/tests/utils/test_daft.py +121 -31
  231. deltacat/tests/utils/test_numpy.py +1193 -0
  232. deltacat/tests/utils/test_pandas.py +1106 -0
  233. deltacat/tests/utils/test_polars.py +1040 -0
  234. deltacat/tests/utils/test_pyarrow.py +1370 -89
  235. deltacat/types/media.py +224 -14
  236. deltacat/types/tables.py +2329 -59
  237. deltacat/utils/arguments.py +33 -1
  238. deltacat/utils/daft.py +823 -36
  239. deltacat/utils/export.py +3 -1
  240. deltacat/utils/filesystem.py +100 -0
  241. deltacat/utils/metafile_locator.py +2 -1
  242. deltacat/utils/numpy.py +118 -26
  243. deltacat/utils/pandas.py +577 -48
  244. deltacat/utils/polars.py +658 -27
  245. deltacat/utils/pyarrow.py +1258 -213
  246. deltacat/utils/ray_utils/dataset.py +101 -10
  247. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  248. deltacat/utils/url.py +57 -16
  249. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  250. deltacat-2.0.0b12.dist-info/RECORD +439 -0
  251. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  252. deltacat/catalog/iceberg/__init__.py +0 -4
  253. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  254. deltacat/compute/merge_on_read/__init__.py +0 -4
  255. deltacat/compute/merge_on_read/daft.py +0 -40
  256. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  257. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  258. deltacat/daft/daft_scan.py +0 -115
  259. deltacat/daft/model.py +0 -258
  260. deltacat/daft/translator.py +0 -126
  261. deltacat/examples/common/fixtures.py +0 -15
  262. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  263. deltacat/storage/rivulet/__init__.py +0 -11
  264. deltacat/storage/rivulet/feather/__init__.py +0 -5
  265. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  266. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  267. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  268. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  269. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  270. deltacat/utils/s3fs.py +0 -21
  271. deltacat-2.0.0b10.dist-info/METADATA +0 -68
  272. deltacat-2.0.0b10.dist-info/RECORD +0 -381
  273. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  274. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  275. /deltacat/{daft → docs/autogen/schema}/__init__.py +0 -0
  276. /deltacat/{examples/common → docs/autogen/schema/inference}/__init__.py +0 -0
  277. /deltacat/examples/{iceberg → compactor}/__init__.py +0 -0
  278. /deltacat/{storage/iceberg → examples/experimental}/__init__.py +0 -0
  279. /deltacat/{storage/rivulet/arrow → examples/experimental/iceberg}/__init__.py +0 -0
  280. /deltacat/{storage/rivulet/fs → examples/experimental/iceberg/converter}/__init__.py +0 -0
  281. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  282. /deltacat/{storage/rivulet/reader → experimental/catalog}/__init__.py +0 -0
  283. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  284. /deltacat/{storage/rivulet/schema → experimental/compatibility}/__init__.py +0 -0
  285. /deltacat/{storage/rivulet/writer → experimental/converter_agent}/__init__.py +0 -0
  286. /deltacat/{tests/storage/rivulet → experimental/converter_agent/beam}/__init__.py +0 -0
  287. /deltacat/{tests/storage/rivulet/fs → experimental/storage}/__init__.py +0 -0
  288. /deltacat/{tests/storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
  289. /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  290. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/storage/rivulet/fs/__init__.py} +0 -0
  291. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  292. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  293. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  294. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  295. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  296. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  297. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  298. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -103,11 +103,6 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
103
103
  add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType, Optional[EntryParams]]]]
104
104
 
105
105
 
106
- @dataclass(frozen=True)
107
- class NoRCFOutputCompactionTestCaseParams(BaseCompactorTestCase):
108
- pass
109
-
110
-
111
106
  def with_compactor_version_func_test_param(
112
107
  test_cases: Dict[str, BaseCompactorTestCase] = None
113
108
  ):
@@ -135,7 +130,7 @@ def with_compactor_version_func_test_param(
135
130
 
136
131
 
137
132
  INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
138
- "1-incremental-pkstr-sknone-norcf": IncrementalCompactionTestCaseParams(
133
+ "1-incremental-pkstr-sknone-norci": IncrementalCompactionTestCaseParams(
139
134
  primary_keys={"pk_col_1"},
140
135
  sort_keys=ZERO_VALUED_SORT_KEY,
141
136
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
@@ -161,7 +156,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
161
156
  skip_enabled_compact_partition_drivers=None,
162
157
  assert_compaction_audit=assert_compaction_audit,
163
158
  ),
164
- "2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
159
+ "2-incremental-pkstr-skstr-norci": IncrementalCompactionTestCaseParams(
165
160
  primary_keys={"pk_col_1"},
166
161
  sort_keys=ZERO_VALUED_SORT_KEY,
167
162
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
@@ -190,7 +185,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
190
185
  skip_enabled_compact_partition_drivers=None,
191
186
  assert_compaction_audit=assert_compaction_audit,
192
187
  ),
193
- "3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
188
+ "3-incremental-pkstr-multiskstr-norci": IncrementalCompactionTestCaseParams(
194
189
  primary_keys={"pk_col_1"},
195
190
  sort_keys=[
196
191
  SortKey.of(key=["sk_col_1"]),
@@ -599,6 +594,38 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
599
594
  skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
600
595
  assert_compaction_audit=None,
601
596
  ),
597
+ "15-incremental-empty-input-with-single-hash-bucket": IncrementalCompactionTestCaseParams(
598
+ primary_keys={"pk_col_1"},
599
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
600
+ partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
601
+ partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
602
+ input_deltas=pa.Table.from_arrays(
603
+ [
604
+ pa.array([]),
605
+ pa.array([]),
606
+ ],
607
+ names=["pk_col_1", "sk_col_1"],
608
+ ),
609
+ input_deltas_delta_type=DeltaType.UPSERT,
610
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
611
+ [
612
+ pa.array([]),
613
+ pa.array([]),
614
+ ],
615
+ names=["pk_col_1", "sk_col_1"],
616
+ ),
617
+ expected_terminal_exception=None,
618
+ expected_terminal_exception_message=None,
619
+ do_create_placement_group=False,
620
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
621
+ hash_bucket_count=1,
622
+ read_kwargs_provider=None,
623
+ drop_duplicates=True,
624
+ is_inplace=False,
625
+ add_late_deltas=None,
626
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
627
+ assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
628
+ ),
602
629
  }
603
630
 
604
631
  INCREMENTAL_TEST_CASES = with_compactor_version_func_test_param(INCREMENTAL_TEST_CASES)
@@ -49,7 +49,7 @@ class TestRepartitionRange(unittest.TestCase):
49
49
  self.destination_partition: Partition = MagicMock()
50
50
  self.repartition_args = {"column": "last_updated", "ranges": [1678665487112747]}
51
51
  self.max_records_per_output_file = 2
52
- self.s3_table_writer_kwargs = {}
52
+ self.table_writer_kwargs = {}
53
53
  self.repartitioned_file_content_type = ContentType.PARQUET
54
54
  self.deltacat_storage = MagicMock()
55
55
  self.deltacat_storage_kwargs = MagicMock()
@@ -60,7 +60,7 @@ class TestRepartitionRange(unittest.TestCase):
60
60
  self.destination_partition,
61
61
  self.repartition_args,
62
62
  self.max_records_per_output_file,
63
- self.s3_table_writer_kwargs,
63
+ self.table_writer_kwargs,
64
64
  self.repartitioned_file_content_type,
65
65
  self.deltacat_storage,
66
66
  self.deltacat_storage_kwargs,
@@ -87,7 +87,7 @@ class TestRepartitionRange(unittest.TestCase):
87
87
  self.destination_partition,
88
88
  self.repartition_args,
89
89
  self.max_records_per_output_file,
90
- self.s3_table_writer_kwargs,
90
+ self.table_writer_kwargs,
91
91
  self.repartitioned_file_content_type,
92
92
  self.deltacat_storage,
93
93
  self.deltacat_storage_kwargs,
@@ -101,7 +101,7 @@ class TestRepartitionRange(unittest.TestCase):
101
101
  self.destination_partition,
102
102
  self.repartition_args,
103
103
  self.max_records_per_output_file,
104
- self.s3_table_writer_kwargs,
104
+ self.table_writer_kwargs,
105
105
  self.repartitioned_file_content_type,
106
106
  self.deltacat_storage,
107
107
  self.deltacat_storage_kwargs,
@@ -114,7 +114,7 @@ class TestRepartitionRange(unittest.TestCase):
114
114
  self.destination_partition,
115
115
  self.repartition_args,
116
116
  self.max_records_per_output_file,
117
- self.s3_table_writer_kwargs,
117
+ self.table_writer_kwargs,
118
118
  self.repartitioned_file_content_type,
119
119
  self.deltacat_storage,
120
120
  self.deltacat_storage_kwargs,
@@ -128,7 +128,7 @@ class TestRepartitionRange(unittest.TestCase):
128
128
  self.destination_partition,
129
129
  self.repartition_args,
130
130
  self.max_records_per_output_file,
131
- self.s3_table_writer_kwargs,
131
+ self.table_writer_kwargs,
132
132
  self.repartitioned_file_content_type,
133
133
  self.deltacat_storage,
134
134
  self.deltacat_storage_kwargs,
@@ -143,7 +143,7 @@ class TestRepartitionRange(unittest.TestCase):
143
143
  self.destination_partition,
144
144
  self.repartition_args,
145
145
  self.max_records_per_output_file,
146
- self.s3_table_writer_kwargs,
146
+ self.table_writer_kwargs,
147
147
  self.repartitioned_file_content_type,
148
148
  self.deltacat_storage,
149
149
  self.deltacat_storage_kwargs,
@@ -158,7 +158,7 @@ class TestRepartitionRange(unittest.TestCase):
158
158
  self.destination_partition,
159
159
  self.repartition_args,
160
160
  self.max_records_per_output_file,
161
- self.s3_table_writer_kwargs,
161
+ self.table_writer_kwargs,
162
162
  self.repartitioned_file_content_type,
163
163
  self.deltacat_storage,
164
164
  self.deltacat_storage_kwargs,
@@ -175,7 +175,7 @@ class TestRepartitionRange(unittest.TestCase):
175
175
  self.destination_partition,
176
176
  self.repartition_args,
177
177
  self.max_records_per_output_file,
178
- self.s3_table_writer_kwargs,
178
+ self.table_writer_kwargs,
179
179
  self.repartitioned_file_content_type,
180
180
  self.deltacat_storage,
181
181
  self.deltacat_storage_kwargs,
@@ -189,7 +189,7 @@ class TestRepartitionRange(unittest.TestCase):
189
189
  self.destination_partition,
190
190
  self.repartition_args,
191
191
  self.max_records_per_output_file,
192
- self.s3_table_writer_kwargs,
192
+ self.table_writer_kwargs,
193
193
  self.repartitioned_file_content_type,
194
194
  self.deltacat_storage,
195
195
  self.deltacat_storage_kwargs,
@@ -206,7 +206,7 @@ class TestRepartitionRange(unittest.TestCase):
206
206
  self.destination_partition,
207
207
  self.repartition_args,
208
208
  self.max_records_per_output_file,
209
- self.s3_table_writer_kwargs,
209
+ self.table_writer_kwargs,
210
210
  self.repartitioned_file_content_type,
211
211
  self.deltacat_storage,
212
212
  ),
@@ -233,7 +233,7 @@ class TestRepartitionRange(unittest.TestCase):
233
233
  self.destination_partition,
234
234
  self.repartition_args,
235
235
  self.max_records_per_output_file,
236
- self.s3_table_writer_kwargs,
236
+ self.table_writer_kwargs,
237
237
  self.repartitioned_file_content_type,
238
238
  self.deltacat_storage,
239
239
  self.deltacat_storage_kwargs,
@@ -1,131 +1,135 @@
1
- import unittest
1
+ import pytest
2
2
  from unittest import mock
3
3
 
4
- from deltacat.tests.compute.conftest import (
5
- create_local_deltacat_storage_file,
6
- clean_up_local_deltacat_storage_file,
7
- )
8
4
  from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
9
5
 
10
6
 
11
- class TestFitInputDeltas(unittest.TestCase):
12
- @classmethod
13
- def setUpClass(cls):
14
- cls.module_patcher = mock.patch.dict("sys.modules", {"ray": mock.MagicMock()})
15
- cls.module_patcher.start()
16
-
17
- from deltacat.compute.compactor.model.compaction_session_audit_info import (
18
- CompactionSessionAuditInfo,
7
+ @pytest.fixture(scope="module", autouse=True)
8
+ def mock_ray():
9
+ """Mock ray module for all tests in this module"""
10
+ module_patcher = mock.patch.dict("sys.modules", {"ray": mock.MagicMock()})
11
+ module_patcher.start()
12
+ yield
13
+ module_patcher.stop()
14
+
15
+
16
+ @pytest.fixture
17
+ def compaction_audit():
18
+ """Fixture for CompactionSessionAuditInfo"""
19
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
20
+ CompactionSessionAuditInfo,
21
+ )
22
+
23
+ return CompactionSessionAuditInfo("1.0", "2.3", "test")
24
+
25
+
26
+ def test_sanity(main_deltacat_storage_kwargs, compaction_audit):
27
+ from deltacat.compute.compactor.utils import io
28
+ from deltacat.storage import metastore
29
+
30
+ (
31
+ delta_list,
32
+ hash_bucket_count,
33
+ high_watermark,
34
+ require_multiple_rounds,
35
+ ) = io.fit_input_deltas(
36
+ [TEST_UPSERT_DELTA],
37
+ {"CPU": 1, "memory": 20000000},
38
+ compaction_audit,
39
+ None,
40
+ metastore,
41
+ main_deltacat_storage_kwargs,
42
+ )
43
+
44
+ assert hash_bucket_count is not None
45
+ assert len(delta_list) == 1
46
+ assert high_watermark is not None
47
+ assert require_multiple_rounds is False
48
+ assert compaction_audit.hash_bucket_count is not None
49
+ assert compaction_audit.input_file_count is not None
50
+ assert compaction_audit.input_size_bytes is not None
51
+ assert compaction_audit.total_cluster_memory_bytes is not None
52
+
53
+
54
+ def test_when_hash_bucket_count_overridden(
55
+ main_deltacat_storage_kwargs, compaction_audit
56
+ ):
57
+ from deltacat.compute.compactor.utils import io
58
+ from deltacat.storage import metastore
59
+
60
+ (
61
+ delta_list,
62
+ hash_bucket_count,
63
+ high_watermark,
64
+ require_multiple_rounds,
65
+ ) = io.fit_input_deltas(
66
+ [TEST_UPSERT_DELTA],
67
+ {"CPU": 1, "memory": 20000000},
68
+ compaction_audit,
69
+ 20,
70
+ metastore,
71
+ main_deltacat_storage_kwargs,
72
+ )
73
+
74
+ assert hash_bucket_count == 20
75
+ assert len(delta_list) == 1
76
+ assert high_watermark is not None
77
+ assert require_multiple_rounds is False
78
+
79
+
80
+ def test_when_not_enough_memory_splits_manifest_entries(
81
+ main_deltacat_storage_kwargs, compaction_audit
82
+ ):
83
+ from deltacat.compute.compactor.utils import io
84
+ from deltacat.storage import metastore
85
+
86
+ (
87
+ delta_list,
88
+ hash_bucket_count,
89
+ high_watermark,
90
+ require_multiple_rounds,
91
+ ) = io.fit_input_deltas(
92
+ [TEST_UPSERT_DELTA],
93
+ {"CPU": 2, "memory": 10},
94
+ compaction_audit,
95
+ 20,
96
+ metastore,
97
+ main_deltacat_storage_kwargs,
98
+ )
99
+
100
+ assert hash_bucket_count is not None
101
+ assert len(delta_list) == 2
102
+ assert high_watermark is not None
103
+ assert require_multiple_rounds is False
104
+
105
+
106
+ def test_when_no_input_deltas(main_deltacat_storage_kwargs, compaction_audit):
107
+ from deltacat.compute.compactor.utils import io
108
+ from deltacat.storage import metastore
109
+
110
+ with pytest.raises(AssertionError):
111
+ io.fit_input_deltas(
112
+ [],
113
+ {"CPU": 100, "memory": 20000.0},
114
+ compaction_audit,
115
+ None,
116
+ metastore,
117
+ main_deltacat_storage_kwargs,
19
118
  )
20
119
 
21
- cls.kwargs_for_local_deltacat_storage = create_local_deltacat_storage_file()
22
-
23
- cls.COMPACTION_AUDIT = CompactionSessionAuditInfo("1.0", "2.3", "test")
24
-
25
- super().setUpClass()
26
-
27
- @classmethod
28
- def tearDownClass(cls) -> None:
29
- cls.module_patcher.stop()
30
- clean_up_local_deltacat_storage_file(cls.kwargs_for_local_deltacat_storage)
31
120
 
32
- def test_sanity(self):
33
- from deltacat.compute.compactor.utils import io
34
- import deltacat.tests.local_deltacat_storage as ds
121
+ def test_when_cpu_resources_is_not_passed(
122
+ main_deltacat_storage_kwargs, compaction_audit
123
+ ):
124
+ from deltacat.compute.compactor.utils import io
125
+ from deltacat.storage import metastore
35
126
 
36
- (
37
- delta_list,
38
- hash_bucket_count,
39
- high_watermark,
40
- require_multiple_rounds,
41
- ) = io.fit_input_deltas(
42
- [TEST_UPSERT_DELTA],
43
- {"CPU": 1, "memory": 20000000},
44
- self.COMPACTION_AUDIT,
127
+ with pytest.raises(KeyError):
128
+ io.fit_input_deltas(
129
+ [],
130
+ {},
131
+ compaction_audit,
45
132
  None,
46
- ds,
47
- self.kwargs_for_local_deltacat_storage,
133
+ metastore,
134
+ main_deltacat_storage_kwargs,
48
135
  )
49
-
50
- self.assertIsNotNone(hash_bucket_count)
51
- self.assertTrue(1, len(delta_list))
52
- self.assertIsNotNone(high_watermark)
53
- self.assertFalse(require_multiple_rounds)
54
- self.assertIsNotNone(hash_bucket_count, self.COMPACTION_AUDIT.hash_bucket_count)
55
- self.assertIsNotNone(self.COMPACTION_AUDIT.input_file_count)
56
- self.assertIsNotNone(self.COMPACTION_AUDIT.input_size_bytes)
57
- self.assertIsNotNone(self.COMPACTION_AUDIT.total_cluster_memory_bytes)
58
-
59
- def test_when_hash_bucket_count_overridden(self):
60
- from deltacat.compute.compactor.utils import io
61
- import deltacat.tests.local_deltacat_storage as ds
62
-
63
- (
64
- delta_list,
65
- hash_bucket_count,
66
- high_watermark,
67
- require_multiple_rounds,
68
- ) = io.fit_input_deltas(
69
- [TEST_UPSERT_DELTA],
70
- {"CPU": 1, "memory": 20000000},
71
- self.COMPACTION_AUDIT,
72
- 20,
73
- ds,
74
- self.kwargs_for_local_deltacat_storage,
75
- )
76
-
77
- self.assertEqual(20, hash_bucket_count)
78
- self.assertEqual(1, len(delta_list))
79
- self.assertIsNotNone(high_watermark)
80
- self.assertFalse(require_multiple_rounds)
81
-
82
- def test_when_not_enough_memory_splits_manifest_entries(self):
83
- from deltacat.compute.compactor.utils import io
84
- import deltacat.tests.local_deltacat_storage as ds
85
-
86
- (
87
- delta_list,
88
- hash_bucket_count,
89
- high_watermark,
90
- require_multiple_rounds,
91
- ) = io.fit_input_deltas(
92
- [TEST_UPSERT_DELTA],
93
- {"CPU": 2, "memory": 10},
94
- self.COMPACTION_AUDIT,
95
- 20,
96
- ds,
97
- self.kwargs_for_local_deltacat_storage,
98
- )
99
-
100
- self.assertIsNotNone(hash_bucket_count)
101
- self.assertTrue(2, len(delta_list))
102
- self.assertIsNotNone(high_watermark)
103
- self.assertFalse(require_multiple_rounds)
104
-
105
- def test_when_no_input_deltas(self):
106
- from deltacat.compute.compactor.utils import io
107
- import deltacat.tests.local_deltacat_storage as ds
108
-
109
- with self.assertRaises(AssertionError):
110
- io.fit_input_deltas(
111
- [],
112
- {"CPU": 100, "memory": 20000.0},
113
- self.COMPACTION_AUDIT,
114
- None,
115
- ds,
116
- self.kwargs_for_local_deltacat_storage,
117
- )
118
-
119
- def test_when_cpu_resources_is_not_passed(self):
120
- from deltacat.compute.compactor.utils import io
121
- import deltacat.tests.local_deltacat_storage as ds
122
-
123
- with self.assertRaises(KeyError):
124
- io.fit_input_deltas(
125
- [],
126
- {},
127
- self.COMPACTION_AUDIT,
128
- None,
129
- ds,
130
- self.kwargs_for_local_deltacat_storage,
131
- )
@@ -0,0 +1,254 @@
1
+ from unittest.mock import Mock
2
+ from deltacat.compute.compactor.utils.round_completion_reader import (
3
+ read_round_completion_info,
4
+ )
5
+ from deltacat.tests.compute.test_util_common import get_test_partition_locator
6
+ from deltacat.compute.compactor import RoundCompletionInfo
7
+ from deltacat.compute.compactor.model.pyarrow_write_result import PyArrowWriteResult
8
+ from deltacat.storage.model.partition import Partition
9
+
10
+
11
+ class TestRoundCompletionInfoInPartition:
12
+ def test_read_round_completion_info_from_partition_with_matching_source(self):
13
+ """
14
+ Test reading RoundCompletionInfo from partition metafile with matching source partition locator.
15
+ """
16
+ source_locator = get_test_partition_locator("source")
17
+ destination_locator = get_test_partition_locator("destination")
18
+
19
+ # Create a test RoundCompletionInfo with prev_source_partition_locator
20
+ pawr = PyArrowWriteResult.of(
21
+ file_count=1, pyarrow_bytes=1000, file_bytes=1000, record_count=100
22
+ )
23
+
24
+ expected_rci = RoundCompletionInfo.of(
25
+ high_watermark=122,
26
+ compacted_delta_locator=None,
27
+ compacted_pyarrow_write_result=pawr,
28
+ sort_keys_bit_width=12,
29
+ prev_source_partition_locator=source_locator,
30
+ )
31
+
32
+ # Create a partition with RoundCompletionInfo
33
+ partition = Partition.of(
34
+ locator=destination_locator,
35
+ content_types=None,
36
+ compaction_round_completion_info=expected_rci,
37
+ )
38
+
39
+ # Mock the storage
40
+ mock_storage = Mock()
41
+
42
+ # Test reading with partition provided (no storage call needed)
43
+ rci = read_round_completion_info(
44
+ source_partition_locator=source_locator,
45
+ destination_partition_locator=destination_locator,
46
+ deltacat_storage=mock_storage,
47
+ deltacat_storage_kwargs={},
48
+ destination_partition=partition,
49
+ )
50
+
51
+ assert rci is not None
52
+ assert rci == expected_rci
53
+ assert rci.high_watermark == 122
54
+ assert rci.sort_keys_bit_width == 12
55
+ assert (
56
+ rci.prev_source_partition_locator.partition_id
57
+ == source_locator.partition_id
58
+ )
59
+
60
+ # Verify storage was not called since partition was provided
61
+ mock_storage.get_partition.assert_not_called()
62
+
63
+ def test_read_round_completion_info_from_partition_with_mismatched_source(self):
64
+ """
65
+ Test reading RoundCompletionInfo from partition metafile with mismatched source partition locator.
66
+ Should return None and log a warning.
67
+ """
68
+ source_locator = get_test_partition_locator("source")
69
+ different_source_locator = get_test_partition_locator("different_source")
70
+ destination_locator = get_test_partition_locator("destination")
71
+
72
+ # Create a test RoundCompletionInfo with different prev_source_partition_locator
73
+ pawr = PyArrowWriteResult.of(
74
+ file_count=1, pyarrow_bytes=1000, file_bytes=1000, record_count=100
75
+ )
76
+
77
+ expected_rci = RoundCompletionInfo.of(
78
+ high_watermark=122,
79
+ compacted_delta_locator=None,
80
+ compacted_pyarrow_write_result=pawr,
81
+ sort_keys_bit_width=12,
82
+ prev_source_partition_locator=different_source_locator, # Different from source_locator
83
+ )
84
+
85
+ # Create a partition with RoundCompletionInfo
86
+ partition = Partition.of(
87
+ locator=destination_locator,
88
+ content_types=None,
89
+ compaction_round_completion_info=expected_rci,
90
+ )
91
+
92
+ # Mock the storage
93
+ mock_storage = Mock()
94
+
95
+ # Test reading with mismatched source locator
96
+ rci = read_round_completion_info(
97
+ source_partition_locator=source_locator, # Different from the one in RoundCompletionInfo
98
+ destination_partition_locator=destination_locator,
99
+ deltacat_storage=mock_storage,
100
+ deltacat_storage_kwargs={},
101
+ destination_partition=partition,
102
+ )
103
+
104
+ # Should return None due to mismatch
105
+ assert rci is None
106
+
107
+ # Verify storage was not called since partition was provided
108
+ mock_storage.get_partition.assert_not_called()
109
+
110
+ def test_read_round_completion_info_from_storage_when_partition_not_provided(self):
111
+ """
112
+ Test reading RoundCompletionInfo from storage when partition is not provided.
113
+ """
114
+ source_locator = get_test_partition_locator("source")
115
+ destination_locator = get_test_partition_locator("destination")
116
+
117
+ # Create a test RoundCompletionInfo
118
+ pawr = PyArrowWriteResult.of(
119
+ file_count=1, pyarrow_bytes=1000, file_bytes=1000, record_count=100
120
+ )
121
+
122
+ expected_rci = RoundCompletionInfo.of(
123
+ high_watermark=122,
124
+ compacted_delta_locator=None,
125
+ compacted_pyarrow_write_result=pawr,
126
+ sort_keys_bit_width=12,
127
+ prev_source_partition_locator=source_locator,
128
+ )
129
+
130
+ # Create a partition with RoundCompletionInfo
131
+ partition = Partition.of(
132
+ locator=destination_locator,
133
+ content_types=None,
134
+ compaction_round_completion_info=expected_rci,
135
+ )
136
+
137
+ # Mock the storage to return the partition
138
+ mock_storage = Mock()
139
+ mock_storage.get_partition.return_value = partition
140
+
141
+ # Test reading without partition provided (storage call needed)
142
+ rci = read_round_completion_info(
143
+ source_partition_locator=source_locator,
144
+ destination_partition_locator=destination_locator,
145
+ deltacat_storage=mock_storage,
146
+ deltacat_storage_kwargs={"test_arg": "test_value"},
147
+ )
148
+
149
+ assert rci is not None
150
+ assert rci == expected_rci
151
+ assert rci.high_watermark == 122
152
+
153
+ # Verify storage was called with correct parameters
154
+ mock_storage.get_partition.assert_called_once_with(
155
+ destination_locator.stream_locator,
156
+ destination_locator.partition_values,
157
+ test_arg="test_value",
158
+ )
159
+
160
+ def test_read_round_completion_info_when_partition_not_found(self):
161
+ """
162
+ Test reading RoundCompletionInfo when partition is not found in storage.
163
+ """
164
+ source_locator = get_test_partition_locator("source")
165
+ destination_locator = get_test_partition_locator("destination")
166
+
167
+ # Mock the storage to return None (partition not found)
168
+ mock_storage = Mock()
169
+ mock_storage.get_partition.return_value = None
170
+
171
+ # Test reading when partition not found
172
+ rci = read_round_completion_info(
173
+ source_partition_locator=source_locator,
174
+ destination_partition_locator=destination_locator,
175
+ deltacat_storage=mock_storage,
176
+ deltacat_storage_kwargs={},
177
+ )
178
+
179
+ # Should return None when partition not found
180
+ assert rci is None
181
+
182
+ # Verify storage was called
183
+ mock_storage.get_partition.assert_called_once()
184
+
185
+ def test_read_round_completion_info_when_no_completion_info_in_partition(self):
186
+ """
187
+ Test reading RoundCompletionInfo when partition exists but has no completion info.
188
+ """
189
+ source_locator = get_test_partition_locator("source")
190
+ destination_locator = get_test_partition_locator("destination")
191
+
192
+ # Create a partition without RoundCompletionInfo
193
+ partition = Partition.of(
194
+ locator=destination_locator,
195
+ content_types=None,
196
+ compaction_round_completion_info=None,
197
+ )
198
+
199
+ # Mock the storage to return the partition
200
+ mock_storage = Mock()
201
+ mock_storage.get_partition.return_value = partition
202
+
203
+ # Test reading when no completion info in partition
204
+ rci = read_round_completion_info(
205
+ source_partition_locator=source_locator,
206
+ destination_partition_locator=destination_locator,
207
+ deltacat_storage=mock_storage,
208
+ deltacat_storage_kwargs={},
209
+ )
210
+
211
+ # Should return None when no completion info
212
+ assert rci is None
213
+
214
+ def test_read_with_missing_prev_source_partition_locator_returns_none(self):
215
+ """
216
+ Test that reading with missing prev_source_partition_locator returns None.
217
+ """
218
+ source_locator = get_test_partition_locator("source")
219
+ destination_locator = get_test_partition_locator("destination")
220
+
221
+ # Create RoundCompletionInfo without prev_source_partition_locator
222
+ pawr = PyArrowWriteResult.of(
223
+ file_count=1, pyarrow_bytes=1000, file_bytes=1000, record_count=100
224
+ )
225
+
226
+ rcf = RoundCompletionInfo.of(
227
+ high_watermark=122,
228
+ compacted_delta_locator=None,
229
+ compacted_pyarrow_write_result=pawr,
230
+ sort_keys_bit_width=12,
231
+ prev_source_partition_locator=None, # Missing
232
+ )
233
+
234
+ # Create a partition with RoundCompletionInfo
235
+ partition = Partition.of(
236
+ locator=destination_locator,
237
+ content_types=None,
238
+ compaction_round_completion_info=rcf,
239
+ )
240
+
241
+ # Mock the storage
242
+ mock_storage = Mock()
243
+
244
+ # Test reading should return None due to missing prev_source_partition_locator
245
+ result = read_round_completion_info(
246
+ source_partition_locator=source_locator,
247
+ destination_partition_locator=destination_locator,
248
+ deltacat_storage=mock_storage,
249
+ deltacat_storage_kwargs={},
250
+ destination_partition=partition,
251
+ )
252
+
253
+ # Should return None when prev_source_partition_locator is missing or mismatched
254
+ assert result is None