deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -20,11 +20,11 @@ from deltacat.utils.common import ReadKwargsProvider
20
20
 
21
21
  from deltacat.storage import (
22
22
  DeltaType,
23
+ EntryParams,
23
24
  )
24
25
  from deltacat.compute.compactor_v2.compaction_session import (
25
26
  compact_partition as compact_partition_v2,
26
27
  )
27
- from deltacat.storage import DeleteParameters
28
28
 
29
29
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
30
30
 
@@ -53,8 +53,8 @@ class BaseCompactorTestCase:
53
53
  Args:
54
54
  primary_keys: Set[str] - argument for the primary_keys parameter in compact_partition. Also needed for table/delta creation
55
55
  sort_keys: List[SortKey] - argument for the sort_keys parameter in compact_partition. Also needed for table/delta creation
56
- partition_keys_param: List[PartitionKey] - argument for the partition_keys parameter. Needed for table/delta creation
57
- partition_values_param: List[Optional[str]] - argument for the partition_valued parameter. Needed for table/delta creation
56
+ partition_keys: List[PartitionKey] - argument for the partition_keys parameter. Needed for table/delta creation
57
+ partition_values: List[Optional[str]] - argument for the partition_valued parameter. Needed for table/delta creation
58
58
  input_deltas: List[pa.Array] - argument required for delta creation during compact_partition test setup. Actual incoming deltas expressed as a PyArrow array (https://arrow.apache.org/docs/python/generated/pyarrow.array.html)
59
59
  input_deltas_delta_type: DeltaType - enumerated argument required for delta creation during compact_partition test setup. Available values are (DeltaType.APPEND, DeltaType.UPSERT, DeltaType.DELETE). DeltaType.APPEND is not supported by compactor v1 or v2
60
60
  expected_terminal_compact_partition_result: pa.Table - expected PyArrow table after compaction (i.e,. the state of the table after applying all row UPDATES/DELETES/INSERTS)
@@ -62,7 +62,7 @@ class BaseCompactorTestCase:
62
62
  expected_terminal_exception_message: Optional[str] - expected exception message if present.
63
63
  do_create_placement_group: bool - toggles whether to create a placement group (https://docs.ray.io/en/latest/ray-core/scheduling/placement-group.html) or not
64
64
  records_per_compacted_file: int - argument for the records_per_compacted_file parameter in compact_partition
65
- hash_bucket_count_param: int - argument for the hash_bucket_count parameter in compact_partition
65
+ hash_bucket_count: int - argument for the hash_bucket_count parameter in compact_partition
66
66
  read_kwargs_provider: Optional[ReadKwargsProvider] - argument for read_kwargs_provider parameter in compact_partition. If None then no ReadKwargsProvider is provided to compact_partition_params
67
67
  drop_duplicates: bool - argument for drop_duplicates parameter in compact_partition. Only recognized by compactor v2.
68
68
  skip_enabled_compact_partition_drivers: List[CompactorVersion] - skip whatever enabled_compact_partition_drivers are included in this list
@@ -100,14 +100,7 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
100
100
  """
101
101
 
102
102
  is_inplace: bool
103
- add_late_deltas: Optional[
104
- List[Tuple[pa.Table, DeltaType, Optional[DeleteParameters]]]
105
- ]
106
-
107
-
108
- @dataclass(frozen=True)
109
- class NoRCFOutputCompactionTestCaseParams(BaseCompactorTestCase):
110
- pass
103
+ add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType, Optional[EntryParams]]]]
111
104
 
112
105
 
113
106
  def with_compactor_version_func_test_param(
@@ -137,7 +130,7 @@ def with_compactor_version_func_test_param(
137
130
 
138
131
 
139
132
  INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
140
- "1-incremental-pkstr-sknone-norcf": IncrementalCompactionTestCaseParams(
133
+ "1-incremental-pkstr-sknone-norci": IncrementalCompactionTestCaseParams(
141
134
  primary_keys={"pk_col_1"},
142
135
  sort_keys=ZERO_VALUED_SORT_KEY,
143
136
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
@@ -163,7 +156,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
163
156
  skip_enabled_compact_partition_drivers=None,
164
157
  assert_compaction_audit=assert_compaction_audit,
165
158
  ),
166
- "2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
159
+ "2-incremental-pkstr-skstr-norci": IncrementalCompactionTestCaseParams(
167
160
  primary_keys={"pk_col_1"},
168
161
  sort_keys=ZERO_VALUED_SORT_KEY,
169
162
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
@@ -192,11 +185,11 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
192
185
  skip_enabled_compact_partition_drivers=None,
193
186
  assert_compaction_audit=assert_compaction_audit,
194
187
  ),
195
- "3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
188
+ "3-incremental-pkstr-multiskstr-norci": IncrementalCompactionTestCaseParams(
196
189
  primary_keys={"pk_col_1"},
197
190
  sort_keys=[
198
- SortKey.of(key_name="sk_col_1"),
199
- SortKey.of(key_name="sk_col_2"),
191
+ SortKey.of(key=["sk_col_1"]),
192
+ SortKey.of(key=["sk_col_2"]),
200
193
  ],
201
194
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
202
195
  partition_values=["1"],
@@ -233,8 +226,8 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
233
226
  "4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
234
227
  primary_keys={"pk_col_1"},
235
228
  sort_keys=[
236
- SortKey.of(key_name="sk_col_1"),
237
- SortKey.of(key_name="sk_col_2"),
229
+ SortKey.of(key=["sk_col_1"]),
230
+ SortKey.of(key=["sk_col_2"]),
238
231
  ],
239
232
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
240
233
  partition_values=["1"],
@@ -269,7 +262,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
269
262
  ),
270
263
  "5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
271
264
  primary_keys={"pk_col_1"},
272
- sort_keys=[SortKey.of(key_name="sk_col_1")],
265
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
273
266
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
274
267
  partition_values=["1"],
275
268
  input_deltas=pa.Table.from_arrays(
@@ -301,7 +294,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
301
294
  ),
302
295
  "6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
303
296
  primary_keys={"pk_col_1"},
304
- sort_keys=[SortKey.of(key_name="sk_col_1")],
297
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
305
298
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
306
299
  partition_values=["1"],
307
300
  input_deltas=pa.Table.from_arrays(
@@ -333,7 +326,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
333
326
  ),
334
327
  "7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
335
328
  primary_keys={"pk_col_1"},
336
- sort_keys=[SortKey.of(key_name="sk_col_1")],
329
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
337
330
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
338
331
  partition_values=["1"],
339
332
  input_deltas=pa.Table.from_arrays(
@@ -365,7 +358,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
365
358
  ),
366
359
  "8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
367
360
  primary_keys={"pk_col_1", "pk_col_2"},
368
- sort_keys=[SortKey.of(key_name="sk_col_1")],
361
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
369
362
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
370
363
  partition_values=["1"],
371
364
  input_deltas=pa.Table.from_arrays(
@@ -399,7 +392,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
399
392
  ),
400
393
  "9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
401
394
  primary_keys={"pk_col_1"},
402
- sort_keys=[SortKey.of(key_name="sk_col_1")],
395
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
403
396
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
404
397
  partition_values=["1"],
405
398
  input_deltas=pa.Table.from_arrays(
@@ -431,7 +424,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
431
424
  ),
432
425
  "10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
433
426
  primary_keys={"pk_col_1"},
434
- sort_keys=[SortKey.of(key_name="sk_col_1")],
427
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
435
428
  partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
436
429
  partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
437
430
  input_deltas=pa.Table.from_arrays(
@@ -463,7 +456,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
463
456
  ),
464
457
  "11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
465
458
  primary_keys={"pk_col_1"},
466
- sort_keys=[SortKey.of(key_name="sk_col_1")],
459
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
467
460
  partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
468
461
  partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
469
462
  input_deltas=pa.Table.from_arrays(
@@ -495,7 +488,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
495
488
  ),
496
489
  "12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
497
490
  primary_keys={"pk_col_1"},
498
- sort_keys=[SortKey.of(key_name="sk_col_1")],
491
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
499
492
  partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
500
493
  partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
501
494
  input_deltas=pa.Table.from_arrays(
@@ -527,7 +520,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
527
520
  ),
528
521
  "13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
529
522
  primary_keys={"pk_col_1"},
530
- sort_keys=[SortKey.of(key_name="sk_col_1")],
523
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
531
524
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
532
525
  partition_values=["1"],
533
526
  input_deltas=pa.Table.from_arrays(
@@ -571,7 +564,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
571
564
  ),
572
565
  "14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
573
566
  primary_keys={"pk_col_1"},
574
- sort_keys=[SortKey.of(key_name="sk_col_1")],
567
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
575
568
  partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
576
569
  partition_values=["1"],
577
570
  input_deltas=pa.Table.from_arrays(
@@ -603,7 +596,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
603
596
  ),
604
597
  "15-incremental-empty-input-with-single-hash-bucket": IncrementalCompactionTestCaseParams(
605
598
  primary_keys={"pk_col_1"},
606
- sort_keys=[SortKey.of(key_name="sk_col_1")],
599
+ sort_keys=[SortKey.of(key=["sk_col_1"])],
607
600
  partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
608
601
  partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
609
602
  input_deltas=pa.Table.from_arrays(
@@ -5,7 +5,7 @@ from deltacat.compute.compactor.steps.repartition import repartition_range
5
5
  from deltacat.types.media import ContentType
6
6
  from deltacat.compute.compactor.model.repartition_result import RepartitionResult
7
7
  from deltacat.storage import (
8
- PartitionLocator,
8
+ Partition,
9
9
  )
10
10
 
11
11
  """
@@ -46,10 +46,10 @@ class TestRepartitionRange(unittest.TestCase):
46
46
  }
47
47
  ),
48
48
  ]
49
- self.destination_partition: PartitionLocator = MagicMock()
49
+ self.destination_partition: Partition = MagicMock()
50
50
  self.repartition_args = {"column": "last_updated", "ranges": [1678665487112747]}
51
51
  self.max_records_per_output_file = 2
52
- self.s3_table_writer_kwargs = {}
52
+ self.table_writer_kwargs = {}
53
53
  self.repartitioned_file_content_type = ContentType.PARQUET
54
54
  self.deltacat_storage = MagicMock()
55
55
  self.deltacat_storage_kwargs = MagicMock()
@@ -60,7 +60,7 @@ class TestRepartitionRange(unittest.TestCase):
60
60
  self.destination_partition,
61
61
  self.repartition_args,
62
62
  self.max_records_per_output_file,
63
- self.s3_table_writer_kwargs,
63
+ self.table_writer_kwargs,
64
64
  self.repartitioned_file_content_type,
65
65
  self.deltacat_storage,
66
66
  self.deltacat_storage_kwargs,
@@ -87,7 +87,7 @@ class TestRepartitionRange(unittest.TestCase):
87
87
  self.destination_partition,
88
88
  self.repartition_args,
89
89
  self.max_records_per_output_file,
90
- self.s3_table_writer_kwargs,
90
+ self.table_writer_kwargs,
91
91
  self.repartitioned_file_content_type,
92
92
  self.deltacat_storage,
93
93
  self.deltacat_storage_kwargs,
@@ -101,7 +101,7 @@ class TestRepartitionRange(unittest.TestCase):
101
101
  self.destination_partition,
102
102
  self.repartition_args,
103
103
  self.max_records_per_output_file,
104
- self.s3_table_writer_kwargs,
104
+ self.table_writer_kwargs,
105
105
  self.repartitioned_file_content_type,
106
106
  self.deltacat_storage,
107
107
  self.deltacat_storage_kwargs,
@@ -114,7 +114,7 @@ class TestRepartitionRange(unittest.TestCase):
114
114
  self.destination_partition,
115
115
  self.repartition_args,
116
116
  self.max_records_per_output_file,
117
- self.s3_table_writer_kwargs,
117
+ self.table_writer_kwargs,
118
118
  self.repartitioned_file_content_type,
119
119
  self.deltacat_storage,
120
120
  self.deltacat_storage_kwargs,
@@ -128,7 +128,7 @@ class TestRepartitionRange(unittest.TestCase):
128
128
  self.destination_partition,
129
129
  self.repartition_args,
130
130
  self.max_records_per_output_file,
131
- self.s3_table_writer_kwargs,
131
+ self.table_writer_kwargs,
132
132
  self.repartitioned_file_content_type,
133
133
  self.deltacat_storage,
134
134
  self.deltacat_storage_kwargs,
@@ -143,7 +143,7 @@ class TestRepartitionRange(unittest.TestCase):
143
143
  self.destination_partition,
144
144
  self.repartition_args,
145
145
  self.max_records_per_output_file,
146
- self.s3_table_writer_kwargs,
146
+ self.table_writer_kwargs,
147
147
  self.repartitioned_file_content_type,
148
148
  self.deltacat_storage,
149
149
  self.deltacat_storage_kwargs,
@@ -158,7 +158,7 @@ class TestRepartitionRange(unittest.TestCase):
158
158
  self.destination_partition,
159
159
  self.repartition_args,
160
160
  self.max_records_per_output_file,
161
- self.s3_table_writer_kwargs,
161
+ self.table_writer_kwargs,
162
162
  self.repartitioned_file_content_type,
163
163
  self.deltacat_storage,
164
164
  self.deltacat_storage_kwargs,
@@ -175,7 +175,7 @@ class TestRepartitionRange(unittest.TestCase):
175
175
  self.destination_partition,
176
176
  self.repartition_args,
177
177
  self.max_records_per_output_file,
178
- self.s3_table_writer_kwargs,
178
+ self.table_writer_kwargs,
179
179
  self.repartitioned_file_content_type,
180
180
  self.deltacat_storage,
181
181
  self.deltacat_storage_kwargs,
@@ -189,7 +189,7 @@ class TestRepartitionRange(unittest.TestCase):
189
189
  self.destination_partition,
190
190
  self.repartition_args,
191
191
  self.max_records_per_output_file,
192
- self.s3_table_writer_kwargs,
192
+ self.table_writer_kwargs,
193
193
  self.repartitioned_file_content_type,
194
194
  self.deltacat_storage,
195
195
  self.deltacat_storage_kwargs,
@@ -206,7 +206,7 @@ class TestRepartitionRange(unittest.TestCase):
206
206
  self.destination_partition,
207
207
  self.repartition_args,
208
208
  self.max_records_per_output_file,
209
- self.s3_table_writer_kwargs,
209
+ self.table_writer_kwargs,
210
210
  self.repartitioned_file_content_type,
211
211
  self.deltacat_storage,
212
212
  ),
@@ -233,7 +233,7 @@ class TestRepartitionRange(unittest.TestCase):
233
233
  self.destination_partition,
234
234
  self.repartition_args,
235
235
  self.max_records_per_output_file,
236
- self.s3_table_writer_kwargs,
236
+ self.table_writer_kwargs,
237
237
  self.repartitioned_file_content_type,
238
238
  self.deltacat_storage,
239
239
  self.deltacat_storage_kwargs,
@@ -1,133 +1,135 @@
1
- import unittest
1
+ import pytest
2
2
  from unittest import mock
3
- from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
4
- from typing import Any, Dict
5
-
6
- DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
7
- "db_file_path",
8
- "deltacat/tests/local_deltacat_storage/db_test.sqlite",
9
- )
10
3
 
4
+ from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
11
5
 
12
- class TestFitInputDeltas(unittest.TestCase):
13
- @classmethod
14
- def setUpClass(cls):
15
- cls.module_patcher = mock.patch.dict("sys.modules", {"ray": mock.MagicMock()})
16
- cls.module_patcher.start()
17
6
 
18
- from deltacat.compute.compactor.model.compaction_session_audit_info import (
19
- CompactionSessionAuditInfo,
7
+ @pytest.fixture(scope="module", autouse=True)
8
+ def mock_ray():
9
+ """Mock ray module for all tests in this module"""
10
+ module_patcher = mock.patch.dict("sys.modules", {"ray": mock.MagicMock()})
11
+ module_patcher.start()
12
+ yield
13
+ module_patcher.stop()
14
+
15
+
16
+ @pytest.fixture
17
+ def compaction_audit():
18
+ """Fixture for CompactionSessionAuditInfo"""
19
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
20
+ CompactionSessionAuditInfo,
21
+ )
22
+
23
+ return CompactionSessionAuditInfo("1.0", "2.3", "test")
24
+
25
+
26
+ def test_sanity(main_deltacat_storage_kwargs, compaction_audit):
27
+ from deltacat.compute.compactor.utils import io
28
+ from deltacat.storage import metastore
29
+
30
+ (
31
+ delta_list,
32
+ hash_bucket_count,
33
+ high_watermark,
34
+ require_multiple_rounds,
35
+ ) = io.fit_input_deltas(
36
+ [TEST_UPSERT_DELTA],
37
+ {"CPU": 1, "memory": 20000000},
38
+ compaction_audit,
39
+ None,
40
+ metastore,
41
+ main_deltacat_storage_kwargs,
42
+ )
43
+
44
+ assert hash_bucket_count is not None
45
+ assert len(delta_list) == 1
46
+ assert high_watermark is not None
47
+ assert require_multiple_rounds is False
48
+ assert compaction_audit.hash_bucket_count is not None
49
+ assert compaction_audit.input_file_count is not None
50
+ assert compaction_audit.input_size_bytes is not None
51
+ assert compaction_audit.total_cluster_memory_bytes is not None
52
+
53
+
54
+ def test_when_hash_bucket_count_overridden(
55
+ main_deltacat_storage_kwargs, compaction_audit
56
+ ):
57
+ from deltacat.compute.compactor.utils import io
58
+ from deltacat.storage import metastore
59
+
60
+ (
61
+ delta_list,
62
+ hash_bucket_count,
63
+ high_watermark,
64
+ require_multiple_rounds,
65
+ ) = io.fit_input_deltas(
66
+ [TEST_UPSERT_DELTA],
67
+ {"CPU": 1, "memory": 20000000},
68
+ compaction_audit,
69
+ 20,
70
+ metastore,
71
+ main_deltacat_storage_kwargs,
72
+ )
73
+
74
+ assert hash_bucket_count == 20
75
+ assert len(delta_list) == 1
76
+ assert high_watermark is not None
77
+ assert require_multiple_rounds is False
78
+
79
+
80
+ def test_when_not_enough_memory_splits_manifest_entries(
81
+ main_deltacat_storage_kwargs, compaction_audit
82
+ ):
83
+ from deltacat.compute.compactor.utils import io
84
+ from deltacat.storage import metastore
85
+
86
+ (
87
+ delta_list,
88
+ hash_bucket_count,
89
+ high_watermark,
90
+ require_multiple_rounds,
91
+ ) = io.fit_input_deltas(
92
+ [TEST_UPSERT_DELTA],
93
+ {"CPU": 2, "memory": 10},
94
+ compaction_audit,
95
+ 20,
96
+ metastore,
97
+ main_deltacat_storage_kwargs,
98
+ )
99
+
100
+ assert hash_bucket_count is not None
101
+ assert len(delta_list) == 2
102
+ assert high_watermark is not None
103
+ assert require_multiple_rounds is False
104
+
105
+
106
+ def test_when_no_input_deltas(main_deltacat_storage_kwargs, compaction_audit):
107
+ from deltacat.compute.compactor.utils import io
108
+ from deltacat.storage import metastore
109
+
110
+ with pytest.raises(AssertionError):
111
+ io.fit_input_deltas(
112
+ [],
113
+ {"CPU": 100, "memory": 20000.0},
114
+ compaction_audit,
115
+ None,
116
+ metastore,
117
+ main_deltacat_storage_kwargs,
20
118
  )
21
119
 
22
- cls.kwargs_for_local_deltacat_storage: Dict[str, Any] = {
23
- DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
24
- }
25
-
26
- cls.COMPACTION_AUDIT = CompactionSessionAuditInfo("1.0", "2.3", "test")
27
-
28
- super().setUpClass()
29
120
 
30
- @classmethod
31
- def tearDownClass(cls) -> None:
32
- cls.module_patcher.stop()
121
+ def test_when_cpu_resources_is_not_passed(
122
+ main_deltacat_storage_kwargs, compaction_audit
123
+ ):
124
+ from deltacat.compute.compactor.utils import io
125
+ from deltacat.storage import metastore
33
126
 
34
- def test_sanity(self):
35
- from deltacat.compute.compactor.utils import io
36
- import deltacat.tests.local_deltacat_storage as ds
37
-
38
- (
39
- delta_list,
40
- hash_bucket_count,
41
- high_watermark,
42
- require_multiple_rounds,
43
- ) = io.fit_input_deltas(
44
- [TEST_UPSERT_DELTA],
45
- {"CPU": 1, "memory": 20000000},
46
- self.COMPACTION_AUDIT,
127
+ with pytest.raises(KeyError):
128
+ io.fit_input_deltas(
129
+ [],
130
+ {},
131
+ compaction_audit,
47
132
  None,
48
- ds,
49
- self.kwargs_for_local_deltacat_storage,
50
- )
51
-
52
- self.assertIsNotNone(hash_bucket_count)
53
- self.assertTrue(1, len(delta_list))
54
- self.assertIsNotNone(high_watermark)
55
- self.assertFalse(require_multiple_rounds)
56
- self.assertIsNotNone(hash_bucket_count, self.COMPACTION_AUDIT.hash_bucket_count)
57
- self.assertIsNotNone(self.COMPACTION_AUDIT.input_file_count)
58
- self.assertIsNotNone(self.COMPACTION_AUDIT.input_size_bytes)
59
- self.assertIsNotNone(self.COMPACTION_AUDIT.total_cluster_memory_bytes)
60
-
61
- def test_when_hash_bucket_count_overridden(self):
62
- from deltacat.compute.compactor.utils import io
63
- import deltacat.tests.local_deltacat_storage as ds
64
-
65
- (
66
- delta_list,
67
- hash_bucket_count,
68
- high_watermark,
69
- require_multiple_rounds,
70
- ) = io.fit_input_deltas(
71
- [TEST_UPSERT_DELTA],
72
- {"CPU": 1, "memory": 20000000},
73
- self.COMPACTION_AUDIT,
74
- 20,
75
- ds,
76
- self.kwargs_for_local_deltacat_storage,
133
+ metastore,
134
+ main_deltacat_storage_kwargs,
77
135
  )
78
-
79
- self.assertEqual(20, hash_bucket_count)
80
- self.assertEqual(1, len(delta_list))
81
- self.assertIsNotNone(high_watermark)
82
- self.assertFalse(require_multiple_rounds)
83
-
84
- def test_when_not_enough_memory_splits_manifest_entries(self):
85
- from deltacat.compute.compactor.utils import io
86
- import deltacat.tests.local_deltacat_storage as ds
87
-
88
- (
89
- delta_list,
90
- hash_bucket_count,
91
- high_watermark,
92
- require_multiple_rounds,
93
- ) = io.fit_input_deltas(
94
- [TEST_UPSERT_DELTA],
95
- {"CPU": 2, "memory": 10},
96
- self.COMPACTION_AUDIT,
97
- 20,
98
- ds,
99
- self.kwargs_for_local_deltacat_storage,
100
- )
101
-
102
- self.assertIsNotNone(hash_bucket_count)
103
- self.assertTrue(2, len(delta_list))
104
- self.assertIsNotNone(high_watermark)
105
- self.assertFalse(require_multiple_rounds)
106
-
107
- def test_when_no_input_deltas(self):
108
- from deltacat.compute.compactor.utils import io
109
- import deltacat.tests.local_deltacat_storage as ds
110
-
111
- with self.assertRaises(AssertionError):
112
- io.fit_input_deltas(
113
- [],
114
- {"CPU": 100, "memory": 20000.0},
115
- self.COMPACTION_AUDIT,
116
- None,
117
- ds,
118
- self.kwargs_for_local_deltacat_storage,
119
- )
120
-
121
- def test_when_cpu_resources_is_not_passed(self):
122
- from deltacat.compute.compactor.utils import io
123
- import deltacat.tests.local_deltacat_storage as ds
124
-
125
- with self.assertRaises(KeyError):
126
- io.fit_input_deltas(
127
- [],
128
- {},
129
- self.COMPACTION_AUDIT,
130
- None,
131
- ds,
132
- self.kwargs_for_local_deltacat_storage,
133
- )