deltacat 1.1.22__tar.gz → 1.1.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. {deltacat-1.1.22/deltacat.egg-info → deltacat-1.1.24}/PKG-INFO +1 -1
  2. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/__init__.py +1 -1
  3. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/aws/s3u.py +2 -2
  4. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/utils/round_completion_file.py +1 -1
  5. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/private/compaction_utils.py +12 -0
  6. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/content_type_params.py +6 -4
  7. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/resource_estimation/delta.py +16 -2
  8. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/file_object_store.py +16 -1
  9. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/memcached_object_store.py +45 -7
  10. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/object_store.py +14 -0
  11. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/redis_object_store.py +32 -4
  12. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/s3_object_store.py +17 -0
  13. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/resource_estimation/test_delta.py +50 -0
  14. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +92 -76
  15. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_compact_partition_rebase.py +88 -73
  16. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/io/test_file_object_store.py +44 -14
  17. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/io/test_memcached_object_store.py +40 -0
  18. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/io/test_redis_object_store.py +20 -0
  19. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/io/test_s3_object_store.py +9 -0
  20. {deltacat-1.1.22 → deltacat-1.1.24/deltacat.egg-info}/PKG-INFO +1 -1
  21. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat.egg-info/requires.txt +1 -1
  22. {deltacat-1.1.22 → deltacat-1.1.24}/setup.py +1 -1
  23. {deltacat-1.1.22 → deltacat-1.1.24}/LICENSE +0 -0
  24. {deltacat-1.1.22 → deltacat-1.1.24}/MANIFEST.in +0 -0
  25. {deltacat-1.1.22 → deltacat-1.1.24}/README.md +0 -0
  26. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/aws/__init__.py +0 -0
  27. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/aws/clients.py +0 -0
  28. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/aws/constants.py +0 -0
  29. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/aws/redshift/__init__.py +0 -0
  30. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/aws/redshift/model/__init__.py +0 -0
  31. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/aws/redshift/model/manifest.py +0 -0
  32. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/benchmarking/__init__.py +0 -0
  33. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  34. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/benchmarking/conftest.py +0 -0
  35. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/catalog/__init__.py +0 -0
  36. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
  37. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/catalog/delegate.py +0 -0
  38. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/catalog/interface.py +0 -0
  39. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/catalog/model/__init__.py +0 -0
  40. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/catalog/model/catalog.py +0 -0
  41. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/catalog/model/table_definition.py +0 -0
  42. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/__init__.py +0 -0
  43. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/__init__.py +0 -0
  44. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/compaction_session.py +0 -0
  45. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/__init__.py +0 -0
  46. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
  47. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  48. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  49. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  50. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  51. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  52. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  53. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  54. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  55. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  56. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  57. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  58. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  59. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  60. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/repartition_session.py +0 -0
  61. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/steps/__init__.py +0 -0
  62. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  63. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  64. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/steps/materialize.py +0 -0
  65. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/steps/repartition.py +0 -0
  66. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/utils/__init__.py +0 -0
  67. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/utils/io.py +0 -0
  68. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  69. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  70. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  71. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/__init__.py +0 -0
  72. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
  73. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/constants.py +0 -0
  74. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  75. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  76. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  77. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  78. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  79. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  80. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  81. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
  82. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  83. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  84. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  85. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  86. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  87. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
  88. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  89. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  90. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
  91. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  92. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  93. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  94. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  95. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  96. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
  97. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
  98. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/merge_on_read/__init__.py +0 -0
  99. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/merge_on_read/daft.py +0 -0
  100. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
  101. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  102. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  103. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  104. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/resource_estimation/__init__.py +0 -0
  105. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/resource_estimation/manifest.py +0 -0
  106. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/resource_estimation/model.py +0 -0
  107. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/resource_estimation/parquet.py +0 -0
  108. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/__init__.py +0 -0
  109. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/models/__init__.py +0 -0
  110. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  111. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/models/delta_stats.py +0 -0
  112. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  113. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  114. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/models/stats_result.py +0 -0
  115. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/compute/stats/types.py +0 -0
  116. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/constants.py +0 -0
  117. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/exceptions.py +0 -0
  118. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/__init__.py +0 -0
  119. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/aws/__init__.py +0 -0
  120. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/aws/redshift/__init__.py +0 -0
  121. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/dataset.py +0 -0
  122. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/ray_plasma_object_store.py +0 -0
  123. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/io/read_api.py +0 -0
  124. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/logs.py +0 -0
  125. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/__init__.py +0 -0
  126. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/interface.py +0 -0
  127. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/__init__.py +0 -0
  128. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/delete_parameters.py +0 -0
  129. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/delta.py +0 -0
  130. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/list_result.py +0 -0
  131. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/locator.py +0 -0
  132. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/namespace.py +0 -0
  133. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/partition.py +0 -0
  134. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/partition_spec.py +0 -0
  135. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/sort_key.py +0 -0
  136. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/stream.py +0 -0
  137. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/table.py +0 -0
  138. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/table_version.py +0 -0
  139. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/transform.py +0 -0
  140. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/storage/model/types.py +0 -0
  141. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/__init__.py +0 -0
  142. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/aws/__init__.py +0 -0
  143. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/aws/test_clients.py +0 -0
  144. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/aws/test_s3u.py +0 -0
  145. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/catalog/__init__.py +0 -0
  146. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  147. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/__init__.py +0 -0
  148. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
  149. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
  150. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
  151. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  152. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor/__init__.py +0 -0
  153. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  154. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  155. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  156. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  157. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
  158. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  159. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
  160. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  161. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
  162. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  163. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/resource_estimation/__init__.py +0 -0
  164. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/resource_estimation/data/__init__.py +0 -0
  165. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
  166. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
  167. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  168. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
  169. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_util_common.py +0 -0
  170. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_util_constant.py +0 -0
  171. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
  172. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/io/__init__.py +0 -0
  173. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
  174. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
  175. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
  176. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
  177. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/test_exceptions.py +0 -0
  178. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/test_logs.py +0 -0
  179. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/test_utils/__init__.py +0 -0
  180. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/test_utils/constants.py +0 -0
  181. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/test_utils/pyarrow.py +0 -0
  182. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/test_utils/storage.py +0 -0
  183. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/test_utils/utils.py +0 -0
  184. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/__init__.py +0 -0
  185. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/data/__init__.py +0 -0
  186. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
  187. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  188. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
  189. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  190. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/test_daft.py +0 -0
  191. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/test_metrics.py +0 -0
  192. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/test_placement.py +0 -0
  193. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/test_pyarrow.py +0 -0
  194. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  195. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/tests/utils/test_resources.py +0 -0
  196. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/types/__init__.py +0 -0
  197. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/types/media.py +0 -0
  198. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/types/partial_download.py +0 -0
  199. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/types/tables.py +0 -0
  200. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/__init__.py +0 -0
  201. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/arguments.py +0 -0
  202. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/cloudpickle.py +0 -0
  203. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/common.py +0 -0
  204. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/daft.py +0 -0
  205. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/metrics.py +0 -0
  206. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/numpy.py +0 -0
  207. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/pandas.py +0 -0
  208. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/performance.py +0 -0
  209. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/placement.py +0 -0
  210. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/pyarrow.py +0 -0
  211. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/ray_utils/__init__.py +0 -0
  212. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/ray_utils/collections.py +0 -0
  213. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/ray_utils/concurrency.py +0 -0
  214. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/ray_utils/dataset.py +0 -0
  215. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/ray_utils/performance.py +0 -0
  216. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/ray_utils/runtime.py +0 -0
  217. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/resources.py +0 -0
  218. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/s3fs.py +0 -0
  219. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat/utils/schema.py +0 -0
  220. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat.egg-info/SOURCES.txt +0 -0
  221. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat.egg-info/dependency_links.txt +0 -0
  222. {deltacat-1.1.22 → deltacat-1.1.24}/deltacat.egg-info/top_level.txt +0 -0
  223. {deltacat-1.1.22 → deltacat-1.1.24}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.22
3
+ Version: 1.1.24
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.22"
47
+ __version__ = "1.1.24"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -291,7 +291,7 @@ def read_file(
291
291
  f"Retry download for: {s3_url} after receiving {type(e).__name__}"
292
292
  ) from e
293
293
  except BaseException as e:
294
- logger.warn(
294
+ logger.warning(
295
295
  f"Read has failed for {s3_url} and content_type={content_type} "
296
296
  f"and encoding={content_encoding}. Error: {e}",
297
297
  exc_info=True,
@@ -416,7 +416,7 @@ def upload_table(
416
416
  f"Retry upload for: {s3_url} after receiving {type(e).__name__}",
417
417
  ) from e
418
418
  except BaseException as e:
419
- logger.warn(
419
+ logger.warning(
420
420
  f"Upload has failed for {s3_url} and content_type={content_type}. Error: {e}",
421
421
  exc_info=True,
422
422
  )
@@ -63,7 +63,7 @@ def read_round_completion_file(
63
63
  logger.info(f"Read round completion info: {round_completion_info}")
64
64
  break
65
65
  else:
66
- logger.warn(f"Round completion file not present at {rcf_uri}")
66
+ logger.warning(f"Round completion file not present at {rcf_uri}")
67
67
 
68
68
  return round_completion_info
69
69
 
@@ -227,6 +227,7 @@ def _run_hash_and_merge(
227
227
  previous_compacted_delta_manifest: Optional[Manifest],
228
228
  compacted_partition: Partition,
229
229
  ) -> List[MergeResult]:
230
+ created_obj_ids = set()
230
231
  telemetry_time_hb = 0
231
232
  total_input_records_count = np.int64(0)
232
233
  total_hb_record_count = np.int64(0)
@@ -288,6 +289,7 @@ def _run_hash_and_merge(
288
289
  hb_result.hash_bucket_group_to_obj_id_tuple
289
290
  ):
290
291
  if object_id_size_tuple:
292
+ created_obj_ids.add(object_id_size_tuple[0])
291
293
  all_hash_group_idx_to_obj_id[hash_group_index].append(
292
294
  object_id_size_tuple[0],
293
295
  )
@@ -365,6 +367,16 @@ def _run_hash_and_merge(
365
367
  mutable_compaction_audit.set_telemetry_time_in_seconds(
366
368
  telemetry_this_round + previous_telemetry
367
369
  )
370
+ if params.num_rounds > 1:
371
+ logger.info(
372
+ f"Detected number of rounds to be {params.num_rounds}, "
373
+ f"preparing to delete {len(created_obj_ids)} objects from object store..."
374
+ )
375
+ params.object_store.delete_many(list(created_obj_ids))
376
+ else:
377
+ logger.info(
378
+ f"Detected number of rounds to be {params.num_rounds}, not cleaning up object store..."
379
+ )
368
380
 
369
381
  return merge_results
370
382
 
@@ -97,7 +97,7 @@ def append_content_type_params(
97
97
  max_parquet_meta_size_bytes: Optional[int] = MAX_PARQUET_METADATA_SIZE,
98
98
  deltacat_storage=unimplemented_deltacat_storage,
99
99
  deltacat_storage_kwargs: Optional[Dict[str, Any]] = {},
100
- ) -> None:
100
+ ) -> bool:
101
101
  """
102
102
  This operation appends content type params into the delta entry. Note
103
103
  that this operation can be time consuming, hence we cache it in a Ray actor.
@@ -105,7 +105,7 @@ def append_content_type_params(
105
105
 
106
106
  if not delta.meta:
107
107
  logger.warning(f"Delta with locator {delta.locator} doesn't contain meta.")
108
- return
108
+ return False
109
109
 
110
110
  entry_indices_to_download = []
111
111
  for entry_index, entry in enumerate(delta.manifest.entries):
@@ -120,7 +120,7 @@ def append_content_type_params(
120
120
  logger.info(
121
121
  f"No parquet type params to download for delta with locator {delta.locator}."
122
122
  )
123
- return None
123
+ return False
124
124
 
125
125
  ray_namespace = ray.get_runtime_context().namespace
126
126
  logger.info(
@@ -147,7 +147,7 @@ def append_content_type_params(
147
147
  f" {delta.locator} and digest {delta.locator.hexdigest()}."
148
148
  )
149
149
  delta.manifest = cached_value.manifest
150
- return
150
+ return True
151
151
  logger.info(
152
152
  f"Cache doesn't contain parquet meta for delta with locator {delta.locator}."
153
153
  )
@@ -215,3 +215,5 @@ def append_content_type_params(
215
215
  )
216
216
  ray.get(cache.put.remote(delta.locator.hexdigest(), delta))
217
217
  assert ray.get(cache.get.remote(delta.locator.hexdigest())) is not None
218
+
219
+ return True
@@ -42,7 +42,11 @@ def _estimate_resources_required_to_process_delta_using_previous_inflation(
42
42
  in_memory_size = (
43
43
  delta.meta.content_length * estimate_resources_params.previous_inflation
44
44
  )
45
- num_rows = int(in_memory_size / estimate_resources_params.average_record_size_bytes)
45
+ num_rows = 0
46
+ if estimate_resources_params.average_record_size_bytes is not None:
47
+ num_rows = int(
48
+ in_memory_size / estimate_resources_params.average_record_size_bytes
49
+ )
46
50
 
47
51
  return EstimatedResources.of(
48
52
  memory_bytes=in_memory_size,
@@ -68,6 +72,10 @@ def _estimate_resources_required_to_process_delta_using_type_params(
68
72
  ), "Number of rows can only be estimated for PYARROW_DOWNLOAD operation"
69
73
 
70
74
  if estimate_resources_params.parquet_to_pyarrow_inflation is None:
75
+ logger.debug(
76
+ "Could not estimate using type params as "
77
+ f"parquet_to_pyarrow_inflation is None for {delta.locator}"
78
+ )
71
79
  return None
72
80
 
73
81
  if not delta.manifest:
@@ -86,12 +94,18 @@ def _estimate_resources_required_to_process_delta_using_type_params(
86
94
  ),
87
95
  )
88
96
 
89
- append_content_type_params(
97
+ appended = append_content_type_params(
90
98
  delta=delta,
91
99
  deltacat_storage=deltacat_storage,
92
100
  deltacat_storage_kwargs=deltacat_storage_kwargs,
93
101
  )
94
102
 
103
+ if not appended:
104
+ logger.debug(
105
+ f"Could not append content type params for {delta.locator}, returning None"
106
+ )
107
+ return None
108
+
95
109
  in_memory_size = 0.0
96
110
  num_rows = 0
97
111
 
@@ -41,8 +41,23 @@ class FileObjectStore(IObjectStore):
41
41
  serialized = f.read()
42
42
  loaded = cloudpickle.loads(serialized)
43
43
  result.append(loaded)
44
- os.remove(ref)
45
44
  end = time.monotonic()
46
45
 
47
46
  logger.info(f"The total time taken to read all objects is: {end - start}")
48
47
  return result
48
+
49
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
50
+ start = time.monotonic()
51
+ num_deleted = 0
52
+ for ref in refs:
53
+ try:
54
+ os.remove(ref)
55
+ num_deleted += 1
56
+ except Exception:
57
+ logger.warning(f"Failed to delete ref {ref}!", exc_info=True)
58
+ end = time.monotonic()
59
+
60
+ logger.info(
61
+ f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
62
+ )
63
+ return num_deleted == len(refs)
@@ -100,16 +100,10 @@ class MemcachedObjectStore(IObjectStore):
100
100
 
101
101
  def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
102
102
  result = []
103
- refs_per_ip = defaultdict(lambda: [])
103
+ refs_per_ip = self._get_refs_per_ip(refs)
104
104
  chunks_by_refs = defaultdict(lambda: [])
105
105
 
106
106
  start = time.monotonic()
107
- for ref in refs:
108
- uid, ip, chunk_count = ref.split(self.SEPARATOR)
109
- chunk_count = int(chunk_count)
110
- for chunk_index in range(chunk_count):
111
- current_ref = self._create_ref(uid, ip, chunk_index)
112
- refs_per_ip[ip].append(current_ref)
113
107
 
114
108
  total_ref_count = 0
115
109
  for (ip, current_refs) in refs_per_ip.items():
@@ -193,6 +187,39 @@ class MemcachedObjectStore(IObjectStore):
193
187
 
194
188
  return cloudpickle.loads(serialized)
195
189
 
190
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
191
+ refs_per_ip = self._get_refs_per_ip(refs)
192
+ all_deleted = True
193
+
194
+ start = time.monotonic()
195
+
196
+ total_refs = 0
197
+ fully_deleted_refs = 0
198
+ for (ip, current_refs) in refs_per_ip.items():
199
+ client = self._get_client_by_ip(ip)
200
+ total_refs += len(current_refs)
201
+ try:
202
+ # always returns true
203
+ client.delete_many(current_refs, noreply=self.noreply)
204
+ fully_deleted_refs += len(current_refs)
205
+ except BaseException:
206
+ # if an exception is raised then all, some, or none of the keys may have been deleted
207
+ logger.warning(
208
+ f"Failed to fully delete refs: {current_refs}", exc_info=True
209
+ )
210
+ all_deleted = False
211
+
212
+ end = time.monotonic()
213
+
214
+ logger.info(
215
+ f"From {len(refs)} objects, found {total_refs} total chunk references, of which {fully_deleted_refs} were guaranteed to be successfully deleted."
216
+ )
217
+ logger.info(
218
+ f"The total time taken to attempt deleting {len(refs)} objects is: {end - start}"
219
+ )
220
+
221
+ return all_deleted
222
+
196
223
  def clear(self) -> bool:
197
224
  flushed = all(
198
225
  [
@@ -260,3 +287,14 @@ class MemcachedObjectStore(IObjectStore):
260
287
  self.current_ip = socket.gethostbyname(socket.gethostname())
261
288
 
262
289
  return self.current_ip
290
+
291
+ def _get_refs_per_ip(self, refs: List[Any]):
292
+ refs_per_ip = defaultdict(lambda: [])
293
+
294
+ for ref in refs:
295
+ uid, ip, chunk_count = ref.split(self.SEPARATOR)
296
+ chunk_count = int(chunk_count)
297
+ for chunk_index in range(chunk_count):
298
+ current_ref = self._create_ref(uid, ip, chunk_index)
299
+ refs_per_ip[ip].append(current_ref)
300
+ return refs_per_ip
@@ -43,6 +43,19 @@ class IObjectStore:
43
43
  or may not return ordered results.
44
44
  """
45
45
 
46
+ def delete(self, ref: Any, *args, **kwargs) -> bool:
47
+ """
48
+ Delete a single object from the object store.
49
+ """
50
+ return self.delete_many([ref])
51
+
52
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
53
+ ...
54
+
55
+ """
56
+ Delete many objects from the object store.
57
+ """
58
+
46
59
  def clear(self, *args, **kwargs) -> bool:
47
60
  ...
48
61
 
@@ -52,6 +65,7 @@ class IObjectStore:
52
65
 
53
66
  def close(self, *args, **kwargs) -> None:
54
67
  ...
68
+
55
69
  """
56
70
  Closes all the active connections to object store without clearing
57
71
  the data in the object store.
@@ -56,12 +56,9 @@ class RedisObjectStore(IObjectStore):
56
56
 
57
57
  def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
58
58
  result = []
59
- uid_per_ip = defaultdict(lambda: [])
59
+ uid_per_ip = self._get_uids_per_ip(refs)
60
60
 
61
61
  start = time.monotonic()
62
- for ref in refs:
63
- uid, ip = ref.split(self.SEPARATOR)
64
- uid_per_ip[ip].append(uid)
65
62
 
66
63
  for (ip, uids) in uid_per_ip.items():
67
64
  client = self._get_client_by_ip(ip)
@@ -95,6 +92,29 @@ class RedisObjectStore(IObjectStore):
95
92
  serialized = client.get(uid)
96
93
  return cloudpickle.loads(serialized)
97
94
 
95
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
96
+ uid_per_ip = self._get_uids_per_ip(refs)
97
+
98
+ start = time.monotonic()
99
+
100
+ num_deleted = 0
101
+ for (ip, uids) in uid_per_ip.items():
102
+ client = self._get_client_by_ip(ip)
103
+ num_keys_deleted = client.delete(*uids)
104
+ num_deleted += num_keys_deleted
105
+ if num_keys_deleted != len(uids):
106
+ logger.warning(
107
+ f"Failed to delete {len(uids) - num_keys_deleted} out of {len(uids)} uids: {uids}"
108
+ )
109
+
110
+ end = time.monotonic()
111
+
112
+ logger.info(
113
+ f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
114
+ )
115
+
116
+ return num_deleted == len(refs)
117
+
98
118
  def _get_client_by_ip(self, ip_address: str):
99
119
  if ip_address in self.client_cache:
100
120
  return self.client_cache[ip_address]
@@ -112,3 +132,11 @@ class RedisObjectStore(IObjectStore):
112
132
 
113
133
  def _create_ref(self, uid, ip):
114
134
  return f"{uid}{self.SEPARATOR}{ip}"
135
+
136
+ def _get_uids_per_ip(self, refs: List[Any]):
137
+ uid_per_ip = defaultdict(lambda: [])
138
+
139
+ for ref in refs:
140
+ uid, ip = ref.split(self.SEPARATOR)
141
+ uid_per_ip[ip].append(uid)
142
+ return uid_per_ip
@@ -42,3 +42,20 @@ class S3ObjectStore(IObjectStore):
42
42
 
43
43
  logger.info(f"The total time taken to read all objects is: {end - start}")
44
44
  return result
45
+
46
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
47
+ start = time.monotonic()
48
+ num_deleted = 0
49
+ for ref in refs:
50
+ try:
51
+ s3_utils.delete_files_by_prefix(self.bucket, str(ref))
52
+ num_deleted += 1
53
+ except BaseException:
54
+ logger.warning(f"Failed to delete ref {ref}!", exc_info=True)
55
+ end = time.monotonic()
56
+
57
+ logger.info(
58
+ f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
59
+ )
60
+
61
+ return num_deleted == len(refs)
@@ -526,6 +526,30 @@ class TestEstimateResourcesRequiredToProcessDelta:
526
526
  == parquet_delta_with_manifest.meta.content_length
527
527
  )
528
528
 
529
+ def test_parquet_delta_when_default_v2_without_avg_record_size_and_sampling(
530
+ self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
531
+ ):
532
+ params = EstimateResourcesParams.of(
533
+ resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
534
+ previous_inflation=7,
535
+ parquet_to_pyarrow_inflation=1,
536
+ )
537
+
538
+ result = estimate_resources_required_to_process_delta(
539
+ delta=parquet_delta_with_manifest,
540
+ operation_type=OperationType.PYARROW_DOWNLOAD,
541
+ deltacat_storage=ds,
542
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
543
+ estimate_resources_params=params,
544
+ )
545
+
546
+ assert parquet_delta_with_manifest.manifest is not None
547
+ assert result.memory_bytes is not None
548
+ assert (
549
+ result.statistics.on_disk_size_bytes
550
+ == parquet_delta_with_manifest.meta.content_length
551
+ )
552
+
529
553
  def test_parquet_delta_when_default_v2_and_files_to_sample_zero(
530
554
  self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
531
555
  ):
@@ -578,6 +602,32 @@ class TestEstimateResourcesRequiredToProcessDelta:
578
602
  == utsv_delta_with_manifest.meta.content_length
579
603
  )
580
604
 
605
+ def test_utsv_delta_when_default_v2_without_avg_record_size(
606
+ self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
607
+ ):
608
+ params = EstimateResourcesParams.of(
609
+ resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
610
+ previous_inflation=7,
611
+ average_record_size_bytes=None, # note
612
+ parquet_to_pyarrow_inflation=1,
613
+ )
614
+
615
+ result = estimate_resources_required_to_process_delta(
616
+ delta=utsv_delta_with_manifest,
617
+ operation_type=OperationType.PYARROW_DOWNLOAD,
618
+ deltacat_storage=ds,
619
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
620
+ estimate_resources_params=params,
621
+ )
622
+
623
+ assert utsv_delta_with_manifest.manifest is not None
624
+ assert result.memory_bytes is not None
625
+ assert result.statistics.record_count == 0
626
+ assert (
627
+ result.statistics.on_disk_size_bytes
628
+ == utsv_delta_with_manifest.meta.content_length
629
+ )
630
+
581
631
  def test_parquet_delta_without_inflation_when_default_v2(
582
632
  self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
583
633
  ):
@@ -5,8 +5,9 @@ import pytest
5
5
  import boto3
6
6
  from boto3.resources.base import ServiceResource
7
7
  import pyarrow as pa
8
- from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
8
+ from deltacat.io.file_object_store import FileObjectStore
9
9
  from pytest_benchmark.fixture import BenchmarkFixture
10
+ import tempfile
10
11
 
11
12
  from deltacat.tests.compute.test_util_constant import (
12
13
  TEST_S3_RCF_BUCKET_NAME,
@@ -247,84 +248,99 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
247
248
  pgm = PlacementGroupManager(
248
249
  1, total_cpus, DEFAULT_WORKER_INSTANCE_CPUS, memory_per_bundle=4000000
249
250
  ).pgs[0]
250
- compact_partition_params = CompactPartitionParams.of(
251
- {
252
- "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
253
- "compacted_file_content_type": ContentType.PARQUET,
254
- "dd_max_parallelism_ratio": 1.0,
255
- "deltacat_storage": ds,
256
- "deltacat_storage_kwargs": ds_mock_kwargs,
257
- "destination_partition_locator": rebased_partition.locator,
258
- "hash_bucket_count": hash_bucket_count_param,
259
- "last_stream_position_to_compact": source_partition.stream_position,
260
- "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
261
- "object_store": RayPlasmaObjectStore(),
262
- "pg_config": pgm,
263
- "primary_keys": primary_keys,
264
- "read_kwargs_provider": read_kwargs_provider_param,
265
- "rebase_source_partition_locator": source_partition.locator,
266
- "rebase_source_partition_high_watermark": rebased_partition.stream_position,
267
- "records_per_compacted_file": records_per_compacted_file_param,
268
- "s3_client_kwargs": {},
269
- "source_partition_locator": rebased_partition.locator,
270
- "sort_keys": sort_keys if sort_keys else None,
271
- "num_rounds": num_rounds_param,
272
- "drop_duplicates": drop_duplicates_param,
273
- "min_delta_bytes": 560,
274
- }
275
- )
276
- if expected_terminal_exception:
277
- with pytest.raises(expected_terminal_exception) as exc_info:
278
- benchmark(compact_partition_func, compact_partition_params)
279
- assert expected_terminal_exception_message in str(exc_info.value)
280
- return
281
- from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
282
- ExecutionCompactionResult,
283
- )
251
+ with tempfile.TemporaryDirectory() as test_dir:
252
+ compact_partition_params = CompactPartitionParams.of(
253
+ {
254
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
255
+ "compacted_file_content_type": ContentType.PARQUET,
256
+ "dd_max_parallelism_ratio": 1.0,
257
+ "deltacat_storage": ds,
258
+ "deltacat_storage_kwargs": ds_mock_kwargs,
259
+ "destination_partition_locator": rebased_partition.locator,
260
+ "hash_bucket_count": hash_bucket_count_param,
261
+ "last_stream_position_to_compact": source_partition.stream_position,
262
+ "list_deltas_kwargs": {
263
+ **ds_mock_kwargs,
264
+ **{"equivalent_table_types": []},
265
+ },
266
+ "object_store": FileObjectStore(test_dir),
267
+ "pg_config": pgm,
268
+ "primary_keys": primary_keys,
269
+ "read_kwargs_provider": read_kwargs_provider_param,
270
+ "rebase_source_partition_locator": source_partition.locator,
271
+ "rebase_source_partition_high_watermark": rebased_partition.stream_position,
272
+ "records_per_compacted_file": records_per_compacted_file_param,
273
+ "s3_client_kwargs": {},
274
+ "source_partition_locator": rebased_partition.locator,
275
+ "sort_keys": sort_keys if sort_keys else None,
276
+ "num_rounds": num_rounds_param,
277
+ "drop_duplicates": drop_duplicates_param,
278
+ "min_delta_bytes": 560,
279
+ }
280
+ )
281
+ if expected_terminal_exception:
282
+ with pytest.raises(expected_terminal_exception) as exc_info:
283
+ benchmark(compact_partition_func, compact_partition_params)
284
+ assert expected_terminal_exception_message in str(exc_info.value)
285
+ return
286
+ from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
287
+ ExecutionCompactionResult,
288
+ )
284
289
 
285
- execute_compaction_result_spy = mocker.spy(ExecutionCompactionResult, "__init__")
290
+ execute_compaction_result_spy = mocker.spy(
291
+ ExecutionCompactionResult, "__init__"
292
+ )
293
+ object_store_delete_many_spy = mocker.spy(FileObjectStore, "delete_many")
286
294
 
287
- # execute
288
- rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
295
+ # execute
296
+ rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
289
297
 
290
- round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
291
- audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
292
- round_completion_info.compaction_audit_url
293
- )
298
+ round_completion_info: RoundCompletionInfo = get_rcf(
299
+ s3_resource, rcf_file_s3_uri
300
+ )
301
+ audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
302
+ round_completion_info.compaction_audit_url
303
+ )
294
304
 
295
- compaction_audit_obj: Dict[str, Any] = read_s3_contents(
296
- s3_resource, audit_bucket, audit_key
297
- )
298
- compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
299
- **compaction_audit_obj
300
- )
305
+ compaction_audit_obj: Dict[str, Any] = read_s3_contents(
306
+ s3_resource, audit_bucket, audit_key
307
+ )
308
+ compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
309
+ **compaction_audit_obj
310
+ )
301
311
 
302
- # Assert not in-place compacted
303
- assert (
304
- execute_compaction_result_spy.call_args.args[-1] is False
305
- ), "Table version erroneously marked as in-place compacted!"
306
- compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
307
- s3_resource, rcf_file_s3_uri
308
- )
309
- tables = ds.download_delta(
310
- compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
311
- )
312
- actual_rebase_compacted_table = pa.concat_tables(tables)
313
- # if no primary key is specified then sort by sort_key for consistent assertion
314
- sorting_cols: List[Any] = (
315
- [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
316
- )
317
- rebase_expected_compact_partition_result = (
318
- rebase_expected_compact_partition_result.combine_chunks().sort_by(sorting_cols)
319
- )
320
- actual_rebase_compacted_table = (
321
- actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
322
- )
323
- assert actual_rebase_compacted_table.equals(
324
- rebase_expected_compact_partition_result
325
- ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
312
+ # Assert not in-place compacted
313
+ assert (
314
+ execute_compaction_result_spy.call_args.args[-1] is False
315
+ ), "Table version erroneously marked as in-place compacted!"
316
+ compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
317
+ s3_resource, rcf_file_s3_uri
318
+ )
319
+ tables = ds.download_delta(
320
+ compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
321
+ )
322
+ actual_rebase_compacted_table = pa.concat_tables(tables)
323
+ # if no primary key is specified then sort by sort_key for consistent assertion
324
+ sorting_cols: List[Any] = (
325
+ [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
326
+ )
327
+ rebase_expected_compact_partition_result = (
328
+ rebase_expected_compact_partition_result.combine_chunks().sort_by(
329
+ sorting_cols
330
+ )
331
+ )
332
+ actual_rebase_compacted_table = (
333
+ actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
334
+ )
335
+ assert actual_rebase_compacted_table.equals(
336
+ rebase_expected_compact_partition_result
337
+ ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
326
338
 
327
- if assert_compaction_audit:
328
- if not assert_compaction_audit(compactor_version, compaction_audit):
329
- assert False, "Compaction audit assertion failed"
330
- return
339
+ if assert_compaction_audit:
340
+ if not assert_compaction_audit(compactor_version, compaction_audit):
341
+ assert False, "Compaction audit assertion failed"
342
+ assert os.listdir(test_dir) == []
343
+ assert (
344
+ object_store_delete_many_spy.call_count
345
+ ), "Object store was never cleaned up!"
346
+ return