deltacat 1.1.34__tar.gz → 1.1.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. {deltacat-1.1.34/deltacat.egg-info → deltacat-1.1.35}/PKG-INFO +1 -1
  2. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/__init__.py +1 -1
  3. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/constants.py +16 -1
  4. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/steps/merge.py +47 -1
  5. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +317 -0
  6. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_compact_partition_incremental.py +15 -0
  7. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +15 -0
  8. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_compact_partition_rebase.py +15 -0
  9. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +15 -0
  10. {deltacat-1.1.34 → deltacat-1.1.35/deltacat.egg-info}/PKG-INFO +1 -1
  11. {deltacat-1.1.34 → deltacat-1.1.35}/LICENSE +0 -0
  12. {deltacat-1.1.34 → deltacat-1.1.35}/MANIFEST.in +0 -0
  13. {deltacat-1.1.34 → deltacat-1.1.35}/README.md +0 -0
  14. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/aws/__init__.py +0 -0
  15. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/aws/clients.py +0 -0
  16. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/aws/constants.py +0 -0
  17. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/aws/redshift/__init__.py +0 -0
  18. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/aws/redshift/model/__init__.py +0 -0
  19. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/aws/redshift/model/manifest.py +0 -0
  20. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/aws/s3u.py +0 -0
  21. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/benchmarking/__init__.py +0 -0
  22. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  23. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/benchmarking/conftest.py +0 -0
  24. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/catalog/__init__.py +0 -0
  25. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
  26. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/catalog/delegate.py +0 -0
  27. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/catalog/interface.py +0 -0
  28. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/catalog/model/__init__.py +0 -0
  29. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/catalog/model/catalog.py +0 -0
  30. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/catalog/model/table_definition.py +0 -0
  31. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/__init__.py +0 -0
  32. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/__init__.py +0 -0
  33. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/compaction_session.py +0 -0
  34. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/__init__.py +0 -0
  35. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
  36. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  37. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  38. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  39. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  40. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  41. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  42. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  43. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  44. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  45. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  46. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  47. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  48. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  49. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/repartition_session.py +0 -0
  50. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/steps/__init__.py +0 -0
  51. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  52. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  53. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/steps/materialize.py +0 -0
  54. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/steps/repartition.py +0 -0
  55. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/utils/__init__.py +0 -0
  56. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/utils/io.py +0 -0
  57. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  58. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
  59. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  60. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  61. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/__init__.py +0 -0
  62. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
  63. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  64. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  65. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  66. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  67. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  68. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  69. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  70. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
  71. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  72. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  73. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  74. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  75. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  76. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
  77. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/private/compaction_utils.py +0 -0
  78. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  79. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  80. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  81. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
  82. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  83. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  84. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  85. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  86. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
  87. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
  88. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/merge_on_read/__init__.py +0 -0
  89. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/merge_on_read/daft.py +0 -0
  90. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
  91. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  92. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  93. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  94. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/resource_estimation/__init__.py +0 -0
  95. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/resource_estimation/delta.py +0 -0
  96. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/resource_estimation/manifest.py +0 -0
  97. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/resource_estimation/model.py +0 -0
  98. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/resource_estimation/parquet.py +0 -0
  99. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/__init__.py +0 -0
  100. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/models/__init__.py +0 -0
  101. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  102. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/models/delta_stats.py +0 -0
  103. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  104. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  105. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/models/stats_result.py +0 -0
  106. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/compute/stats/types.py +0 -0
  107. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/constants.py +0 -0
  108. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/exceptions.py +0 -0
  109. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/__init__.py +0 -0
  110. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/aws/__init__.py +0 -0
  111. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/aws/redshift/__init__.py +0 -0
  112. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/dataset.py +0 -0
  113. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/file_object_store.py +0 -0
  114. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/memcached_object_store.py +0 -0
  115. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/object_store.py +0 -0
  116. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/ray_plasma_object_store.py +0 -0
  117. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/read_api.py +0 -0
  118. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/redis_object_store.py +0 -0
  119. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/io/s3_object_store.py +0 -0
  120. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/logs.py +0 -0
  121. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/__init__.py +0 -0
  122. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/interface.py +0 -0
  123. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/__init__.py +0 -0
  124. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/delete_parameters.py +0 -0
  125. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/delta.py +0 -0
  126. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/list_result.py +0 -0
  127. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/locator.py +0 -0
  128. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/namespace.py +0 -0
  129. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/partition.py +0 -0
  130. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/partition_spec.py +0 -0
  131. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/sort_key.py +0 -0
  132. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/stream.py +0 -0
  133. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/table.py +0 -0
  134. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/table_version.py +0 -0
  135. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/transform.py +0 -0
  136. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/storage/model/types.py +0 -0
  137. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/__init__.py +0 -0
  138. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/aws/__init__.py +0 -0
  139. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/aws/test_clients.py +0 -0
  140. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/aws/test_s3u.py +0 -0
  141. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/catalog/__init__.py +0 -0
  142. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  143. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/__init__.py +0 -0
  144. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
  145. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
  146. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
  147. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  148. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor/__init__.py +0 -0
  149. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  150. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  151. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  152. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  153. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
  154. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  155. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  156. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
  157. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -0
  158. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -0
  159. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  160. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/resource_estimation/__init__.py +0 -0
  161. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/resource_estimation/data/__init__.py +0 -0
  162. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
  163. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
  164. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  165. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_util_common.py +0 -0
  166. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_util_constant.py +0 -0
  167. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
  168. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/io/__init__.py +0 -0
  169. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
  170. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/io/test_file_object_store.py +0 -0
  171. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/io/test_memcached_object_store.py +0 -0
  172. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
  173. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/io/test_redis_object_store.py +0 -0
  174. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/io/test_s3_object_store.py +0 -0
  175. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
  176. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
  177. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/test_exceptions.py +0 -0
  178. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/test_logs.py +0 -0
  179. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/test_utils/__init__.py +0 -0
  180. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/test_utils/constants.py +0 -0
  181. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/test_utils/pyarrow.py +0 -0
  182. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/test_utils/storage.py +0 -0
  183. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/test_utils/utils.py +0 -0
  184. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/__init__.py +0 -0
  185. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/data/__init__.py +0 -0
  186. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
  187. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  188. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
  189. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  190. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/test_daft.py +0 -0
  191. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/test_metrics.py +0 -0
  192. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/test_placement.py +0 -0
  193. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/test_pyarrow.py +0 -0
  194. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  195. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/tests/utils/test_resources.py +0 -0
  196. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/types/__init__.py +0 -0
  197. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/types/media.py +0 -0
  198. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/types/partial_download.py +0 -0
  199. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/types/tables.py +0 -0
  200. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/__init__.py +0 -0
  201. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/arguments.py +0 -0
  202. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/cloudpickle.py +0 -0
  203. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/common.py +0 -0
  204. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/daft.py +0 -0
  205. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/metrics.py +0 -0
  206. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/numpy.py +0 -0
  207. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/pandas.py +0 -0
  208. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/performance.py +0 -0
  209. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/placement.py +0 -0
  210. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/pyarrow.py +0 -0
  211. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/ray_utils/__init__.py +0 -0
  212. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/ray_utils/collections.py +0 -0
  213. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/ray_utils/concurrency.py +0 -0
  214. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/ray_utils/dataset.py +0 -0
  215. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/ray_utils/performance.py +0 -0
  216. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/ray_utils/runtime.py +0 -0
  217. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/resources.py +0 -0
  218. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/s3fs.py +0 -0
  219. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat/utils/schema.py +0 -0
  220. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat.egg-info/SOURCES.txt +0 -0
  221. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat.egg-info/dependency_links.txt +0 -0
  222. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat.egg-info/requires.txt +0 -0
  223. {deltacat-1.1.34 → deltacat-1.1.35}/deltacat.egg-info/top_level.txt +0 -0
  224. {deltacat-1.1.34 → deltacat-1.1.35}/setup.cfg +0 -0
  225. {deltacat-1.1.34 → deltacat-1.1.35}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.34
3
+ Version: 1.1.35
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.34"
47
+ __version__ = "1.1.35"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -1,4 +1,4 @@
1
- from deltacat.utils.common import env_bool, env_integer
1
+ from deltacat.utils.common import env_bool, env_integer, env_string
2
2
 
3
3
  TOTAL_BYTES_IN_SHA1_HASH = 20
4
4
 
@@ -92,3 +92,18 @@ DEFAULT_NUM_ROUNDS = 1
92
92
  SHA1_HASHING_FOR_MEMORY_OPTIMIZATION_DISABLED = env_bool(
93
93
  "SHA1_HASHING_FOR_MEMORY_OPTIMIZATION_DISABLED", False
94
94
  )
95
+
96
+ # This env variable specifies whether to check bucketing spec
97
+ # compliance of the existing compacted table.
98
+ # PRINT_LOG: Enable logging if any partition is found
99
+ # to be non-compliant with the bucketing spec.
100
+ # ASSERT: Fail the job with ValidationError if the
101
+ # current compacted partition is found to be non-compliant
102
+ # with bucketing spec. Note, logging is implicitly enabled
103
+ # in this case.
104
+ BUCKETING_SPEC_COMPLIANCE_PROFILE = env_string(
105
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE", None
106
+ )
107
+
108
+ BUCKETING_SPEC_COMPLIANCE_PRINT_LOG = "PRINT_LOG"
109
+ BUCKETING_SPEC_COMPLIANCE_ASSERT = "ASSERT"
@@ -32,6 +32,7 @@ from deltacat.utils.resources import (
32
32
  )
33
33
  from deltacat.compute.compactor_v2.utils.primary_key_index import (
34
34
  generate_pk_hash_column,
35
+ pk_digest_to_hash_bucket_index,
35
36
  )
36
37
  from deltacat.storage import (
37
38
  Delta,
@@ -47,6 +48,9 @@ from deltacat.compute.compactor_v2.constants import (
47
48
  MERGE_TIME_IN_SECONDS,
48
49
  MERGE_SUCCESS_COUNT,
49
50
  MERGE_FAILURE_COUNT,
51
+ BUCKETING_SPEC_COMPLIANCE_PROFILE,
52
+ BUCKETING_SPEC_COMPLIANCE_ASSERT,
53
+ BUCKETING_SPEC_COMPLIANCE_PRINT_LOG,
50
54
  )
51
55
  from deltacat.exceptions import (
52
56
  categorize_errors,
@@ -188,9 +192,34 @@ def _merge_tables(
188
192
  return final_table
189
193
 
190
194
 
195
+ def _validate_bucketing_spec_compliance(
196
+ table: pa.Table, rcf: RoundCompletionInfo, hb_index: int, primary_keys: List[str]
197
+ ) -> None:
198
+ pki_table = generate_pk_hash_column(
199
+ [table], primary_keys=primary_keys, requires_hash=True
200
+ )[0]
201
+ for index, hash_value in enumerate(sc.pk_hash_string_column_np(pki_table)):
202
+ hash_bucket = pk_digest_to_hash_bucket_index(hash_value, rcf.hash_bucket_count)
203
+ if hash_bucket != hb_index:
204
+ logger.info(
205
+ f"{rcf.compacted_delta_locator.namespace}.{rcf.compacted_delta_locator.table_name}"
206
+ f".{rcf.compacted_delta_locator.table_version}.{rcf.compacted_delta_locator.partition_id}"
207
+ f".{rcf.compacted_delta_locator.partition_values} has non-compliant bucketing spec. "
208
+ f"Expected hash bucket is {hb_index} but found {hash_bucket}."
209
+ )
210
+ if BUCKETING_SPEC_COMPLIANCE_PROFILE == BUCKETING_SPEC_COMPLIANCE_ASSERT:
211
+ raise AssertionError(
212
+ "Hash bucket drift detected. Expected hash bucket index"
213
+ f" to be {hb_index} but found {hash_bucket}"
214
+ )
215
+ # No further checks necessary
216
+ break
217
+
218
+
191
219
  def _download_compacted_table(
192
220
  hb_index: int,
193
221
  rcf: RoundCompletionInfo,
222
+ primary_keys: List[str],
194
223
  read_kwargs_provider: Optional[ReadKwargsProvider] = None,
195
224
  deltacat_storage=unimplemented_deltacat_storage,
196
225
  deltacat_storage_kwargs: Optional[dict] = None,
@@ -214,7 +243,23 @@ def _download_compacted_table(
214
243
 
215
244
  tables.append(table)
216
245
 
217
- return pa.concat_tables(tables)
246
+ compacted_table = pa.concat_tables(tables)
247
+ check_bucketing_spec = BUCKETING_SPEC_COMPLIANCE_PROFILE in [
248
+ BUCKETING_SPEC_COMPLIANCE_PRINT_LOG,
249
+ BUCKETING_SPEC_COMPLIANCE_ASSERT,
250
+ ]
251
+
252
+ logger.debug(
253
+ f"Value of BUCKETING_SPEC_COMPLIANCE_PROFILE, check_bucketing_spec:"
254
+ f" {BUCKETING_SPEC_COMPLIANCE_PROFILE}, {check_bucketing_spec}"
255
+ )
256
+
257
+ # Bucketing spec compliance isn't required without primary keys
258
+ if primary_keys and check_bucketing_spec:
259
+ _validate_bucketing_spec_compliance(
260
+ compacted_table, rcf, hb_index, primary_keys
261
+ )
262
+ return compacted_table
218
263
 
219
264
 
220
265
  def _copy_all_manifest_files_from_old_hash_buckets(
@@ -543,6 +588,7 @@ def _timed_merge(input: MergeInput) -> MergeResult:
543
588
  compacted_table = _download_compacted_table(
544
589
  hb_index=merge_file_group.hb_index,
545
590
  rcf=input.round_completion_info,
591
+ primary_keys=input.primary_keys,
546
592
  read_kwargs_provider=input.read_kwargs_provider,
547
593
  deltacat_storage=input.deltacat_storage,
548
594
  deltacat_storage_kwargs=input.deltacat_storage_kwargs,
@@ -4,9 +4,11 @@ import os
4
4
  import pyarrow as pa
5
5
  import pytest
6
6
  import boto3
7
+ import json
7
8
  from deltacat.compute.compactor.model.compaction_session_audit_info import (
8
9
  CompactionSessionAuditInfo,
9
10
  )
11
+ from deltacat.exceptions import ValidationError
10
12
  from boto3.resources.base import ServiceResource
11
13
  import deltacat.tests.local_deltacat_storage as ds
12
14
  from deltacat.types.media import ContentType
@@ -88,6 +90,17 @@ def disable_sha1(monkeypatch):
88
90
  )
89
91
 
90
92
 
93
+ @pytest.fixture(scope="function")
94
+ def enable_bucketing_spec_validation(monkeypatch):
95
+ import deltacat.compute.compactor_v2.steps.merge
96
+
97
+ monkeypatch.setattr(
98
+ deltacat.compute.compactor_v2.steps.merge,
99
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
100
+ "ASSERT",
101
+ )
102
+
103
+
91
104
  class TestCompactionSession:
92
105
  """
93
106
  This class adds specific tests that aren't part of the parametrized test suite.
@@ -689,3 +702,307 @@ class TestCompactionSession:
689
702
  incremental_rcf.compacted_pyarrow_write_result.pyarrow_bytes >= 2300000000
690
703
  )
691
704
  assert incremental_rcf.compacted_pyarrow_write_result.records == 4
705
+
706
+ def test_compact_partition_when_bucket_spec_validation_fails(
707
+ self,
708
+ s3_resource,
709
+ local_deltacat_storage_kwargs,
710
+ enable_bucketing_spec_validation,
711
+ ):
712
+ """
713
+ A test case which asserts the bucketing spec validation throws an assertion error
714
+ when the validation has failed.
715
+ """
716
+
717
+ # setup
718
+ staged_source = stage_partition_from_file_paths(
719
+ self.NAMESPACE, ["source"], **local_deltacat_storage_kwargs
720
+ )
721
+
722
+ source_delta = commit_delta_to_staged_partition(
723
+ staged_source, [self.BACKFILL_FILE_PATH], **local_deltacat_storage_kwargs
724
+ )
725
+
726
+ staged_dest = stage_partition_from_file_paths(
727
+ self.NAMESPACE, ["destination"], **local_deltacat_storage_kwargs
728
+ )
729
+ dest_partition = ds.commit_partition(
730
+ staged_dest, **local_deltacat_storage_kwargs
731
+ )
732
+
733
+ # action
734
+ rcf_url = compact_partition(
735
+ CompactPartitionParams.of(
736
+ {
737
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
738
+ "compacted_file_content_type": ContentType.PARQUET,
739
+ "dd_max_parallelism_ratio": 1.0,
740
+ "deltacat_storage": ds,
741
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
742
+ "destination_partition_locator": dest_partition.locator,
743
+ "drop_duplicates": True,
744
+ "hash_bucket_count": 4,
745
+ "last_stream_position_to_compact": source_delta.stream_position,
746
+ "list_deltas_kwargs": {
747
+ **local_deltacat_storage_kwargs,
748
+ **{"equivalent_table_types": []},
749
+ },
750
+ "primary_keys": ["pk"],
751
+ "rebase_source_partition_locator": source_delta.partition_locator,
752
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
753
+ "records_per_compacted_file": 1,
754
+ "s3_client_kwargs": {},
755
+ "source_partition_locator": source_delta.partition_locator,
756
+ }
757
+ )
758
+ )
759
+
760
+ backfill_rcf = get_rcf(s3_resource, rcf_url)
761
+ bucket, backfill_key1, backfill_key2 = rcf_url.strip("s3://").split("/")
762
+ # Move the records to different hash buckets to simulate a validation failure.
763
+ backfill_rcf["hbIndexToEntryRange"] = {"1": [0, 3]}
764
+ s3_resource.Bucket(bucket).put_object(
765
+ Key=f"{backfill_key1}/{backfill_key2}", Body=json.dumps(backfill_rcf)
766
+ )
767
+
768
+ # Now run an incremental compaction and verify if the previous RCF was read properly.
769
+ new_source_delta = commit_delta_to_partition(
770
+ source_delta.partition_locator,
771
+ [self.INCREMENTAL_FILE_PATH],
772
+ **local_deltacat_storage_kwargs,
773
+ )
774
+
775
+ new_destination_partition = ds.get_partition(
776
+ dest_partition.stream_locator, [], **local_deltacat_storage_kwargs
777
+ )
778
+
779
+ with pytest.raises(ValidationError) as excinfo:
780
+ compact_partition(
781
+ CompactPartitionParams.of(
782
+ {
783
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
784
+ "compacted_file_content_type": ContentType.PARQUET,
785
+ "dd_max_parallelism_ratio": 1.0,
786
+ "deltacat_storage": ds,
787
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
788
+ "destination_partition_locator": new_destination_partition.locator,
789
+ "drop_duplicates": True,
790
+ "hash_bucket_count": 4,
791
+ "last_stream_position_to_compact": new_source_delta.stream_position,
792
+ "list_deltas_kwargs": {
793
+ **local_deltacat_storage_kwargs,
794
+ **{"equivalent_table_types": []},
795
+ },
796
+ "primary_keys": ["pk"],
797
+ "rebase_source_partition_locator": None,
798
+ "rebase_source_partition_high_watermark": None,
799
+ "records_per_compacted_file": 4000,
800
+ "s3_client_kwargs": {},
801
+ "source_partition_locator": new_source_delta.partition_locator,
802
+ }
803
+ )
804
+ )
805
+
806
+ assert (
807
+ "Hash bucket drift detected. Expected hash bucket index to be 1 but found 0"
808
+ in str(excinfo.value)
809
+ )
810
+
811
+ def test_compact_partition_when_bucket_spec_validation_fails_but_env_variable_disabled(
812
+ self,
813
+ s3_resource,
814
+ local_deltacat_storage_kwargs,
815
+ ):
816
+ """
817
+ A test case which asserts even if bucketing spec validation fails, compaction doesn't
818
+ throw an error if the feature is not enabled.
819
+ """
820
+
821
+ # setup
822
+ staged_source = stage_partition_from_file_paths(
823
+ self.NAMESPACE, ["source"], **local_deltacat_storage_kwargs
824
+ )
825
+
826
+ source_delta = commit_delta_to_staged_partition(
827
+ staged_source, [self.BACKFILL_FILE_PATH], **local_deltacat_storage_kwargs
828
+ )
829
+
830
+ staged_dest = stage_partition_from_file_paths(
831
+ self.NAMESPACE, ["destination"], **local_deltacat_storage_kwargs
832
+ )
833
+ dest_partition = ds.commit_partition(
834
+ staged_dest, **local_deltacat_storage_kwargs
835
+ )
836
+
837
+ # action
838
+ rcf_url = compact_partition(
839
+ CompactPartitionParams.of(
840
+ {
841
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
842
+ "compacted_file_content_type": ContentType.PARQUET,
843
+ "dd_max_parallelism_ratio": 1.0,
844
+ "deltacat_storage": ds,
845
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
846
+ "destination_partition_locator": dest_partition.locator,
847
+ "drop_duplicates": True,
848
+ "hash_bucket_count": 4,
849
+ "last_stream_position_to_compact": source_delta.stream_position,
850
+ "list_deltas_kwargs": {
851
+ **local_deltacat_storage_kwargs,
852
+ **{"equivalent_table_types": []},
853
+ },
854
+ "primary_keys": ["pk"],
855
+ "rebase_source_partition_locator": source_delta.partition_locator,
856
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
857
+ "records_per_compacted_file": 1,
858
+ "s3_client_kwargs": {},
859
+ "source_partition_locator": source_delta.partition_locator,
860
+ }
861
+ )
862
+ )
863
+
864
+ backfill_rcf = get_rcf(s3_resource, rcf_url)
865
+ bucket, backfill_key1, backfill_key2 = rcf_url.strip("s3://").split("/")
866
+ # Move the records to different hash buckets to simulate a validation failure.
867
+ backfill_rcf["hbIndexToEntryRange"] = {"1": [0, 3]}
868
+ s3_resource.Bucket(bucket).put_object(
869
+ Key=f"{backfill_key1}/{backfill_key2}", Body=json.dumps(backfill_rcf)
870
+ )
871
+
872
+ # Now run an incremental compaction and verify if the previous RCF was read properly.
873
+ new_source_delta = commit_delta_to_partition(
874
+ source_delta.partition_locator,
875
+ [self.INCREMENTAL_FILE_PATH],
876
+ **local_deltacat_storage_kwargs,
877
+ )
878
+
879
+ new_destination_partition = ds.get_partition(
880
+ dest_partition.stream_locator, [], **local_deltacat_storage_kwargs
881
+ )
882
+
883
+ new_rcf = compact_partition(
884
+ CompactPartitionParams.of(
885
+ {
886
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
887
+ "compacted_file_content_type": ContentType.PARQUET,
888
+ "dd_max_parallelism_ratio": 1.0,
889
+ "deltacat_storage": ds,
890
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
891
+ "destination_partition_locator": new_destination_partition.locator,
892
+ "drop_duplicates": True,
893
+ "hash_bucket_count": 4,
894
+ "last_stream_position_to_compact": new_source_delta.stream_position,
895
+ "list_deltas_kwargs": {
896
+ **local_deltacat_storage_kwargs,
897
+ **{"equivalent_table_types": []},
898
+ },
899
+ "primary_keys": ["pk"],
900
+ "rebase_source_partition_locator": None,
901
+ "rebase_source_partition_high_watermark": None,
902
+ "records_per_compacted_file": 4000,
903
+ "s3_client_kwargs": {},
904
+ "source_partition_locator": new_source_delta.partition_locator,
905
+ }
906
+ )
907
+ )
908
+
909
+ incremental_rcf = get_rcf(s3_resource, new_rcf)
910
+ assert incremental_rcf.hash_bucket_count == 4
911
+ assert len(incremental_rcf.hb_index_to_entry_range) == 2
912
+
913
+ def test_compact_partition_when_bucket_spec_validation_succeeds(
914
+ self,
915
+ s3_resource,
916
+ local_deltacat_storage_kwargs,
917
+ enable_bucketing_spec_validation,
918
+ ):
919
+ """
920
+ A test case which asserts the bucketing spec validation does not throw
921
+ and error when the validation succeeds.
922
+ """
923
+
924
+ # setup
925
+ staged_source = stage_partition_from_file_paths(
926
+ self.NAMESPACE, ["source"], **local_deltacat_storage_kwargs
927
+ )
928
+
929
+ source_delta = commit_delta_to_staged_partition(
930
+ staged_source, [self.BACKFILL_FILE_PATH], **local_deltacat_storage_kwargs
931
+ )
932
+
933
+ staged_dest = stage_partition_from_file_paths(
934
+ self.NAMESPACE, ["destination"], **local_deltacat_storage_kwargs
935
+ )
936
+ dest_partition = ds.commit_partition(
937
+ staged_dest, **local_deltacat_storage_kwargs
938
+ )
939
+
940
+ # action
941
+ rcf_url = compact_partition(
942
+ CompactPartitionParams.of(
943
+ {
944
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
945
+ "compacted_file_content_type": ContentType.PARQUET,
946
+ "dd_max_parallelism_ratio": 1.0,
947
+ "deltacat_storage": ds,
948
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
949
+ "destination_partition_locator": dest_partition.locator,
950
+ "drop_duplicates": True,
951
+ "hash_bucket_count": 4,
952
+ "last_stream_position_to_compact": source_delta.stream_position,
953
+ "list_deltas_kwargs": {
954
+ **local_deltacat_storage_kwargs,
955
+ **{"equivalent_table_types": []},
956
+ },
957
+ "primary_keys": ["pk"],
958
+ "rebase_source_partition_locator": source_delta.partition_locator,
959
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
960
+ "records_per_compacted_file": 1,
961
+ "s3_client_kwargs": {},
962
+ "source_partition_locator": source_delta.partition_locator,
963
+ }
964
+ )
965
+ )
966
+
967
+ rcf = get_rcf(s3_resource, rcf_url)
968
+ assert rcf.hash_bucket_count == 4
969
+
970
+ # Now run an incremental compaction and verify if the previous RCF was read properly.
971
+ new_source_delta = commit_delta_to_partition(
972
+ source_delta.partition_locator,
973
+ [self.INCREMENTAL_FILE_PATH],
974
+ **local_deltacat_storage_kwargs,
975
+ )
976
+
977
+ new_destination_partition = ds.get_partition(
978
+ dest_partition.stream_locator, [], **local_deltacat_storage_kwargs
979
+ )
980
+
981
+ new_uri = compact_partition(
982
+ CompactPartitionParams.of(
983
+ {
984
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
985
+ "compacted_file_content_type": ContentType.PARQUET,
986
+ "dd_max_parallelism_ratio": 1.0,
987
+ "deltacat_storage": ds,
988
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
989
+ "destination_partition_locator": new_destination_partition.locator,
990
+ "drop_duplicates": True,
991
+ "hash_bucket_count": 4,
992
+ "last_stream_position_to_compact": new_source_delta.stream_position,
993
+ "list_deltas_kwargs": {
994
+ **local_deltacat_storage_kwargs,
995
+ **{"equivalent_table_types": []},
996
+ },
997
+ "primary_keys": ["pk"],
998
+ "rebase_source_partition_locator": None,
999
+ "rebase_source_partition_high_watermark": None,
1000
+ "records_per_compacted_file": 4000,
1001
+ "s3_client_kwargs": {},
1002
+ "source_partition_locator": new_source_delta.partition_locator,
1003
+ }
1004
+ )
1005
+ )
1006
+
1007
+ rcf = get_rcf(s3_resource, new_uri)
1008
+ assert rcf.hash_bucket_count == 4
@@ -119,6 +119,21 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
119
119
  os.remove(DATABASE_FILE_PATH_VALUE)
120
120
 
121
121
 
122
+ @pytest.fixture(autouse=True, scope="function")
123
+ def enable_bucketing_spec_validation(monkeypatch):
124
+ """
125
+ Enable the bucketing spec validation for all tests.
126
+ This will help catch hash bucket drift in testing.
127
+ """
128
+ import deltacat.compute.compactor_v2.steps.merge
129
+
130
+ monkeypatch.setattr(
131
+ deltacat.compute.compactor_v2.steps.merge,
132
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
133
+ "ASSERT",
134
+ )
135
+
136
+
122
137
  @pytest.mark.parametrize(
123
138
  [
124
139
  "test_name",
@@ -114,6 +114,21 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
114
114
  os.remove(DATABASE_FILE_PATH_VALUE)
115
115
 
116
116
 
117
+ @pytest.fixture(autouse=True, scope="function")
118
+ def enable_bucketing_spec_validation(monkeypatch):
119
+ """
120
+ Enable the bucketing spec validation for all tests.
121
+ This will help catch hash bucket drift in testing.
122
+ """
123
+ import deltacat.compute.compactor_v2.steps.merge
124
+
125
+ monkeypatch.setattr(
126
+ deltacat.compute.compactor_v2.steps.merge,
127
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
128
+ "ASSERT",
129
+ )
130
+
131
+
117
132
  @pytest.mark.parametrize(
118
133
  [
119
134
  "test_name",
@@ -114,6 +114,21 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
114
114
  os.remove(DATABASE_FILE_PATH_VALUE)
115
115
 
116
116
 
117
+ @pytest.fixture(autouse=True, scope="function")
118
+ def enable_bucketing_spec_validation(monkeypatch):
119
+ """
120
+ Enable the bucketing spec validation for all tests.
121
+ This will help catch hash bucket drift in testing.
122
+ """
123
+ import deltacat.compute.compactor_v2.steps.merge
124
+
125
+ monkeypatch.setattr(
126
+ deltacat.compute.compactor_v2.steps.merge,
127
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
128
+ "ASSERT",
129
+ )
130
+
131
+
117
132
  @pytest.mark.parametrize(
118
133
  [
119
134
  "test_name",
@@ -118,6 +118,21 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
118
118
  os.remove(DATABASE_FILE_PATH_VALUE)
119
119
 
120
120
 
121
+ @pytest.fixture(autouse=True, scope="function")
122
+ def enable_bucketing_spec_validation(monkeypatch):
123
+ """
124
+ Enable the bucketing spec validation for all tests.
125
+ This will help catch hash bucket drift in testing.
126
+ """
127
+ import deltacat.compute.compactor_v2.steps.merge
128
+
129
+ monkeypatch.setattr(
130
+ deltacat.compute.compactor_v2.steps.merge,
131
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
132
+ "ASSERT",
133
+ )
134
+
135
+
121
136
  @pytest.mark.parametrize(
122
137
  [
123
138
  "test_name",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.34
3
+ Version: 1.1.35
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
File without changes
File without changes
File without changes
File without changes