deltacat 1.1.17__tar.gz → 1.1.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. {deltacat-1.1.17/deltacat.egg-info → deltacat-1.1.18}/PKG-INFO +1 -1
  2. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/__init__.py +1 -1
  3. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/aws/constants.py +0 -1
  4. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/primary_key_index.py +9 -4
  5. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/exceptions.py +2 -4
  6. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +77 -0
  7. deltacat-1.1.18/deltacat/tests/compute/compact_partition_rebase_test_cases.py +397 -0
  8. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +159 -0
  9. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_compact_partition_rebase.py +13 -4
  10. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -5
  11. {deltacat-1.1.17 → deltacat-1.1.18/deltacat.egg-info}/PKG-INFO +1 -1
  12. deltacat-1.1.17/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -89
  13. {deltacat-1.1.17 → deltacat-1.1.18}/LICENSE +0 -0
  14. {deltacat-1.1.17 → deltacat-1.1.18}/MANIFEST.in +0 -0
  15. {deltacat-1.1.17 → deltacat-1.1.18}/README.md +0 -0
  16. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/aws/__init__.py +0 -0
  17. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/aws/clients.py +0 -0
  18. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/aws/redshift/__init__.py +0 -0
  19. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/aws/redshift/model/__init__.py +0 -0
  20. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/aws/redshift/model/manifest.py +0 -0
  21. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/aws/s3u.py +0 -0
  22. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/benchmarking/__init__.py +0 -0
  23. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  24. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/benchmarking/conftest.py +0 -0
  25. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/catalog/__init__.py +0 -0
  26. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
  27. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/catalog/delegate.py +0 -0
  28. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/catalog/interface.py +0 -0
  29. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/catalog/model/__init__.py +0 -0
  30. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/catalog/model/catalog.py +0 -0
  31. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/catalog/model/table_definition.py +0 -0
  32. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/__init__.py +0 -0
  33. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/__init__.py +0 -0
  34. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/compaction_session.py +0 -0
  35. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/__init__.py +0 -0
  36. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
  37. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  38. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  39. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  40. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  41. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  42. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  43. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  44. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  45. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  46. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  47. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  48. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  49. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  50. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/repartition_session.py +0 -0
  51. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/steps/__init__.py +0 -0
  52. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  53. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  54. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/steps/materialize.py +0 -0
  55. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/steps/repartition.py +0 -0
  56. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/utils/__init__.py +0 -0
  57. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/utils/io.py +0 -0
  58. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  59. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
  60. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  61. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  62. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/__init__.py +0 -0
  63. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
  64. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/constants.py +0 -0
  65. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  66. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  67. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  68. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  69. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  70. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  71. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  72. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
  73. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  74. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  75. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  76. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  77. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  78. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
  79. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/private/compaction_utils.py +0 -0
  80. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  81. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  82. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
  83. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  84. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
  85. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  86. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  87. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  88. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  89. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
  90. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/merge_on_read/__init__.py +0 -0
  91. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/merge_on_read/daft.py +0 -0
  92. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
  93. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  94. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  95. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  96. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/__init__.py +0 -0
  97. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/config/__init__.py +0 -0
  98. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/meta_stats.py +0 -0
  99. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/model/__init__.py +0 -0
  100. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/model/partition_stats_dict.py +0 -0
  101. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/model/stats_cluster_size_estimator.py +0 -0
  102. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/stats.py +0 -0
  103. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/utils/__init__.py +0 -0
  104. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/utils/constants.py +0 -0
  105. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/utils/io.py +0 -0
  106. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +0 -0
  107. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/metastats/utils/ray_utils.py +0 -0
  108. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/__init__.py +0 -0
  109. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/basic.py +0 -0
  110. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/models/__init__.py +0 -0
  111. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  112. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/models/delta_stats.py +0 -0
  113. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  114. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  115. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/models/stats_result.py +0 -0
  116. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/types.py +0 -0
  117. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/utils/__init__.py +0 -0
  118. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/utils/intervals.py +0 -0
  119. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/utils/io.py +0 -0
  120. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/compute/stats/utils/manifest_stats_file.py +0 -0
  121. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/constants.py +0 -0
  122. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/__init__.py +0 -0
  123. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/aws/__init__.py +0 -0
  124. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/aws/redshift/__init__.py +0 -0
  125. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/dataset.py +0 -0
  126. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/file_object_store.py +0 -0
  127. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/memcached_object_store.py +0 -0
  128. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/object_store.py +0 -0
  129. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/ray_plasma_object_store.py +0 -0
  130. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/read_api.py +0 -0
  131. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/redis_object_store.py +0 -0
  132. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/io/s3_object_store.py +0 -0
  133. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/logs.py +0 -0
  134. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/__init__.py +0 -0
  135. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/interface.py +0 -0
  136. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/__init__.py +0 -0
  137. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/delete_parameters.py +0 -0
  138. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/delta.py +0 -0
  139. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/list_result.py +0 -0
  140. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/locator.py +0 -0
  141. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/namespace.py +0 -0
  142. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/partition.py +0 -0
  143. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/partition_spec.py +0 -0
  144. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/sort_key.py +0 -0
  145. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/stream.py +0 -0
  146. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/table.py +0 -0
  147. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/table_version.py +0 -0
  148. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/transform.py +0 -0
  149. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/storage/model/types.py +0 -0
  150. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/__init__.py +0 -0
  151. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/aws/__init__.py +0 -0
  152. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/aws/test_clients.py +0 -0
  153. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/aws/test_s3u.py +0 -0
  154. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/catalog/__init__.py +0 -0
  155. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  156. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/__init__.py +0 -0
  157. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  158. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor/__init__.py +0 -0
  159. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  160. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  161. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  162. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  163. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
  164. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  165. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
  166. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  167. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
  168. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  169. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
  170. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
  171. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  172. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
  173. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_util_common.py +0 -0
  174. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/compute/test_util_constant.py +0 -0
  175. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/io/__init__.py +0 -0
  176. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
  177. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/io/test_file_object_store.py +0 -0
  178. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/io/test_memcached_object_store.py +0 -0
  179. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
  180. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/io/test_redis_object_store.py +0 -0
  181. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/io/test_s3_object_store.py +0 -0
  182. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
  183. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
  184. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/stats/__init__.py +0 -0
  185. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/stats/test_intervals.py +0 -0
  186. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/test_exceptions.py +0 -0
  187. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/test_logs.py +0 -0
  188. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/test_utils/__init__.py +0 -0
  189. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/test_utils/constants.py +0 -0
  190. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/test_utils/pyarrow.py +0 -0
  191. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/test_utils/storage.py +0 -0
  192. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/test_utils/utils.py +0 -0
  193. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/__init__.py +0 -0
  194. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/data/__init__.py +0 -0
  195. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
  196. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  197. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
  198. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  199. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/test_daft.py +0 -0
  200. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/test_metrics.py +0 -0
  201. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/test_placement.py +0 -0
  202. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/test_pyarrow.py +0 -0
  203. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  204. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/tests/utils/test_resources.py +0 -0
  205. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/types/__init__.py +0 -0
  206. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/types/media.py +0 -0
  207. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/types/partial_download.py +0 -0
  208. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/types/tables.py +0 -0
  209. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/__init__.py +0 -0
  210. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/arguments.py +0 -0
  211. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/cloudpickle.py +0 -0
  212. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/common.py +0 -0
  213. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/daft.py +0 -0
  214. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/metrics.py +0 -0
  215. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/numpy.py +0 -0
  216. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/pandas.py +0 -0
  217. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/performance.py +0 -0
  218. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/placement.py +0 -0
  219. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/pyarrow.py +0 -0
  220. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/ray_utils/__init__.py +0 -0
  221. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/ray_utils/collections.py +0 -0
  222. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/ray_utils/concurrency.py +0 -0
  223. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/ray_utils/dataset.py +0 -0
  224. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/ray_utils/performance.py +0 -0
  225. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/ray_utils/runtime.py +0 -0
  226. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/resources.py +0 -0
  227. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/s3fs.py +0 -0
  228. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat/utils/schema.py +0 -0
  229. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat.egg-info/SOURCES.txt +0 -0
  230. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat.egg-info/dependency_links.txt +0 -0
  231. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat.egg-info/requires.txt +0 -0
  232. {deltacat-1.1.17 → deltacat-1.1.18}/deltacat.egg-info/top_level.txt +0 -0
  233. {deltacat-1.1.17 → deltacat-1.1.18}/setup.cfg +0 -0
  234. {deltacat-1.1.17 → deltacat-1.1.18}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.17
3
+ Version: 1.1.18
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.17"
47
+ __version__ = "1.1.18"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -1,7 +1,6 @@
1
1
  import botocore
2
2
  from typing import Set
3
3
  from daft.exceptions import DaftTransientError
4
-
5
4
  from deltacat.utils.common import env_integer, env_string
6
5
 
7
6
 
@@ -27,8 +27,11 @@ def _append_sha1_hash_to_table(table: pa.Table, hash_column: pa.Array) -> pa.Tab
27
27
 
28
28
  result = []
29
29
  for hash_value in hash_column_np:
30
- assert hash_value is not None, f"Expected non-null primary key"
31
- result.append(hashlib.sha1(hash_value.encode("utf-8")).hexdigest())
30
+ if hash_value is None:
31
+ result.append(None)
32
+ logger.info("A primary key hash is null")
33
+ else:
34
+ result.append(hashlib.sha1(hash_value.encode("utf-8")).hexdigest())
32
35
 
33
36
  return sc.append_pk_hash_string_column(table, result)
34
37
 
@@ -191,7 +194,7 @@ def generate_pk_hash_column(
191
194
  pk_columns.append(sliced_string_cast(table[pk_name]))
192
195
 
193
196
  pk_columns.append(PK_DELIMITER)
194
- hash_column = pc.binary_join_element_wise(*pk_columns)
197
+ hash_column = pc.binary_join_element_wise(*pk_columns, null_handling="replace")
195
198
  return hash_column
196
199
 
197
200
  def _generate_uuid(table: pa.Table) -> pa.Array:
@@ -345,8 +348,10 @@ def hash_group_index_to_hash_bucket_indices(
345
348
  return range(hb_group, num_buckets, num_groups)
346
349
 
347
350
 
348
- def pk_digest_to_hash_bucket_index(digest: str, num_buckets: int) -> int:
351
+ def pk_digest_to_hash_bucket_index(digest: Optional[str], num_buckets: int) -> int:
349
352
  """
350
353
  Generates the hash bucket index from the given digest.
351
354
  """
355
+ if digest is None:
356
+ return 0
352
357
  return int(digest, 16) % num_buckets
@@ -299,7 +299,7 @@ def _categorize_tenacity_error(e: tenacity.RetryError):
299
299
  def _categorize_dependency_pyarrow_error(e: ArrowException):
300
300
  if isinstance(e, ArrowInvalid):
301
301
  raise DependencyPyarrowInvalidError(
302
- f"Pyarrow Invalid error occurred. Reason: {e}"
302
+ f"Pyarrow Invalid error occurred. {e}"
303
303
  ) from e
304
304
  elif isinstance(e, ArrowCapacityError):
305
305
  raise DependencyPyarrowCapacityError("Pyarrow Capacity error occurred.") from e
@@ -308,9 +308,7 @@ def _categorize_dependency_pyarrow_error(e: ArrowException):
308
308
 
309
309
 
310
310
  def _categorize_assertion_error(e: BaseException):
311
- raise ValidationError(
312
- f"One of the assertions in DeltaCAT has failed. Reason: {e}"
313
- ) from e
311
+ raise ValidationError(f"One of the assertions in DeltaCAT has failed. {e}") from e
314
312
 
315
313
 
316
314
  def _categorize_daft_error(e: DaftCoreException):
@@ -848,6 +848,83 @@ MULTIPLE_ROUNDS_TEST_CASES = {
848
848
  assert_compaction_audit=None,
849
849
  num_rounds=3,
850
850
  ),
851
+ # 4 input deltas (3 upsert, 1 delete delta), 2 rounds requested
852
+ # Expect to see a table that aggregates 10 records total
853
+ # (12 upserts - 2 deletes (null PK) = 10 records)
854
+ # (dropDuplicates = False)
855
+ "9-multiple-rounds-delete-deltas-with-null-pk": MultipleRoundsTestCaseParams(
856
+ primary_keys={"pk_col_1"},
857
+ sort_keys=ZERO_VALUED_SORT_KEY,
858
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
859
+ partition_values=["1"],
860
+ input_deltas=[
861
+ (
862
+ pa.Table.from_arrays(
863
+ [
864
+ pa.array([None, 11, 12, 13]),
865
+ pa.array(["a", "b", "c", "d"]),
866
+ ],
867
+ names=["pk_col_1", "col_1"],
868
+ ),
869
+ DeltaType.UPSERT,
870
+ None,
871
+ ),
872
+ (
873
+ pa.Table.from_arrays(
874
+ [
875
+ pa.array([14, 15, 16, 17]),
876
+ pa.array(["e", "f", "g", "h"]),
877
+ ],
878
+ names=["pk_col_1", "col_1"],
879
+ ),
880
+ DeltaType.UPSERT,
881
+ None,
882
+ ),
883
+ (
884
+ pa.Table.from_arrays(
885
+ [
886
+ pa.array([18, 19, 20, 21]),
887
+ pa.array(["i", "j", "k", "l"]),
888
+ ],
889
+ names=["pk_col_1", "col_1"],
890
+ ),
891
+ DeltaType.UPSERT,
892
+ None,
893
+ ),
894
+ (
895
+ pa.Table.from_arrays(
896
+ [pa.array([None, 11]), pa.array(["a", "b"])],
897
+ names=["pk_col_1", "col_1"],
898
+ ),
899
+ DeltaType.DELETE,
900
+ DeleteParameters.of(["pk_col_1", "col_1"]),
901
+ ),
902
+ ],
903
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
904
+ [
905
+ pa.array([i for i in range(12, 22)]),
906
+ pa.array(["c", "d", "e", "f", "g", "h", "i", "j", "k", "l"]),
907
+ ],
908
+ names=["pk_col_1", "col_1"],
909
+ ),
910
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
911
+ [
912
+ pa.array([i for i in range(12, 22)]),
913
+ pa.array(["c", "d", "e", "f", "g", "h", "i", "j", "k", "l"]),
914
+ ],
915
+ names=["pk_col_1", "col_1"],
916
+ ),
917
+ expected_terminal_exception=None,
918
+ expected_terminal_exception_message=None,
919
+ do_create_placement_group=False,
920
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
921
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
922
+ read_kwargs_provider=None,
923
+ drop_duplicates=False,
924
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
925
+ assert_compaction_audit=None,
926
+ num_rounds=2,
927
+ ),
851
928
  }
852
929
 
853
930
  MULTIPLE_ROUNDS_TEST_CASES = with_compactor_version_func_test_param(
@@ -0,0 +1,397 @@
1
+ import pyarrow as pa
2
+ from deltacat.tests.compute.test_util_common import (
3
+ PartitionKey,
4
+ PartitionKeyType,
5
+ )
6
+ from deltacat.tests.compute.test_util_constant import (
7
+ DEFAULT_MAX_RECORDS_PER_FILE,
8
+ DEFAULT_HASH_BUCKET_COUNT,
9
+ )
10
+ from dataclasses import dataclass
11
+
12
+
13
+ from deltacat.storage import (
14
+ DeltaType,
15
+ )
16
+
17
+ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
18
+
19
+ from deltacat.storage.model.sort_key import SortKey
20
+
21
+ from deltacat.tests.compute.compact_partition_test_cases import (
22
+ BaseCompactorTestCase,
23
+ with_compactor_version_func_test_param,
24
+ )
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class RebaseCompactionTestCaseParams(BaseCompactorTestCase):
29
+ """
30
+ A pytest parameterized test case for the `compact_partition` function with rebase compaction.
31
+
32
+ Args:
33
+ * (inherited from CompactorTestCase): see CompactorTestCase docstring for details
34
+ rebase_expected_compact_partition_result: pa.Table - expected table after rebase compaction runs. An output that is asserted on in Rebase unit tests
35
+ """
36
+
37
+ rebase_expected_compact_partition_result: pa.Table
38
+
39
+
40
+ REBASE_TEST_CASES = {
41
+ "1-rebase-sanity": RebaseCompactionTestCaseParams(
42
+ primary_keys={"pk_col_1"},
43
+ sort_keys=[
44
+ SortKey.of(key_name="sk_col_1"),
45
+ SortKey.of(key_name="sk_col_2"),
46
+ ],
47
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
48
+ partition_values=["1"],
49
+ input_deltas=pa.Table.from_arrays(
50
+ [
51
+ pa.array([str(i) for i in range(10)]),
52
+ pa.array([i for i in range(0, 10)]),
53
+ pa.array(["foo"] * 10),
54
+ pa.array([i / 10 for i in range(10, 20)]),
55
+ ],
56
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
57
+ ),
58
+ input_deltas_delta_type=DeltaType.UPSERT,
59
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
60
+ [
61
+ pa.array([str(i) for i in range(10)]),
62
+ pa.array([i for i in range(0, 10)]),
63
+ pa.array(["foo"] * 10),
64
+ pa.array([i / 10 for i in range(10, 20)]),
65
+ ],
66
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
67
+ ),
68
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
69
+ [
70
+ pa.array([str(i) for i in range(10)]),
71
+ pa.array([i for i in range(20, 30)]),
72
+ pa.array(["foo"] * 10),
73
+ pa.array([i / 10 for i in range(40, 50)]),
74
+ ],
75
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
76
+ ),
77
+ expected_terminal_exception=None,
78
+ expected_terminal_exception_message=None,
79
+ do_create_placement_group=False,
80
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
81
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
82
+ read_kwargs_provider=None,
83
+ drop_duplicates=True,
84
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
85
+ assert_compaction_audit=None,
86
+ ),
87
+ "2-rebase-with-null-pk": RebaseCompactionTestCaseParams(
88
+ primary_keys={"pk_col_1"},
89
+ sort_keys=[
90
+ SortKey.of(key_name="sk_col_1"),
91
+ SortKey.of(key_name="sk_col_2"),
92
+ ],
93
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
94
+ partition_values=["1"],
95
+ input_deltas=pa.Table.from_arrays(
96
+ [
97
+ pa.array([1, 2, None, 2, None, 1]),
98
+ pa.array([1, 2, 3, 4, 5, 6]),
99
+ pa.array(["foo"] * 6),
100
+ pa.array([5, 6, 7, 8, 9, 10]),
101
+ ],
102
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
103
+ ),
104
+ input_deltas_delta_type=DeltaType.UPSERT,
105
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
106
+ [
107
+ pa.array([None, 1, 2]),
108
+ pa.array([5, 6, 4]),
109
+ pa.array(["foo"] * 3),
110
+ pa.array([9, 10, 8]),
111
+ ],
112
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
113
+ ),
114
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
115
+ [
116
+ pa.array([None, 1, 2]),
117
+ pa.array([5, 6, 4]),
118
+ pa.array(["foo"] * 3),
119
+ pa.array([7, 10, 8]),
120
+ ],
121
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
122
+ ),
123
+ expected_terminal_exception=None,
124
+ expected_terminal_exception_message=None,
125
+ do_create_placement_group=False,
126
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
127
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
128
+ read_kwargs_provider=None,
129
+ drop_duplicates=True,
130
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
131
+ assert_compaction_audit=None,
132
+ ),
133
+ "3-rebase-with-null-two-pk": RebaseCompactionTestCaseParams(
134
+ primary_keys={"pk_col_1", "pk_col_2"},
135
+ sort_keys=[
136
+ SortKey.of(key_name="sk_col_1"),
137
+ ],
138
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
139
+ partition_values=["1"],
140
+ input_deltas=pa.Table.from_arrays(
141
+ [
142
+ pa.array([1, 2, None, 2, None, 1, 5]),
143
+ pa.array([1, None, 3, None, None, 1, 5]),
144
+ pa.array(["foo"] * 7),
145
+ pa.array([5, 6, 7, 8, 9, 10, 11]),
146
+ ],
147
+ names=["pk_col_1", "pk_col_2", "sk_col_1", "col_1"],
148
+ ),
149
+ input_deltas_delta_type=DeltaType.UPSERT,
150
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
151
+ [
152
+ pa.array([1, 2, None, 5, None]),
153
+ pa.array([1, None, 3, 5, None]),
154
+ pa.array(["foo"] * 5),
155
+ pa.array([10, 8, 7, 11, 9]),
156
+ ],
157
+ names=["pk_col_1", "pk_col_2", "sk_col_1", "col_1"],
158
+ ),
159
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
160
+ [
161
+ pa.array([1, 2, None, 5, None]),
162
+ pa.array([1, None, 3, 5, None]),
163
+ pa.array(["foo"] * 5),
164
+ pa.array([10, 8, 7, 11, 9]),
165
+ ],
166
+ names=["pk_col_1", "pk_col_2", "sk_col_1", "col_1"],
167
+ ),
168
+ expected_terminal_exception=None,
169
+ expected_terminal_exception_message=None,
170
+ do_create_placement_group=False,
171
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
172
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
173
+ read_kwargs_provider=None,
174
+ drop_duplicates=True,
175
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
176
+ assert_compaction_audit=None,
177
+ ),
178
+ "4-rebase-with-null-multiple-pk-different-types": RebaseCompactionTestCaseParams(
179
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
180
+ sort_keys=[],
181
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
182
+ partition_values=["1"],
183
+ input_deltas=pa.Table.from_arrays(
184
+ [
185
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
186
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
187
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
188
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
189
+ ],
190
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
191
+ ),
192
+ input_deltas_delta_type=DeltaType.UPSERT,
193
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
194
+ [
195
+ pa.array([1, 2, None, 5, None, None]),
196
+ pa.array([1, None, 3, 5, None, None]),
197
+ pa.array(["a", "b", "c", "g", "e", None]),
198
+ pa.array([10, 8, 7, 11, 12, 14]),
199
+ ],
200
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
201
+ ),
202
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
203
+ [
204
+ pa.array([1, 2, None, 5, None, None]),
205
+ pa.array([1, None, 3, 5, None, None]),
206
+ pa.array(["a", "b", "c", "g", "e", None]),
207
+ pa.array([10, 8, 7, 11, 12, 14]),
208
+ ],
209
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
210
+ ),
211
+ expected_terminal_exception=None,
212
+ expected_terminal_exception_message=None,
213
+ do_create_placement_group=False,
214
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
215
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
216
+ read_kwargs_provider=None,
217
+ drop_duplicates=True,
218
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
219
+ assert_compaction_audit=None,
220
+ ),
221
+ "5-rebase-with-null-multiple-pk-one-hash-bucket": RebaseCompactionTestCaseParams(
222
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
223
+ sort_keys=[],
224
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
225
+ partition_values=["1"],
226
+ input_deltas=pa.Table.from_arrays(
227
+ [
228
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
229
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
230
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
231
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
232
+ ],
233
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
234
+ ),
235
+ input_deltas_delta_type=DeltaType.UPSERT,
236
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
237
+ [
238
+ pa.array([1, 2, None, 5, None, None]),
239
+ pa.array([1, None, 3, 5, None, None]),
240
+ pa.array(["a", "b", "c", "g", "e", None]),
241
+ pa.array([10, 8, 7, 11, 12, 14]),
242
+ ],
243
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
244
+ ),
245
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
246
+ [
247
+ pa.array([1, 2, None, 5, None, None]),
248
+ pa.array([1, None, 3, 5, None, None]),
249
+ pa.array(["a", "b", "c", "g", "e", None]),
250
+ pa.array([10, 8, 7, 11, 12, 14]),
251
+ ],
252
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
253
+ ),
254
+ expected_terminal_exception=None,
255
+ expected_terminal_exception_message=None,
256
+ do_create_placement_group=False,
257
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
258
+ hash_bucket_count=1,
259
+ read_kwargs_provider=None,
260
+ drop_duplicates=True,
261
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
262
+ assert_compaction_audit=None,
263
+ ),
264
+ "6-rebase-with-null-multiple-pk-drop-duplicates-false": RebaseCompactionTestCaseParams(
265
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
266
+ sort_keys=[],
267
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
268
+ partition_values=["1"],
269
+ input_deltas=pa.Table.from_arrays(
270
+ [
271
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
272
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
273
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
274
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
275
+ ],
276
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
277
+ ),
278
+ input_deltas_delta_type=DeltaType.UPSERT,
279
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
280
+ [
281
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
282
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
283
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
284
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
285
+ ],
286
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
287
+ ),
288
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
289
+ [
290
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
291
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
292
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
293
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
294
+ ],
295
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
296
+ ),
297
+ expected_terminal_exception=None,
298
+ expected_terminal_exception_message=None,
299
+ do_create_placement_group=False,
300
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
301
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
302
+ read_kwargs_provider=None,
303
+ drop_duplicates=False,
304
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
305
+ assert_compaction_audit=None,
306
+ ),
307
+ "7-rebase-drop-duplicates-false": RebaseCompactionTestCaseParams(
308
+ primary_keys={"pk_col_1"},
309
+ sort_keys=[
310
+ SortKey.of(key_name="sk_col_1"),
311
+ ],
312
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
313
+ partition_values=["1"],
314
+ input_deltas=pa.Table.from_arrays(
315
+ [
316
+ pa.array([1, 2, 2, 3, 3, 1]),
317
+ pa.array([1, 2, 3, 4, 5, 6]),
318
+ pa.array(["a", "b", "c", "b", "e", "a"]),
319
+ pa.array([5, 6, 7, 8, 9, 10]),
320
+ ],
321
+ names=["pk_col_1", "sk_col_1", "col_1", "col_2"],
322
+ ),
323
+ input_deltas_delta_type=DeltaType.UPSERT,
324
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
325
+ [
326
+ pa.array([1, 2, 2, 3, 3, 1]),
327
+ pa.array([1, 2, 3, 4, 5, 6]),
328
+ pa.array(["a", "b", "c", "b", "e", "a"]),
329
+ pa.array([5, 6, 7, 8, 9, 10]),
330
+ ],
331
+ names=["pk_col_1", "sk_col_1", "col_1", "col_2"],
332
+ ),
333
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
334
+ [
335
+ pa.array([1, 2, 2, 3, 3, 1]),
336
+ pa.array([1, 2, 3, 4, 5, 6]),
337
+ pa.array(["a", "b", "c", "b", "e", "a"]),
338
+ pa.array([5, 6, 7, 8, 9, 10]),
339
+ ],
340
+ names=["pk_col_1", "sk_col_1", "col_1", "col_2"],
341
+ ),
342
+ expected_terminal_exception=None,
343
+ expected_terminal_exception_message=None,
344
+ do_create_placement_group=False,
345
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
346
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
347
+ read_kwargs_provider=None,
348
+ drop_duplicates=False,
349
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
350
+ assert_compaction_audit=None,
351
+ ),
352
+ "8-rebase-with-with-null-pk-duplicates-false-hash-bucket-1": RebaseCompactionTestCaseParams(
353
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
354
+ sort_keys=[],
355
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
356
+ partition_values=["1"],
357
+ input_deltas=pa.Table.from_arrays(
358
+ [
359
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
360
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
361
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
362
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
363
+ ],
364
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
365
+ ),
366
+ input_deltas_delta_type=DeltaType.UPSERT,
367
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
368
+ [
369
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
370
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
371
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
372
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
373
+ ],
374
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
375
+ ),
376
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
377
+ [
378
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
379
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
380
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
381
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
382
+ ],
383
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
384
+ ),
385
+ expected_terminal_exception=None,
386
+ expected_terminal_exception_message=None,
387
+ do_create_placement_group=False,
388
+ records_per_compacted_file=1,
389
+ hash_bucket_count=1,
390
+ read_kwargs_provider=None,
391
+ drop_duplicates=False,
392
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
393
+ assert_compaction_audit=None,
394
+ ),
395
+ }
396
+
397
+ REBASE_TEST_CASES = with_compactor_version_func_test_param(REBASE_TEST_CASES)