deltacat 1.1.12__tar.gz → 1.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. {deltacat-1.1.12/deltacat.egg-info → deltacat-1.1.14}/PKG-INFO +1 -1
  2. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/__init__.py +1 -1
  3. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/aws/clients.py +1 -1
  4. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/compact_partition_params.py +5 -0
  5. deltacat-1.1.14/deltacat/compute/compactor_v2/compaction_session.py +220 -0
  6. deltacat-1.1.12/deltacat/compute/compactor_v2/compaction_session.py → deltacat-1.1.14/deltacat/compute/compactor_v2/private/compaction_utils.py +363 -343
  7. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/utils/task_options.py +0 -1
  8. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +1 -0
  9. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +32 -0
  10. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compact_partition_test_cases.py +19 -1
  11. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/test_compact_partition_incremental.py +13 -0
  12. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/test_compact_partition_rebase.py +34 -0
  13. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +12 -0
  14. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/test_util_common.py +101 -0
  15. deltacat-1.1.14/deltacat/utils/ray_utils/__init__.py +0 -0
  16. {deltacat-1.1.12 → deltacat-1.1.14/deltacat.egg-info}/PKG-INFO +1 -1
  17. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat.egg-info/SOURCES.txt +2 -0
  18. {deltacat-1.1.12 → deltacat-1.1.14}/LICENSE +0 -0
  19. {deltacat-1.1.12 → deltacat-1.1.14}/MANIFEST.in +0 -0
  20. {deltacat-1.1.12 → deltacat-1.1.14}/README.md +0 -0
  21. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/aws/__init__.py +0 -0
  22. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/aws/constants.py +0 -0
  23. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/aws/redshift/__init__.py +0 -0
  24. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/aws/redshift/model/__init__.py +0 -0
  25. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/aws/redshift/model/manifest.py +0 -0
  26. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/aws/s3u.py +0 -0
  27. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/benchmarking/__init__.py +0 -0
  28. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  29. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/benchmarking/conftest.py +0 -0
  30. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/catalog/__init__.py +0 -0
  31. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
  32. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/catalog/delegate.py +0 -0
  33. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/catalog/interface.py +0 -0
  34. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/catalog/model/__init__.py +0 -0
  35. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/catalog/model/catalog.py +0 -0
  36. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/catalog/model/table_definition.py +0 -0
  37. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/__init__.py +0 -0
  38. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/__init__.py +0 -0
  39. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/compaction_session.py +0 -0
  40. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/__init__.py +0 -0
  41. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  42. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  43. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  44. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  45. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  46. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  47. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  48. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  49. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  50. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  51. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  52. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  53. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  54. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/repartition_session.py +0 -0
  55. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/steps/__init__.py +0 -0
  56. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  57. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  58. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/steps/materialize.py +0 -0
  59. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/steps/repartition.py +0 -0
  60. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/utils/__init__.py +0 -0
  61. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/utils/io.py +0 -0
  62. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  63. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
  64. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  65. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  66. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/__init__.py +0 -0
  67. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/constants.py +0 -0
  68. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  69. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  70. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  71. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  72. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  73. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  74. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  75. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
  76. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  77. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  78. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  79. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  80. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  81. {deltacat-1.1.12/deltacat/compute/compactor_v2/steps → deltacat-1.1.14/deltacat/compute/compactor_v2/private}/__init__.py +0 -0
  82. {deltacat-1.1.12/deltacat/compute/compactor_v2/utils → deltacat-1.1.14/deltacat/compute/compactor_v2/steps}/__init__.py +0 -0
  83. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  84. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
  85. {deltacat-1.1.12/deltacat/compute/merge_on_read/model → deltacat-1.1.14/deltacat/compute/compactor_v2/utils}/__init__.py +0 -0
  86. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
  87. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  88. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  89. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  90. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  91. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
  92. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/merge_on_read/__init__.py +0 -0
  93. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/merge_on_read/daft.py +0 -0
  94. {deltacat-1.1.12/deltacat/compute/merge_on_read/utils → deltacat-1.1.14/deltacat/compute/merge_on_read/model}/__init__.py +0 -0
  95. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  96. {deltacat-1.1.12/deltacat/compute/metastats → deltacat-1.1.14/deltacat/compute/merge_on_read/utils}/__init__.py +0 -0
  97. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  98. {deltacat-1.1.12/deltacat/compute/metastats/config → deltacat-1.1.14/deltacat/compute/metastats}/__init__.py +0 -0
  99. {deltacat-1.1.12/deltacat/compute/metastats/model → deltacat-1.1.14/deltacat/compute/metastats/config}/__init__.py +0 -0
  100. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/meta_stats.py +0 -0
  101. {deltacat-1.1.12/deltacat/compute/metastats/utils → deltacat-1.1.14/deltacat/compute/metastats/model}/__init__.py +0 -0
  102. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/model/partition_stats_dict.py +0 -0
  103. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/model/stats_cluster_size_estimator.py +0 -0
  104. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/stats.py +0 -0
  105. {deltacat-1.1.12/deltacat/compute/stats → deltacat-1.1.14/deltacat/compute/metastats/utils}/__init__.py +0 -0
  106. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/utils/constants.py +0 -0
  107. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/utils/io.py +0 -0
  108. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +0 -0
  109. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/metastats/utils/ray_utils.py +0 -0
  110. {deltacat-1.1.12/deltacat/compute/stats/models → deltacat-1.1.14/deltacat/compute/stats}/__init__.py +0 -0
  111. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/basic.py +0 -0
  112. {deltacat-1.1.12/deltacat/compute/stats/utils → deltacat-1.1.14/deltacat/compute/stats/models}/__init__.py +0 -0
  113. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  114. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/models/delta_stats.py +0 -0
  115. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  116. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  117. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/models/stats_result.py +0 -0
  118. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/types.py +0 -0
  119. {deltacat-1.1.12/deltacat/io → deltacat-1.1.14/deltacat/compute/stats/utils}/__init__.py +0 -0
  120. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/utils/intervals.py +0 -0
  121. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/utils/io.py +0 -0
  122. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/compute/stats/utils/manifest_stats_file.py +0 -0
  123. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/constants.py +0 -0
  124. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/exceptions.py +0 -0
  125. {deltacat-1.1.12/deltacat/io/aws → deltacat-1.1.14/deltacat/io}/__init__.py +0 -0
  126. {deltacat-1.1.12/deltacat/io/aws/redshift → deltacat-1.1.14/deltacat/io/aws}/__init__.py +0 -0
  127. {deltacat-1.1.12/deltacat/storage/model → deltacat-1.1.14/deltacat/io/aws/redshift}/__init__.py +0 -0
  128. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/dataset.py +0 -0
  129. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/file_object_store.py +0 -0
  130. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/memcached_object_store.py +0 -0
  131. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/object_store.py +0 -0
  132. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/ray_plasma_object_store.py +0 -0
  133. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/read_api.py +0 -0
  134. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/redis_object_store.py +0 -0
  135. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/io/s3_object_store.py +0 -0
  136. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/logs.py +0 -0
  137. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/__init__.py +0 -0
  138. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/interface.py +0 -0
  139. {deltacat-1.1.12/deltacat/tests → deltacat-1.1.14/deltacat/storage/model}/__init__.py +0 -0
  140. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/delete_parameters.py +0 -0
  141. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/delta.py +0 -0
  142. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/list_result.py +0 -0
  143. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/locator.py +0 -0
  144. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/namespace.py +0 -0
  145. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/partition.py +0 -0
  146. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/partition_spec.py +0 -0
  147. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/sort_key.py +0 -0
  148. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/stream.py +0 -0
  149. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/table.py +0 -0
  150. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/table_version.py +0 -0
  151. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/transform.py +0 -0
  152. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/storage/model/types.py +0 -0
  153. {deltacat-1.1.12/deltacat/tests/aws → deltacat-1.1.14/deltacat/tests}/__init__.py +0 -0
  154. {deltacat-1.1.12/deltacat/tests/catalog → deltacat-1.1.14/deltacat/tests/aws}/__init__.py +0 -0
  155. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/aws/test_clients.py +0 -0
  156. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/aws/test_s3u.py +0 -0
  157. {deltacat-1.1.12/deltacat/tests/compute → deltacat-1.1.14/deltacat/tests/catalog}/__init__.py +0 -0
  158. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  159. {deltacat-1.1.12/deltacat/tests/compute/compactor → deltacat-1.1.14/deltacat/tests/compute}/__init__.py +0 -0
  160. {deltacat-1.1.12/deltacat/tests/compute/compactor/steps → deltacat-1.1.14/deltacat/tests/compute/compactor}/__init__.py +0 -0
  161. {deltacat-1.1.12/deltacat/tests/compute/compactor/utils → deltacat-1.1.14/deltacat/tests/compute/compactor/steps}/__init__.py +0 -0
  162. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  163. {deltacat-1.1.12/deltacat/tests/compute/compactor_v2 → deltacat-1.1.14/deltacat/tests/compute/compactor/utils}/__init__.py +0 -0
  164. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  165. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
  166. {deltacat-1.1.12/deltacat/tests/compute/compactor_v2/utils → deltacat-1.1.14/deltacat/tests/compute/compactor_v2}/__init__.py +0 -0
  167. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
  168. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  169. {deltacat-1.1.12/deltacat/tests/io → deltacat-1.1.14/deltacat/tests/compute/compactor_v2/utils}/__init__.py +0 -0
  170. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  171. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  172. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/test_util_constant.py +0 -0
  173. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
  174. {deltacat-1.1.12/deltacat/tests/stats → deltacat-1.1.14/deltacat/tests/io}/__init__.py +0 -0
  175. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
  176. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/io/test_file_object_store.py +0 -0
  177. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/io/test_memcached_object_store.py +0 -0
  178. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
  179. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/io/test_redis_object_store.py +0 -0
  180. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/io/test_s3_object_store.py +0 -0
  181. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
  182. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
  183. {deltacat-1.1.12/deltacat/tests/test_utils → deltacat-1.1.14/deltacat/tests/stats}/__init__.py +0 -0
  184. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/stats/test_intervals.py +0 -0
  185. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/test_exceptions.py +0 -0
  186. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/test_logs.py +0 -0
  187. {deltacat-1.1.12/deltacat/tests/utils → deltacat-1.1.14/deltacat/tests/test_utils}/__init__.py +0 -0
  188. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/test_utils/constants.py +0 -0
  189. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/test_utils/pyarrow.py +0 -0
  190. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/test_utils/storage.py +0 -0
  191. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/test_utils/utils.py +0 -0
  192. {deltacat-1.1.12/deltacat/tests/utils/data → deltacat-1.1.14/deltacat/tests/utils}/__init__.py +0 -0
  193. {deltacat-1.1.12/deltacat/tests/utils/ray_utils → deltacat-1.1.14/deltacat/tests/utils/data}/__init__.py +0 -0
  194. {deltacat-1.1.12/deltacat/types → deltacat-1.1.14/deltacat/tests/utils/ray_utils}/__init__.py +0 -0
  195. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  196. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
  197. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  198. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/test_daft.py +0 -0
  199. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/test_metrics.py +0 -0
  200. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/test_placement.py +0 -0
  201. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/test_pyarrow.py +0 -0
  202. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  203. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/tests/utils/test_resources.py +0 -0
  204. {deltacat-1.1.12/deltacat/utils → deltacat-1.1.14/deltacat/types}/__init__.py +0 -0
  205. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/types/media.py +0 -0
  206. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/types/partial_download.py +0 -0
  207. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/types/tables.py +0 -0
  208. {deltacat-1.1.12/deltacat/utils/ray_utils → deltacat-1.1.14/deltacat/utils}/__init__.py +0 -0
  209. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/arguments.py +0 -0
  210. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/cloudpickle.py +0 -0
  211. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/common.py +0 -0
  212. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/daft.py +0 -0
  213. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/metrics.py +0 -0
  214. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/numpy.py +0 -0
  215. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/pandas.py +0 -0
  216. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/performance.py +0 -0
  217. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/placement.py +0 -0
  218. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/pyarrow.py +0 -0
  219. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/ray_utils/collections.py +0 -0
  220. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/ray_utils/concurrency.py +0 -0
  221. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/ray_utils/dataset.py +0 -0
  222. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/ray_utils/performance.py +0 -0
  223. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/ray_utils/runtime.py +0 -0
  224. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/resources.py +0 -0
  225. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/s3fs.py +0 -0
  226. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat/utils/schema.py +0 -0
  227. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat.egg-info/dependency_links.txt +0 -0
  228. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat.egg-info/requires.txt +0 -0
  229. {deltacat-1.1.12 → deltacat-1.1.14}/deltacat.egg-info/top_level.txt +0 -0
  230. {deltacat-1.1.12 → deltacat-1.1.14}/setup.cfg +0 -0
  231. {deltacat-1.1.12 → deltacat-1.1.14}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.12
3
+ Version: 1.1.14
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.12"
47
+ __version__ = "1.1.14"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -42,7 +42,7 @@ RETRYABLE_HTTP_STATUS_CODES = [
42
42
 
43
43
  boto_retry_wrapper = Retrying(
44
44
  wait=wait_random_exponential(multiplier=1, max=10),
45
- stop=stop_after_delay(60 * 5),
45
+ stop=stop_after_delay(60 * 10),
46
46
  # CredentialRetrievalError can still be thrown due to throttling, even if IMDS health checks succeed.
47
47
  retry=retry_if_exception_type(CredentialRetrievalError),
48
48
  )
@@ -185,6 +185,11 @@ class CompactPartitionParams(dict):
185
185
 
186
186
  @property
187
187
  def task_max_parallelism(self) -> int:
188
+ if self.pg_config:
189
+ cluster_resources = self.pg_config.resource
190
+ cluster_cpus = cluster_resources["CPU"]
191
+ self.task_max_parallelism = cluster_cpus
192
+ self["task_max_parallelism"] = self.task_max_parallelism
188
193
  return self["task_max_parallelism"]
189
194
 
190
195
  @task_max_parallelism.setter
@@ -0,0 +1,220 @@
1
+ import numpy as np
2
+ import importlib
3
+ from contextlib import nullcontext
4
+ import logging
5
+ import time
6
+ import ray
7
+
8
+ import deltacat
9
+ from deltacat.compute.compactor import (
10
+ PyArrowWriteResult,
11
+ RoundCompletionInfo,
12
+ )
13
+ from deltacat import logs
14
+ from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
15
+ ExecutionCompactionResult,
16
+ )
17
+ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
18
+ from deltacat.compute.compactor.utils import round_completion_file as rcf
19
+ from deltacat.compute.compactor import DeltaAnnotated
20
+ from deltacat.compute.compactor_v2.deletes.delete_strategy import (
21
+ DeleteStrategy,
22
+ )
23
+ from deltacat.compute.compactor.model.materialize_result import MaterializeResult
24
+ from deltacat.compute.compactor_v2.model.merge_result import MergeResult
25
+ from deltacat.compute.compactor_v2.deletes.delete_file_envelope import (
26
+ DeleteFileEnvelope,
27
+ )
28
+ from deltacat.storage import (
29
+ Delta,
30
+ DeltaLocator,
31
+ Manifest,
32
+ Partition,
33
+ )
34
+ from deltacat.compute.compactor.model.compact_partition_params import (
35
+ CompactPartitionParams,
36
+ )
37
+ from deltacat.utils.resources import (
38
+ get_current_process_peak_memory_usage_in_bytes,
39
+ )
40
+ from deltacat.compute.compactor_v2.private.compaction_utils import (
41
+ _fetch_compaction_metadata,
42
+ _build_uniform_deltas,
43
+ _run_hash_and_merge,
44
+ _process_merge_results,
45
+ _upload_compaction_audit,
46
+ _write_new_round_completion_file,
47
+ _commit_compaction_result,
48
+ )
49
+ from deltacat.utils.metrics import metrics
50
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
51
+ CompactionSessionAuditInfo,
52
+ )
53
+
54
+ from typing import List, Optional
55
+ from deltacat.compute.compactor_v2.utils import io
56
+ from deltacat.exceptions import categorize_errors
57
+ from deltacat.compute.compactor_v2.constants import COMPACT_PARTITION_METRIC_PREFIX
58
+
59
+ if importlib.util.find_spec("memray"):
60
+ import memray
61
+
62
+
63
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
64
+
65
+
66
+ @metrics(prefix=COMPACT_PARTITION_METRIC_PREFIX)
67
+ @categorize_errors
68
+ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
69
+ assert (
70
+ params.hash_bucket_count is not None and params.hash_bucket_count >= 1
71
+ ), "hash_bucket_count is a required arg for compactor v2"
72
+
73
+ with memray.Tracker(
74
+ "compaction_partition.bin"
75
+ ) if params.enable_profiler else nullcontext():
76
+ execute_compaction_result: ExecutionCompactionResult = _execute_compaction(
77
+ params,
78
+ **kwargs,
79
+ )
80
+ _commit_compaction_result(params, execute_compaction_result)
81
+ return execute_compaction_result.round_completion_file_s3_url
82
+
83
+
84
+ def _execute_compaction(
85
+ params: CompactPartitionParams, **kwargs
86
+ ) -> ExecutionCompactionResult:
87
+ compaction_start_time: float = time.monotonic()
88
+ # Fetch round completion info for previously compacted partition, if it exists
89
+ fetch_compaction_metadata_result: tuple[
90
+ Optional[Manifest], Optional[RoundCompletionInfo]
91
+ ] = _fetch_compaction_metadata(params)
92
+ (
93
+ previous_compacted_delta_manifest,
94
+ round_completion_info,
95
+ ) = fetch_compaction_metadata_result
96
+ rcf_source_partition_locator: rcf.PartitionLocator = (
97
+ params.rebase_source_partition_locator or params.source_partition_locator
98
+ )
99
+
100
+ base_audit_url: str = rcf_source_partition_locator.path(
101
+ f"s3://{params.compaction_artifact_s3_bucket}/compaction-audit"
102
+ )
103
+ audit_url: str = f"{base_audit_url}.json"
104
+ logger.info(f"Compaction audit will be written to {audit_url}")
105
+ compaction_audit: CompactionSessionAuditInfo = (
106
+ CompactionSessionAuditInfo(deltacat.__version__, ray.__version__, audit_url)
107
+ .set_hash_bucket_count(params.hash_bucket_count)
108
+ .set_compactor_version(CompactorVersion.V2.value)
109
+ )
110
+
111
+ if params.pg_config:
112
+ logger.info(
113
+ "pg_config specified. Tasks will be scheduled in a placement group."
114
+ )
115
+ cluster_resources = params.pg_config.resource
116
+ cluster_memory = cluster_resources["memory"]
117
+ compaction_audit.set_total_cluster_memory_bytes(cluster_memory)
118
+ high_watermark = (
119
+ round_completion_info.high_watermark if round_completion_info else None
120
+ )
121
+ audit_url = compaction_audit.audit_url if compaction_audit else None
122
+ # discover and build uniform deltas
123
+ delta_discovery_start = time.monotonic()
124
+ input_deltas: List[Delta] = io.discover_deltas(
125
+ params.source_partition_locator,
126
+ params.last_stream_position_to_compact,
127
+ params.rebase_source_partition_locator,
128
+ params.rebase_source_partition_high_watermark,
129
+ high_watermark,
130
+ params.deltacat_storage,
131
+ params.deltacat_storage_kwargs,
132
+ params.list_deltas_kwargs,
133
+ )
134
+ if not input_deltas:
135
+ logger.info("No input deltas found to compact.")
136
+ return ExecutionCompactionResult(None, None, None, False)
137
+ build_uniform_deltas_result: tuple[
138
+ List[DeltaAnnotated], DeleteStrategy, List[DeleteFileEnvelope], Partition
139
+ ] = _build_uniform_deltas(
140
+ params, compaction_audit, input_deltas, delta_discovery_start
141
+ )
142
+ (
143
+ uniform_deltas,
144
+ delete_strategy,
145
+ delete_file_envelopes,
146
+ ) = build_uniform_deltas_result
147
+
148
+ # run merge
149
+ _run_hash_and_merge_result: tuple[
150
+ Optional[List[MergeResult]],
151
+ np.float64,
152
+ np.float64,
153
+ Partition,
154
+ ] = _run_hash_and_merge(
155
+ params,
156
+ uniform_deltas,
157
+ round_completion_info,
158
+ delete_strategy,
159
+ delete_file_envelopes,
160
+ compaction_audit,
161
+ previous_compacted_delta_manifest,
162
+ )
163
+ (
164
+ merge_results,
165
+ telemetry_time_hb,
166
+ telemetry_time_merge,
167
+ compacted_partition,
168
+ ) = _run_hash_and_merge_result
169
+ # process merge results
170
+ process_merge_results: tuple[
171
+ Delta, list[MaterializeResult], dict
172
+ ] = _process_merge_results(params, merge_results, compaction_audit)
173
+ merged_delta, mat_results, hb_id_to_entry_indices_range = process_merge_results
174
+ # Record information, logging, and return ExecutionCompactionResult
175
+ record_info_msg: str = f" Materialized records: {merged_delta.meta.record_count}"
176
+ logger.info(record_info_msg)
177
+ compacted_delta: Delta = params.deltacat_storage.commit_delta(
178
+ merged_delta,
179
+ properties=kwargs.get("properties", {}),
180
+ **params.deltacat_storage_kwargs,
181
+ )
182
+
183
+ logger.info(f"Committed compacted delta: {compacted_delta}")
184
+ compaction_end_time: float = time.monotonic()
185
+ compaction_audit.set_compaction_time_in_seconds(
186
+ compaction_end_time - compaction_start_time
187
+ )
188
+ new_compacted_delta_locator: DeltaLocator = DeltaLocator.of(
189
+ compacted_partition.locator,
190
+ compacted_delta.stream_position,
191
+ )
192
+ pyarrow_write_result: PyArrowWriteResult = PyArrowWriteResult.union(
193
+ [m.pyarrow_write_result for m in mat_results]
194
+ )
195
+
196
+ session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
197
+ compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
198
+ session_peak_memory
199
+ )
200
+
201
+ compaction_audit.save_round_completion_stats(
202
+ mat_results, telemetry_time_hb + telemetry_time_merge
203
+ )
204
+
205
+ _upload_compaction_audit(
206
+ params,
207
+ compaction_audit,
208
+ round_completion_info,
209
+ )
210
+ compaction_result: ExecutionCompactionResult = _write_new_round_completion_file(
211
+ params,
212
+ compaction_audit,
213
+ compacted_partition,
214
+ audit_url,
215
+ hb_id_to_entry_indices_range,
216
+ rcf_source_partition_locator,
217
+ new_compacted_delta_locator,
218
+ pyarrow_write_result,
219
+ )
220
+ return compaction_result