deltacat 1.1.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. {deltacat-1.1.0 → deltacat-1.1.1}/PKG-INFO +1 -1
  2. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/__init__.py +1 -1
  3. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/compact_partition_params.py +24 -0
  4. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/compaction_session_audit_info.py +11 -0
  5. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/compaction_session.py +34 -6
  6. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +6 -0
  7. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/model/merge_input.py +6 -0
  8. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/steps/hash_bucket.py +2 -1
  9. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/steps/merge.py +26 -4
  10. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/task_options.py +94 -8
  11. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/memcached_object_store.py +20 -0
  12. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/logs.py +29 -2
  13. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/test_compact_partition_params.py +5 -0
  14. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/io/test_memcached_object_store.py +19 -0
  15. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/resources.py +5 -3
  16. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat.egg-info/PKG-INFO +1 -1
  17. {deltacat-1.1.0 → deltacat-1.1.1}/MANIFEST.in +0 -0
  18. {deltacat-1.1.0 → deltacat-1.1.1}/README.md +0 -0
  19. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/aws/__init__.py +0 -0
  20. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/aws/clients.py +0 -0
  21. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/aws/constants.py +0 -0
  22. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/aws/redshift/__init__.py +0 -0
  23. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/aws/redshift/model/__init__.py +0 -0
  24. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/aws/redshift/model/manifest.py +0 -0
  25. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/aws/s3u.py +0 -0
  26. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/benchmarking/__init__.py +0 -0
  27. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  28. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/benchmarking/conftest.py +0 -0
  29. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/catalog/__init__.py +0 -0
  30. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
  31. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/catalog/delegate.py +0 -0
  32. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/catalog/interface.py +0 -0
  33. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/catalog/model/__init__.py +0 -0
  34. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/catalog/model/catalog.py +0 -0
  35. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/catalog/model/table_definition.py +0 -0
  36. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/__init__.py +0 -0
  37. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/__init__.py +0 -0
  38. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/compaction_session.py +0 -0
  39. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/__init__.py +0 -0
  40. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  41. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  42. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  43. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  44. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  45. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  46. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  47. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  48. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  49. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  50. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  51. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  52. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/repartition_session.py +0 -0
  53. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/steps/__init__.py +0 -0
  54. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  55. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  56. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/steps/materialize.py +0 -0
  57. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/steps/repartition.py +0 -0
  58. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/utils/__init__.py +0 -0
  59. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/utils/io.py +0 -0
  60. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  61. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
  62. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  63. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  64. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/__init__.py +0 -0
  65. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/constants.py +0 -0
  66. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  67. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  68. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  69. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  70. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  71. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  72. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  73. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  74. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  75. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  76. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  77. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  78. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
  79. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  80. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  81. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  82. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  83. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
  84. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/merge_on_read/__init__.py +0 -0
  85. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/merge_on_read/daft.py +0 -0
  86. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
  87. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  88. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  89. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  90. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/__init__.py +0 -0
  91. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/config/__init__.py +0 -0
  92. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/meta_stats.py +0 -0
  93. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/model/__init__.py +0 -0
  94. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/model/partition_stats_dict.py +0 -0
  95. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/model/stats_cluster_size_estimator.py +0 -0
  96. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/stats.py +0 -0
  97. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/utils/__init__.py +0 -0
  98. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/utils/constants.py +0 -0
  99. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/utils/io.py +0 -0
  100. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +0 -0
  101. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/metastats/utils/ray_utils.py +0 -0
  102. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/__init__.py +0 -0
  103. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/basic.py +0 -0
  104. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/models/__init__.py +0 -0
  105. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  106. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/models/delta_stats.py +0 -0
  107. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  108. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  109. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/models/stats_result.py +0 -0
  110. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/types.py +0 -0
  111. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/utils/__init__.py +0 -0
  112. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/utils/intervals.py +0 -0
  113. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/utils/io.py +0 -0
  114. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/compute/stats/utils/manifest_stats_file.py +0 -0
  115. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/constants.py +0 -0
  116. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/exceptions.py +0 -0
  117. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/__init__.py +0 -0
  118. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/aws/__init__.py +0 -0
  119. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/aws/redshift/__init__.py +0 -0
  120. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/aws/redshift/redshift_datasource.py +0 -0
  121. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/dataset.py +0 -0
  122. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/file_object_store.py +0 -0
  123. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/object_store.py +0 -0
  124. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/ray_plasma_object_store.py +0 -0
  125. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/read_api.py +0 -0
  126. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/redis_object_store.py +0 -0
  127. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/io/s3_object_store.py +0 -0
  128. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/__init__.py +0 -0
  129. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/interface.py +0 -0
  130. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/__init__.py +0 -0
  131. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/delete_parameters.py +0 -0
  132. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/delta.py +0 -0
  133. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/list_result.py +0 -0
  134. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/locator.py +0 -0
  135. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/namespace.py +0 -0
  136. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/partition.py +0 -0
  137. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/sort_key.py +0 -0
  138. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/stream.py +0 -0
  139. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/table.py +0 -0
  140. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/table_version.py +0 -0
  141. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/storage/model/types.py +0 -0
  142. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/__init__.py +0 -0
  143. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/aws/__init__.py +0 -0
  144. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/aws/test_clients.py +0 -0
  145. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/catalog/__init__.py +0 -0
  146. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  147. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/__init__.py +0 -0
  148. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
  149. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  150. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor/__init__.py +0 -0
  151. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  152. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  153. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  154. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  155. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  156. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
  157. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  158. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
  159. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  160. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
  161. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
  162. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/test_util_common.py +0 -0
  163. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/test_util_constant.py +0 -0
  164. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
  165. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/io/__init__.py +0 -0
  166. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
  167. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/io/test_file_object_store.py +0 -0
  168. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
  169. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/io/test_redis_object_store.py +0 -0
  170. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/io/test_s3_object_store.py +0 -0
  171. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
  172. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/stats/__init__.py +0 -0
  173. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/stats/test_intervals.py +0 -0
  174. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/test_utils/__init__.py +0 -0
  175. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/test_utils/constants.py +0 -0
  176. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/test_utils/pyarrow.py +0 -0
  177. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/test_utils/storage.py +0 -0
  178. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/test_utils/utils.py +0 -0
  179. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/utils/__init__.py +0 -0
  180. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/utils/data/__init__.py +0 -0
  181. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  182. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/utils/test_daft.py +0 -0
  183. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/utils/test_pyarrow.py +0 -0
  184. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  185. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/tests/utils/test_resources.py +0 -0
  186. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/types/__init__.py +0 -0
  187. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/types/media.py +0 -0
  188. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/types/partial_download.py +0 -0
  189. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/types/tables.py +0 -0
  190. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/__init__.py +0 -0
  191. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/arguments.py +0 -0
  192. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/cloudpickle.py +0 -0
  193. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/common.py +0 -0
  194. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/daft.py +0 -0
  195. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/metrics.py +0 -0
  196. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/numpy.py +0 -0
  197. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/pandas.py +0 -0
  198. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/performance.py +0 -0
  199. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/placement.py +0 -0
  200. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/pyarrow.py +0 -0
  201. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/ray_utils/__init__.py +0 -0
  202. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/ray_utils/collections.py +0 -0
  203. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/ray_utils/concurrency.py +0 -0
  204. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/ray_utils/dataset.py +0 -0
  205. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/ray_utils/performance.py +0 -0
  206. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/ray_utils/runtime.py +0 -0
  207. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/s3fs.py +0 -0
  208. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat/utils/schema.py +0 -0
  209. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat.egg-info/SOURCES.txt +0 -0
  210. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat.egg-info/dependency_links.txt +0 -0
  211. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat.egg-info/requires.txt +0 -0
  212. {deltacat-1.1.0 → deltacat-1.1.1}/deltacat.egg-info/top_level.txt +0 -0
  213. {deltacat-1.1.0 → deltacat-1.1.1}/setup.cfg +0 -0
  214. {deltacat-1.1.0 → deltacat-1.1.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.0
3
+ Version: 1.1.1
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.0"
47
+ __version__ = "1.1.1"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -20,6 +20,7 @@ from deltacat.compute.compactor_v2.constants import (
20
20
  AVERAGE_RECORD_SIZE_BYTES,
21
21
  TASK_MAX_PARALLELISM,
22
22
  DROP_DUPLICATES,
23
+ TOTAL_MEMORY_BUFFER_PERCENTAGE,
23
24
  )
24
25
  from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
25
26
  from deltacat.compute.compactor.utils.sort_key import validate_sort_keys
@@ -85,12 +86,17 @@ class CompactPartitionParams(dict):
85
86
  result.average_record_size_bytes = params.get(
86
87
  "average_record_size_bytes", AVERAGE_RECORD_SIZE_BYTES
87
88
  )
89
+ result.total_memory_buffer_percentage = params.get(
90
+ "total_memory_buffer_percentage", TOTAL_MEMORY_BUFFER_PERCENTAGE
91
+ )
88
92
  result.hash_group_count = params.get(
89
93
  "hash_group_count", result.hash_bucket_count
90
94
  )
91
95
  result.drop_duplicates = params.get("drop_duplicates", DROP_DUPLICATES)
92
96
  result.ray_custom_resources = params.get("ray_custom_resources")
93
97
 
98
+ result.memory_logs_enabled = params.get("memory_logs_enabled", False)
99
+
94
100
  result.metrics_config = params.get("metrics_config")
95
101
 
96
102
  if not importlib.util.find_spec("memray"):
@@ -190,6 +196,16 @@ class CompactPartitionParams(dict):
190
196
  def average_record_size_bytes(self, average_record_size_bytes: float) -> None:
191
197
  self["average_record_size_bytes"] = average_record_size_bytes
192
198
 
199
+ @property
200
+ def total_memory_buffer_percentage(self) -> int:
201
+ return self["total_memory_buffer_percentage"]
202
+
203
+ @total_memory_buffer_percentage.setter
204
+ def total_memory_buffer_percentage(
205
+ self, total_memory_buffer_percentage: int
206
+ ) -> None:
207
+ self["total_memory_buffer_percentage"] = total_memory_buffer_percentage
208
+
193
209
  @property
194
210
  def min_files_in_batch(self) -> float:
195
211
  return self["min_files_in_batch"]
@@ -355,6 +371,14 @@ class CompactPartitionParams(dict):
355
371
  def sort_keys(self, keys: List[SortKey]) -> None:
356
372
  self["sort_keys"] = keys
357
373
 
374
+ @property
375
+ def memory_logs_enabled(self) -> bool:
376
+ return self.get("memory_logs_enabled")
377
+
378
+ @memory_logs_enabled.setter
379
+ def memory_logs_enabled(self, value: bool) -> None:
380
+ self["memory_logs_enabled"] = value
381
+
358
382
  @property
359
383
  def metrics_config(self) -> Optional[MetricsConfig]:
360
384
  return self.get("metrics_config")
@@ -84,6 +84,13 @@ class CompactionSessionAuditInfo(dict):
84
84
  """
85
85
  return self.get("recordsDeduped")
86
86
 
87
+ @property
88
+ def records_deleted(self) -> int:
89
+ """
90
+ The total count of deleted records in a compaction session if delete deltas are present.
91
+ """
92
+ return self.get("recordsDeleted")
93
+
87
94
  @property
88
95
  def input_size_bytes(self) -> float:
89
96
  """
@@ -461,6 +468,10 @@ class CompactionSessionAuditInfo(dict):
461
468
  self["recordsDeduped"] = records_deduped
462
469
  return self
463
470
 
471
+ def set_records_deleted(self, records_deleted: int) -> CompactionSessionAuditInfo:
472
+ self["recordsDeleted"] = records_deleted
473
+ return self
474
+
464
475
  def set_input_size_bytes(
465
476
  self, input_size_bytes: float
466
477
  ) -> CompactionSessionAuditInfo:
@@ -62,6 +62,7 @@ from deltacat.utils.resources import (
62
62
  from deltacat.compute.compactor_v2.utils.task_options import (
63
63
  hash_bucket_resource_options_provider,
64
64
  merge_resource_options_provider,
65
+ local_merge_resource_options_provider,
65
66
  )
66
67
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
67
68
 
@@ -258,8 +259,10 @@ def _execute_compaction(
258
259
  resource_amount_provider=hash_bucket_resource_options_provider,
259
260
  previous_inflation=params.previous_inflation,
260
261
  average_record_size_bytes=params.average_record_size_bytes,
262
+ total_memory_buffer_percentage=params.total_memory_buffer_percentage,
261
263
  primary_keys=params.primary_keys,
262
264
  ray_custom_resources=params.ray_custom_resources,
265
+ memory_logs_enabled=params.memory_logs_enabled,
263
266
  )
264
267
 
265
268
  total_input_records_count = np.int64(0)
@@ -275,7 +278,29 @@ def _execute_compaction(
275
278
  delete_strategy,
276
279
  delete_file_envelopes,
277
280
  )
278
- local_merge_result = ray.get(mg.merge.remote(local_merge_input))
281
+ estimated_da_bytes = (
282
+ compaction_audit.estimated_in_memory_size_bytes_during_discovery
283
+ )
284
+ estimated_num_records = sum(
285
+ [
286
+ entry.meta.record_count
287
+ for delta in uniform_deltas
288
+ for entry in delta.manifest.entries
289
+ ]
290
+ )
291
+ local_merge_options = local_merge_resource_options_provider(
292
+ estimated_da_size=estimated_da_bytes,
293
+ estimated_num_rows=estimated_num_records,
294
+ total_memory_buffer_percentage=params.total_memory_buffer_percentage,
295
+ round_completion_info=round_completion_info,
296
+ compacted_delta_manifest=previous_compacted_delta_manifest,
297
+ ray_custom_resources=params.ray_custom_resources,
298
+ primary_keys=params.primary_keys,
299
+ memory_logs_enabled=params.memory_logs_enabled,
300
+ )
301
+ local_merge_result = ray.get(
302
+ mg.merge.options(**local_merge_options).remote(local_merge_input)
303
+ )
279
304
  total_input_records_count += local_merge_result.input_record_count
280
305
  merge_results = [local_merge_result]
281
306
  merge_invoke_end = time.monotonic()
@@ -296,6 +321,7 @@ def _execute_compaction(
296
321
  object_store=params.object_store,
297
322
  deltacat_storage=params.deltacat_storage,
298
323
  deltacat_storage_kwargs=params.deltacat_storage_kwargs,
324
+ memory_logs_enabled=params.memory_logs_enabled,
299
325
  )
300
326
  }
301
327
 
@@ -382,12 +408,14 @@ def _execute_compaction(
382
408
  num_hash_groups=params.hash_group_count,
383
409
  hash_group_size_bytes=all_hash_group_idx_to_size_bytes,
384
410
  hash_group_num_rows=all_hash_group_idx_to_num_rows,
411
+ total_memory_buffer_percentage=params.total_memory_buffer_percentage,
385
412
  round_completion_info=round_completion_info,
386
413
  compacted_delta_manifest=previous_compacted_delta_manifest,
387
414
  primary_keys=params.primary_keys,
388
415
  deltacat_storage=params.deltacat_storage,
389
416
  deltacat_storage_kwargs=params.deltacat_storage_kwargs,
390
417
  ray_custom_resources=params.ray_custom_resources,
418
+ memory_logs_enabled=params.memory_logs_enabled,
391
419
  )
392
420
 
393
421
  def merge_input_provider(index, item):
@@ -417,6 +445,7 @@ def _execute_compaction(
417
445
  deltacat_storage_kwargs=params.deltacat_storage_kwargs,
418
446
  delete_strategy=delete_strategy,
419
447
  delete_file_envelopes=delete_file_envelopes,
448
+ memory_logs_enabled=params.memory_logs_enabled,
420
449
  )
421
450
  }
422
451
 
@@ -438,11 +467,11 @@ def _execute_compaction(
438
467
  merge_end = time.monotonic()
439
468
 
440
469
  total_dd_record_count = sum([ddr.deduped_record_count for ddr in merge_results])
441
- total_dropped_record_count = sum(
470
+ total_deleted_record_count = sum(
442
471
  [ddr.deleted_record_count for ddr in merge_results]
443
472
  )
444
473
  logger.info(
445
- f"Deduped {total_dd_record_count} records and dropped {total_dropped_record_count} records..."
474
+ f"Deduped {total_dd_record_count} records and deleted {total_deleted_record_count} records..."
446
475
  )
447
476
 
448
477
  compaction_audit.set_input_records(total_input_records_count.item())
@@ -456,7 +485,7 @@ def _execute_compaction(
456
485
  )
457
486
 
458
487
  compaction_audit.set_records_deduped(total_dd_record_count.item())
459
-
488
+ compaction_audit.set_records_deleted(total_deleted_record_count.item())
460
489
  mat_results = []
461
490
  for merge_result in merge_results:
462
491
  mat_results.extend(merge_result.materialize_results)
@@ -503,7 +532,7 @@ def _execute_compaction(
503
532
  record_info_msg = (
504
533
  f"Hash bucket records: {total_hb_record_count},"
505
534
  f" Deduped records: {total_dd_record_count}, "
506
- f" Dropped records: {total_dropped_record_count}, "
535
+ f" Deleted records: {total_deleted_record_count}, "
507
536
  f" Materialized records: {merged_delta.meta.record_count}"
508
537
  )
509
538
  logger.info(record_info_msg)
@@ -603,7 +632,6 @@ def _execute_compaction(
603
632
  f"partition-{params.source_partition_locator.partition_values},"
604
633
  f"compacted at: {params.last_stream_position_to_compact},"
605
634
  )
606
-
607
635
  return (
608
636
  compacted_partition,
609
637
  new_round_completion_info,
@@ -22,6 +22,7 @@ class HashBucketInput(Dict):
22
22
  object_store: Optional[IObjectStore] = None,
23
23
  deltacat_storage=unimplemented_deltacat_storage,
24
24
  deltacat_storage_kwargs: Optional[Dict[str, Any]] = None,
25
+ memory_logs_enabled: Optional[bool] = None,
25
26
  ) -> HashBucketInput:
26
27
 
27
28
  result = HashBucketInput()
@@ -36,6 +37,7 @@ class HashBucketInput(Dict):
36
37
  result["object_store"] = object_store
37
38
  result["deltacat_storage"] = deltacat_storage
38
39
  result["deltacat_storage_kwargs"] = deltacat_storage_kwargs or {}
40
+ result["memory_logs_enabled"] = memory_logs_enabled
39
41
 
40
42
  return result
41
43
 
@@ -82,3 +84,7 @@ class HashBucketInput(Dict):
82
84
  @property
83
85
  def deltacat_storage_kwargs(self) -> Optional[Dict[str, Any]]:
84
86
  return self.get("deltacat_storage_kwargs")
87
+
88
+ @property
89
+ def memory_logs_enabled(self) -> Optional[bool]:
90
+ return self.get("memory_logs_enabled")
@@ -46,6 +46,7 @@ class MergeInput(Dict):
46
46
  delete_file_envelopes: Optional[List] = None,
47
47
  deltacat_storage=unimplemented_deltacat_storage,
48
48
  deltacat_storage_kwargs: Optional[Dict[str, Any]] = None,
49
+ memory_logs_enabled: Optional[bool] = None,
49
50
  ) -> MergeInput:
50
51
 
51
52
  result = MergeInput()
@@ -67,6 +68,7 @@ class MergeInput(Dict):
67
68
  result["delete_strategy"] = delete_strategy
68
69
  result["deltacat_storage"] = deltacat_storage
69
70
  result["deltacat_storage_kwargs"] = deltacat_storage_kwargs or {}
71
+ result["memory_logs_enabled"] = memory_logs_enabled
70
72
  return result
71
73
 
72
74
  @property
@@ -133,6 +135,10 @@ class MergeInput(Dict):
133
135
  def deltacat_storage_kwargs(self) -> Optional[Dict[str, Any]]:
134
136
  return self.get("deltacat_storage_kwargs")
135
137
 
138
+ @property
139
+ def memory_logs_enabled(self) -> Optional[bool]:
140
+ return self.get("memory_logs_enabled")
141
+
136
142
  @property
137
143
  def delete_file_envelopes(
138
144
  self,
@@ -142,7 +142,8 @@ def hash_bucket(input: HashBucketInput) -> HashBucketResult:
142
142
  f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
143
143
  )
144
144
 
145
- process_util.schedule_callback(log_peak_memory, 10)
145
+ if input.memory_logs_enabled:
146
+ process_util.schedule_callback(log_peak_memory, 10)
146
147
 
147
148
  hash_bucket_result, duration = timed_invocation(
148
149
  func=_timed_hash_bucket, input=input
@@ -12,6 +12,7 @@ from uuid import uuid4
12
12
  from deltacat import logs
13
13
  from typing import Callable, Iterator, List, Optional, Tuple
14
14
  from deltacat.compute.compactor_v2.model.merge_result import MergeResult
15
+ from deltacat.compute.compactor_v2.model.merge_file_group import MergeFileGroup
15
16
  from deltacat.compute.compactor.model.materialize_result import MaterializeResult
16
17
  from deltacat.compute.compactor.model.pyarrow_write_result import PyArrowWriteResult
17
18
  from deltacat.compute.compactor import RoundCompletionInfo, DeltaFileEnvelope
@@ -269,6 +270,24 @@ def _has_previous_compacted_table(input: MergeInput, hb_idx: int) -> bool:
269
270
  )
270
271
 
271
272
 
273
+ def _can_copy_by_reference(
274
+ has_delete: bool, merge_file_group: MergeFileGroup, input: MergeInput
275
+ ) -> bool:
276
+ """
277
+ Can copy by reference only if there are no deletes to merge in
278
+ and previous compacted stream id matches that of new stream
279
+ """
280
+ return (
281
+ not has_delete
282
+ and not merge_file_group.dfe_groups
283
+ and input.round_completion_info is not None
284
+ and (
285
+ input.write_to_partition.stream_id
286
+ == input.round_completion_info.compacted_delta_locator.stream_id
287
+ )
288
+ )
289
+
290
+
272
291
  def _flatten_dfe_list(
273
292
  df_envelopes_list: List[List[DeltaFileEnvelope]],
274
293
  ) -> List[DeltaFileEnvelope]:
@@ -349,7 +368,7 @@ def _compact_tables(
349
368
  1. The compacted PyArrow table.
350
369
  2. The total number of records in the incremental data.
351
370
  3. The total number of deduplicated records.
352
- 4. The total number of dropped records due to DELETE operations.
371
+ 4. The total number of deleted records due to DELETE operations.
353
372
  """
354
373
  df_envelopes: List[DeltaFileEnvelope] = _flatten_dfe_list(dfe_list)
355
374
  delete_file_envelopes = input.delete_file_envelopes or []
@@ -479,10 +498,12 @@ def _timed_merge(input: MergeInput) -> MergeResult:
479
498
  assert (
480
499
  input.delete_strategy is not None
481
500
  ), "Merge input missing delete_strategy"
482
- if not has_delete and not merge_file_group.dfe_groups:
483
- # Can copy by reference only if there are no deletes to merge in
501
+ if _can_copy_by_reference(
502
+ has_delete=has_delete, merge_file_group=merge_file_group, input=input
503
+ ):
484
504
  hb_index_copy_by_ref_ids.append(merge_file_group.hb_index)
485
505
  continue
506
+
486
507
  if _has_previous_compacted_table(input, merge_file_group.hb_index):
487
508
  compacted_table = _download_compacted_table(
488
509
  hb_index=merge_file_group.hb_index,
@@ -548,7 +569,8 @@ def merge(input: MergeInput) -> MergeResult:
548
569
  f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
549
570
  )
550
571
 
551
- process_util.schedule_callback(log_peak_memory, 10)
572
+ if input.memory_logs_enabled:
573
+ process_util.schedule_callback(log_peak_memory, 10)
552
574
 
553
575
  merge_result, duration = timed_invocation(func=_timed_merge, input=input)
554
576
 
@@ -1,7 +1,10 @@
1
1
  import botocore
2
2
  import logging
3
- from typing import Dict, Optional, List, Tuple
3
+ from typing import Dict, Optional, List, Tuple, Any
4
4
  from deltacat import logs
5
+ from deltacat.compute.compactor_v2.model.merge_file_group import (
6
+ LocalMergeFileGroupsProvider,
7
+ )
5
8
  from deltacat.types.media import ContentEncoding, ContentType
6
9
  from deltacat.types.partial_download import PartialParquetParameters
7
10
  from deltacat.storage import (
@@ -15,7 +18,6 @@ from deltacat.compute.compactor_v2.utils.primary_key_index import (
15
18
  hash_group_index_to_hash_bucket_indices,
16
19
  )
17
20
  from deltacat.compute.compactor_v2.constants import (
18
- TOTAL_MEMORY_BUFFER_PERCENTAGE,
19
21
  PARQUET_TO_PYARROW_INFLATION,
20
22
  )
21
23
 
@@ -133,8 +135,10 @@ def hash_bucket_resource_options_provider(
133
135
  item: DeltaAnnotated,
134
136
  previous_inflation: float,
135
137
  average_record_size_bytes: float,
138
+ total_memory_buffer_percentage: int,
136
139
  primary_keys: List[str] = None,
137
140
  ray_custom_resources: Optional[Dict] = None,
141
+ memory_logs_enabled: Optional[bool] = None,
138
142
  **kwargs,
139
143
  ) -> Dict:
140
144
  debug_memory_params = {"hash_bucket_task_index": index}
@@ -189,10 +193,11 @@ def hash_bucket_resource_options_provider(
189
193
  debug_memory_params["average_record_size_bytes"] = average_record_size_bytes
190
194
 
191
195
  # Consider buffer
192
- total_memory = total_memory * (1 + TOTAL_MEMORY_BUFFER_PERCENTAGE / 100.0)
196
+ total_memory = total_memory * (1 + total_memory_buffer_percentage / 100.0)
193
197
  debug_memory_params["total_memory_with_buffer"] = total_memory
194
- logger.debug(
195
- f"[Hash bucket task {index}]: Params used for calculating hash bucketing memory: {debug_memory_params}"
198
+ logger.debug_conditional(
199
+ f"[Hash bucket task {index}]: Params used for calculating hash bucketing memory: {debug_memory_params}",
200
+ memory_logs_enabled,
196
201
  )
197
202
 
198
203
  return get_task_options(0.01, total_memory, ray_custom_resources)
@@ -204,12 +209,14 @@ def merge_resource_options_provider(
204
209
  num_hash_groups: int,
205
210
  hash_group_size_bytes: Dict[int, int],
206
211
  hash_group_num_rows: Dict[int, int],
212
+ total_memory_buffer_percentage: int,
207
213
  round_completion_info: Optional[RoundCompletionInfo] = None,
208
214
  compacted_delta_manifest: Optional[Manifest] = None,
209
215
  ray_custom_resources: Optional[Dict] = None,
210
216
  primary_keys: Optional[List[str]] = None,
211
217
  deltacat_storage=unimplemented_deltacat_storage,
212
218
  deltacat_storage_kwargs: Optional[Dict] = {},
219
+ memory_logs_enabled: Optional[bool] = None,
213
220
  **kwargs,
214
221
  ) -> Dict:
215
222
  debug_memory_params = {"merge_task_index": index}
@@ -224,6 +231,84 @@ def merge_resource_options_provider(
224
231
  pk_size_bytes = data_size
225
232
  incremental_index_array_size = num_rows * 4
226
233
 
234
+ return get_merge_task_options(
235
+ index,
236
+ hb_group_idx,
237
+ data_size,
238
+ pk_size_bytes,
239
+ num_rows,
240
+ num_hash_groups,
241
+ total_memory_buffer_percentage,
242
+ incremental_index_array_size,
243
+ debug_memory_params,
244
+ ray_custom_resources,
245
+ round_completion_info=round_completion_info,
246
+ compacted_delta_manifest=compacted_delta_manifest,
247
+ primary_keys=primary_keys,
248
+ deltacat_storage=deltacat_storage,
249
+ deltacat_storage_kwargs=deltacat_storage_kwargs,
250
+ memory_logs_enabled=memory_logs_enabled,
251
+ )
252
+
253
+
254
+ def local_merge_resource_options_provider(
255
+ estimated_da_size: float,
256
+ estimated_num_rows: int,
257
+ total_memory_buffer_percentage: int,
258
+ round_completion_info: Optional[RoundCompletionInfo] = None,
259
+ compacted_delta_manifest: Optional[Manifest] = None,
260
+ ray_custom_resources: Optional[Dict] = None,
261
+ primary_keys: Optional[List[str]] = None,
262
+ deltacat_storage=unimplemented_deltacat_storage,
263
+ deltacat_storage_kwargs: Optional[Dict] = {},
264
+ memory_logs_enabled: Optional[bool] = None,
265
+ **kwargs,
266
+ ) -> Dict:
267
+ index = hb_group_idx = LocalMergeFileGroupsProvider.LOCAL_HASH_BUCKET_INDEX
268
+ debug_memory_params = {"merge_task_index": index}
269
+
270
+ # upper bound for pk size of incremental
271
+ pk_size_bytes = estimated_da_size
272
+ incremental_index_array_size = estimated_num_rows * 4
273
+
274
+ return get_merge_task_options(
275
+ index=index,
276
+ hb_group_idx=hb_group_idx,
277
+ data_size=estimated_da_size,
278
+ pk_size_bytes=pk_size_bytes,
279
+ num_rows=estimated_num_rows,
280
+ num_hash_groups=1,
281
+ incremental_index_array_size=incremental_index_array_size,
282
+ total_memory_buffer_percentage=total_memory_buffer_percentage,
283
+ debug_memory_params=debug_memory_params,
284
+ ray_custom_resources=ray_custom_resources,
285
+ round_completion_info=round_completion_info,
286
+ compacted_delta_manifest=compacted_delta_manifest,
287
+ primary_keys=primary_keys,
288
+ deltacat_storage=deltacat_storage,
289
+ deltacat_storage_kwargs=deltacat_storage_kwargs,
290
+ memory_logs_enabled=memory_logs_enabled,
291
+ )
292
+
293
+
294
+ def get_merge_task_options(
295
+ index: int,
296
+ hb_group_idx: int,
297
+ data_size: float,
298
+ pk_size_bytes: float,
299
+ num_rows: int,
300
+ num_hash_groups: int,
301
+ total_memory_buffer_percentage: int,
302
+ incremental_index_array_size: int,
303
+ debug_memory_params: Dict[str, Any],
304
+ ray_custom_resources: Optional[Dict],
305
+ round_completion_info: Optional[RoundCompletionInfo] = None,
306
+ compacted_delta_manifest: Optional[Manifest] = None,
307
+ primary_keys: Optional[List[str]] = None,
308
+ deltacat_storage=unimplemented_deltacat_storage,
309
+ deltacat_storage_kwargs: Optional[Dict] = {},
310
+ memory_logs_enabled: Optional[bool] = None,
311
+ ) -> Dict[str, Any]:
227
312
  if (
228
313
  round_completion_info
229
314
  and compacted_delta_manifest
@@ -296,10 +381,11 @@ def merge_resource_options_provider(
296
381
  debug_memory_params["incremental_index_array_size"] = incremental_index_array_size
297
382
  debug_memory_params["total_memory"] = total_memory
298
383
 
299
- total_memory = total_memory * (1 + TOTAL_MEMORY_BUFFER_PERCENTAGE / 100.0)
384
+ total_memory = total_memory * (1 + total_memory_buffer_percentage / 100.0)
300
385
  debug_memory_params["total_memory_with_buffer"] = total_memory
301
- logger.debug(
302
- f"[Merge task {index}]: Params used for calculating merge memory: {debug_memory_params}"
386
+ logger.debug_conditional(
387
+ f"[Merge task {index}]: Params used for calculating merge memory: {debug_memory_params}",
388
+ memory_logs_enabled,
303
389
  )
304
390
 
305
391
  return get_task_options(0.01, total_memory, ray_custom_resources)
@@ -181,15 +181,35 @@ class MemcachedObjectStore(IObjectStore):
181
181
  for chunk_index in range(chunk_count):
182
182
  ref = self._create_ref(uid, ip, chunk_index)
183
183
  chunk = client.get(ref)
184
+ if chunk is None:
185
+ raise ValueError(
186
+ f"Expected uid: {uid}, chunk index: {chunk_index} from client ip: {ip}"
187
+ f" to be non-empty."
188
+ )
184
189
  serialized.extend(chunk)
185
190
 
186
191
  return cloudpickle.loads(serialized)
187
192
 
193
+ def clear(self) -> bool:
194
+ flushed = all(
195
+ [
196
+ self._get_client_by_ip(ip).flush_all(noreply=False)
197
+ for ip in self.storage_node_ips
198
+ ]
199
+ )
200
+ self.client_cache.clear()
201
+
202
+ if flushed:
203
+ logger.info("Successfully cleared cache contents.")
204
+
205
+ return flushed
206
+
188
207
  def close(self) -> None:
189
208
  for client in self.client_cache.values():
190
209
  client.close()
191
210
 
192
211
  self.client_cache.clear()
212
+ logger.info("Successfully closed object store clients.")
193
213
 
194
214
  def _create_ref(self, uid, ip, chunk_index) -> str:
195
215
  return f"{uid}{self.SEPARATOR}{ip}{self.SEPARATOR}{chunk_index}"
@@ -2,7 +2,7 @@ import logging
2
2
  import os
3
3
  import pathlib
4
4
  from logging import FileHandler, Handler, Logger, LoggerAdapter, handlers
5
- from typing import Union
5
+ from typing import Any, Dict, Optional, Union
6
6
 
7
7
  import ray
8
8
  from ray.runtime_context import RuntimeContext
@@ -26,7 +26,32 @@ DEFAULT_MAX_BYTES_PER_LOG = 2 ^ 20 * 256 # 256 MiB
26
26
  DEFAULT_BACKUP_COUNT = 0
27
27
 
28
28
 
29
- class RayRuntimeContextLoggerAdapter(logging.LoggerAdapter):
29
+ class DeltaCATLoggerAdapter(logging.LoggerAdapter):
30
+ """
31
+ Logger Adapter class with additional functionality
32
+ """
33
+
34
+ def __init__(self, logger: Logger, extra: Optional[Dict[str, Any]] = {}):
35
+ super().__init__(logger, extra)
36
+
37
+ def debug_conditional(self, msg, do_print: bool, *args, **kwargs):
38
+ if do_print:
39
+ self.debug(msg, *args, **kwargs)
40
+
41
+ def info_conditional(self, msg, do_print: bool, *args, **kwargs):
42
+ if do_print:
43
+ self.info(msg, *args, **kwargs)
44
+
45
+ def warning_conditional(self, msg, do_print: bool, *args, **kwargs):
46
+ if do_print:
47
+ self.warning(msg, *args, **kwargs)
48
+
49
+ def error_conditional(self, msg, do_print: bool, *args, **kwargs):
50
+ if do_print:
51
+ self.error(msg, *args, **kwargs)
52
+
53
+
54
+ class RayRuntimeContextLoggerAdapter(DeltaCATLoggerAdapter):
30
55
  """
31
56
  Logger Adapter for injecting Ray Runtime Context into logging messages.
32
57
  """
@@ -147,6 +172,8 @@ def _configure_logger(
147
172
  ray_runtime_ctx = ray.get_runtime_context()
148
173
  if ray_runtime_ctx.worker.connected:
149
174
  logger = RayRuntimeContextLoggerAdapter(logger, ray_runtime_ctx)
175
+ else:
176
+ logger = DeltaCATLoggerAdapter(logger)
150
177
 
151
178
  return logger
152
179
 
@@ -72,6 +72,7 @@ class TestCompactPartitionParams(unittest.TestCase):
72
72
  "partitionValues": [],
73
73
  "partitionId": "79612ea39ac5493eae925abe60767d42",
74
74
  },
75
+ "memory_logs_enabled": True,
75
76
  "metrics_config": MetricsConfig("us-east-1", MetricsTarget.CLOUDWATCH_EMF),
76
77
  }
77
78
 
@@ -135,6 +136,10 @@ class TestCompactPartitionParams(unittest.TestCase):
135
136
  json.loads(serialized_params)["destination_partition_locator"]
136
137
  == params.destination_partition_locator
137
138
  )
139
+ assert (
140
+ json.loads(serialized_params)["memory_logs_enabled"]
141
+ == params.memory_logs_enabled
142
+ )
138
143
  assert (
139
144
  json.loads(serialized_params)["metrics_config"]["metrics_target"]
140
145
  == params.metrics_config.metrics_target
@@ -25,6 +25,10 @@ class MockPyMemcacheClient:
25
25
  def get(self, key, *args, **kwargs):
26
26
  return self.store.get(key)
27
27
 
28
+ def flush_all(self, *args, **kwargs):
29
+ for key, value in self.store.items():
30
+ self.store[key] = None
31
+
28
32
 
29
33
  class TestMemcachedObjectStore(unittest.TestCase):
30
34
 
@@ -192,3 +196,18 @@ class TestMemcachedObjectStore(unittest.TestCase):
192
196
  # assert
193
197
  result = self.object_store.get(ref)
194
198
  self.assertEqual(result, self.TEST_VALUE_LARGE)
199
+
200
+ @mock.patch("deltacat.io.memcached_object_store.Client")
201
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
202
+ def test_clear_sanity(self, mock_retrying_client, mock_client):
203
+ # setup
204
+ mock_client.return_value = MockPyMemcacheClient()
205
+ mock_retrying_client.return_value = mock_client.return_value
206
+
207
+ # action
208
+ ref = self.object_store.put(self.TEST_VALUE_LARGE)
209
+ self.object_store.clear()
210
+
211
+ # assert
212
+ with self.assertRaises(ValueError):
213
+ self.object_store.get(ref)
@@ -36,13 +36,15 @@ class ClusterUtilization:
36
36
  used_resources[key] = cluster_resources[key] - available_resources[key]
37
37
 
38
38
  self.total_memory_bytes = cluster_resources.get("memory")
39
- self.used_memory_bytes = used_resources.get("memory")
39
+ self.used_memory_bytes = used_resources.get("memory", 0.0)
40
40
  self.total_cpu = cluster_resources.get("CPU")
41
- self.used_cpu = used_resources.get("CPU")
41
+ self.used_cpu = used_resources.get("CPU", 0)
42
42
  self.total_object_store_memory_bytes = cluster_resources.get(
43
43
  "object_store_memory"
44
44
  )
45
- self.used_object_store_memory_bytes = used_resources.get("object_store_memory")
45
+ self.used_object_store_memory_bytes = used_resources.get(
46
+ "object_store_memory", 0.0
47
+ )
46
48
  self.used_memory_percent = (
47
49
  self.used_memory_bytes / self.total_memory_bytes
48
50
  ) * 100
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.0
3
+ Version: 1.1.1
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
File without changes