deltacat 1.1.33__tar.gz → 1.1.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. {deltacat-1.1.33/deltacat.egg-info → deltacat-1.1.34}/PKG-INFO +1 -1
  2. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/__init__.py +1 -1
  3. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/content_type_params.py +17 -0
  4. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/io.py +1 -1
  5. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/resource_estimation/delta.py +19 -1
  6. deltacat-1.1.34/deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +253 -0
  7. {deltacat-1.1.33 → deltacat-1.1.34/deltacat.egg-info}/PKG-INFO +1 -1
  8. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat.egg-info/SOURCES.txt +1 -0
  9. {deltacat-1.1.33 → deltacat-1.1.34}/LICENSE +0 -0
  10. {deltacat-1.1.33 → deltacat-1.1.34}/MANIFEST.in +0 -0
  11. {deltacat-1.1.33 → deltacat-1.1.34}/README.md +0 -0
  12. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/aws/__init__.py +0 -0
  13. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/aws/clients.py +0 -0
  14. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/aws/constants.py +0 -0
  15. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/aws/redshift/__init__.py +0 -0
  16. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/aws/redshift/model/__init__.py +0 -0
  17. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/aws/redshift/model/manifest.py +0 -0
  18. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/aws/s3u.py +0 -0
  19. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/benchmarking/__init__.py +0 -0
  20. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  21. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/benchmarking/conftest.py +0 -0
  22. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/catalog/__init__.py +0 -0
  23. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
  24. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/catalog/delegate.py +0 -0
  25. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/catalog/interface.py +0 -0
  26. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/catalog/model/__init__.py +0 -0
  27. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/catalog/model/catalog.py +0 -0
  28. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/catalog/model/table_definition.py +0 -0
  29. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/__init__.py +0 -0
  30. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/__init__.py +0 -0
  31. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/compaction_session.py +0 -0
  32. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/__init__.py +0 -0
  33. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
  34. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  35. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  36. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  37. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  38. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  39. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  40. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  41. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  42. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  43. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  44. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  45. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  46. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  47. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/repartition_session.py +0 -0
  48. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/steps/__init__.py +0 -0
  49. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  50. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  51. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/steps/materialize.py +0 -0
  52. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/steps/repartition.py +0 -0
  53. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/utils/__init__.py +0 -0
  54. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/utils/io.py +0 -0
  55. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  56. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
  57. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  58. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  59. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/__init__.py +0 -0
  60. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
  61. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/constants.py +0 -0
  62. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  63. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  64. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  65. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  66. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  67. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  68. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  69. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
  70. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  71. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  72. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  73. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  74. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  75. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
  76. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/private/compaction_utils.py +0 -0
  77. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  78. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  79. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
  80. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  81. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  82. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  83. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  84. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
  85. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
  86. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/merge_on_read/__init__.py +0 -0
  87. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/merge_on_read/daft.py +0 -0
  88. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
  89. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  90. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  91. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  92. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/resource_estimation/__init__.py +0 -0
  93. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/resource_estimation/manifest.py +0 -0
  94. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/resource_estimation/model.py +0 -0
  95. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/resource_estimation/parquet.py +0 -0
  96. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/__init__.py +0 -0
  97. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/models/__init__.py +0 -0
  98. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  99. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/models/delta_stats.py +0 -0
  100. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  101. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  102. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/models/stats_result.py +0 -0
  103. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/compute/stats/types.py +0 -0
  104. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/constants.py +0 -0
  105. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/exceptions.py +0 -0
  106. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/__init__.py +0 -0
  107. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/aws/__init__.py +0 -0
  108. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/aws/redshift/__init__.py +0 -0
  109. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/dataset.py +0 -0
  110. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/file_object_store.py +0 -0
  111. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/memcached_object_store.py +0 -0
  112. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/object_store.py +0 -0
  113. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/ray_plasma_object_store.py +0 -0
  114. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/read_api.py +0 -0
  115. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/redis_object_store.py +0 -0
  116. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/io/s3_object_store.py +0 -0
  117. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/logs.py +0 -0
  118. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/__init__.py +0 -0
  119. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/interface.py +0 -0
  120. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/__init__.py +0 -0
  121. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/delete_parameters.py +0 -0
  122. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/delta.py +0 -0
  123. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/list_result.py +0 -0
  124. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/locator.py +0 -0
  125. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/namespace.py +0 -0
  126. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/partition.py +0 -0
  127. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/partition_spec.py +0 -0
  128. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/sort_key.py +0 -0
  129. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/stream.py +0 -0
  130. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/table.py +0 -0
  131. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/table_version.py +0 -0
  132. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/transform.py +0 -0
  133. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/storage/model/types.py +0 -0
  134. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/__init__.py +0 -0
  135. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/aws/__init__.py +0 -0
  136. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/aws/test_clients.py +0 -0
  137. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/aws/test_s3u.py +0 -0
  138. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/catalog/__init__.py +0 -0
  139. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  140. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/__init__.py +0 -0
  141. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
  142. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
  143. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
  144. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  145. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor/__init__.py +0 -0
  146. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  147. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  148. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  149. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  150. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
  151. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  152. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
  153. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  154. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
  155. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -0
  156. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  157. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/resource_estimation/__init__.py +0 -0
  158. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/resource_estimation/data/__init__.py +0 -0
  159. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
  160. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
  161. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
  162. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
  163. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  164. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_compact_partition_rebase.py +0 -0
  165. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
  166. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_util_common.py +0 -0
  167. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_util_constant.py +0 -0
  168. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
  169. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/io/__init__.py +0 -0
  170. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
  171. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/io/test_file_object_store.py +0 -0
  172. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/io/test_memcached_object_store.py +0 -0
  173. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
  174. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/io/test_redis_object_store.py +0 -0
  175. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/io/test_s3_object_store.py +0 -0
  176. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
  177. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
  178. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/test_exceptions.py +0 -0
  179. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/test_logs.py +0 -0
  180. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/test_utils/__init__.py +0 -0
  181. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/test_utils/constants.py +0 -0
  182. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/test_utils/pyarrow.py +0 -0
  183. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/test_utils/storage.py +0 -0
  184. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/test_utils/utils.py +0 -0
  185. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/__init__.py +0 -0
  186. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/data/__init__.py +0 -0
  187. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
  188. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  189. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
  190. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  191. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/test_daft.py +0 -0
  192. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/test_metrics.py +0 -0
  193. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/test_placement.py +0 -0
  194. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/test_pyarrow.py +0 -0
  195. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  196. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/tests/utils/test_resources.py +0 -0
  197. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/types/__init__.py +0 -0
  198. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/types/media.py +0 -0
  199. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/types/partial_download.py +0 -0
  200. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/types/tables.py +0 -0
  201. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/__init__.py +0 -0
  202. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/arguments.py +0 -0
  203. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/cloudpickle.py +0 -0
  204. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/common.py +0 -0
  205. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/daft.py +0 -0
  206. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/metrics.py +0 -0
  207. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/numpy.py +0 -0
  208. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/pandas.py +0 -0
  209. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/performance.py +0 -0
  210. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/placement.py +0 -0
  211. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/pyarrow.py +0 -0
  212. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/ray_utils/__init__.py +0 -0
  213. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/ray_utils/collections.py +0 -0
  214. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/ray_utils/concurrency.py +0 -0
  215. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/ray_utils/dataset.py +0 -0
  216. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/ray_utils/performance.py +0 -0
  217. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/ray_utils/runtime.py +0 -0
  218. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/resources.py +0 -0
  219. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/s3fs.py +0 -0
  220. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat/utils/schema.py +0 -0
  221. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat.egg-info/dependency_links.txt +0 -0
  222. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat.egg-info/requires.txt +0 -0
  223. {deltacat-1.1.33 → deltacat-1.1.34}/deltacat.egg-info/top_level.txt +0 -0
  224. {deltacat-1.1.33 → deltacat-1.1.34}/setup.cfg +0 -0
  225. {deltacat-1.1.33 → deltacat-1.1.34}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.33
3
+ Version: 1.1.34
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.33"
47
+ __version__ = "1.1.34"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -5,6 +5,7 @@ from deltacat.compute.compactor_v2.constants import (
5
5
  TASK_MAX_PARALLELISM,
6
6
  MAX_PARQUET_METADATA_SIZE,
7
7
  )
8
+ from deltacat.utils.common import ReadKwargsProvider
8
9
  from deltacat.utils.ray_utils.concurrency import invoke_parallel
9
10
  from deltacat import logs
10
11
  from deltacat.storage import (
@@ -75,11 +76,21 @@ def _download_parquet_metadata_for_manifest_entry(
75
76
  entry_index: int,
76
77
  deltacat_storage: unimplemented_deltacat_storage,
77
78
  deltacat_storage_kwargs: Optional[Dict[Any, Any]] = {},
79
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
78
80
  ) -> Dict[str, Any]:
81
+ logger.info(
82
+ f"Downloading the parquet metadata for Delta with locator {delta.locator} and entry_index: {entry_index}"
83
+ )
84
+ if "file_reader_kwargs_provider" in deltacat_storage_kwargs:
85
+ logger.info(
86
+ "'file_reader_kwargs_provider' is also present in deltacat_storage_kwargs. Removing to prevent multiple values for keyword argument"
87
+ )
88
+ deltacat_storage_kwargs.pop("file_reader_kwargs_provider")
79
89
  pq_file = deltacat_storage.download_delta_manifest_entry(
80
90
  delta,
81
91
  entry_index=entry_index,
82
92
  table_type=TableType.PYARROW_PARQUET,
93
+ file_reader_kwargs_provider=file_reader_kwargs_provider,
83
94
  **deltacat_storage_kwargs,
84
95
  )
85
96
 
@@ -97,11 +108,15 @@ def append_content_type_params(
97
108
  max_parquet_meta_size_bytes: Optional[int] = MAX_PARQUET_METADATA_SIZE,
98
109
  deltacat_storage=unimplemented_deltacat_storage,
99
110
  deltacat_storage_kwargs: Optional[Dict[str, Any]] = {},
111
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
100
112
  ) -> bool:
101
113
  """
102
114
  This operation appends content type params into the delta entry. Note
103
115
  that this operation can be time consuming, hence we cache it in a Ray actor.
104
116
  """
117
+ logger.info(
118
+ f"Appending the content type params for Delta with locator {delta.locator}..."
119
+ )
105
120
 
106
121
  if not delta.meta:
107
122
  logger.warning(f"Delta with locator {delta.locator} doesn't contain meta.")
@@ -159,6 +174,7 @@ def append_content_type_params(
159
174
 
160
175
  def input_provider(index, item) -> Dict:
161
176
  return {
177
+ "file_reader_kwargs_provider": file_reader_kwargs_provider,
162
178
  "deltacat_storage_kwargs": deltacat_storage_kwargs,
163
179
  "deltacat_storage": deltacat_storage,
164
180
  "delta": delta,
@@ -168,6 +184,7 @@ def append_content_type_params(
168
184
  logger.info(
169
185
  f"Downloading parquet meta for {len(entry_indices_to_download)} manifest entries..."
170
186
  )
187
+
171
188
  pq_files_promise = invoke_parallel(
172
189
  entry_indices_to_download,
173
190
  ray_task=_download_parquet_metadata_for_manifest_entry,
@@ -101,7 +101,6 @@ def create_uniform_input_deltas(
101
101
  delta_manifest_entries_count = 0
102
102
  estimated_da_bytes = 0
103
103
  input_da_list = []
104
-
105
104
  for delta in input_deltas:
106
105
  if (
107
106
  compact_partition_params.enable_input_split
@@ -118,6 +117,7 @@ def create_uniform_input_deltas(
118
117
  deltacat_storage_kwargs=deltacat_storage_kwargs,
119
118
  task_max_parallelism=compact_partition_params.task_max_parallelism,
120
119
  max_parquet_meta_size_bytes=compact_partition_params.max_parquet_meta_size_bytes,
120
+ file_reader_kwargs_provider=compact_partition_params.read_kwargs_provider,
121
121
  )
122
122
 
123
123
  manifest_entries = delta.manifest.entries
@@ -93,11 +93,29 @@ def _estimate_resources_required_to_process_delta_using_type_params(
93
93
  on_disk_size_bytes=delta.meta.content_length,
94
94
  ),
95
95
  )
96
-
96
+ file_reader_kwargs_provider = kwargs.get(
97
+ "file_reader_kwargs_provider"
98
+ ) or deltacat_storage_kwargs.get("file_reader_kwargs_provider")
99
+
100
+ """
101
+ NOTE: The file_reader_kwargs_provider parameter can be passed in two ways:
102
+ 1. Nested within deltacat_storage_kwargs during resource estimation
103
+ 2. As a top-level attribute of CompactPartitionsParams during compaction
104
+
105
+ This creates an inconsistent parameter path between resource estimation and compaction flows.
106
+ As a long-term solution, this should be unified to use a single consistent path (either always
107
+ nested in deltacat_storage_kwargs or always as a top-level parameter).
108
+
109
+ For now, this implementation handles the resource estimation case by:
110
+ 1. First checking for file_reader_kwargs_provider as a direct kwarg
111
+ 2. Falling back to deltacat_storage_kwargs if not found
112
+ This approach maintains backward compatibility by not modifying the DELTA_RESOURCE_ESTIMATION_FUNCTIONS signatures.
113
+ """
97
114
  appended = append_content_type_params(
98
115
  delta=delta,
99
116
  deltacat_storage=deltacat_storage,
100
117
  deltacat_storage_kwargs=deltacat_storage_kwargs,
118
+ file_reader_kwargs_provider=file_reader_kwargs_provider,
101
119
  )
102
120
 
103
121
  if not appended:
@@ -0,0 +1,253 @@
1
+ import ray
2
+ from typing import Dict, Any
3
+ from deltacat.types.media import ContentType
4
+ import pyarrow as pa
5
+
6
+ import pytest
7
+ import deltacat.tests.local_deltacat_storage as ds
8
+ import os
9
+ from deltacat.tests.test_utils.pyarrow import (
10
+ stage_partition_from_file_paths,
11
+ commit_delta_to_staged_partition,
12
+ )
13
+ from deltacat.utils.pyarrow import (
14
+ ReadKwargsProviderPyArrowCsvPureUtf8,
15
+ ReadKwargsProviderPyArrowSchemaOverride,
16
+ )
17
+
18
+ DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
19
+ "db_file_path",
20
+ "deltacat/tests/local_deltacat_storage/db_test.sqlite",
21
+ )
22
+
23
+
24
+ class TestContentTypeParams:
25
+ TEST_NAMESPACE = "test_content_type_params"
26
+ TEST_ENTRY_INDEX = 0
27
+ DEDUPE_BASE_COMPACTED_TABLE_STRING_PK = "deltacat/tests/compute/compactor_v2/steps/data/dedupe_base_compacted_table_string_pk.csv"
28
+ DEDUPE_NO_DUPLICATION_STRING_PK = "deltacat/tests/compute/compactor_v2/steps/data/dedupe_table_no_duplication_string_pk.csv"
29
+
30
+ @pytest.fixture(scope="module", autouse=True)
31
+ def setup_ray_cluster(self):
32
+ ray.init(local_mode=True, ignore_reinit_error=True)
33
+ yield
34
+ ray.shutdown()
35
+
36
+ @pytest.fixture(scope="function")
37
+ def local_deltacat_storage_kwargs(self, request: pytest.FixtureRequest):
38
+ # see deltacat/tests/local_deltacat_storage/README.md for documentation
39
+ kwargs_for_local_deltacat_storage: Dict[str, Any] = {
40
+ DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
41
+ }
42
+ yield kwargs_for_local_deltacat_storage
43
+ if os.path.exists(DATABASE_FILE_PATH_VALUE):
44
+ os.remove(DATABASE_FILE_PATH_VALUE)
45
+
46
+ def test__download_parquet_metadata_for_manifest_entry_sanity(
47
+ self, local_deltacat_storage_kwargs
48
+ ):
49
+ from deltacat.compute.compactor_v2.utils.content_type_params import (
50
+ _download_parquet_metadata_for_manifest_entry,
51
+ )
52
+ from deltacat.types.partial_download import PartialParquetParameters
53
+
54
+ partition = stage_partition_from_file_paths(
55
+ self.TEST_NAMESPACE,
56
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
57
+ **local_deltacat_storage_kwargs,
58
+ )
59
+ test_delta = commit_delta_to_staged_partition(
60
+ partition,
61
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
62
+ **local_deltacat_storage_kwargs,
63
+ )
64
+ test_entry_index = 0
65
+ obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
66
+ test_delta, test_entry_index, ds, local_deltacat_storage_kwargs
67
+ )
68
+ parquet_metadata = ray.get(obj_ref)
69
+ partial_parquet_params = parquet_metadata["partial_parquet_params"]
70
+
71
+ # validate
72
+ assert isinstance(parquet_metadata, dict)
73
+ assert "entry_index" in parquet_metadata
74
+ assert "partial_parquet_params" in parquet_metadata
75
+ assert parquet_metadata["entry_index"] == test_entry_index
76
+ assert isinstance(partial_parquet_params, PartialParquetParameters)
77
+
78
+ assert partial_parquet_params.row_groups_to_download == [0]
79
+ assert partial_parquet_params.num_row_groups == 1
80
+ assert partial_parquet_params.num_rows == 8
81
+ assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
82
+ assert partial_parquet_params.in_memory_size_bytes > 0
83
+
84
+ pq_metadata = partial_parquet_params.pq_metadata
85
+ assert pq_metadata.num_columns == 2
86
+ assert pq_metadata.num_rows == 8
87
+ assert pq_metadata.num_row_groups == 1
88
+ assert pq_metadata.format_version == "2.6"
89
+
90
+ assert (
91
+ test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
92
+ == ContentType.PARQUET.value
93
+ )
94
+
95
+ @pytest.mark.parametrize(
96
+ "read_kwargs_provider,expected_values",
97
+ [
98
+ (
99
+ ReadKwargsProviderPyArrowCsvPureUtf8(),
100
+ {
101
+ "num_rows": 6,
102
+ "num_columns": 2,
103
+ "num_row_groups": 1,
104
+ "format_version": "2.6",
105
+ "column_types": [pa.string(), pa.string()],
106
+ },
107
+ ),
108
+ (
109
+ ReadKwargsProviderPyArrowSchemaOverride(
110
+ schema=pa.schema(
111
+ [
112
+ ("id", pa.string()),
113
+ ("value", pa.int64()),
114
+ ]
115
+ )
116
+ ),
117
+ {
118
+ "num_rows": 6,
119
+ "num_columns": 2,
120
+ "num_row_groups": 1,
121
+ "format_version": "2.6",
122
+ "column_types": [pa.string(), pa.int64()],
123
+ },
124
+ ),
125
+ (
126
+ ReadKwargsProviderPyArrowSchemaOverride(
127
+ schema=None,
128
+ pq_coerce_int96_timestamp_unit="ms",
129
+ parquet_reader_type="daft",
130
+ ),
131
+ {
132
+ "num_rows": 6,
133
+ "num_columns": 2,
134
+ "num_row_groups": 1,
135
+ "format_version": "2.6",
136
+ "column_types": None, # Will use default type inference
137
+ },
138
+ ),
139
+ ],
140
+ )
141
+ def test__download_parquet_metadata_for_manifest_entry_with_read_kwargs_provider(
142
+ self, read_kwargs_provider, expected_values, local_deltacat_storage_kwargs
143
+ ):
144
+ from deltacat.compute.compactor_v2.utils.content_type_params import (
145
+ _download_parquet_metadata_for_manifest_entry,
146
+ )
147
+
148
+ partition = stage_partition_from_file_paths(
149
+ self.TEST_NAMESPACE,
150
+ [self.DEDUPE_NO_DUPLICATION_STRING_PK],
151
+ **local_deltacat_storage_kwargs,
152
+ )
153
+ test_delta = commit_delta_to_staged_partition(
154
+ partition,
155
+ [self.DEDUPE_NO_DUPLICATION_STRING_PK],
156
+ **local_deltacat_storage_kwargs,
157
+ )
158
+ test_entry_index = 0
159
+ read_kwargs_provider = ReadKwargsProviderPyArrowCsvPureUtf8
160
+ obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
161
+ test_delta,
162
+ test_entry_index,
163
+ ds,
164
+ local_deltacat_storage_kwargs,
165
+ read_kwargs_provider,
166
+ )
167
+ parquet_metadata = ray.get(obj_ref)
168
+ partial_parquet_params = parquet_metadata["partial_parquet_params"]
169
+
170
+ # validate
171
+ assert isinstance(parquet_metadata, dict)
172
+ assert "entry_index" in parquet_metadata
173
+ assert "partial_parquet_params" in parquet_metadata
174
+ assert parquet_metadata["entry_index"] == self.TEST_ENTRY_INDEX
175
+
176
+ assert partial_parquet_params.row_groups_to_download == [0]
177
+ assert (
178
+ partial_parquet_params.num_row_groups == expected_values["num_row_groups"]
179
+ )
180
+ assert partial_parquet_params.num_rows == expected_values["num_rows"]
181
+ assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
182
+ assert partial_parquet_params.in_memory_size_bytes > 0
183
+
184
+ pq_metadata = partial_parquet_params.pq_metadata
185
+ assert pq_metadata.num_columns == expected_values["num_columns"]
186
+ assert pq_metadata.num_rows == expected_values["num_rows"]
187
+ assert pq_metadata.num_row_groups == expected_values["num_row_groups"]
188
+ assert pq_metadata.format_version == expected_values["format_version"]
189
+
190
+ assert (
191
+ test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
192
+ == ContentType.PARQUET.value
193
+ )
194
+
195
+ def test_download_parquet_metadata_for_manifest_entry_file_reader_kwargs_present_top_level_and_deltacat_storage_kwarg(
196
+ self, local_deltacat_storage_kwargs, caplog
197
+ ):
198
+ from deltacat.compute.compactor_v2.utils.content_type_params import (
199
+ _download_parquet_metadata_for_manifest_entry,
200
+ )
201
+ from deltacat.types.partial_download import PartialParquetParameters
202
+
203
+ test_file_reader_kwargs_provider = ReadKwargsProviderPyArrowCsvPureUtf8()
204
+
205
+ local_deltacat_storage_kwargs[
206
+ "file_reader_kwargs_provider"
207
+ ] = ReadKwargsProviderPyArrowCsvPureUtf8()
208
+
209
+ partition = stage_partition_from_file_paths(
210
+ self.TEST_NAMESPACE,
211
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
212
+ **local_deltacat_storage_kwargs,
213
+ )
214
+ test_delta = commit_delta_to_staged_partition(
215
+ partition,
216
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
217
+ **local_deltacat_storage_kwargs,
218
+ )
219
+
220
+ test_entry_index = 0
221
+ obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
222
+ test_delta,
223
+ test_entry_index,
224
+ ds,
225
+ local_deltacat_storage_kwargs,
226
+ test_file_reader_kwargs_provider,
227
+ )
228
+ parquet_metadata = ray.get(obj_ref)
229
+ partial_parquet_params = parquet_metadata["partial_parquet_params"]
230
+
231
+ # validate
232
+ assert isinstance(parquet_metadata, dict)
233
+ assert "entry_index" in parquet_metadata
234
+ assert "partial_parquet_params" in parquet_metadata
235
+ assert parquet_metadata["entry_index"] == test_entry_index
236
+ assert isinstance(partial_parquet_params, PartialParquetParameters)
237
+
238
+ assert partial_parquet_params.row_groups_to_download == [0]
239
+ assert partial_parquet_params.num_row_groups == 1
240
+ assert partial_parquet_params.num_rows == 8
241
+ assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
242
+ assert partial_parquet_params.in_memory_size_bytes > 0
243
+
244
+ pq_metadata = partial_parquet_params.pq_metadata
245
+ assert pq_metadata.num_columns == 2
246
+ assert pq_metadata.num_rows == 8
247
+ assert pq_metadata.num_row_groups == 1
248
+ assert pq_metadata.format_version == "2.6"
249
+
250
+ assert (
251
+ test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
252
+ == ContentType.PARQUET.value
253
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.33
3
+ Version: 1.1.34
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -164,6 +164,7 @@ deltacat/tests/compute/compactor_v2/__init__.py
164
164
  deltacat/tests/compute/compactor_v2/test_compaction_session.py
165
165
  deltacat/tests/compute/compactor_v2/test_hashlib.py
166
166
  deltacat/tests/compute/compactor_v2/utils/__init__.py
167
+ deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py
167
168
  deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py
168
169
  deltacat/tests/compute/compactor_v2/utils/test_task_options.py
169
170
  deltacat/tests/compute/resource_estimation/__init__.py
File without changes
File without changes
File without changes
File without changes