deltacat 1.1.9__tar.gz → 1.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. deltacat-1.1.11/PKG-INFO +50 -0
  2. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/__init__.py +1 -1
  3. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/redshift/model/manifest.py +16 -0
  4. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/s3u.py +19 -13
  5. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/compaction_session.py +5 -1
  6. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/repartition_session.py +1 -0
  7. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/round_completion_file.py +39 -9
  8. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/compaction_session.py +15 -11
  9. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/constants.py +3 -0
  10. deltacat-1.1.9/deltacat/compute/compactor_v2/model/compaction_session.py → deltacat-1.1.11/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +1 -2
  11. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/primary_key_index.py +1 -1
  12. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/exceptions.py +5 -2
  13. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/dataset.py +5 -17
  14. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/__init__.py +24 -0
  15. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/interface.py +42 -6
  16. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/delta.py +23 -3
  17. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/partition.py +6 -7
  18. deltacat-1.1.11/deltacat/storage/model/partition_spec.py +71 -0
  19. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/stream.py +38 -1
  20. deltacat-1.1.11/deltacat/storage/model/transform.py +127 -0
  21. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/aws/test_s3u.py +2 -0
  22. deltacat-1.1.11/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +231 -0
  23. deltacat-1.1.11/deltacat/tests/compute/compactor_v2/test_compaction_session.py +255 -0
  24. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_rebase.py +1 -1
  25. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_util_common.py +19 -4
  26. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/local_deltacat_storage/__init__.py +83 -19
  27. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/pyarrow.py +4 -1
  28. deltacat-1.1.11/deltacat/tests/utils/ray_utils/test_dataset.py +66 -0
  29. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/numpy.py +3 -3
  30. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/pandas.py +3 -3
  31. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/pyarrow.py +3 -3
  32. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/dataset.py +7 -7
  33. deltacat-1.1.11/deltacat.egg-info/PKG-INFO +50 -0
  34. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/SOURCES.txt +5 -2
  35. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/requires.txt +2 -2
  36. {deltacat-1.1.9 → deltacat-1.1.11}/setup.py +4 -4
  37. deltacat-1.1.9/PKG-INFO +0 -47
  38. deltacat-1.1.9/deltacat/io/aws/redshift/redshift_datasource.py +0 -578
  39. deltacat-1.1.9/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -90
  40. deltacat-1.1.9/deltacat.egg-info/PKG-INFO +0 -47
  41. {deltacat-1.1.9 → deltacat-1.1.11}/LICENSE +0 -0
  42. {deltacat-1.1.9 → deltacat-1.1.11}/MANIFEST.in +0 -0
  43. {deltacat-1.1.9 → deltacat-1.1.11}/README.md +0 -0
  44. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/__init__.py +0 -0
  45. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/clients.py +0 -0
  46. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/constants.py +0 -0
  47. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/redshift/__init__.py +0 -0
  48. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/aws/redshift/model/__init__.py +0 -0
  49. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/benchmarking/__init__.py +0 -0
  50. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  51. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/benchmarking/conftest.py +0 -0
  52. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/__init__.py +0 -0
  53. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/default_catalog_impl/__init__.py +0 -0
  54. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/delegate.py +0 -0
  55. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/interface.py +0 -0
  56. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/model/__init__.py +0 -0
  57. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/model/catalog.py +0 -0
  58. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/catalog/model/table_definition.py +0 -0
  59. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/__init__.py +0 -0
  60. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/__init__.py +0 -0
  61. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/__init__.py +0 -0
  62. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
  63. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  64. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  65. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  66. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  67. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  68. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  69. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  70. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  71. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  72. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  73. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  74. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  75. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  76. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/__init__.py +0 -0
  77. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  78. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  79. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/materialize.py +0 -0
  80. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/steps/repartition.py +0 -0
  81. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/__init__.py +0 -0
  82. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/io.py +0 -0
  83. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  84. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  85. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  86. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/__init__.py +0 -0
  87. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  88. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  89. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  90. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  91. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  92. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  93. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  94. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  95. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  96. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  97. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  98. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  99. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  100. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  101. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
  102. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  103. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
  104. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  105. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  106. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  107. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  108. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
  109. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/__init__.py +0 -0
  110. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/daft.py +0 -0
  111. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
  112. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  113. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  114. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  115. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/__init__.py +0 -0
  116. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/config/__init__.py +0 -0
  117. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/meta_stats.py +0 -0
  118. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/model/__init__.py +0 -0
  119. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/model/partition_stats_dict.py +0 -0
  120. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/model/stats_cluster_size_estimator.py +0 -0
  121. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/stats.py +0 -0
  122. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/__init__.py +0 -0
  123. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/constants.py +0 -0
  124. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/io.py +0 -0
  125. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +0 -0
  126. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/metastats/utils/ray_utils.py +0 -0
  127. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/__init__.py +0 -0
  128. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/basic.py +0 -0
  129. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/__init__.py +0 -0
  130. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  131. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/delta_stats.py +0 -0
  132. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  133. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  134. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/models/stats_result.py +0 -0
  135. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/types.py +0 -0
  136. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/__init__.py +0 -0
  137. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/intervals.py +0 -0
  138. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/io.py +0 -0
  139. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/compute/stats/utils/manifest_stats_file.py +0 -0
  140. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/constants.py +0 -0
  141. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/__init__.py +0 -0
  142. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/aws/__init__.py +0 -0
  143. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/aws/redshift/__init__.py +0 -0
  144. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/file_object_store.py +0 -0
  145. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/memcached_object_store.py +0 -0
  146. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/object_store.py +0 -0
  147. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/ray_plasma_object_store.py +0 -0
  148. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/read_api.py +0 -0
  149. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/redis_object_store.py +0 -0
  150. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/io/s3_object_store.py +0 -0
  151. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/logs.py +0 -0
  152. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/__init__.py +0 -0
  153. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/delete_parameters.py +0 -0
  154. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/list_result.py +0 -0
  155. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/locator.py +0 -0
  156. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/namespace.py +0 -0
  157. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/sort_key.py +0 -0
  158. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/table.py +0 -0
  159. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/table_version.py +0 -0
  160. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/storage/model/types.py +0 -0
  161. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/__init__.py +0 -0
  162. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/aws/__init__.py +0 -0
  163. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/aws/test_clients.py +0 -0
  164. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/catalog/__init__.py +0 -0
  165. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  166. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/__init__.py +0 -0
  167. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
  168. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
  169. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  170. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/__init__.py +0 -0
  171. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  172. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  173. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  174. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  175. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  176. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  177. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
  178. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  179. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
  180. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  181. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
  182. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_util_constant.py +0 -0
  183. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
  184. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/__init__.py +0 -0
  185. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_cloudpickle_bug_fix.py +0 -0
  186. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_file_object_store.py +0 -0
  187. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_memcached_object_store.py +0 -0
  188. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_ray_plasma_object_store.py +0 -0
  189. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_redis_object_store.py +0 -0
  190. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/io/test_s3_object_store.py +0 -0
  191. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
  192. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/stats/__init__.py +0 -0
  193. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/stats/test_intervals.py +0 -0
  194. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_exceptions.py +0 -0
  195. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_logs.py +0 -0
  196. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/__init__.py +0 -0
  197. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/constants.py +0 -0
  198. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/storage.py +0 -0
  199. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/test_utils/utils.py +0 -0
  200. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/__init__.py +0 -0
  201. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/data/__init__.py +0 -0
  202. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
  203. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  204. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  205. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_daft.py +0 -0
  206. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_metrics.py +0 -0
  207. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_placement.py +0 -0
  208. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_pyarrow.py +0 -0
  209. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  210. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/tests/utils/test_resources.py +0 -0
  211. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/__init__.py +0 -0
  212. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/media.py +0 -0
  213. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/partial_download.py +0 -0
  214. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/types/tables.py +0 -0
  215. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/__init__.py +0 -0
  216. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/arguments.py +0 -0
  217. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/cloudpickle.py +0 -0
  218. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/common.py +0 -0
  219. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/daft.py +0 -0
  220. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/metrics.py +0 -0
  221. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/performance.py +0 -0
  222. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/placement.py +0 -0
  223. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/__init__.py +0 -0
  224. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/collections.py +0 -0
  225. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/concurrency.py +0 -0
  226. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/performance.py +0 -0
  227. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/ray_utils/runtime.py +0 -0
  228. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/resources.py +0 -0
  229. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/s3fs.py +0 -0
  230. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat/utils/schema.py +0 -0
  231. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/dependency_links.txt +0 -0
  232. {deltacat-1.1.9 → deltacat-1.1.11}/deltacat.egg-info/top_level.txt +0 -0
  233. {deltacat-1.1.9 → deltacat-1.1.11}/setup.cfg +0 -0
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.1
2
+ Name: deltacat
3
+ Version: 1.1.11
4
+ Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
+ Home-page: https://github.com/ray-project/deltacat
6
+ Author: Ray Team
7
+ License: UNKNOWN
8
+ Platform: UNKNOWN
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Operating System :: OS Independent
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+
19
+ # DeltaCAT
20
+
21
+ DeltaCAT is a Pythonic Data Catalog powered by Ray.
22
+
23
+ Its data storage model allows you to define and manage fast, scalable,
24
+ ACID-compliant data catalogs through git-like stage/commit APIs, and has been
25
+ used to successfully host exabyte-scale enterprise data lakes.
26
+
27
+ DeltaCAT uses the Ray distributed compute framework together with Apache Arrow
28
+ for common table management tasks, including petabyte-scale
29
+ change-data-capture, data consistency checks, and table repair.
30
+
31
+ ## Getting Started
32
+
33
+ ### Install
34
+
35
+ ```
36
+ pip install deltacat
37
+ ```
38
+
39
+ ### Running Tests
40
+
41
+ ```
42
+ pip3 install virtualenv
43
+ virtualenv test_env
44
+ source test_env/bin/activate
45
+ pip3 install -r requirements.txt
46
+
47
+ pytest
48
+ ```
49
+
50
+
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.9"
47
+ __version__ = "1.1.11"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -99,6 +99,8 @@ class Manifest(dict):
99
99
  total_source_content_length = 0
100
100
  content_type = None
101
101
  content_encoding = None
102
+ partition_values_set = set()
103
+ partition_values = None
102
104
  if entries:
103
105
  content_type = entries[0].meta.content_type
104
106
  content_encoding = entries[0].meta.content_encoding
@@ -127,6 +129,12 @@ class Manifest(dict):
127
129
  total_record_count += meta.record_count or 0
128
130
  total_content_length += meta.content_length or 0
129
131
  total_source_content_length += meta.source_content_length or 0
132
+ if len(partition_values_set) <= 1:
133
+ partition_values_set.add(entry.meta.partition_values)
134
+
135
+ if len(partition_values_set) == 1:
136
+ partition_values = partition_values_set.pop()
137
+
130
138
  meta = ManifestMeta.of(
131
139
  total_record_count,
132
140
  total_content_length,
@@ -134,6 +142,7 @@ class Manifest(dict):
134
142
  content_encoding,
135
143
  total_source_content_length,
136
144
  entry_type=entry_type,
145
+ partition_values=partition_values,
137
146
  )
138
147
  manifest = Manifest._build_manifest(meta, entries, author, uuid, entry_type)
139
148
  return manifest
@@ -185,6 +194,7 @@ class ManifestMeta(dict):
185
194
  credentials: Optional[Dict[str, str]] = None,
186
195
  content_type_parameters: Optional[List[Dict[str, str]]] = None,
187
196
  entry_type: Optional[EntryType] = None,
197
+ partition_values: Optional[List[str]] = None,
188
198
  ) -> ManifestMeta:
189
199
  manifest_meta = ManifestMeta()
190
200
  if record_count is not None:
@@ -203,6 +213,8 @@ class ManifestMeta(dict):
203
213
  manifest_meta["credentials"] = credentials
204
214
  if entry_type is not None:
205
215
  manifest_meta["entry_type"] = entry_type.value
216
+ if partition_values is not None:
217
+ manifest_meta["partition_values"] = partition_values
206
218
  return manifest_meta
207
219
 
208
220
  @property
@@ -244,6 +256,10 @@ class ManifestMeta(dict):
244
256
  return EntryType(self["entry_type"])
245
257
  return val
246
258
 
259
+ @property
260
+ def partition_values(self) -> Optional[List[str]]:
261
+ return self.get("partition_values")
262
+
247
263
 
248
264
  class ManifestAuthor(dict):
249
265
  @staticmethod
@@ -21,7 +21,7 @@ from boto3.resources.base import ServiceResource
21
21
  from botocore.client import BaseClient
22
22
  from botocore.exceptions import ClientError
23
23
  from ray.data.block import Block, BlockAccessor, BlockMetadata
24
- from ray.data.datasource import BlockWritePathProvider
24
+ from ray.data.datasource import FilenameProvider
25
25
  from ray.types import ObjectRef
26
26
  from tenacity import (
27
27
  Retrying,
@@ -70,9 +70,6 @@ from deltacat.exceptions import categorize_errors
70
70
 
71
71
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
72
72
 
73
- # TODO(raghumdani): refactor redshift datasource to reuse the
74
- # same module for writing output files.
75
-
76
73
 
77
74
  class CapturedBlockWritePaths:
78
75
  def __init__(self):
@@ -100,12 +97,15 @@ class CapturedBlockWritePaths:
100
97
  return self._block_refs
101
98
 
102
99
 
103
- class UuidBlockWritePathProvider(BlockWritePathProvider):
100
+ class UuidBlockWritePathProvider(FilenameProvider):
104
101
  """Block write path provider implementation that writes each
105
102
  dataset block out to a file of the form: {base_path}/{uuid}
106
103
  """
107
104
 
108
- def __init__(self, capture_object: CapturedBlockWritePaths):
105
+ def __init__(
106
+ self, capture_object: CapturedBlockWritePaths, base_path: Optional[str] = None
107
+ ):
108
+ self.base_path = base_path
109
109
  self.write_paths: List[str] = []
110
110
  self.block_refs: List[ObjectRef[Block]] = []
111
111
  self.capture_object = capture_object
@@ -117,6 +117,19 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
117
117
  self.block_refs,
118
118
  )
119
119
 
120
+ def get_filename_for_block(
121
+ self, block: Any, task_index: int, block_index: int
122
+ ) -> str:
123
+ if self.base_path is None:
124
+ raise ValueError(
125
+ "Base path must be provided to UuidBlockWritePathProvider",
126
+ )
127
+ return self._get_write_path_for_block(
128
+ base_path=self.base_path,
129
+ block=block,
130
+ block_index=block_index,
131
+ )
132
+
120
133
  def _get_write_path_for_block(
121
134
  self,
122
135
  base_path: str,
@@ -143,13 +156,6 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
143
156
  block_index: Optional[int] = None,
144
157
  file_format: Optional[str] = None,
145
158
  ) -> str:
146
- """
147
- TODO: BlockWritePathProvider is deprecated as of Ray version 2.20.0. Please use FilenameProvider.
148
- See: https://docs.ray.io/en/master/data/api/doc/ray.data.datasource.FilenameProvider.html
149
- Also See: https://github.com/ray-project/deltacat/issues/299
150
-
151
- Hence, this class only works with Ray version 2.20.0 or lower when used in Ray Dataset.
152
- """
153
159
  return self._get_write_path_for_block(
154
160
  base_path,
155
161
  filesystem=filesystem,
@@ -193,6 +193,7 @@ def compact_partition(
193
193
  round_completion_file_s3_url = rcf.write_round_completion_file(
194
194
  compaction_artifact_s3_bucket,
195
195
  new_rcf_partition_locator,
196
+ partition.locator,
196
197
  new_rci,
197
198
  **s3_client_kwargs,
198
199
  )
@@ -312,7 +313,10 @@ def _execute_compaction_round(
312
313
  round_completion_info = None
313
314
  if not rebase_source_partition_locator:
314
315
  round_completion_info = rcf.read_round_completion_file(
315
- compaction_artifact_s3_bucket, source_partition_locator, **s3_client_kwargs
316
+ compaction_artifact_s3_bucket,
317
+ source_partition_locator,
318
+ destination_partition_locator,
319
+ **s3_client_kwargs,
316
320
  )
317
321
  if not round_completion_info:
318
322
  logger.info(
@@ -177,6 +177,7 @@ def repartition(
177
177
  s3_client_kwargs = {}
178
178
 
179
179
  return rcf.write_round_completion_file(
180
+ None,
180
181
  None,
181
182
  None,
182
183
  repartition_completion_info,
@@ -12,10 +12,17 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
12
12
 
13
13
 
14
14
  def get_round_completion_file_s3_url(
15
- bucket: str, source_partition_locator: PartitionLocator
15
+ bucket: str,
16
+ source_partition_locator: PartitionLocator,
17
+ destination_partition_locator: Optional[PartitionLocator] = None,
16
18
  ) -> str:
17
19
 
18
20
  base_url = source_partition_locator.path(f"s3://{bucket}")
21
+ if destination_partition_locator:
22
+ base_url = destination_partition_locator.path(
23
+ f"s3://{bucket}/{source_partition_locator.hexdigest()}"
24
+ )
25
+
19
26
  return f"{base_url}.json"
20
27
 
21
28
 
@@ -23,20 +30,41 @@ def get_round_completion_file_s3_url(
23
30
  def read_round_completion_file(
24
31
  bucket: str,
25
32
  source_partition_locator: PartitionLocator,
33
+ destination_partition_locator: Optional[PartitionLocator] = None,
26
34
  **s3_client_kwargs: Optional[Dict[str, Any]],
27
35
  ) -> RoundCompletionInfo:
28
36
 
29
- round_completion_file_url = get_round_completion_file_s3_url(
37
+ all_uris = []
38
+ if destination_partition_locator:
39
+ round_completion_file_url_with_destination = get_round_completion_file_s3_url(
40
+ bucket,
41
+ source_partition_locator,
42
+ destination_partition_locator,
43
+ )
44
+ all_uris.append(round_completion_file_url_with_destination)
45
+
46
+ # Note: we read from RCF at two different URI for backward
47
+ # compatibility reasons.
48
+ round_completion_file_url_prev = get_round_completion_file_s3_url(
30
49
  bucket,
31
50
  source_partition_locator,
32
51
  )
33
- logger.info(f"reading round completion file from: {round_completion_file_url}")
52
+
53
+ all_uris.append(round_completion_file_url_prev)
54
+
34
55
  round_completion_info = None
35
- result = s3_utils.download(round_completion_file_url, False, **s3_client_kwargs)
36
- if result:
37
- json_str = result["Body"].read().decode("utf-8")
38
- round_completion_info = RoundCompletionInfo(json.loads(json_str))
39
- logger.info(f"read round completion info: {round_completion_info}")
56
+
57
+ for rcf_uri in all_uris:
58
+ logger.info(f"Reading round completion file from: {rcf_uri}")
59
+ result = s3_utils.download(rcf_uri, False, **s3_client_kwargs)
60
+ if result:
61
+ json_str = result["Body"].read().decode("utf-8")
62
+ round_completion_info = RoundCompletionInfo(json.loads(json_str))
63
+ logger.info(f"Read round completion info: {round_completion_info}")
64
+ break
65
+ else:
66
+ logger.warn(f"Round completion file not present at {rcf_uri}")
67
+
40
68
  return round_completion_info
41
69
 
42
70
 
@@ -44,8 +72,9 @@ def read_round_completion_file(
44
72
  def write_round_completion_file(
45
73
  bucket: Optional[str],
46
74
  source_partition_locator: Optional[PartitionLocator],
75
+ destination_partition_locator: Optional[PartitionLocator],
47
76
  round_completion_info: RoundCompletionInfo,
48
- completion_file_s3_url: str = None,
77
+ completion_file_s3_url: Optional[str] = None,
49
78
  **s3_client_kwargs: Optional[Dict[str, Any]],
50
79
  ) -> str:
51
80
  if bucket is None and completion_file_s3_url is None:
@@ -56,6 +85,7 @@ def write_round_completion_file(
56
85
  completion_file_s3_url = get_round_completion_file_s3_url(
57
86
  bucket,
58
87
  source_partition_locator,
88
+ destination_partition_locator,
59
89
  )
60
90
  logger.info(f"writing round completion file to: {completion_file_s3_url}")
61
91
  s3_utils.upload(
@@ -24,7 +24,7 @@ from deltacat.compute.compactor import (
24
24
  )
25
25
  from deltacat.compute.compactor_v2.model.merge_result import MergeResult
26
26
  from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
27
- from deltacat.compute.compactor_v2.model.compaction_session import (
27
+ from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
28
28
  ExecutionCompactionResult,
29
29
  )
30
30
  from deltacat.compute.compactor.model.materialize_result import MaterializeResult
@@ -78,6 +78,7 @@ from deltacat.compute.compactor_v2.utils.task_options import (
78
78
  )
79
79
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
80
80
  from deltacat.exceptions import categorize_errors
81
+ from deltacat.compute.compactor_v2.constants import COMPACT_PARTITION_METRIC_PREFIX
81
82
 
82
83
  if importlib.util.find_spec("memray"):
83
84
  import memray
@@ -86,7 +87,7 @@ if importlib.util.find_spec("memray"):
86
87
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
87
88
 
88
89
 
89
- @metrics
90
+ @metrics(prefix=COMPACT_PARTITION_METRIC_PREFIX)
90
91
  @categorize_errors
91
92
  def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
92
93
  assert (
@@ -109,7 +110,6 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
109
110
  f"Partition-{params.source_partition_locator} -> "
110
111
  f"{compaction_session_type} Compaction session data processing completed"
111
112
  )
112
- round_completion_file_s3_url: Optional[str] = None
113
113
  if execute_compaction_result.new_compacted_partition:
114
114
  previous_partition: Optional[Partition] = None
115
115
  if execute_compaction_result.is_inplace_compacted:
@@ -131,19 +131,13 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
131
131
  **params.deltacat_storage_kwargs,
132
132
  )
133
133
  logger.info(f"Committed compacted partition: {committed_partition}")
134
- round_completion_file_s3_url = rcf.write_round_completion_file(
135
- params.compaction_artifact_s3_bucket,
136
- execute_compaction_result.new_round_completion_file_partition_locator,
137
- execute_compaction_result.new_round_completion_info,
138
- **params.s3_client_kwargs,
139
- )
140
134
  else:
141
135
  logger.warning("No new partition was committed during compaction.")
142
136
 
143
137
  logger.info(
144
138
  f"Completed compaction session for: {params.source_partition_locator}"
145
139
  )
146
- return round_completion_file_s3_url
140
+ return execute_compaction_result.round_completion_file_s3_url
147
141
 
148
142
 
149
143
  def _execute_compaction(
@@ -188,6 +182,7 @@ def _execute_compaction(
188
182
  round_completion_info = rcf.read_round_completion_file(
189
183
  params.compaction_artifact_s3_bucket,
190
184
  params.source_partition_locator,
185
+ params.destination_partition_locator,
191
186
  **params.s3_client_kwargs,
192
187
  )
193
188
  if not round_completion_info:
@@ -684,9 +679,18 @@ def _execute_compaction(
684
679
  f"and rcf source partition_id of {rcf_source_partition_locator.partition_id}."
685
680
  )
686
681
  rcf_source_partition_locator = compacted_partition.locator
682
+
683
+ round_completion_file_s3_url = rcf.write_round_completion_file(
684
+ params.compaction_artifact_s3_bucket,
685
+ rcf_source_partition_locator,
686
+ compacted_partition.locator,
687
+ new_round_completion_info,
688
+ **params.s3_client_kwargs,
689
+ )
690
+
687
691
  return ExecutionCompactionResult(
688
692
  compacted_partition,
689
693
  new_round_completion_info,
690
- rcf_source_partition_locator,
694
+ round_completion_file_s3_url,
691
695
  is_inplace_compacted,
692
696
  )
@@ -68,3 +68,6 @@ DISCOVER_DELTAS_METRIC_PREFIX = "discover_deltas"
68
68
 
69
69
  # Metric prefix for prepare deletes
70
70
  PREPARE_DELETES_METRIC_PREFIX = "prepare_deletes"
71
+
72
+ # Metric prefix for compact partition method
73
+ COMPACT_PARTITION_METRIC_PREFIX = "compact_partition"
@@ -2,7 +2,6 @@ from dataclasses import dataclass, fields
2
2
 
3
3
  from deltacat.storage import (
4
4
  Partition,
5
- PartitionLocator,
6
5
  )
7
6
  from deltacat.compute.compactor import (
8
7
  RoundCompletionInfo,
@@ -14,7 +13,7 @@ from typing import Optional
14
13
  class ExecutionCompactionResult:
15
14
  new_compacted_partition: Optional[Partition]
16
15
  new_round_completion_info: Optional[RoundCompletionInfo]
17
- new_round_completion_file_partition_locator: Optional[PartitionLocator]
16
+ round_completion_file_s3_url: Optional[str]
18
17
  is_inplace_compacted: bool
19
18
 
20
19
  def __iter__(self):
@@ -162,7 +162,7 @@ def group_by_pk_hash_bucket(
162
162
  len(new_tables) == 1
163
163
  ), f"Expected only 1 table in the result but found {len(new_tables)}"
164
164
 
165
- table = generate_pk_hash_column([table], primary_keys, requires_hash=True)[0]
165
+ table = new_tables[0]
166
166
 
167
167
  # group hash bucket record indices
168
168
  result = group_record_indices_by_hash_bucket(
@@ -213,11 +213,14 @@ def categorize_errors(func: Callable):
213
213
  except BaseException as e:
214
214
  deltacat_storage = None
215
215
  deltacat_storage_kwargs = {}
216
+ all_args = args
216
217
  if kwargs:
217
218
  deltacat_storage = kwargs.get(DELTACAT_STORAGE_PARAM)
218
219
  deltacat_storage_kwargs = kwargs.get(DELTACAT_STORAGE_KWARGS_PARAM, {})
219
- if not deltacat_storage and args:
220
- for arg in args:
220
+ all_args = all_args + tuple(kwargs.values())
221
+
222
+ if not deltacat_storage and all_args:
223
+ for arg in all_args:
221
224
  if (
222
225
  isinstance(arg, dict)
223
226
  and arg.get(DELTACAT_STORAGE_PARAM) is not None
@@ -6,9 +6,6 @@ from typing import Any, Callable, Dict, Optional, TypeVar, Union, cast
6
6
  import pyarrow as pa
7
7
  import s3fs
8
8
  from ray.data import Dataset
9
- from ray.data.datasource import BlockWritePathProvider, DefaultBlockWritePathProvider
10
-
11
- from deltacat.io.aws.redshift.redshift_datasource import RedshiftDatasource
12
9
 
13
10
  T = TypeVar("T")
14
11
 
@@ -27,7 +24,6 @@ class DeltacatDataset(Dataset[T]):
27
24
  filesystem: Optional[Union[pa.fs.FileSystem, s3fs.S3FileSystem]] = None,
28
25
  try_create_dir: bool = True,
29
26
  arrow_open_stream_args: Optional[Dict[str, Any]] = None,
30
- block_path_provider: BlockWritePathProvider = DefaultBlockWritePathProvider(),
31
27
  arrow_parquet_args_fn: Callable[[], Dict[str, Any]] = lambda: {},
32
28
  **arrow_parquet_args,
33
29
  ) -> None:
@@ -59,9 +55,8 @@ class DeltacatDataset(Dataset[T]):
59
55
  if True. Does nothing if all directories already exist.
60
56
  arrow_open_stream_args: kwargs passed to
61
57
  pyarrow.fs.FileSystem.open_output_stream
62
- block_path_provider: BlockWritePathProvider implementation
63
- to write each dataset block to a custom output path. Uses
64
- DefaultBlockWritePathProvider if None.
58
+ filename_provider: FilenameProvider implementation
59
+ to write each dataset block to a custom output path.
65
60
  arrow_parquet_args_fn: Callable that returns a dictionary of write
66
61
  arguments to use when writing each block to a file. Overrides
67
62
  any duplicate keys from arrow_parquet_args. This should be used
@@ -72,14 +67,7 @@ class DeltacatDataset(Dataset[T]):
72
67
  pyarrow.parquet.write_table(), which is used to write out each
73
68
  block to a file.
74
69
  """
75
- self.write_datasource(
76
- RedshiftDatasource(),
77
- path=path,
78
- dataset_uuid=self._uuid,
79
- filesystem=filesystem,
80
- try_create_dir=try_create_dir,
81
- open_stream_args=arrow_open_stream_args,
82
- block_path_provider=block_path_provider,
83
- write_args_fn=arrow_parquet_args_fn,
84
- **arrow_parquet_args,
70
+ raise NotImplementedError(
71
+ "Writing to Redshift is not yet supported. "
72
+ "Please use DeltacatDataset.write_parquet() instead."
85
73
  )
@@ -14,6 +14,20 @@ from deltacat.storage.model.stream import Stream, StreamLocator
14
14
  from deltacat.storage.model.table import Table, TableLocator
15
15
  from deltacat.storage.model.table_version import TableVersion, TableVersionLocator
16
16
  from deltacat.storage.model.delete_parameters import DeleteParameters
17
+ from deltacat.storage.model.partition_spec import (
18
+ PartitionFilter,
19
+ PartitionValues,
20
+ DeltaPartitionSpec,
21
+ StreamPartitionSpec,
22
+ )
23
+ from deltacat.storage.model.transform import (
24
+ Transform,
25
+ TransformName,
26
+ TransformParameters,
27
+ BucketingStrategy,
28
+ BucketTransformParameters,
29
+ IdentityTransformParameters,
30
+ )
17
31
 
18
32
  from deltacat.storage.model.types import (
19
33
  CommitState,
@@ -56,4 +70,14 @@ __all__ = [
56
70
  "TableVersionLocator",
57
71
  "SortKey",
58
72
  "SortOrder",
73
+ "PartitionFilter",
74
+ "PartitionValues",
75
+ "DeltaPartitionSpec",
76
+ "StreamPartitionSpec",
77
+ "Transform",
78
+ "TransformName",
79
+ "TransformParameters",
80
+ "BucketingStrategy",
81
+ "BucketTransformParameters",
82
+ "IdentityTransformParameters",
59
83
  ]