deltacat 2.0.0b3__tar.gz → 2.0.0b7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (362) hide show
  1. {deltacat-2.0.0b3/deltacat.egg-info → deltacat-2.0.0b7}/PKG-INFO +1 -1
  2. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/__init__.py +1 -1
  3. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/impl.py +15 -2
  4. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/overrides.py +12 -14
  5. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/main/impl.py +1 -1
  6. deltacat-2.0.0b7/deltacat/daft/daft_scan.py +111 -0
  7. deltacat-2.0.0b7/deltacat/daft/model.py +258 -0
  8. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/iceberg/iceberg_bucket_writer.py +64 -17
  9. deltacat-2.0.0b7/deltacat/experimental/daft/__init__.py +4 -0
  10. deltacat-2.0.0b7/deltacat/experimental/daft/daft_catalog.py +229 -0
  11. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/manifest.py +49 -0
  12. deltacat-2.0.0b7/deltacat/tests/storage/model/test_manifest.py +129 -0
  13. deltacat-2.0.0b7/deltacat/utils/__init__.py +0 -0
  14. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/daft.py +2 -5
  15. deltacat-2.0.0b7/deltacat/utils/ray_utils/__init__.py +0 -0
  16. {deltacat-2.0.0b3 → deltacat-2.0.0b7/deltacat.egg-info}/PKG-INFO +1 -1
  17. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/SOURCES.txt +7 -0
  18. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/requires.txt +2 -2
  19. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/setup.py +3 -2
  20. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/LICENSE +0 -0
  21. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/MANIFEST.in +0 -0
  22. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/README.md +0 -0
  23. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/annotations.py +0 -0
  24. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/api.py +0 -0
  25. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/__init__.py +0 -0
  26. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/clients.py +0 -0
  27. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/constants.py +0 -0
  28. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/aws/s3u.py +0 -0
  29. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/__init__.py +0 -0
  30. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_engine.py +0 -0
  31. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  32. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_report.py +0 -0
  33. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/benchmark_suite.py +0 -0
  34. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/conftest.py +0 -0
  35. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/data/__init__.py +0 -0
  36. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/data/random_row_generator.py +0 -0
  37. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/data/row_generator.py +0 -0
  38. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/benchmarking/test_benchmark_pipeline.py +0 -0
  39. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/__init__.py +0 -0
  40. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/delegate.py +0 -0
  41. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/__init__.py +0 -0
  42. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/iceberg/iceberg_catalog_config.py +0 -0
  43. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/interface.py +0 -0
  44. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/main/__init__.py +0 -0
  45. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/__init__.py +0 -0
  46. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/catalog.py +0 -0
  47. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/properties.py +0 -0
  48. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/catalog/model/table_definition.py +0 -0
  49. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/__init__.py +0 -0
  50. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/__init__.py +0 -0
  51. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/compaction_session.py +0 -0
  52. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/__init__.py +0 -0
  53. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
  54. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  55. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  56. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  57. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  58. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  59. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  60. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  61. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  62. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  63. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  64. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  65. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  66. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  67. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/repartition_session.py +0 -0
  68. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/__init__.py +0 -0
  69. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  70. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  71. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/materialize.py +0 -0
  72. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/steps/repartition.py +0 -0
  73. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/__init__.py +0 -0
  74. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/io.py +0 -0
  75. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  76. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/round_completion_file.py +0 -0
  77. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  78. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  79. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/__init__.py +0 -0
  80. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
  81. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/constants.py +0 -0
  82. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  83. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  84. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  85. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  86. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  87. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  88. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  89. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
  90. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  91. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  92. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  93. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  94. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  95. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
  96. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/private/compaction_utils.py +0 -0
  97. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  98. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  99. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/steps/merge.py +0 -0
  100. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  101. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
  102. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  103. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  104. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  105. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  106. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
  107. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
  108. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/__init__.py +0 -0
  109. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/constants.py +0 -0
  110. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/converter_session.py +0 -0
  111. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/__init__.py +0 -0
  112. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/convert_input.py +0 -0
  113. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/convert_input_files.py +0 -0
  114. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/model/converter_session_params.py +0 -0
  115. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  116. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/catalog.py +0 -0
  117. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/overrides.py +0 -0
  118. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +0 -0
  119. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/steps/__init__.py +0 -0
  120. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/steps/convert.py +0 -0
  121. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/steps/dedupe.py +0 -0
  122. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/__init__.py +0 -0
  123. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/convert_task_options.py +0 -0
  124. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/converter_session_utils.py +0 -0
  125. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/iceberg_columns.py +0 -0
  126. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/io.py +0 -0
  127. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/converter/utils/s3u.py +0 -0
  128. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/__init__.py +0 -0
  129. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/daft.py +0 -0
  130. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/model/__init__.py +0 -0
  131. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -0
  132. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  133. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/merge_on_read/utils/delta.py +0 -0
  134. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/__init__.py +0 -0
  135. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/delta.py +0 -0
  136. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/manifest.py +0 -0
  137. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/model.py +0 -0
  138. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/resource_estimation/parquet.py +0 -0
  139. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/__init__.py +0 -0
  140. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/__init__.py +0 -0
  141. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  142. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/delta_stats.py +0 -0
  143. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  144. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  145. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/models/stats_result.py +0 -0
  146. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/compute/stats/types.py +0 -0
  147. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/constants.py +0 -0
  148. {deltacat-2.0.0b3/deltacat/examples → deltacat-2.0.0b7/deltacat/daft}/__init__.py +0 -0
  149. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/env.py +0 -0
  150. {deltacat-2.0.0b3/deltacat/examples/common → deltacat-2.0.0b7/deltacat/examples}/__init__.py +0 -0
  151. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/basic_logging.py +0 -0
  152. {deltacat-2.0.0b3/deltacat/examples/iceberg → deltacat-2.0.0b7/deltacat/examples/common}/__init__.py +0 -0
  153. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/common/fixtures.py +0 -0
  154. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/hello_world.py +0 -0
  155. {deltacat-2.0.0b3/deltacat/io → deltacat-2.0.0b7/deltacat/examples/iceberg}/__init__.py +0 -0
  156. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/examples/iceberg/iceberg_reader.py +0 -0
  157. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/exceptions.py +0 -0
  158. {deltacat-2.0.0b3/deltacat/storage/iceberg → deltacat-2.0.0b7/deltacat/experimental}/__init__.py +0 -0
  159. {deltacat-2.0.0b3/deltacat/storage/main → deltacat-2.0.0b7/deltacat/io}/__init__.py +0 -0
  160. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/file_object_store.py +0 -0
  161. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/memcached_object_store.py +0 -0
  162. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/object_store.py +0 -0
  163. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/ray_plasma_object_store.py +0 -0
  164. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/redis_object_store.py +0 -0
  165. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/io/s3_object_store.py +0 -0
  166. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/logs.py +0 -0
  167. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/__init__.py +0 -0
  168. {deltacat-2.0.0b3/deltacat/storage/model → deltacat-2.0.0b7/deltacat/storage/iceberg}/__init__.py +0 -0
  169. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/iceberg/iceberg_scan_planner.py +0 -0
  170. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/iceberg/impl.py +0 -0
  171. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/iceberg/model.py +0 -0
  172. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/interface.py +0 -0
  173. {deltacat-2.0.0b3/deltacat/storage/model/scan → deltacat-2.0.0b7/deltacat/storage/main}/__init__.py +0 -0
  174. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/main/impl.py +0 -0
  175. {deltacat-2.0.0b3/deltacat/storage/rivulet/arrow → deltacat-2.0.0b7/deltacat/storage/model}/__init__.py +0 -0
  176. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/delta.py +0 -0
  177. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/interop.py +0 -0
  178. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/list_result.py +0 -0
  179. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/locator.py +0 -0
  180. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/metafile.py +0 -0
  181. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/namespace.py +0 -0
  182. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/partition.py +0 -0
  183. {deltacat-2.0.0b3/deltacat/storage/rivulet/fs → deltacat-2.0.0b7/deltacat/storage/model/scan}/__init__.py +0 -0
  184. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/scan/push_down.py +0 -0
  185. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/scan/scan_plan.py +0 -0
  186. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/scan/scan_task.py +0 -0
  187. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/schema.py +0 -0
  188. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/shard.py +0 -0
  189. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/sort_key.py +0 -0
  190. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/stream.py +0 -0
  191. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/table.py +0 -0
  192. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/table_version.py +0 -0
  193. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/transaction.py +0 -0
  194. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/transform.py +0 -0
  195. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/model/types.py +0 -0
  196. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/__init__.py +0 -0
  197. {deltacat-2.0.0b3/deltacat/storage/rivulet/metastore → deltacat-2.0.0b7/deltacat/storage/rivulet/arrow}/__init__.py +0 -0
  198. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/arrow/serializer.py +0 -0
  199. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/dataset.py +0 -0
  200. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/dataset_executor.py +0 -0
  201. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/feather/__init__.py +0 -0
  202. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/feather/file_reader.py +0 -0
  203. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/feather/serializer.py +0 -0
  204. {deltacat-2.0.0b3/deltacat/storage/rivulet/reader → deltacat-2.0.0b7/deltacat/storage/rivulet/fs}/__init__.py +0 -0
  205. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/file_provider.py +0 -0
  206. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/file_store.py +0 -0
  207. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/input_file.py +0 -0
  208. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/fs/output_file.py +0 -0
  209. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/logical_plan.py +0 -0
  210. {deltacat-2.0.0b3/deltacat/storage/rivulet/schema → deltacat-2.0.0b7/deltacat/storage/rivulet/metastore}/__init__.py +0 -0
  211. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/delta.py +0 -0
  212. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/json_sst.py +0 -0
  213. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/sst.py +0 -0
  214. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/metastore/sst_interval_tree.py +0 -0
  215. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/mvp/Table.py +0 -0
  216. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/mvp/__init__.py +0 -0
  217. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/__init__.py +0 -0
  218. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  219. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/file_reader.py +0 -0
  220. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/parquet/serializer.py +0 -0
  221. {deltacat-2.0.0b3/deltacat/storage/rivulet/writer → deltacat-2.0.0b7/deltacat/storage/rivulet/reader}/__init__.py +0 -0
  222. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/block_scanner.py +0 -0
  223. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/data_reader.py +0 -0
  224. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/data_scan.py +0 -0
  225. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/dataset_metastore.py +0 -0
  226. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/dataset_reader.py +0 -0
  227. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/pyarrow_data_reader.py +0 -0
  228. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/query_expression.py +0 -0
  229. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/reader/reader_type_registrar.py +0 -0
  230. {deltacat-2.0.0b3/deltacat/storage/util → deltacat-2.0.0b7/deltacat/storage/rivulet/schema}/__init__.py +0 -0
  231. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/schema/datatype.py +0 -0
  232. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/schema/schema.py +0 -0
  233. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/serializer.py +0 -0
  234. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/serializer_factory.py +0 -0
  235. {deltacat-2.0.0b3/deltacat/tests → deltacat-2.0.0b7/deltacat/storage/rivulet/writer}/__init__.py +0 -0
  236. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/writer/dataset_writer.py +0 -0
  237. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/rivulet/writer/memtable_dataset_writer.py +0 -0
  238. {deltacat-2.0.0b3/deltacat/tests/aws → deltacat-2.0.0b7/deltacat/storage/util}/__init__.py +0 -0
  239. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/storage/util/scan_planner.py +0 -0
  240. {deltacat-2.0.0b3/deltacat/tests/catalog → deltacat-2.0.0b7/deltacat/tests}/__init__.py +0 -0
  241. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/__init__.py +0 -0
  242. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_cloudpickle_bug_fix.py +0 -0
  243. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_file_object_store.py +0 -0
  244. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_memcached_object_store.py +0 -0
  245. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_ray_plasma_object_store.py +0 -0
  246. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_redis_object_store.py +0 -0
  247. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/_io/test_s3_object_store.py +0 -0
  248. {deltacat-2.0.0b3/deltacat/tests/compute → deltacat-2.0.0b7/deltacat/tests/aws}/__init__.py +0 -0
  249. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/aws/test_clients.py +0 -0
  250. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/aws/test_s3u.py +0 -0
  251. {deltacat-2.0.0b3/deltacat/tests/compute/compactor → deltacat-2.0.0b7/deltacat/tests/catalog}/__init__.py +0 -0
  252. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/catalog/test_catalogs.py +0 -0
  253. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/catalog/test_default_catalog_impl.py +0 -0
  254. {deltacat-2.0.0b3/deltacat/tests/compute/compactor/steps → deltacat-2.0.0b7/deltacat/tests/compute}/__init__.py +0 -0
  255. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
  256. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
  257. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
  258. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  259. {deltacat-2.0.0b3/deltacat/tests/compute/compactor/utils → deltacat-2.0.0b7/deltacat/tests/compute/compactor}/__init__.py +0 -0
  260. {deltacat-2.0.0b3/deltacat/tests/compute/compactor_v2 → deltacat-2.0.0b7/deltacat/tests/compute/compactor/steps}/__init__.py +0 -0
  261. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  262. {deltacat-2.0.0b3/deltacat/tests/compute/compactor_v2 → deltacat-2.0.0b7/deltacat/tests/compute/compactor}/utils/__init__.py +0 -0
  263. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  264. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -0
  265. {deltacat-2.0.0b3/deltacat/tests/compute/converter → deltacat-2.0.0b7/deltacat/tests/compute/compactor_v2}/__init__.py +0 -0
  266. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -0
  267. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  268. {deltacat-2.0.0b3/deltacat/tests/compute/resource_estimation → deltacat-2.0.0b7/deltacat/tests/compute/compactor_v2/utils}/__init__.py +0 -0
  269. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  270. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/conftest.py +0 -0
  271. {deltacat-2.0.0b3/deltacat/tests/compute/resource_estimation/data → deltacat-2.0.0b7/deltacat/tests/compute/converter}/__init__.py +0 -0
  272. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/converter/conftest.py +0 -0
  273. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/converter/test_convert_session.py +0 -0
  274. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/converter/utils.py +0 -0
  275. {deltacat-2.0.0b3/deltacat/tests/storage → deltacat-2.0.0b7/deltacat/tests/compute/resource_estimation}/__init__.py +0 -0
  276. {deltacat-2.0.0b3/deltacat/tests/storage/main → deltacat-2.0.0b7/deltacat/tests/compute/resource_estimation/data}/__init__.py +0 -0
  277. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
  278. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
  279. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
  280. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
  281. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  282. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_rebase.py +0 -0
  283. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
  284. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_util_common.py +0 -0
  285. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_util_constant.py +0 -0
  286. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -0
  287. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/local_deltacat_storage/__init__.py +0 -0
  288. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/local_deltacat_storage/exceptions.py +0 -0
  289. {deltacat-2.0.0b3/deltacat/tests/storage/model → deltacat-2.0.0b7/deltacat/tests/storage}/__init__.py +0 -0
  290. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/conftest.py +0 -0
  291. {deltacat-2.0.0b3/deltacat/tests/storage/rivulet → deltacat-2.0.0b7/deltacat/tests/storage/main}/__init__.py +0 -0
  292. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/main/test_main_storage.py +0 -0
  293. {deltacat-2.0.0b3/deltacat/tests/storage/rivulet/fs → deltacat-2.0.0b7/deltacat/tests/storage/model}/__init__.py +0 -0
  294. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_delete_parameters.py +0 -0
  295. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_metafile_io.py +0 -0
  296. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_schema.py +0 -0
  297. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_shard.py +0 -0
  298. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_table_version.py +0 -0
  299. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/model/test_transaction.py +0 -0
  300. {deltacat-2.0.0b3/deltacat/tests/storage/rivulet/schema → deltacat-2.0.0b7/deltacat/tests/storage/rivulet}/__init__.py +0 -0
  301. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/conftest.py +0 -0
  302. {deltacat-2.0.0b3/deltacat/tests/storage/rivulet/writer → deltacat-2.0.0b7/deltacat/tests/storage/rivulet/fs}/__init__.py +0 -0
  303. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +0 -0
  304. {deltacat-2.0.0b3/deltacat/tests/test_utils → deltacat-2.0.0b7/deltacat/tests/storage/rivulet/schema}/__init__.py +0 -0
  305. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/schema/test_schema.py +0 -0
  306. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_dataset.py +0 -0
  307. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_manifest.py +0 -0
  308. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_sst_interval_tree.py +0 -0
  309. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/test_utils.py +0 -0
  310. {deltacat-2.0.0b3/deltacat/tests/utils → deltacat-2.0.0b7/deltacat/tests/storage/rivulet/writer}/__init__.py +0 -0
  311. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +0 -0
  312. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +0 -0
  313. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +0 -0
  314. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_deltacat_api.py +0 -0
  315. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_exceptions.py +0 -0
  316. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_logs.py +0 -0
  317. {deltacat-2.0.0b3/deltacat/tests/utils/data → deltacat-2.0.0b7/deltacat/tests/test_utils}/__init__.py +0 -0
  318. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/constants.py +0 -0
  319. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/filesystem.py +0 -0
  320. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/message_pack_utils.py +0 -0
  321. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/pyarrow.py +0 -0
  322. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/storage.py +0 -0
  323. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/test_utils/utils.py +0 -0
  324. {deltacat-2.0.0b3/deltacat/tests/utils/ray_utils → deltacat-2.0.0b7/deltacat/tests/utils}/__init__.py +0 -0
  325. {deltacat-2.0.0b3/deltacat/types → deltacat-2.0.0b7/deltacat/tests/utils/data}/__init__.py +0 -0
  326. {deltacat-2.0.0b3/deltacat/utils → deltacat-2.0.0b7/deltacat/tests/utils/ray_utils}/__init__.py +0 -0
  327. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  328. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
  329. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  330. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_daft.py +0 -0
  331. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_metrics.py +0 -0
  332. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_placement.py +0 -0
  333. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_pyarrow.py +0 -0
  334. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  335. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/tests/utils/test_resources.py +0 -0
  336. {deltacat-2.0.0b3/deltacat/utils/ray_utils → deltacat-2.0.0b7/deltacat/types}/__init__.py +0 -0
  337. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/types/media.py +0 -0
  338. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/types/partial_download.py +0 -0
  339. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/types/tables.py +0 -0
  340. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/arguments.py +0 -0
  341. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/cloudpickle.py +0 -0
  342. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/common.py +0 -0
  343. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/export.py +0 -0
  344. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/filesystem.py +0 -0
  345. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/metafile_locator.py +0 -0
  346. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/metrics.py +0 -0
  347. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/numpy.py +0 -0
  348. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/pandas.py +0 -0
  349. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/performance.py +0 -0
  350. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/placement.py +0 -0
  351. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/pyarrow.py +0 -0
  352. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/collections.py +0 -0
  353. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/concurrency.py +0 -0
  354. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/dataset.py +0 -0
  355. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/performance.py +0 -0
  356. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/ray_utils/runtime.py +0 -0
  357. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/resources.py +0 -0
  358. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/s3fs.py +0 -0
  359. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat/utils/schema.py +0 -0
  360. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/dependency_links.txt +0 -0
  361. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/deltacat.egg-info/top_level.txt +0 -0
  362. {deltacat-2.0.0b3 → deltacat-2.0.0b7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 2.0.0b3
3
+ Version: 2.0.0b7
4
4
  Summary: A portable, scalable, fast, and Pythonic Data Lakehouse for AI.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -67,7 +67,7 @@ if importlib.util.find_spec("pyiceberg") is not None:
67
67
 
68
68
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
69
69
 
70
- __version__ = "2.0.0b3"
70
+ __version__ = "2.0.0b7"
71
71
 
72
72
 
73
73
  __all__ = [
@@ -2,10 +2,13 @@ import logging
2
2
 
3
3
  from typing import Any, Dict, List, Optional, Union
4
4
 
5
- from daft import DataFrame
5
+ from daft import DataFrame, context
6
+ from daft.daft import ScanOperatorHandle, StorageConfig
7
+ from daft.logical.builder import LogicalPlanBuilder
6
8
 
7
9
  from deltacat import logs
8
10
  from deltacat.catalog.model.table_definition import TableDefinition
11
+ from deltacat.daft.daft_scan import DeltaCatScanOperator
9
12
  from deltacat.exceptions import TableAlreadyExistsError
10
13
  from deltacat.storage.iceberg.iceberg_scan_planner import IcebergScanPlanner
11
14
  from deltacat.storage.iceberg.model import PartitionSchemeMapper, SchemaMapper
@@ -144,7 +147,17 @@ def read_table(
144
147
  table: str, *args, namespace: Optional[str] = None, **kwargs
145
148
  ) -> DistributedDataset:
146
149
  """Read a table into a distributed dataset."""
147
- raise NotImplementedError("read_table not implemented")
150
+ # TODO: more proper IO configuration
151
+ io_config = context.get_context().daft_planning_config.default_io_config
152
+ multithreaded_io = context.get_context().get_or_create_runner().name != "ray"
153
+
154
+ storage_config = StorageConfig(multithreaded_io, io_config)
155
+
156
+ dc_table = get_table(name=table, namespace=namespace, **kwargs)
157
+ dc_scan_operator = DeltaCatScanOperator(dc_table, storage_config)
158
+ handle = ScanOperatorHandle.from_python_scan_operator(dc_scan_operator)
159
+ builder = LogicalPlanBuilder.from_tabular_scan(scan_operator=handle)
160
+ return DataFrame(builder)
148
161
 
149
162
 
150
163
  def alter_table(
@@ -5,12 +5,11 @@ from typing import Iterator, List
5
5
  from pyarrow.fs import FileSystem
6
6
 
7
7
  from pyiceberg.io.pyarrow import (
8
- fill_parquet_file_metadata,
8
+ data_file_statistics_from_parquet_metadata,
9
9
  compute_statistics_plan,
10
10
  parquet_path_to_id_mapping,
11
11
  )
12
- from pyiceberg.table import Table, _MergingSnapshotProducer
13
- from pyiceberg.table.snapshots import Operation
12
+ from pyiceberg.table import Table
14
13
  from pyiceberg.manifest import DataFile, DataFileContent, FileFormat
15
14
  from pyiceberg.types import StructType, NestedField, IntegerType
16
15
  from pyiceberg.typedef import Record
@@ -24,11 +23,10 @@ def append(table: Table, paths: List[str]) -> None:
24
23
  # raise ValueError("Cannot write to tables with a sort-order")
25
24
 
26
25
  data_files = write_file(table, paths)
27
- merge = _MergingSnapshotProducer(operation=Operation.APPEND, table=table)
28
- for data_file in data_files:
29
- merge.append_data_file(data_file)
30
-
31
- merge.commit()
26
+ with table.transaction() as txn:
27
+ with txn.update_snapshot().fast_append() as snapshot_update:
28
+ for data_file in data_files:
29
+ snapshot_update.append_data_file(data_file)
32
30
 
33
31
 
34
32
  def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
@@ -41,6 +39,11 @@ def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
41
39
  fs_path = fs_tuple[1]
42
40
  with fs.open_input_file(fs_path) as native_file:
43
41
  parquet_metadata = pq.read_metadata(native_file)
42
+ statistics = data_file_statistics_from_parquet_metadata(
43
+ parquet_metadata=parquet_metadata,
44
+ stats_columns=compute_statistics_plan(table.schema(), table.properties),
45
+ parquet_column_mapping=parquet_path_to_id_mapping(table.schema()),
46
+ )
44
47
  data_file = DataFile(
45
48
  content=DataFileContent.DATA,
46
49
  file_path=file_path,
@@ -63,12 +66,7 @@ def write_file(table: Table, paths: Iterator[str]) -> Iterator[DataFile]:
63
66
  spec_id=table.spec().spec_id,
64
67
  equality_ids=None,
65
68
  key_metadata=None,
66
- )
67
- fill_parquet_file_metadata(
68
- data_file=data_file,
69
- parquet_metadata=parquet_metadata,
70
- stats_columns=compute_statistics_plan(table.schema(), table.properties),
71
- parquet_column_mapping=parquet_path_to_id_mapping(table.schema()),
69
+ **statistics.to_serialized_dict(),
72
70
  )
73
71
  data_files.append(data_file)
74
72
  return data_files
@@ -709,7 +709,7 @@ def _get_deltas_from_partition_filter(
709
709
 
710
710
  def _get_storage(**kwargs):
711
711
  """
712
- Returns the implementation of `deltacat.storage.interface` to use with this catalog.
712
+ Returns the implementation of `deltacat.storage.interface` to use with this catalog
713
713
 
714
714
  This is configured in the `CatalogProperties` stored during initialization and passed through `delegate.py`
715
715
  """
@@ -0,0 +1,111 @@
1
+ from typing import Iterator
2
+
3
+ from daft import Schema
4
+ from daft.daft import (
5
+ StorageConfig,
6
+ PartitionField,
7
+ Pushdowns,
8
+ ScanTask,
9
+ FileFormatConfig,
10
+ ParquetSourceConfig,
11
+ )
12
+ from daft.io.scan import ScanOperator
13
+
14
+ from deltacat.catalog.model.table_definition import TableDefinition
15
+ from deltacat.daft.model import DaftPartitionKeyMapper
16
+
17
+
18
+ class DeltaCatScanOperator(ScanOperator):
19
+ def __init__(self, table: TableDefinition, storage_config: StorageConfig) -> None:
20
+ super().__init__()
21
+ self.table = table
22
+ self._schema = self._infer_schema()
23
+ self.partition_keys = self._infer_partition_keys()
24
+ self.storage_config = storage_config
25
+
26
+ def schema(self) -> Schema:
27
+ return self._schema
28
+
29
+ def name(self) -> str:
30
+ return "DeltaCatScanOperator"
31
+
32
+ def display_name(self) -> str:
33
+ return f"DeltaCATScanOperator({self.table.table.namespace}.{self.table.table.table_name})"
34
+
35
+ def partitioning_keys(self) -> list[PartitionField]:
36
+ return self.partition_keys
37
+
38
+ def multiline_display(self) -> list[str]:
39
+ return [
40
+ self.display_name(),
41
+ f"Schema = {self._schema}",
42
+ f"Partitioning keys = {self.partitioning_keys}",
43
+ f"Storage config = {self.storage_config}",
44
+ ]
45
+
46
+ def to_scan_tasks(self, pushdowns: Pushdowns) -> Iterator[ScanTask]:
47
+ # TODO: implement pushdown predicate on DeltaCAT
48
+ dc_scan_plan = self.table.create_scan_plan()
49
+ scan_tasks = []
50
+ file_format_config = FileFormatConfig.from_parquet_config(
51
+ # maybe this: ParquetSourceConfig(field_id_mapping=self._field_id_mapping)
52
+ ParquetSourceConfig()
53
+ )
54
+ for dc_scan_task in dc_scan_plan.scan_tasks:
55
+ for data_file in dc_scan_task.data_files():
56
+ st = ScanTask.catalog_scan_task(
57
+ file=data_file.file_path,
58
+ file_format=file_format_config,
59
+ schema=self._schema._schema,
60
+ storage_config=self.storage_config,
61
+ pushdowns=pushdowns,
62
+ )
63
+ scan_tasks.append(st)
64
+ return iter(scan_tasks)
65
+
66
+ def can_absorb_filter(self) -> bool:
67
+ return False
68
+
69
+ def can_absorb_limit(self) -> bool:
70
+ return False
71
+
72
+ def can_absorb_select(self) -> bool:
73
+ return True
74
+
75
+ def _infer_schema(self) -> Schema:
76
+
77
+ if not (
78
+ self.table and self.table.table_version and self.table.table_version.schema
79
+ ):
80
+ raise RuntimeError(
81
+ f"Failed to infer schema for DeltaCAT Table "
82
+ f"{self.table.table.namespace}.{self.table.table.table_name}"
83
+ )
84
+
85
+ return Schema.from_pyarrow_schema(self.table.table_version.schema.arrow)
86
+
87
+ def _infer_partition_keys(self) -> list[PartitionField]:
88
+ if not (
89
+ self.table
90
+ and self.table.table_version
91
+ and self.table.table_version.partition_scheme
92
+ and self.table.table_version.schema
93
+ ):
94
+ raise RuntimeError(
95
+ f"Failed to infer partition keys for DeltaCAT Table "
96
+ f"{self.table.table.namespace}.{self.table.table.table_name}"
97
+ )
98
+
99
+ schema = self.table.table_version.schema
100
+ partition_keys = self.table.table_version.partition_scheme.keys
101
+ if not partition_keys:
102
+ return []
103
+
104
+ partition_fields = []
105
+ for key in partition_keys:
106
+ field = DaftPartitionKeyMapper.unmap(key, schema)
107
+ # Assert that the returned value is not None.
108
+ assert field is not None, f"Unmapping failed for key {key}"
109
+ partition_fields.append(field)
110
+
111
+ return partition_fields
@@ -0,0 +1,258 @@
1
+ from typing import Optional
2
+
3
+ import pyarrow as pa
4
+ from pyarrow import Field as PaField
5
+ from daft import Schema as DaftSchema, DataType
6
+ from daft.daft import (
7
+ PartitionField as DaftPartitionField,
8
+ PartitionTransform as DaftTransform,
9
+ )
10
+ from daft.logical.schema import Field as DaftField
11
+ from daft.io.scan import make_partition_field
12
+
13
+ from deltacat.storage.model.schema import Schema
14
+ from deltacat.storage.model.interop import ModelMapper
15
+ from deltacat.storage.model.partition import PartitionKey
16
+ from deltacat.storage.model.transform import (
17
+ BucketingStrategy,
18
+ Transform,
19
+ BucketTransform,
20
+ HourTransform,
21
+ DayTransform,
22
+ MonthTransform,
23
+ YearTransform,
24
+ IdentityTransform,
25
+ TruncateTransform,
26
+ )
27
+
28
+
29
+ class DaftFieldMapper(ModelMapper[DaftField, PaField]):
30
+ @staticmethod
31
+ def map(
32
+ obj: Optional[DaftField],
33
+ **kwargs,
34
+ ) -> Optional[PaField]:
35
+ """Convert Daft Field to PyArrow Field.
36
+
37
+ Args:
38
+ obj: The Daft Field to convert
39
+ **kwargs: Additional arguments
40
+
41
+ Returns:
42
+ Converted PyArrow Field object
43
+ """
44
+ if obj is None:
45
+ return None
46
+
47
+ return pa.field(
48
+ name=obj.name,
49
+ type=obj.dtype.to_arrow_dtype(),
50
+ )
51
+
52
+ @staticmethod
53
+ def unmap(
54
+ obj: Optional[PaField],
55
+ **kwargs,
56
+ ) -> Optional[DaftField]:
57
+ """Convert PyArrow Field to Daft Field.
58
+
59
+ Args:
60
+ obj: The PyArrow Field to convert
61
+ **kwargs: Additional arguments
62
+
63
+ Returns:
64
+ Converted Daft Field object
65
+ """
66
+ if obj is None:
67
+ return None
68
+
69
+ return DaftField.create(
70
+ name=obj.name,
71
+ dtype=DataType.from_arrow_type(obj.type), # type: ignore
72
+ )
73
+
74
+
75
+ class DaftTransformMapper(ModelMapper[DaftTransform, Transform]):
76
+ @staticmethod
77
+ def map(
78
+ obj: Optional[DaftTransform],
79
+ **kwargs,
80
+ ) -> Optional[Transform]:
81
+ """Convert DaftTransform to DeltaCAT Transform.
82
+
83
+ Args:
84
+ obj: The DaftTransform to convert
85
+ **kwargs: Additional arguments
86
+
87
+ Returns:
88
+ Converted Transform object
89
+ """
90
+
91
+ # daft.PartitionTransform doesn't have a Python interface for accessing its attributes,
92
+ # thus conversion is not possible.
93
+ # TODO: request Daft to expose Python friendly interface for daft.PartitionTransform
94
+ raise NotImplementedError(
95
+ "Converting transform from Daft to DeltaCAT is not supported"
96
+ )
97
+
98
+ @staticmethod
99
+ def unmap(
100
+ obj: Optional[Transform],
101
+ **kwargs,
102
+ ) -> Optional[DaftTransform]:
103
+ """Convert DeltaCAT Transform to DaftTransform.
104
+
105
+ Args:
106
+ obj: The Transform to convert
107
+ **kwargs: Additional arguments
108
+
109
+ Returns:
110
+ Converted DaftTransform object
111
+ """
112
+ if obj is None:
113
+ return None
114
+
115
+ # Map DeltaCAT transforms to Daft transforms using isinstance
116
+
117
+ if isinstance(obj, IdentityTransform):
118
+ return DaftTransform.identity()
119
+ elif isinstance(obj, HourTransform):
120
+ return DaftTransform.hour()
121
+ elif isinstance(obj, DayTransform):
122
+ return DaftTransform.day()
123
+ elif isinstance(obj, MonthTransform):
124
+ return DaftTransform.month()
125
+ elif isinstance(obj, YearTransform):
126
+ return DaftTransform.year()
127
+ elif isinstance(obj, BucketTransform):
128
+ if obj.parameters.bucketing_strategy == BucketingStrategy.ICEBERG:
129
+ return DaftTransform.iceberg_bucket(obj.parameters.num_buckets)
130
+ else:
131
+ raise ValueError(
132
+ f"Unsupported Bucketing Strategy: {obj.parameters.bucketing_strategy}"
133
+ )
134
+ elif isinstance(obj, TruncateTransform):
135
+ return DaftTransform.iceberg_truncate(obj.parameters.width)
136
+
137
+ raise ValueError(f"Unsupported Transform: {obj}")
138
+
139
+
140
+ class DaftPartitionKeyMapper(ModelMapper[DaftPartitionField, PartitionKey]):
141
+ @staticmethod
142
+ def map(
143
+ obj: Optional[DaftPartitionField],
144
+ schema: Optional[DaftSchema] = None,
145
+ **kwargs,
146
+ ) -> Optional[PartitionKey]:
147
+ """Convert DaftPartitionField to PartitionKey.
148
+
149
+ Args:
150
+ obj: The DaftPartitionField to convert
151
+ schema: The Daft schema containing field information
152
+ **kwargs: Additional arguments
153
+
154
+ Returns:
155
+ Converted PartitionKey object
156
+ """
157
+ # Daft PartitionField only exposes 1 attribute `field` which is not enough
158
+ # to convert to DeltaCAT PartitionKey
159
+ # TODO: request Daft to expose more Python friendly interface for PartitionField
160
+ raise NotImplementedError(
161
+ f"Converting Daft PartitionField to DeltaCAT PartitionKey is not supported"
162
+ )
163
+
164
+ @staticmethod
165
+ def unmap(
166
+ obj: Optional[PartitionKey],
167
+ schema: Optional[Schema] = None,
168
+ **kwargs,
169
+ ) -> Optional[DaftPartitionField]:
170
+ """Convert PartitionKey to DaftPartitionField.
171
+
172
+ Args:
173
+ obj: The DeltaCAT PartitionKey to convert
174
+ schema: The Schema containing field information
175
+ **kwargs: Additional arguments
176
+
177
+ Returns:
178
+ Converted DaftPartitionField object
179
+ """
180
+ if obj is None:
181
+ return None
182
+ if obj.name is None:
183
+ raise ValueError("Name is required for PartitionKey conversion")
184
+ if not schema:
185
+ raise ValueError("Schema is required for PartitionKey conversion")
186
+ if len(obj.key) < 1:
187
+ raise ValueError(
188
+ f"At least 1 PartitionKey FieldLocator is expected, instead got {len(obj.key)}. FieldLocators: {obj.key}."
189
+ )
190
+
191
+ # Get the source field from schema - FieldLocator in PartitionKey.key points to the source field of partition field
192
+ dc_source_field = schema.field(obj.key[0]).arrow
193
+ daft_source_field = DaftFieldMapper.unmap(obj=dc_source_field)
194
+ # Convert transform if present
195
+ daft_transform = DaftTransformMapper.unmap(obj.transform)
196
+ daft_partition_field = DaftPartitionKeyMapper.get_daft_partition_field(
197
+ partition_field_name=obj.name,
198
+ daft_source_field=daft_source_field,
199
+ dc_transform=obj.transform,
200
+ )
201
+
202
+ # Create DaftPartitionField
203
+ return make_partition_field(
204
+ field=daft_partition_field,
205
+ source_field=daft_source_field,
206
+ transform=daft_transform,
207
+ )
208
+
209
+ @staticmethod
210
+ def get_daft_partition_field(
211
+ partition_field_name: str,
212
+ daft_source_field: Optional[DaftField],
213
+ # TODO: replace DeltaCAT transform with Daft Transform for uniformality
214
+ # We cannot use Daft Transform here because Daft Transform doesn't have a Python interface for us to
215
+ # access its attributes.
216
+ # TODO: request Daft to provide a more python friendly interface for Daft Tranform
217
+ dc_transform: Optional[Transform],
218
+ ) -> DaftField:
219
+ """Generate Daft Partition Field given partition field name, source field and transform.
220
+ Partition field type is inferred using source field type and transform.
221
+
222
+ Args:
223
+ partition_field_name (str): the specified result field name
224
+ daft_source_field (DaftField): the source field of the partition field
225
+ daft_transform (DaftTransform): transform applied on the source field to create partition field
226
+
227
+ Returns:
228
+ DaftField: Daft Field representing the partition field
229
+ """
230
+ if daft_source_field is None:
231
+ raise ValueError("Source field is required for PartitionField conversion")
232
+ if dc_transform is None:
233
+ raise ValueError("Transform is required for PartitionField conversion")
234
+
235
+ result_type = None
236
+ # Below type conversion logic references Daft - Iceberg conversion logic:
237
+ # https://github.com/Eventual-Inc/Daft/blob/7f2e9b5fb50fdfe858be17572f132b37dd6e5ab2/daft/iceberg/iceberg_scan.py#L61-L85
238
+ if isinstance(dc_transform, IdentityTransform):
239
+ result_type = daft_source_field.dtype
240
+ elif isinstance(dc_transform, YearTransform):
241
+ result_type = DataType.int32()
242
+ elif isinstance(dc_transform, MonthTransform):
243
+ result_type = DataType.int32()
244
+ elif isinstance(dc_transform, DayTransform):
245
+ result_type = DataType.int32()
246
+ elif isinstance(dc_transform, HourTransform):
247
+ result_type = DataType.int32()
248
+ elif isinstance(dc_transform, BucketTransform):
249
+ result_type = DataType.int32()
250
+ elif isinstance(dc_transform, TruncateTransform):
251
+ result_type = daft_source_field.dtype
252
+ else:
253
+ raise ValueError(f"Unsupported transform: {dc_transform}")
254
+
255
+ return DaftField.create(
256
+ name=partition_field_name,
257
+ dtype=result_type,
258
+ )
@@ -1,11 +1,15 @@
1
1
  import os
2
2
  import logging
3
3
 
4
+ import uuid
4
5
  import daft
6
+ from pyiceberg.catalog import CatalogType
7
+
5
8
  import deltacat as dc
6
9
 
7
10
  from deltacat import logs
8
11
  from deltacat import IcebergCatalog
12
+ from deltacat.catalog.iceberg import IcebergCatalogConfig
9
13
  from deltacat.examples.common.fixtures import (
10
14
  store_cli_args_in_os_environ,
11
15
  )
@@ -30,6 +34,24 @@ driver_logger = logs.configure_application_logger(logging.getLogger(__name__))
30
34
 
31
35
 
32
36
  def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
37
+ """
38
+ This is an e2e example that
39
+ 1. creates a DeltaCAT Table (backed by an Iceberg Table) in Glue
40
+ 2. writes data into the DeltaCAT Table
41
+ 3. reads data from the DeltaCAT Table using Daft
42
+
43
+ To run the script:
44
+ 1. prepare an AWS Account
45
+ 1. prepare a S3 location where the data will be written to, which will be used in Step 3.
46
+ 2. prepare an IAM Role that has access to the S3 location and Glue
47
+ 2. retrieve the IAM Role AWS Credential and cache locally in ~/.aws/credentials
48
+ 3. run below command to execute the example
49
+ ```
50
+ make venv && source venv/bin/activate
51
+ python -m deltacat.examples.iceberg.iceberg_bucket_writer --warehouse=s3://<YOUR_S3_LOCATION>
52
+ ```
53
+
54
+ """
33
55
  # create any runtime environment required to run the example
34
56
  runtime_env = create_ray_runtime_environment()
35
57
 
@@ -38,6 +60,7 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
38
60
  # Only the `iceberg` data catalog is provided so it will become the default.
39
61
  # If initializing multiple catalogs, use the `default_catalog_name` param
40
62
  # to specify which catalog should be the default.
63
+
41
64
  dc.init(
42
65
  catalogs={
43
66
  # the name of the DeltaCAT catalog is "iceberg"
@@ -49,11 +72,13 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
49
72
  name="example-iceberg-catalog",
50
73
  # for additional properties see:
51
74
  # https://py.iceberg.apache.org/configuration/
52
- properties={
53
- "type": "glue",
54
- "region_name": "us-east-1",
55
- "warehouse": warehouse,
56
- },
75
+ config=IcebergCatalogConfig(
76
+ type=CatalogType.GLUE,
77
+ properties={
78
+ "warehouse": warehouse,
79
+ "region_name": "us-east-1",
80
+ },
81
+ ),
57
82
  )
58
83
  },
59
84
  # pass the runtime environment into ray.init()
@@ -89,10 +114,10 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
89
114
  }
90
115
  )
91
116
 
92
- # write to a table named `test_namespace.test_table_bucketed`
117
+ # write to a table named `test_namespace.test_table_bucketed-<SUFFIX>`
93
118
  # we don't need to specify which catalog to create this table in since
94
119
  # only the "iceberg" catalog is available
95
- table_name = "test_table_bucketed"
120
+ table_name = f"test_table_bucketed-{uuid.uuid4().hex[:8]}"
96
121
  namespace = "test_namespace"
97
122
  print(f"Creating Glue Table: {namespace}.{table_name}")
98
123
  dc.write_to_table(
@@ -106,9 +131,40 @@ def run(warehouse="s3://my-bucket/my/key/prefix", **kwargs):
106
131
  )
107
132
 
108
133
  print(f"Getting Glue Table: {namespace}.{table_name}")
109
- table_definition = dc.get_table(table_name, namespace)
134
+ table_definition = dc.get_table(name=table_name, namespace=namespace)
110
135
  print(f"Retrieved Glue Table: {table_definition}")
111
136
 
137
+ # Read Data from DeltaCAT Table (backed by Iceberg) using Daft
138
+ daft_dataframe = dc.read_table(table=table_name, namespace=namespace)
139
+
140
+ daft_dataframe.where(df["bid"] > 200.0).show()
141
+ # Expected result:
142
+ # ╭────────┬─────────┬─────────╮
143
+ # │ symbol ┆ bid ┆ ask │
144
+ # │ --- ┆ --- ┆ --- │
145
+ # │ Utf8 ┆ Float64 ┆ Float64 │
146
+ # ╞════════╪═════════╪═════════╡
147
+ # │ meta ┆ 392.03 ┆ 392.09 │
148
+ # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
149
+ # │ msft ┆ 403.25 ┆ 403.27 │
150
+ # ╰────────┴─────────┴─────────╯
151
+
152
+ daft_dataframe.select("symbol").show()
153
+ # Expected result:
154
+ # ╭────────╮
155
+ # │ symbol │
156
+ # │ --- │
157
+ # │ Utf8 │
158
+ # ╞════════╡
159
+ # │ meta │
160
+ # ├╌╌╌╌╌╌╌╌┤
161
+ # │ amzn │
162
+ # ├╌╌╌╌╌╌╌╌┤
163
+ # │ goog │
164
+ # ├╌╌╌╌╌╌╌╌┤
165
+ # │ msft │
166
+ # ╰────────╯
167
+
112
168
 
113
169
  if __name__ == "__main__":
114
170
  example_script_args = [
@@ -121,15 +177,6 @@ if __name__ == "__main__":
121
177
  "type": str,
122
178
  },
123
179
  ),
124
- (
125
- [
126
- "--STAGE",
127
- ],
128
- {
129
- "help": "Example runtime environment stage (e.g. dev, alpha, beta, prod).",
130
- "type": str,
131
- },
132
- ),
133
180
  ]
134
181
 
135
182
  # store any CLI args in the runtime environment
@@ -0,0 +1,4 @@
1
+ """Daft integration package for DeltaCAT.
2
+
3
+ This package provides integration between DeltaCAT and Daft.
4
+ """