deltacat 2.0.0.post2__tar.gz → 2.0.0.post3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (454) hide show
  1. {deltacat-2.0.0.post2/deltacat.egg-info → deltacat-2.0.0.post3}/PKG-INFO +254 -53
  2. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/README.md +253 -52
  3. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/__init__.py +10 -3
  4. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/api.py +83 -15
  5. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/__init__.py +6 -0
  6. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/delegate.py +170 -3
  7. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/interface.py +35 -2
  8. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/main/impl.py +125 -97
  9. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/model/catalog.py +150 -35
  10. deltacat-2.0.0.post3/deltacat/catalog/model/properties.py +333 -0
  11. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/private/compaction_utils.py +8 -2
  12. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/steps/merge.py +9 -7
  13. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/converter_session.py +15 -10
  14. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +7 -5
  15. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/io.py +22 -3
  16. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/janitor.py +38 -15
  17. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/constants.py +11 -0
  18. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/bootstrap.py +3 -1
  19. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/explorer.py +0 -1
  20. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/utils/common.py +0 -1
  21. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +0 -1
  22. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/exceptions.py +15 -0
  23. deltacat-2.0.0.post3/deltacat/experimental/compatibility/backfill_transaction_partitions.py +513 -0
  24. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/table_monitor.py +2 -3
  25. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/daft/daft_catalog.py +1 -0
  26. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +7 -2
  27. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/interface.py +6 -7
  28. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/main/impl.py +209 -121
  29. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/delta.py +22 -8
  30. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/manifest.py +81 -9
  31. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/metafile.py +113 -30
  32. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/namespace.py +11 -3
  33. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/partition.py +19 -3
  34. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/stream.py +10 -3
  35. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/table.py +10 -3
  36. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/table_version.py +10 -3
  37. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/transaction.py +259 -108
  38. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/types.py +1 -0
  39. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +57 -6
  40. deltacat-2.0.0.post3/deltacat/tests/catalog/model/test_properties_transaction_migration.py +232 -0
  41. deltacat-2.0.0.post3/deltacat/tests/catalog/test_catalogs.py +651 -0
  42. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/test_default_catalog_impl.py +1184 -39
  43. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -18
  44. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/test_convert_session.py +2 -2
  45. deltacat-2.0.0.post3/deltacat/tests/compute/converter/test_converter_commit_conflict_resolution.py +626 -0
  46. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_janitor.py +60 -38
  47. deltacat-2.0.0.post3/deltacat/tests/conftest.py +56 -0
  48. deltacat-2.0.0.post3/deltacat/tests/experimental/compatibility/test_backfill_transaction_partitions.py +477 -0
  49. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/main/test_main_storage.py +17 -8
  50. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_metafile_io.py +142 -18
  51. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_transaction_history.py +128 -68
  52. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_deltacat_api.py +334 -25
  53. deltacat-2.0.0.post3/deltacat/tests/utils/test_filesystem.py +3319 -0
  54. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/media.py +0 -4
  55. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/tables.py +111 -113
  56. deltacat-2.0.0.post3/deltacat/utils/filesystem.py +1590 -0
  57. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/url.py +89 -18
  58. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3/deltacat.egg-info}/PKG-INFO +254 -53
  59. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/SOURCES.txt +5 -2
  60. deltacat-2.0.0.post2/deltacat/catalog/model/properties.py +0 -155
  61. deltacat-2.0.0.post2/deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +0 -201
  62. deltacat-2.0.0.post2/deltacat/tests/catalog/test_catalogs.py +0 -321
  63. deltacat-2.0.0.post2/deltacat/tests/conftest.py +0 -25
  64. deltacat-2.0.0.post2/deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +0 -582
  65. deltacat-2.0.0.post2/deltacat/utils/filesystem.py +0 -450
  66. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/LICENSE +0 -0
  67. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/MANIFEST.in +0 -0
  68. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/annotations.py +0 -0
  69. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/__init__.py +0 -0
  70. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/clients.py +0 -0
  71. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/constants.py +0 -0
  72. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/aws/s3u.py +0 -0
  73. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/__init__.py +0 -0
  74. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_engine.py +0 -0
  75. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_parquet_reads.py +0 -0
  76. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_report.py +0 -0
  77. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/benchmark_suite.py +0 -0
  78. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/conftest.py +0 -0
  79. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/data/__init__.py +0 -0
  80. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/data/random_row_generator.py +0 -0
  81. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/data/row_generator.py +0 -0
  82. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/benchmarking/test_benchmark_pipeline.py +0 -0
  83. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/main/__init__.py +0 -0
  84. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/model/__init__.py +0 -0
  85. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/catalog/model/table_definition.py +0 -0
  86. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/__init__.py +0 -0
  87. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/__init__.py +0 -0
  88. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/compaction_session.py +0 -0
  89. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/__init__.py +0 -0
  90. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/compact_partition_params.py +0 -0
  91. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/compaction_session_audit_info.py +0 -0
  92. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/compactor_version.py +0 -0
  93. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/dedupe_result.py +0 -0
  94. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/delta_annotated.py +0 -0
  95. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/delta_file_envelope.py +0 -0
  96. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/delta_file_locator.py +0 -0
  97. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/hash_bucket_result.py +0 -0
  98. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/materialize_result.py +0 -0
  99. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/primary_key_index.py +0 -0
  100. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/pyarrow_write_result.py +0 -0
  101. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/repartition_result.py +0 -0
  102. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/round_completion_info.py +0 -0
  103. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/model/table_object_store.py +0 -0
  104. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/repartition_session.py +0 -0
  105. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/__init__.py +0 -0
  106. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/dedupe.py +0 -0
  107. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/hash_bucket.py +0 -0
  108. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/materialize.py +0 -0
  109. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/steps/repartition.py +0 -0
  110. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/__init__.py +0 -0
  111. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/io.py +0 -0
  112. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/primary_key_index.py +0 -0
  113. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/round_completion_reader.py +0 -0
  114. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/sort_key.py +0 -0
  115. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor/utils/system_columns.py +0 -0
  116. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/__init__.py +0 -0
  117. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/compaction_session.py +0 -0
  118. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/constants.py +0 -0
  119. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  120. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +0 -0
  121. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/delete_strategy.py +0 -0
  122. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +0 -0
  123. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/model.py +0 -0
  124. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/deletes/utils.py +0 -0
  125. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/__init__.py +0 -0
  126. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -0
  127. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/hash_bucket_input.py +0 -0
  128. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/hash_bucket_result.py +0 -0
  129. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/merge_file_group.py +0 -0
  130. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/merge_input.py +0 -0
  131. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/model/merge_result.py +0 -0
  132. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/private/__init__.py +0 -0
  133. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  134. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/steps/hash_bucket.py +0 -0
  135. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  136. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/content_type_params.py +0 -0
  137. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/dedupe.py +0 -0
  138. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/delta.py +0 -0
  139. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/io.py +0 -0
  140. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/merge.py +0 -0
  141. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/primary_key_index.py +0 -0
  142. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/compactor_v2/utils/task_options.py +0 -0
  143. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/__init__.py +0 -0
  144. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/constants.py +0 -0
  145. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/__init__.py +0 -0
  146. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/convert_input.py +0 -0
  147. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/convert_input_files.py +0 -0
  148. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/convert_result.py +0 -0
  149. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/model/converter_session_params.py +0 -0
  150. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  151. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/catalog.py +0 -0
  152. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/pyiceberg/overrides.py +0 -0
  153. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/steps/__init__.py +0 -0
  154. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/steps/convert.py +0 -0
  155. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/steps/dedupe.py +0 -0
  156. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/__init__.py +0 -0
  157. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/convert_task_options.py +0 -0
  158. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/converter_session_utils.py +0 -0
  159. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/iceberg_columns.py +0 -0
  160. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/converter/utils/s3u.py +0 -0
  161. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/jobs/__init__.py +0 -0
  162. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/jobs/client.py +0 -0
  163. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/__init__.py +0 -0
  164. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/delta.py +0 -0
  165. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/manifest.py +0 -0
  166. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/model.py +0 -0
  167. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/resource_estimation/parquet.py +0 -0
  168. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/__init__.py +0 -0
  169. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/__init__.py +0 -0
  170. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/delta_column_stats.py +0 -0
  171. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/delta_stats.py +0 -0
  172. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/delta_stats_cache_result.py +0 -0
  173. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/manifest_entry_stats.py +0 -0
  174. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/models/stats_result.py +0 -0
  175. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/compute/stats/types.py +0 -0
  176. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/__init__.py +0 -0
  177. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/__init__.py +0 -0
  178. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/__init__.py +0 -0
  179. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  180. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/inference/generate_type_mappings.py +0 -0
  181. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +0 -0
  182. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/env.py +0 -0
  183. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/__init__.py +0 -0
  184. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/basic_logging.py +0 -0
  185. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/__init__.py +0 -0
  186. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/aws/__init__.py +0 -0
  187. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/compactor.py +0 -0
  188. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/gcp/__init__.py +0 -0
  189. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/job_runner.py +0 -0
  190. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/compactor/utils/__init__.py +0 -0
  191. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/__init__.py +0 -0
  192. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/__init__.py +0 -0
  193. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  194. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  195. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/app.py +0 -0
  196. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/main.py +0 -0
  197. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +0 -0
  198. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +0 -0
  199. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +0 -0
  200. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +0 -0
  201. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/experimental/iceberg/iceberg_reader.py +0 -0
  202. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/hello_world.py +0 -0
  203. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/__init__.py +0 -0
  204. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/aws/__init__.py +0 -0
  205. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/gcp/__init__.py +0 -0
  206. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/indexer.py +0 -0
  207. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/examples/indexer/job_runner.py +0 -0
  208. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/__init__.py +0 -0
  209. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/__init__.py +0 -0
  210. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/__init__.py +0 -0
  211. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +0 -0
  212. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/impl.py +0 -0
  213. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/catalog/iceberg/overrides.py +0 -0
  214. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/compatibility/__init__.py +0 -0
  215. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/__init__.py +0 -0
  216. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  217. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/converter_agent/beam/managed.py +0 -0
  218. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/daft/__init__.py +0 -0
  219. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/__init__.py +0 -0
  220. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/__init__.py +0 -0
  221. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +0 -0
  222. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/impl.py +0 -0
  223. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/model.py +0 -0
  224. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/iceberg/visitor.py +0 -0
  225. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/__init__.py +0 -0
  226. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  227. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/arrow/serializer.py +0 -0
  228. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/dataset.py +0 -0
  229. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/dataset_executor.py +0 -0
  230. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/feather/__init__.py +0 -0
  231. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/feather/file_reader.py +0 -0
  232. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/feather/serializer.py +0 -0
  233. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  234. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/file_provider.py +0 -0
  235. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/file_store.py +0 -0
  236. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/input_file.py +0 -0
  237. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/fs/output_file.py +0 -0
  238. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/logical_plan.py +0 -0
  239. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  240. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/delta.py +0 -0
  241. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/json_sst.py +0 -0
  242. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/sst.py +0 -0
  243. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +0 -0
  244. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/mvp/Table.py +0 -0
  245. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/mvp/__init__.py +0 -0
  246. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/__init__.py +0 -0
  247. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  248. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/file_reader.py +0 -0
  249. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/parquet/serializer.py +0 -0
  250. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  251. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/block_scanner.py +0 -0
  252. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/data_reader.py +0 -0
  253. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/data_scan.py +0 -0
  254. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/dataset_reader.py +0 -0
  255. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +0 -0
  256. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/query_expression.py +0 -0
  257. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +0 -0
  258. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  259. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/schema/datatype.py +0 -0
  260. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/schema/schema.py +0 -0
  261. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/serializer.py +0 -0
  262. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/serializer_factory.py +0 -0
  263. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  264. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/shard/range_shard.py +0 -0
  265. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  266. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/writer/dataset_writer.py +0 -0
  267. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +0 -0
  268. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/__init__.py +0 -0
  269. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/dataset/__init__.py +0 -0
  270. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/dataset/deltacat_dataset.py +0 -0
  271. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasink/__init__.py +0 -0
  272. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasink/deltacat_datasink.py +0 -0
  273. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasource/__init__.py +0 -0
  274. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/datasource/deltacat_datasource.py +0 -0
  275. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/file_object_store.py +0 -0
  276. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/memcached_object_store.py +0 -0
  277. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/object_store.py +0 -0
  278. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/ray_plasma_object_store.py +0 -0
  279. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/reader/__init__.py +0 -0
  280. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/reader/deltacat_read_api.py +0 -0
  281. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/redis_object_store.py +0 -0
  282. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/io/s3_object_store.py +0 -0
  283. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/logs.py +0 -0
  284. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/__init__.py +0 -0
  285. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/main/__init__.py +0 -0
  286. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/__init__.py +0 -0
  287. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/expression/__init__.py +0 -0
  288. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/expression/expression.py +0 -0
  289. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/expression/visitor.py +0 -0
  290. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/interop.py +0 -0
  291. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/list_result.py +0 -0
  292. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/locator.py +0 -0
  293. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/__init__.py +0 -0
  294. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/push_down.py +0 -0
  295. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/scan_plan.py +0 -0
  296. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/scan/scan_task.py +0 -0
  297. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/schema.py +0 -0
  298. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/shard.py +0 -0
  299. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/sort_key.py +0 -0
  300. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/model/transform.py +0 -0
  301. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/util/__init__.py +0 -0
  302. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/storage/util/scan_planner.py +0 -0
  303. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/__init__.py +0 -0
  304. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/__init__.py +0 -0
  305. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/reader/__init__.py +0 -0
  306. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  307. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_cloudpickle_bug_fix.py +0 -0
  308. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_file_object_store.py +0 -0
  309. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_memcached_object_store.py +0 -0
  310. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_ray_plasma_object_store.py +0 -0
  311. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_redis_object_store.py +0 -0
  312. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/_io/test_s3_object_store.py +0 -0
  313. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/aws/__init__.py +0 -0
  314. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/aws/test_clients.py +0 -0
  315. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/aws/test_s3u.py +0 -0
  316. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/__init__.py +0 -0
  317. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/data/__init__.py +0 -0
  318. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/main/__init__.py +0 -0
  319. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +0 -0
  320. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/model/__init__.py +0 -0
  321. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/catalog/model/test_table_definition.py +0 -0
  322. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/__init__.py +0 -0
  323. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +0 -0
  324. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_rebase_test_cases.py +0 -0
  325. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +0 -0
  326. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compact_partition_test_cases.py +0 -0
  327. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/__init__.py +0 -0
  328. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  329. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/steps/test_repartition.py +0 -0
  330. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  331. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/utils/test_io.py +0 -0
  332. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +0 -0
  333. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  334. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/test_hashlib.py +0 -0
  335. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/__init__.py +0 -0
  336. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -0
  337. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -0
  338. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -0
  339. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/conftest.py +0 -0
  340. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/__init__.py +0 -0
  341. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/conftest.py +0 -0
  342. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/converter/utils.py +0 -0
  343. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/__init__.py +0 -0
  344. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/data/__init__.py +0 -0
  345. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/test_delta.py +0 -0
  346. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/resource_estimation/test_manifest.py +0 -0
  347. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_incremental.py +0 -0
  348. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_multiple_rounds.py +0 -0
  349. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_params.py +0 -0
  350. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_rebase.py +0 -0
  351. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +0 -0
  352. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_util_common.py +0 -0
  353. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/compute/test_util_constant.py +0 -0
  354. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/daft/__init__.py +0 -0
  355. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/daft/test_model.py +0 -0
  356. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/__init__.py +0 -0
  357. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/catalog/__init__.py +0 -0
  358. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  359. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +0 -0
  360. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/compatibility/__init__.py +0 -0
  361. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/daft/__init__.py +0 -0
  362. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +0 -0
  363. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/__init__.py +0 -0
  364. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  365. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/conftest.py +0 -0
  366. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  367. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +0 -0
  368. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  369. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +0 -0
  370. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +0 -0
  371. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +0 -0
  372. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  373. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +0 -0
  374. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  375. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +0 -0
  376. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_dataset.py +0 -0
  377. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_manifest.py +0 -0
  378. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +0 -0
  379. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/test_utils.py +0 -0
  380. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  381. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +0 -0
  382. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +0 -0
  383. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +0 -0
  384. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/__init__.py +0 -0
  385. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/main/__init__.py +0 -0
  386. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/__init__.py +0 -0
  387. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_delete_parameters.py +0 -0
  388. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_expression.py +0 -0
  389. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_manifest.py +0 -0
  390. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_partition_scheme.py +0 -0
  391. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_schema.py +0 -0
  392. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_schema_update.py +0 -0
  393. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_shard.py +0 -0
  394. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_sort_scheme.py +0 -0
  395. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_table_version.py +0 -0
  396. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/storage/model/test_transaction.py +0 -0
  397. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_exceptions.py +0 -0
  398. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_logs.py +0 -0
  399. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/__init__.py +0 -0
  400. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/constants.py +0 -0
  401. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/filesystem.py +0 -0
  402. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/message_pack_utils.py +0 -0
  403. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/pyarrow.py +0 -0
  404. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/storage.py +0 -0
  405. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/test_utils/utils.py +0 -0
  406. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/types/__init__.py +0 -0
  407. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/types/test_tables.py +0 -0
  408. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/__init__.py +0 -0
  409. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/data/__init__.py +0 -0
  410. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/exceptions.py +0 -0
  411. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/main_deltacat_storage_mock.py +0 -0
  412. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/ray_utils/__init__.py +0 -0
  413. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/ray_utils/test_concurrency.py +0 -0
  414. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/ray_utils/test_dataset.py +0 -0
  415. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_cloudpickle.py +0 -0
  416. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_daft.py +0 -0
  417. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_metrics.py +0 -0
  418. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_numpy.py +0 -0
  419. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_pandas.py +0 -0
  420. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_placement.py +0 -0
  421. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_polars.py +0 -0
  422. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_pyarrow.py +0 -0
  423. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_record_batch_tables.py +0 -0
  424. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/tests/utils/test_resources.py +0 -0
  425. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/__init__.py +0 -0
  426. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/types/partial_download.py +0 -0
  427. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/__init__.py +0 -0
  428. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/arguments.py +0 -0
  429. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/cloudpickle.py +0 -0
  430. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/common.py +0 -0
  431. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/daft.py +0 -0
  432. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/export.py +0 -0
  433. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/metafile_locator.py +0 -0
  434. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/metrics.py +0 -0
  435. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/numpy.py +0 -0
  436. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/pandas.py +0 -0
  437. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/performance.py +0 -0
  438. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/placement.py +0 -0
  439. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/polars.py +0 -0
  440. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/pyarrow.py +0 -0
  441. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/__init__.py +0 -0
  442. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/collections.py +0 -0
  443. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/concurrency.py +0 -0
  444. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/dataset.py +0 -0
  445. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/performance.py +0 -0
  446. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/ray_utils/runtime.py +0 -0
  447. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/reader_compatibility_mapping.py +0 -0
  448. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/resources.py +0 -0
  449. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat/utils/schema.py +0 -0
  450. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/dependency_links.txt +0 -0
  451. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/requires.txt +0 -0
  452. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/deltacat.egg-info/top_level.txt +0 -0
  453. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/setup.cfg +0 -0
  454. {deltacat-2.0.0.post2 → deltacat-2.0.0.post3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deltacat
3
- Version: 2.0.0.post2
3
+ Version: 2.0.0.post3
4
4
  Summary: DeltaCAT is a portable Pythonic Data Lakehouse powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -53,24 +53,20 @@ Dynamic: summary
53
53
  <img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-logo-alpha-750.png" alt="deltacat logo" style="width:55%; height:auto; text-align: center;">
54
54
  </p>
55
55
 
56
- DeltaCAT is a portable Pythonic Data Lakehouse powered by [Ray](https://github.com/ray-project/ray). It lets you define and manage
57
- fast, scalable, ACID-compliant multimodal data lakes, and has been used to [successfully manage exabyte-scale enterprise
58
- data lakes](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
56
+ DeltaCAT is a portable Multimodal Lakehouse powered by [Ray](https://github.com/ray-project/ray), [Apache Arrow](https://github.com/apache/arrow), and [Daft](https://github.com/Eventual-Inc/Daft). It lets you create ACID-compliant multimodal data lakes [that efficiently scale to manage exabytes of production data](https://aws.amazon.com/blogs/opensource/amazons-exabyte-scale-migration-from-apache-spark-to-ray-on-amazon-ec2/).
59
57
 
60
- It provides data lake level transactions & time travel, fast schema evolution for feature enrichment, zero-copy multimodal file processing, schemaless dataset management, and transparent dataset optimization. It runs locally for rapid development or in the cloud for production workloads.
58
+ It provides data lake level transactions & time travel, zero-copy schema evolution, zero-copy multimodal file processing (image, audio, video, text, etc.), and transparent dataset optimization. It runs locally for rapid development or in the cloud for production workloads. It runs on any filesystem for easy setup and sharing - no external catalog services, lock managers, or key value stores required.
61
59
 
62
- It uses the Ray distributed compute framework together with [Apache Arrow](https://github.com/apache/arrow) and
63
- [Daft](https://github.com/Eventual-Inc/Daft) to efficiently scale common table management tasks, like petabyte-scale
64
- merge-on-read and copy-on-write operations.
60
+
61
+ ## Overview
65
62
 
66
63
  DeltaCAT provides the following high-level components:
67
- 1. [**Catalog**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/catalog/interface.py): High-level APIs to create, discover, organize, share, and manage datasets.
68
- 2. [**Compute**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/compute/): Distributed data management procedures to read, write, and optimize datasets.
69
- 3. [**Storage**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/storage/): In-memory and on-disk multimodal dataset formats.
64
+ 1. [**Catalog**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/catalog/interface.py): Pythonic APIs to discover, read, write, and manage datasets.
65
+ 2. [**Compute**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/compute/): Distributed data management procedures that automatically optimize your datasets.
66
+ 3. [**Storage**](https://github.com/ray-project/deltacat/tree/2.0/deltacat/storage/): A portable multimodal data lake format useable with any filesystem.
70
67
  4. **Sync** (in development): Synchronize DeltaCAT datasets to data warehouses and other table formats.
71
68
 
72
- ## Overview
73
- DeltaCAT's **Catalog**, **Compute**, and **Storage** layers work together to bring ACID-compliant data management to any Ray application. These components automate data indexing, change management, dataset read/write optimization, schema evolution, and other common data management tasks across any set of data files readable by Ray Data, Daft, Pandas, Polars, PyArrow, or NumPy.
69
+ DeltaCAT's **Catalog**, **Compute**, and **Storage** layers work together to bring ACID-compliant data management to any Ray application. These components automate data indexing, change management, dataset read/write optimization, schema evolution, and other common data management tasks across any set of data files readable by [Pandas](https://github.com/pandas-dev/pandas), [NumPy](https://github.com/numpy/numpy), [Polars](https://github.com/pola-rs/polars), [PyArrow](https://arrow.apache.org/docs/python/index.html), [Ray Data](https://docs.ray.io/en/latest/data/data.html), and [Daft](https://docs.daft.ai/en/stable/api/dataframe/).
74
70
 
75
71
  <p align="center">
76
72
  <img src="https://github.com/ray-project/deltacat/raw/2.0/media/deltacat-tech-overview.png" alt="deltacat tech overview" style="width:100%; height:auto; text-align: center;">
@@ -81,7 +77,8 @@ Data consumers that prefer to stay within the ecosystem of Pythonic data managem
81
77
  ## Getting Started
82
78
  DeltaCAT applications run anywhere that Ray runs, including your local laptop, cloud computing cluster, or on-premise cluster.
83
79
 
84
- DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake containing a set of **Tables**. It provides a root location (e.g., a local file path or S3 Bucket) to store table information, and can be rooted in any [PyArrow-compatible Filesystem](https://arrow.apache.org/docs/python/filesystems.html). **Tables** can be created, read, and written using the `dc.write` and `dc.read` APIs.
80
+ DeltaCAT lets you manage **Tables** across one or more **Catalogs**. A **Table** can be thought of as a named collection of data files. A **Catalog** can be thought of as a named data lake that contains a set of **Tables**. A **Catalog** provides a root location (e.g., a local file path or S3 Bucket) to store information about all your **Tables**, and can be rooted in any [PyArrow-compatible Filesystem](https://arrow.apache.org/docs/python/filesystems.html). **Tables** can be created, read, and written using the `dc.write` and `dc.read` APIs.
81
+
85
82
 
86
83
  ### Quick Start
87
84
 
@@ -114,7 +111,7 @@ dc.write(data, "users")
114
111
  daft_df = dc.read("users") # Returns Daft DataFrame (default)
115
112
  daft_df.show() # Materialize and print the DataFrame
116
113
 
117
- # Append more data and add a new column.
114
+ # Add more data and add a new column.
118
115
  # Compaction and zero-copy schema evolution are handled automatically.
119
116
  data = pd.DataFrame({
120
117
  "id": [4, 5, 6],
@@ -131,13 +128,13 @@ daft_df.select("name", "age", "city").show()
131
128
  ```
132
129
 
133
130
  ### Core Concepts
134
- DeltaCAT can do much more than just append data to tables and read it back again. Expand the sections below to see examples of other core DeltaCAT concepts and APIs.
131
+ DeltaCAT can do much more than just add data to tables and read it back again. Expand the sections below to see examples of other core DeltaCAT concepts and APIs.
135
132
 
136
133
  <details>
137
134
 
138
135
  <summary><span style="font-size: 1.25em; font-weight: bold;">Idempotent Writes</span></summary>
139
136
 
140
- If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **appends** table data by default. One way to prevent this perpetual table growth and make the example idempotent is to use the **REPLACE** write mode if the table already exists:
137
+ If you run the quick start example repeatedly from the same working directory, you'll notice that the table it writes to just keeps growing larger. This is because DeltaCAT always **adds** table data by default. One way to prevent this perpetual table growth and make the example idempotent is to use the **REPLACE** write mode if the table already exists:
141
138
 
142
139
  ```python
143
140
  import deltacat as dc
@@ -171,7 +168,7 @@ dc.write(data, "users", mode=write_mode)
171
168
  daft_df = dc.read("users") # Returns Daft DataFrame (default)
172
169
  daft_df.show() # Materialize and print the DataFrame
173
170
 
174
- # Explicitly append more data and add a new column.
171
+ # Explicitly add more data and add a new column.
175
172
  # Compaction and schema evolution are handled automatically.
176
173
  data = pd.DataFrame({
177
174
  "id": [4, 5, 6],
@@ -179,7 +176,7 @@ data = pd.DataFrame({
179
176
  "age": [2, 12, 4],
180
177
  "city": ["Hollywood", "Gloucester", "San Francisco"]
181
178
  })
182
- dc.write(data, "users", mode=dc.TableWriteMode.APPEND)
179
+ dc.write(data, "users", mode=dc.TableWriteMode.ADD)
183
180
 
184
181
  # Read the full table back into a Daft DataFrame.
185
182
  daft_df = dc.read("users")
@@ -223,7 +220,7 @@ dc.write(data, "users", mode=dc.TableWriteMode.CREATE)
223
220
  daft_df = dc.read("users") # Returns Daft DataFrame (default)
224
221
  daft_df.show() # Materialize and print the DataFrame
225
222
 
226
- # Explicitly append more data and add a new column.
223
+ # Explicitly add more data and add a new column.
227
224
  # Compaction and schema evolution are handled automatically.
228
225
  data = pd.DataFrame({
229
226
  "id": [4, 5, 6],
@@ -231,7 +228,7 @@ data = pd.DataFrame({
231
228
  "age": [2, 12, 4],
232
229
  "city": ["Hollywood", "Gloucester", "San Francisco"]
233
230
  })
234
- dc.write(data, "users", mode=dc.TableWriteMode.APPEND)
231
+ dc.write(data, "users", mode=dc.TableWriteMode.ADD)
235
232
 
236
233
  # Read the full table back into a Daft DataFrame.
237
234
  daft_df = dc.read("users")
@@ -243,9 +240,117 @@ assert dc.dataset_length(daft_df) == 6
243
240
 
244
241
  </details>
245
242
 
243
+
246
244
  <details>
247
245
 
248
- <summary><span style="font-size: 1.25em; font-weight: bold;">Multi-Format Data Processing</span></summary>
246
+ <summary><span style="font-size: 1.25em; font-weight: bold;">Ordered Writes</span></summary>
247
+ DeltaCAT writes are unordered by default, which means that the order of data written to the table isn't guaranteed to match the order that it is read back. While this is useful for preventing conflicts between concurrent writers, you can also use the **APPEND** write mode to preserve write order and raise explicit concurrency conflicts between parallel writers:
248
+
249
+ ```python
250
+ import deltacat as dc
251
+ import pandas as pd
252
+
253
+ # Initialize DeltaCAT with a default local catalog.
254
+ # Ray will be initialized automatically.
255
+ # Catalog files will be stored in .deltacat/ in the current working directory.
256
+ dc.init_local()
257
+
258
+ # Create data to write.
259
+ data = pd.DataFrame({
260
+ "id": [1, 2],
261
+ "name": ["Cheshire", "Dinah"],
262
+ "age": [3, 7]
263
+ })
264
+
265
+ # Derive a DeltaCAT schema for the data.
266
+ schema = dc.Schema.of(dc.dataset_schema(data))
267
+
268
+ # Create an empty table to hold ordered user data.
269
+ if not dc.table_exists("users_ordered"):
270
+ dc.create_table("users_ordered", schema=schema)
271
+
272
+ # Write the first ordered delta to the table.
273
+ dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
274
+
275
+ # Write the second ordered delta to the table.
276
+ data = pd.DataFrame({
277
+ "id": [3, 4],
278
+ "name": ["Felix", "Tom"],
279
+ "age": [2, 12],
280
+ "city": ["Hollywood", "Gloucester"]
281
+ })
282
+ dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
283
+
284
+ # Write the third ordered delta to the table.
285
+ data = pd.DataFrame({
286
+ "id": [5, 6],
287
+ "name": ["Simpkin", "Delta"],
288
+ "age": [12, 4],
289
+ "city": ["San Francisco", "San Francisco"]
290
+ })
291
+ dc.write(data, "users_ordered", mode=dc.TableWriteMode.APPEND)
292
+
293
+ # Read the data back as a Pandas DataFrame, and ensure that the
294
+ # order of the records returned matches the order they were written.
295
+ pandas_df = dc.read("users_ordered", read_as=dc.DatasetType.PANDAS)
296
+ print(pandas_df)
297
+ ```
298
+
299
+ </details>
300
+
301
+ <details>
302
+
303
+ <summary><span style="font-size: 1.25em; font-weight: bold;">Schemaless Tables</span></summary>
304
+ Tables created automatically via `dc.write` have a schema inferred from the data written by default. However, if you create an empty table without providing a schema, it defaults to schemaless. Writes to schemaless tables are more efficient and flexible, since they simply track the location and basic metadata associated with the data files written to the table. However, if you know that a unified schema can be derived for your schemaless data, then you can you can still read it back as a structured dataset:
305
+
306
+ ```python
307
+ import deltacat as dc
308
+ import pandas as pd
309
+
310
+ # Initialize DeltaCAT with a default local catalog.
311
+ # Ray will be initialized automatically.
312
+ # Catalog files will be stored in .deltacat/ in the current working directory.
313
+ dc.init_local()
314
+
315
+ # Create data to write.
316
+ data = pd.DataFrame({
317
+ "id": [1, 2],
318
+ "name": ["Cheshire", "Dinah"],
319
+ "age": [3, 7]
320
+ })
321
+
322
+ # Create an empty schemaless table to hold ordered user data.
323
+ if not dc.table_exists("users_schemaless"):
324
+ dc.create_table("users_schemaless")
325
+
326
+ # Write the first ordered delta to the table.
327
+ dc.write(data, "users_schemaless", mode=dc.TableWriteMode.APPEND)
328
+
329
+ # Write the second ordered delta to the table.
330
+ data = pd.DataFrame({
331
+ "id": [3, 4],
332
+ "name": ["Felix", "Tom"],
333
+ "age": [2, 12],
334
+ "city": ["Hollywood", "Gloucester"]
335
+ })
336
+ dc.write(data, "users_schemaless", mode=dc.TableWriteMode.APPEND)
337
+
338
+ # Read back the file manifest of the schemaless table.
339
+ # Notice that file paths, sizes, etc. are returned instead of the dataframes written.
340
+ manifest_df = dc.read("users_schemaless", read_as=dc.DatasetType.PANDAS)
341
+ print(manifest_df)
342
+
343
+ # Use from_manifest_table to convert the manifest table to a structured dataset.
344
+ structured_daft_df = dc.from_manifest_table(manifest_df)
345
+ structured_daft_df.show()
346
+ ```
347
+
348
+ </details>
349
+
350
+
351
+ <details>
352
+
353
+ <summary><span style="font-size: 1.25em; font-weight: bold;">Working Across Dataset and File Types</span></summary>
249
354
 
250
355
  DeltaCAT natively supports a variety of open dataset and file formats already integrated with Ray and Arrow. You can use `dc.read` to read tables back as a Daft DataFrame, Ray Dataset, Pandas DataFrame, PyArrow Table, Polars DataFrame, NumPy Array, or list of PyArrow ParquetFile objects:
251
356
 
@@ -600,6 +705,10 @@ order_data = pd.DataFrame({
600
705
  "product_id": [101, 102, 103],
601
706
  "quantity": [2, 1, 2]
602
707
  })
708
+ # Create identity, inventory, and sales namespaces
709
+ dc.create_namespace("identity")
710
+ dc.create_namespace("inventory")
711
+ dc.create_namespace("sales")
603
712
 
604
713
  # Write tables to different namespaces to organize them by domain
605
714
  dc.write(user_data, "users", namespace="identity")
@@ -625,7 +734,10 @@ finance_users = pd.DataFrame({
625
734
  "preferred_payment_method": ["credit", "cash", "paypal"]
626
735
  })
627
736
 
737
+ dc.create_namespace("marketing")
628
738
  dc.write(marketing_users, "users", namespace="marketing")
739
+
740
+ dc.create_namespace("finance")
629
741
  dc.write(finance_users, "users", namespace="finance")
630
742
 
631
743
  # Each namespace maintains its own "users" table with different schemas
@@ -671,6 +783,7 @@ product_data = pd.DataFrame({
671
783
  })
672
784
 
673
785
  # The product catalog can be created independently.
786
+ dc.create_namespace("inventory")
674
787
  dc.write(product_data, "catalog", namespace="inventory")
675
788
 
676
789
  print(f"\n=== Initial Product Data ===")
@@ -697,7 +810,9 @@ finance_schema = dc.Schema.of([
697
810
  # Create user identities and user finance data within a single transaction.
698
811
  # Since transactions are atomic, this prevents accounting discrepancies.
699
812
  with dc.transaction():
813
+ dc.create_namespace("identity")
700
814
  dc.write(user_data, "users", namespace="identity")
815
+ dc.create_namespace("finance")
701
816
  dc.write(initial_finance, "users", namespace="finance", schema=finance_schema)
702
817
 
703
818
  print(f"\n=== Initial User Data ===")
@@ -716,6 +831,7 @@ new_orders = pd.DataFrame({
716
831
  # Process new orders and update lifetime payment totals within a single transaction.
717
832
  with dc.transaction():
718
833
  # Step 1: Write the new orders
834
+ dc.create_namespace("sales")
719
835
  dc.write(new_orders, "transactions", namespace="sales")
720
836
 
721
837
  # Step 2: Read back transactions and products to compute actual totals
@@ -731,6 +847,7 @@ with dc.transaction():
731
847
  finance_updates.columns = ["user_id", "lifetime_payments"]
732
848
 
733
849
  # Step 4: Write the computed totals
850
+ dc.create_namespace("finance")
734
851
  dc.write(finance_updates, "users", namespace="finance", mode=dc.TableWriteMode.MERGE)
735
852
 
736
853
  # Verify that orders and and lifetime payments are kept in sync.
@@ -760,16 +877,14 @@ import tempfile
760
877
  from decimal import Decimal
761
878
 
762
879
  # Initialize catalogs with separate names and catalog roots.
763
- dc.init(catalogs={
764
- "staging": dc.Catalog(config=dc.CatalogProperties(
765
- root=tempfile.mkdtemp(), # Use temporary directory for staging
766
- filesystem=pa.fs.LocalFileSystem()
767
- )),
768
- "prod": dc.Catalog(config=dc.CatalogProperties(
769
- root="s3://example/deltacat/", # Use S3 for prod
770
- filesystem=pa.fs.S3FileSystem()
771
- ))
772
- })
880
+ dc.init(
881
+ catalogs={
882
+ # Use temporary directory for staging
883
+ "staging": dc.Catalog(dc.CatalogProperties(tempfile.mkdtemp())),
884
+ # Use S3 for prod
885
+ "prod": dc.Catalog(dc.CatalogProperties("s3://example/deltacat"))
886
+ }
887
+ )
773
888
 
774
889
  # Create a PyArrow table with decimal256 data
775
890
  decimal_table = pa.table({
@@ -817,6 +932,92 @@ print(dc.read("financial_data", catalog="prod", read_as=dc.DatasetType.PANDAS))
817
932
 
818
933
  </details>
819
934
 
935
+ <details>
936
+
937
+ <summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Sharing & Portability</span></summary>
938
+
939
+ DeltaCAT catalogs are self-contained directories on a filesystem, so you can easily share your data lake with others. A local catalog on your laptop can be compressed and sent anywhere. A cloud catalog in S3, GCS, or Azure Blog Storage can be shared via URL. The read/write permissions of your catalog are the read/write permissions of your filesystem.
940
+
941
+ For example, you can zip up your local catalog and upload it to S3 via:
942
+ ```bash
943
+ # zip a local catalog
944
+ zip -r catalog.zip .deltacat/
945
+
946
+ # copy the catalog to a cloud bucket
947
+ aws s3 cp catalog.zip s3://my-bucket/catalog.zip
948
+ ```
949
+
950
+ The person you shared it with can retrieve and decompress it via:
951
+ ```bash
952
+ # copy the cloud catalog to local disk
953
+ aws s3 cp s3://my-bucket/catalog.zip .
954
+
955
+ # unzip the catalog to a local directory
956
+ unzip catalog.zip -d .deltacat_copy/
957
+ ```
958
+
959
+ And then initialize it together with any other catalogs they're working with:
960
+ ```python
961
+ import deltacat as dc
962
+
963
+ # Initialize catalogs with separate names and catalog roots.
964
+ dc.init(
965
+ catalogs={
966
+ "original": dc.Catalog(dc.CatalogProperties(".deltacat")),
967
+ "copy": dc.Catalog(dc.CatalogProperties(".deltacat_copy")),
968
+ "prod_aws": dc.Catalog(dc.CatalogProperties("s3://prod/deltacat")),
969
+ "prod_gcp": dc.Catalog(dc.CatalogProperties("gs://prod/deltacat")),
970
+ "prod_azure": dc.Catalog(dc.CatalogProperties("az://prod/deltacat")),
971
+ }
972
+ )
973
+
974
+ # List all namespaces in the original catalog
975
+ namespaces = dc.list("dc://original")
976
+ print([namespace.name for namespace in namespaces])
977
+
978
+ # List all namespaces in the copy catalog
979
+ namespaces = dc.list("dc://copy")
980
+ print([namespace.name for namespace in namespaces])
981
+
982
+ # List all tables in the default namespace of the original catalog
983
+ tables = dc.list("dc://original/default")
984
+ print([table.name for table in tables])
985
+
986
+ # List all tables in the default namespace of the copy catalog
987
+ tables = dc.list("dc://copy/default")
988
+ print([table.name for table in tables])
989
+ ```
990
+
991
+ `dc.copy` can also be used to copy namespaces and tables between catalogs:
992
+ ```python
993
+ # Copy the "default" namespace from the original local catalog over to the "myspace" namespace in the copy catalog
994
+ dc.copy("dc://original/default", "dc://copy/default/myspace")
995
+
996
+ # By default, no tables are copied from the source namespace to the destination
997
+ tables = dc.list("dc://copy/myspace")
998
+ print(f"{len(tables)} tables in myspace.")
999
+
1000
+ # Copy the "users" table from the original local catalog over to "local_users" in the prod_aws catalog
1001
+ dc.copy("dc://original/default/users", "dc://prod_aws/default/local_users")
1002
+
1003
+ # Read the copied table back
1004
+ df = dc.read("local_users", catalog="prod_aws")
1005
+ df.show()
1006
+
1007
+ # We can also copy all tables in the default namespace using **
1008
+ dc.copy("dc://original/default/**", "dc://copy/default/myspace")
1009
+ tables = dc.list("dc://copy/myspace")
1010
+ print(f"{len(tables)} tables in myspace.")
1011
+
1012
+ # Or we can copy all namespaces from the original catalog using *
1013
+ dc.copy("dc://original/*", "dc://copy")
1014
+ namespaces = dc.list("dc://copy")
1015
+ print([namespace.name for namespace in namespaces])
1016
+ ```
1017
+
1018
+ </details>
1019
+
1020
+
820
1021
  <details>
821
1022
 
822
1023
  <summary><span style="font-size: 1.25em; font-weight: bold;">Data Lake Level Time Travel</span></summary>
@@ -858,10 +1059,10 @@ initial_finance = pd.DataFrame({
858
1059
 
859
1060
  # Write initial state atomically with a commit message
860
1061
  with dc.transaction(commit_message="Initial data load: users, products, orders, and finance"):
861
- dc.write(initial_users, "users", namespace="identity")
862
- dc.write(initial_products, "catalog", namespace="inventory")
863
- dc.write(initial_orders, "transactions", namespace="sales")
864
- dc.write(initial_finance, "users", namespace="finance")
1062
+ dc.write(initial_users, "users", namespace="identity", auto_create_namespace=True)
1063
+ dc.write(initial_products, "catalog", namespace="inventory", auto_create_namespace=True)
1064
+ dc.write(initial_orders, "transactions", namespace="sales", auto_create_namespace=True)
1065
+ dc.write(initial_finance, "users", namespace="finance", auto_create_namespace=True)
865
1066
 
866
1067
  # Sleep briefly to ensure transaction timestamp separation
867
1068
  time.sleep(0.1)
@@ -1077,7 +1278,7 @@ daft_docs = daft_docs.with_column("content", daft_docs["path"].url.download().de
1077
1278
  # Capture basic feedback sentiment analysis in a parallel multi-table transaction
1078
1279
  with dc.transaction():
1079
1280
  # Write the full customer feedback to a new "documents" table.
1080
- dc.write(daft_docs, "documents", namespace="analysis")
1281
+ dc.write(daft_docs, "documents")
1081
1282
 
1082
1283
  # Define a UDF to analyze customer feedback sentiment.
1083
1284
  @daft.udf(return_dtype=daft.DataType.struct({
@@ -1114,14 +1315,14 @@ with dc.transaction():
1114
1315
  dc.Field.of(pa.field("confidence", pa.float64())),
1115
1316
  dc.Field.of(pa.field("model_version", pa.large_string())),
1116
1317
  ])
1117
- dc.write(daft_results, "insights", namespace="analysis", schema=initial_schema)
1318
+ dc.write(daft_results, "insights", schema=initial_schema)
1118
1319
 
1119
1320
  # Write to a new audit trail table.
1120
1321
  audit_df = pd.DataFrame([{
1121
1322
  "version": "v1.0",
1122
1323
  "docs_processed": dc.dataset_length(daft_docs),
1123
1324
  }])
1124
- dc.write(audit_df, "audit", namespace="analysis")
1325
+ dc.write(audit_df, "audit")
1125
1326
 
1126
1327
  print("=== V1.0: Customer feedback sentiment analysis processing complete! ===")
1127
1328
 
@@ -1162,9 +1363,9 @@ with dc.transaction():
1162
1363
  )
1163
1364
 
1164
1365
  # Merge new V2.0 insights into the existing V1.0 insights table.
1165
- dc.write(daft_emotions, "insights", namespace="analysis")
1366
+ dc.write(daft_emotions, "insights")
1166
1367
  audit_df = pd.DataFrame([{"version": "v2.0", "docs_processed": dc.dataset_length(daft_docs)}])
1167
- dc.write(audit_df, "audit", namespace="analysis")
1368
+ dc.write(audit_df, "audit")
1168
1369
 
1169
1370
  print("=== V2.0: Customer feedback emotion analysis processing complete! ===")
1170
1371
 
@@ -1176,7 +1377,7 @@ time.sleep(0.1)
1176
1377
  # Generate customer service responses based on emotion analysis results.
1177
1378
  with dc.transaction():
1178
1379
  # First, read the current insights table with emotion analysis
1179
- current_insights = dc.read("insights", namespace="analysis")
1380
+ current_insights = dc.read("insights")
1180
1381
 
1181
1382
  # Define a UDF to generate customer service responses based on analysis results.
1182
1383
  @daft.udf(return_dtype=daft.DataType.struct({
@@ -1223,39 +1424,39 @@ with dc.transaction():
1223
1424
  )
1224
1425
  # Merge new V3.0 responses into the existing V2.0 insights table.
1225
1426
  # The new response columns are automatically joined by document ID.
1226
- dc.write(daft_responses, "insights", namespace="analysis")
1427
+ dc.write(daft_responses, "insights")
1227
1428
  audit_df = pd.DataFrame([{"version": "v3.0", "docs_processed": dc.dataset_length(current_insights)}])
1228
- dc.write(audit_df, "audit", namespace="analysis")
1429
+ dc.write(audit_df, "audit")
1229
1430
 
1230
1431
  print("=== V3.0: Customer service response generation processing complete! ===")
1231
1432
 
1232
1433
  print("\n=== Time Travel Comparison of all Versions ===")
1233
1434
  with dc.transaction(as_of=checkpoint_v1):
1234
1435
  print(f"== V1.0 Insights (sentiment) ==")
1235
- print(dc.read("insights", namespace="analysis").show())
1436
+ print(dc.read("insights").show())
1236
1437
  print(f"== V1.0 Audit ==")
1237
- print(dc.read("audit", namespace="analysis").show())
1438
+ print(dc.read("audit").show())
1238
1439
 
1239
1440
  with dc.transaction(as_of=checkpoint_v2):
1240
1441
  print(f"== V2.0 Insights (emotion) ==")
1241
- print(dc.read("insights", namespace="analysis").show())
1442
+ print(dc.read("insights").show())
1242
1443
  print(f"== V2.0 Audit ==")
1243
- print(dc.read("audit", namespace="analysis").show())
1444
+ print(dc.read("audit").show())
1244
1445
 
1245
- v3_results = dc.read("insights", namespace="analysis")
1446
+ v3_results = dc.read("insights")
1246
1447
  print(f"== V3.0 Insights (customer service response) ==")
1247
- print(dc.read("insights", namespace="analysis").show())
1448
+ print(dc.read("insights").show())
1248
1449
  print(f"== V3.0 Audit ==")
1249
- print(dc.read("audit", namespace="analysis").show())
1450
+ print(dc.read("audit").show())
1250
1451
  ```
1251
1452
 
1252
1453
  </details>
1253
1454
 
1254
1455
  ## Runtime Environment Requirements
1255
1456
 
1256
- DeltaCAT's transaction system assumes that the host machine provides strong system clock accuracy guarantees, and that the filesystem hosting the catalog root directory offers strong consistency.
1457
+ DeltaCAT's transaction system assumes that the host machine provides strong system clock accuracy guarantees, and that the filesystem hosting the catalog root directory offers strong read-after-write consistency.
1257
1458
 
1258
- Taken together, these requirements make DeltaCAT suitable for production use on most major cloud computing hosts (e.g., EC2, GCE, Azure VMs) and storage systems (e.g., S3, GCS, Azure Blob Storage), but local laptops should typically be limited to testing/experimental purposes.
1459
+ Taken together, these requirements make DeltaCAT suitable for production use on most major cloud computing hosts (e.g., EC2, GCE, Azure VMs) and storage systems (e.g., S3, GCS, Azure Blob Storage), but local laptops should typically be limited to testing/experimental purposes (e.g., due to potential system clock drift).
1259
1460
 
1260
1461
  ## Additional Resources
1261
1462
  ### Table Documentation