deltacat 2.0.0b9__py3-none-any.whl → 2.0.0b11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. deltacat/__init__.py +41 -16
  2. deltacat/api.py +478 -123
  3. deltacat/aws/s3u.py +2 -2
  4. deltacat/benchmarking/benchmark_engine.py +4 -2
  5. deltacat/benchmarking/conftest.py +1 -1
  6. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  7. deltacat/catalog/__init__.py +62 -5
  8. deltacat/catalog/main/impl.py +26 -10
  9. deltacat/catalog/model/catalog.py +165 -109
  10. deltacat/catalog/model/properties.py +25 -24
  11. deltacat/compute/__init__.py +14 -0
  12. deltacat/compute/converter/constants.py +5 -0
  13. deltacat/compute/converter/converter_session.py +78 -36
  14. deltacat/compute/converter/model/convert_input.py +24 -4
  15. deltacat/compute/converter/model/convert_result.py +61 -0
  16. deltacat/compute/converter/model/converter_session_params.py +52 -10
  17. deltacat/compute/converter/pyiceberg/overrides.py +181 -62
  18. deltacat/compute/converter/steps/convert.py +84 -36
  19. deltacat/compute/converter/steps/dedupe.py +25 -4
  20. deltacat/compute/converter/utils/convert_task_options.py +42 -13
  21. deltacat/compute/converter/utils/iceberg_columns.py +5 -0
  22. deltacat/compute/converter/utils/io.py +82 -11
  23. deltacat/compute/converter/utils/s3u.py +13 -4
  24. deltacat/compute/jobs/client.py +406 -0
  25. deltacat/constants.py +5 -6
  26. deltacat/env.py +10 -0
  27. deltacat/examples/basic_logging.py +6 -6
  28. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  29. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  30. deltacat/examples/hello_world.py +4 -2
  31. deltacat/examples/indexer/indexer.py +163 -0
  32. deltacat/examples/indexer/job_runner.py +198 -0
  33. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  34. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  35. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +27 -9
  36. deltacat/{storage → experimental/storage}/iceberg/iceberg_scan_planner.py +1 -1
  37. deltacat/{storage → experimental/storage}/iceberg/impl.py +1 -1
  38. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  39. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  40. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -9
  41. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  42. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  43. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  44. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  45. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  46. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  47. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  48. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  49. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -1
  50. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  51. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  52. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  53. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  54. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  55. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  56. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  57. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  58. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  59. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +4 -4
  60. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  61. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  62. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  63. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  64. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  65. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  66. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  67. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  68. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  69. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  70. deltacat/io/__init__.py +13 -0
  71. deltacat/io/dataset/__init__.py +0 -0
  72. deltacat/io/dataset/deltacat_dataset.py +91 -0
  73. deltacat/io/datasink/__init__.py +0 -0
  74. deltacat/io/datasink/deltacat_datasink.py +207 -0
  75. deltacat/io/datasource/__init__.py +0 -0
  76. deltacat/io/datasource/deltacat_datasource.py +580 -0
  77. deltacat/io/reader/__init__.py +0 -0
  78. deltacat/io/reader/deltacat_read_api.py +172 -0
  79. deltacat/storage/__init__.py +2 -0
  80. deltacat/storage/model/expression/__init__.py +47 -0
  81. deltacat/storage/model/expression/expression.py +656 -0
  82. deltacat/storage/model/expression/visitor.py +248 -0
  83. deltacat/storage/model/metafile.py +74 -42
  84. deltacat/storage/model/scan/push_down.py +32 -5
  85. deltacat/storage/model/shard.py +6 -2
  86. deltacat/storage/model/types.py +5 -3
  87. deltacat/tests/_io/reader/__init__.py +0 -0
  88. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  89. deltacat/tests/catalog/data/__init__.py +0 -0
  90. deltacat/tests/catalog/main/__init__.py +0 -0
  91. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  92. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +436 -0
  93. deltacat/tests/catalog/model/__init__.py +0 -0
  94. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  95. deltacat/tests/catalog/test_catalogs.py +52 -98
  96. deltacat/tests/catalog/test_default_catalog_impl.py +1 -2
  97. deltacat/tests/compute/converter/test_convert_session.py +209 -46
  98. deltacat/tests/daft/__init__.py +0 -0
  99. deltacat/tests/daft/test_model.py +97 -0
  100. deltacat/tests/experimental/__init__.py +0 -0
  101. deltacat/tests/experimental/catalog/__init__.py +0 -0
  102. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  103. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  104. deltacat/tests/experimental/daft/__init__.py +0 -0
  105. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  106. deltacat/tests/experimental/storage/__init__.py +0 -0
  107. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  108. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  109. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  110. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  111. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  112. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  113. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  114. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  115. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  116. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  117. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  118. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  119. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  120. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  121. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  122. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  123. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  124. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  125. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  126. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  127. deltacat/tests/local_deltacat_storage/__init__.py +1 -0
  128. deltacat/tests/storage/model/test_expression.py +327 -0
  129. deltacat/tests/storage/model/test_shard.py +3 -1
  130. deltacat/tests/test_deltacat_api.py +50 -9
  131. deltacat/types/media.py +141 -43
  132. deltacat/types/tables.py +35 -7
  133. deltacat/utils/daft.py +531 -5
  134. deltacat/utils/export.py +3 -1
  135. deltacat/utils/filesystem.py +39 -9
  136. deltacat/utils/polars.py +128 -0
  137. deltacat/utils/pyarrow.py +151 -15
  138. deltacat/utils/ray_utils/concurrency.py +1 -1
  139. deltacat/utils/ray_utils/runtime.py +56 -4
  140. deltacat/utils/url.py +1284 -0
  141. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/METADATA +11 -9
  142. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/RECORD +168 -123
  143. deltacat/catalog/iceberg/__init__.py +0 -4
  144. deltacat/daft/daft_scan.py +0 -111
  145. deltacat/daft/model.py +0 -258
  146. deltacat/examples/common/fixtures.py +0 -15
  147. deltacat/storage/rivulet/__init__.py +0 -11
  148. deltacat/storage/rivulet/feather/__init__.py +0 -5
  149. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  150. /deltacat/{daft → compute/jobs}/__init__.py +0 -0
  151. /deltacat/examples/{common → experimental}/__init__.py +0 -0
  152. /deltacat/examples/{iceberg → experimental/iceberg}/__init__.py +0 -0
  153. /deltacat/{storage/iceberg → examples/indexer}/__init__.py +0 -0
  154. /deltacat/{storage/rivulet/arrow → examples/indexer/aws}/__init__.py +0 -0
  155. /deltacat/{storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  156. /deltacat/{storage/rivulet/metastore → experimental/catalog}/__init__.py +0 -0
  157. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  158. /deltacat/{storage/rivulet/reader → experimental/storage}/__init__.py +0 -0
  159. /deltacat/{storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
  160. /deltacat/{storage → experimental/storage}/iceberg/model.py +0 -0
  161. /deltacat/{storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  162. /deltacat/{tests/storage/rivulet → experimental/storage/rivulet/fs}/__init__.py +0 -0
  163. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  164. /deltacat/{tests/storage/rivulet/fs → experimental/storage/rivulet/metastore}/__init__.py +0 -0
  165. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  166. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  167. /deltacat/{storage → experimental/storage}/rivulet/parquet/data_reader.py +0 -0
  168. /deltacat/{tests/storage/rivulet/schema → experimental/storage/rivulet/reader}/__init__.py +0 -0
  169. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  170. /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/schema}/__init__.py +0 -0
  171. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  172. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  173. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/LICENSE +0 -0
  174. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/WHEEL +0 -0
  175. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/top_level.txt +0 -0
deltacat/__init__.py CHANGED
@@ -5,9 +5,10 @@ import deltacat.logs # noqa: F401
5
5
  from deltacat.api import (
6
6
  copy,
7
7
  get,
8
+ list,
8
9
  put,
9
10
  )
10
- from deltacat.catalog.delegate import (
11
+ from deltacat.catalog import ( # noqa: F401
11
12
  alter_namespace,
12
13
  alter_table,
13
14
  create_namespace,
@@ -26,17 +27,24 @@ from deltacat.catalog.delegate import (
26
27
  table_exists,
27
28
  truncate_table,
28
29
  write_to_table,
29
- )
30
- from deltacat.catalog.model.catalog import ( # noqa: F401
31
- Catalog,
32
- Catalogs,
33
- is_initialized,
34
30
  init,
31
+ is_initialized,
32
+ clear_catalogs,
35
33
  get_catalog,
34
+ get_catalog_properties,
35
+ pop_catalog,
36
36
  put_catalog,
37
+ raise_if_not_initialized,
38
+ Catalog,
39
+ CatalogProperties,
40
+ TableDefinition,
41
+ )
42
+ from deltacat.compute import (
43
+ job_client,
44
+ local_job_client,
37
45
  )
38
- from deltacat.catalog.model.table_definition import TableDefinition
39
46
  from deltacat.storage import (
47
+ Dataset,
40
48
  DistributedDataset,
41
49
  Field,
42
50
  LifecycleState,
@@ -53,13 +61,21 @@ from deltacat.storage import (
53
61
  SortScheme,
54
62
  NullOrder,
55
63
  )
56
- from deltacat.storage.rivulet import Dataset, Datatype
57
- from deltacat.types.media import ContentEncoding, ContentType, TableType
64
+ from deltacat.types.media import (
65
+ ContentEncoding,
66
+ ContentType,
67
+ DatasetType,
68
+ DatastoreType,
69
+ )
70
+
58
71
  from deltacat.types.tables import TableWriteMode
72
+ from deltacat.utils.url import DeltaCatUrl
59
73
 
60
74
  __iceberg__ = []
61
75
  if importlib.util.find_spec("pyiceberg") is not None:
62
- from deltacat.catalog.iceberg import impl as IcebergCatalog # noqa: F401
76
+ from deltacat.experimental.catalog.iceberg import ( # noqa: F401
77
+ impl as IcebergCatalog,
78
+ )
63
79
 
64
80
  __iceberg__ = [
65
81
  "IcebergCatalog",
@@ -67,13 +83,16 @@ if importlib.util.find_spec("pyiceberg") is not None:
67
83
 
68
84
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
69
85
 
70
- __version__ = "2.0.0b9"
86
+ __version__ = "2.0.0b11"
71
87
 
72
88
 
73
89
  __all__ = [
74
90
  "__version__",
91
+ "job_client",
92
+ "local_job_client",
75
93
  "copy",
76
94
  "get",
95
+ "list",
77
96
  "put",
78
97
  "alter_table",
79
98
  "create_table",
@@ -93,16 +112,23 @@ __all__ = [
93
112
  "default_namespace",
94
113
  "write_to_table",
95
114
  "read_table",
115
+ "init",
116
+ "is_initialized",
117
+ "clear_catalogs",
96
118
  "get_catalog",
119
+ "get_catalog_properties",
120
+ "pop_catalog",
97
121
  "put_catalog",
98
- "is_initialized",
99
- "init",
122
+ "raise_if_not_initialized",
100
123
  "Catalog",
124
+ "CatalogProperties",
101
125
  "ContentType",
102
126
  "ContentEncoding",
103
- "DistributedDataset",
104
127
  "Dataset",
105
- "Datatype",
128
+ "DatasetType",
129
+ "DatastoreType",
130
+ "DeltaCatUrl",
131
+ "DistributedDataset",
106
132
  "Field",
107
133
  "LifecycleState",
108
134
  "ListResult",
@@ -118,7 +144,6 @@ __all__ = [
118
144
  "SortOrder",
119
145
  "SortScheme",
120
146
  "TableDefinition",
121
- "TableType",
122
147
  "TableWriteMode",
123
148
  ]
124
149