deltacat 2.0.0b9__py3-none-any.whl → 2.0.0b11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +41 -16
- deltacat/api.py +478 -123
- deltacat/aws/s3u.py +2 -2
- deltacat/benchmarking/benchmark_engine.py +4 -2
- deltacat/benchmarking/conftest.py +1 -1
- deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
- deltacat/catalog/__init__.py +62 -5
- deltacat/catalog/main/impl.py +26 -10
- deltacat/catalog/model/catalog.py +165 -109
- deltacat/catalog/model/properties.py +25 -24
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/converter/constants.py +5 -0
- deltacat/compute/converter/converter_session.py +78 -36
- deltacat/compute/converter/model/convert_input.py +24 -4
- deltacat/compute/converter/model/convert_result.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +52 -10
- deltacat/compute/converter/pyiceberg/overrides.py +181 -62
- deltacat/compute/converter/steps/convert.py +84 -36
- deltacat/compute/converter/steps/dedupe.py +25 -4
- deltacat/compute/converter/utils/convert_task_options.py +42 -13
- deltacat/compute/converter/utils/iceberg_columns.py +5 -0
- deltacat/compute/converter/utils/io.py +82 -11
- deltacat/compute/converter/utils/s3u.py +13 -4
- deltacat/compute/jobs/client.py +406 -0
- deltacat/constants.py +5 -6
- deltacat/env.py +10 -0
- deltacat/examples/basic_logging.py +6 -6
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
- deltacat/examples/hello_world.py +4 -2
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
- deltacat/{catalog → experimental/catalog}/iceberg/impl.py +27 -9
- deltacat/{storage → experimental/storage}/iceberg/iceberg_scan_planner.py +1 -1
- deltacat/{storage → experimental/storage}/iceberg/impl.py +1 -1
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
- deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -9
- deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
- deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
- deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
- deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
- deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +4 -4
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
- deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
- deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +580 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/storage/__init__.py +2 -0
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/metafile.py +74 -42
- deltacat/storage/model/scan/push_down.py +32 -5
- deltacat/storage/model/shard.py +6 -2
- deltacat/storage/model/types.py +5 -3
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +436 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +52 -98
- deltacat/tests/catalog/test_default_catalog_impl.py +1 -2
- deltacat/tests/compute/converter/test_convert_session.py +209 -46
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +0 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
- deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
- deltacat/tests/local_deltacat_storage/__init__.py +1 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_shard.py +3 -1
- deltacat/tests/test_deltacat_api.py +50 -9
- deltacat/types/media.py +141 -43
- deltacat/types/tables.py +35 -7
- deltacat/utils/daft.py +531 -5
- deltacat/utils/export.py +3 -1
- deltacat/utils/filesystem.py +39 -9
- deltacat/utils/polars.py +128 -0
- deltacat/utils/pyarrow.py +151 -15
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/url.py +1284 -0
- {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/METADATA +11 -9
- {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/RECORD +168 -123
- deltacat/catalog/iceberg/__init__.py +0 -4
- deltacat/daft/daft_scan.py +0 -111
- deltacat/daft/model.py +0 -258
- deltacat/examples/common/fixtures.py +0 -15
- deltacat/storage/rivulet/__init__.py +0 -11
- deltacat/storage/rivulet/feather/__init__.py +0 -5
- deltacat/storage/rivulet/parquet/__init__.py +0 -5
- /deltacat/{daft → compute/jobs}/__init__.py +0 -0
- /deltacat/examples/{common → experimental}/__init__.py +0 -0
- /deltacat/examples/{iceberg → experimental/iceberg}/__init__.py +0 -0
- /deltacat/{storage/iceberg → examples/indexer}/__init__.py +0 -0
- /deltacat/{storage/rivulet/arrow → examples/indexer/aws}/__init__.py +0 -0
- /deltacat/{storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
- /deltacat/{storage/rivulet/metastore → experimental/catalog}/__init__.py +0 -0
- /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
- /deltacat/{storage/rivulet/reader → experimental/storage}/__init__.py +0 -0
- /deltacat/{storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/iceberg/model.py +0 -0
- /deltacat/{storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet → experimental/storage/rivulet/fs}/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
- /deltacat/{tests/storage/rivulet/fs → experimental/storage/rivulet/metastore}/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/parquet/data_reader.py +0 -0
- /deltacat/{tests/storage/rivulet/schema → experimental/storage/rivulet/reader}/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
- /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/schema}/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/LICENSE +0 -0
- {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/WHEEL +0 -0
- {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
@@ -5,9 +5,10 @@ import deltacat.logs # noqa: F401
|
|
5
5
|
from deltacat.api import (
|
6
6
|
copy,
|
7
7
|
get,
|
8
|
+
list,
|
8
9
|
put,
|
9
10
|
)
|
10
|
-
from deltacat.catalog
|
11
|
+
from deltacat.catalog import ( # noqa: F401
|
11
12
|
alter_namespace,
|
12
13
|
alter_table,
|
13
14
|
create_namespace,
|
@@ -26,17 +27,24 @@ from deltacat.catalog.delegate import (
|
|
26
27
|
table_exists,
|
27
28
|
truncate_table,
|
28
29
|
write_to_table,
|
29
|
-
)
|
30
|
-
from deltacat.catalog.model.catalog import ( # noqa: F401
|
31
|
-
Catalog,
|
32
|
-
Catalogs,
|
33
|
-
is_initialized,
|
34
30
|
init,
|
31
|
+
is_initialized,
|
32
|
+
clear_catalogs,
|
35
33
|
get_catalog,
|
34
|
+
get_catalog_properties,
|
35
|
+
pop_catalog,
|
36
36
|
put_catalog,
|
37
|
+
raise_if_not_initialized,
|
38
|
+
Catalog,
|
39
|
+
CatalogProperties,
|
40
|
+
TableDefinition,
|
41
|
+
)
|
42
|
+
from deltacat.compute import (
|
43
|
+
job_client,
|
44
|
+
local_job_client,
|
37
45
|
)
|
38
|
-
from deltacat.catalog.model.table_definition import TableDefinition
|
39
46
|
from deltacat.storage import (
|
47
|
+
Dataset,
|
40
48
|
DistributedDataset,
|
41
49
|
Field,
|
42
50
|
LifecycleState,
|
@@ -53,13 +61,21 @@ from deltacat.storage import (
|
|
53
61
|
SortScheme,
|
54
62
|
NullOrder,
|
55
63
|
)
|
56
|
-
from deltacat.
|
57
|
-
|
64
|
+
from deltacat.types.media import (
|
65
|
+
ContentEncoding,
|
66
|
+
ContentType,
|
67
|
+
DatasetType,
|
68
|
+
DatastoreType,
|
69
|
+
)
|
70
|
+
|
58
71
|
from deltacat.types.tables import TableWriteMode
|
72
|
+
from deltacat.utils.url import DeltaCatUrl
|
59
73
|
|
60
74
|
__iceberg__ = []
|
61
75
|
if importlib.util.find_spec("pyiceberg") is not None:
|
62
|
-
from deltacat.catalog.iceberg import
|
76
|
+
from deltacat.experimental.catalog.iceberg import ( # noqa: F401
|
77
|
+
impl as IcebergCatalog,
|
78
|
+
)
|
63
79
|
|
64
80
|
__iceberg__ = [
|
65
81
|
"IcebergCatalog",
|
@@ -67,13 +83,16 @@ if importlib.util.find_spec("pyiceberg") is not None:
|
|
67
83
|
|
68
84
|
deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
|
69
85
|
|
70
|
-
__version__ = "2.0.
|
86
|
+
__version__ = "2.0.0b11"
|
71
87
|
|
72
88
|
|
73
89
|
__all__ = [
|
74
90
|
"__version__",
|
91
|
+
"job_client",
|
92
|
+
"local_job_client",
|
75
93
|
"copy",
|
76
94
|
"get",
|
95
|
+
"list",
|
77
96
|
"put",
|
78
97
|
"alter_table",
|
79
98
|
"create_table",
|
@@ -93,16 +112,23 @@ __all__ = [
|
|
93
112
|
"default_namespace",
|
94
113
|
"write_to_table",
|
95
114
|
"read_table",
|
115
|
+
"init",
|
116
|
+
"is_initialized",
|
117
|
+
"clear_catalogs",
|
96
118
|
"get_catalog",
|
119
|
+
"get_catalog_properties",
|
120
|
+
"pop_catalog",
|
97
121
|
"put_catalog",
|
98
|
-
"
|
99
|
-
"init",
|
122
|
+
"raise_if_not_initialized",
|
100
123
|
"Catalog",
|
124
|
+
"CatalogProperties",
|
101
125
|
"ContentType",
|
102
126
|
"ContentEncoding",
|
103
|
-
"DistributedDataset",
|
104
127
|
"Dataset",
|
105
|
-
"
|
128
|
+
"DatasetType",
|
129
|
+
"DatastoreType",
|
130
|
+
"DeltaCatUrl",
|
131
|
+
"DistributedDataset",
|
106
132
|
"Field",
|
107
133
|
"LifecycleState",
|
108
134
|
"ListResult",
|
@@ -118,7 +144,6 @@ __all__ = [
|
|
118
144
|
"SortOrder",
|
119
145
|
"SortScheme",
|
120
146
|
"TableDefinition",
|
121
|
-
"TableType",
|
122
147
|
"TableWriteMode",
|
123
148
|
]
|
124
149
|
|