deltacat 2.0.0b7__py3-none-any.whl → 2.0.0b10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +27 -6
- deltacat/api.py +478 -123
- deltacat/aws/s3u.py +2 -2
- deltacat/benchmarking/conftest.py +1 -1
- deltacat/catalog/main/impl.py +12 -6
- deltacat/catalog/model/catalog.py +65 -47
- deltacat/catalog/model/properties.py +1 -3
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/converter/constants.py +5 -0
- deltacat/compute/converter/converter_session.py +78 -36
- deltacat/compute/converter/model/convert_input.py +24 -4
- deltacat/compute/converter/model/convert_result.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +52 -10
- deltacat/compute/converter/pyiceberg/overrides.py +181 -62
- deltacat/compute/converter/steps/convert.py +84 -36
- deltacat/compute/converter/steps/dedupe.py +25 -4
- deltacat/compute/converter/utils/convert_task_options.py +42 -13
- deltacat/compute/converter/utils/iceberg_columns.py +5 -0
- deltacat/compute/converter/utils/io.py +82 -11
- deltacat/compute/converter/utils/s3u.py +13 -4
- deltacat/compute/jobs/__init__.py +0 -0
- deltacat/compute/jobs/client.py +404 -0
- deltacat/constants.py +4 -4
- deltacat/daft/daft_scan.py +7 -3
- deltacat/daft/translator.py +126 -0
- deltacat/examples/basic_logging.py +5 -3
- deltacat/examples/hello_world.py +4 -2
- deltacat/examples/indexer/__init__.py +0 -0
- deltacat/examples/indexer/aws/__init__.py +0 -0
- deltacat/examples/indexer/gcp/__init__.py +0 -0
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +199 -0
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +580 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/storage/__init__.py +2 -0
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/metafile.py +74 -42
- deltacat/storage/model/scan/push_down.py +32 -5
- deltacat/storage/model/types.py +5 -3
- deltacat/storage/rivulet/__init__.py +4 -4
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/compute/converter/test_convert_session.py +209 -46
- deltacat/tests/local_deltacat_storage/__init__.py +1 -0
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +2 -1
- deltacat/tests/storage/rivulet/test_dataset.py +1 -1
- deltacat/tests/storage/rivulet/test_manifest.py +1 -1
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +1 -1
- deltacat/tests/test_deltacat_api.py +50 -9
- deltacat/types/media.py +141 -43
- deltacat/types/tables.py +35 -7
- deltacat/utils/daft.py +2 -2
- deltacat/utils/filesystem.py +39 -9
- deltacat/utils/polars.py +128 -0
- deltacat/utils/pyarrow.py +151 -15
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/url.py +1284 -0
- {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/METADATA +9 -6
- {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/RECORD +73 -48
- {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/LICENSE +0 -0
- {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/WHEEL +0 -0
- {deltacat-2.0.0b7.dist-info → deltacat-2.0.0b10.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
@@ -5,6 +5,7 @@ import deltacat.logs # noqa: F401
|
|
5
5
|
from deltacat.api import (
|
6
6
|
copy,
|
7
7
|
get,
|
8
|
+
list,
|
8
9
|
put,
|
9
10
|
)
|
10
11
|
from deltacat.catalog.delegate import (
|
@@ -30,13 +31,19 @@ from deltacat.catalog.delegate import (
|
|
30
31
|
from deltacat.catalog.model.catalog import ( # noqa: F401
|
31
32
|
Catalog,
|
32
33
|
Catalogs,
|
34
|
+
raise_if_not_initialized,
|
33
35
|
is_initialized,
|
34
36
|
init,
|
35
37
|
get_catalog,
|
36
38
|
put_catalog,
|
37
39
|
)
|
38
40
|
from deltacat.catalog.model.table_definition import TableDefinition
|
41
|
+
from deltacat.compute import (
|
42
|
+
job_client,
|
43
|
+
local_job_client,
|
44
|
+
)
|
39
45
|
from deltacat.storage import (
|
46
|
+
Dataset,
|
40
47
|
DistributedDataset,
|
41
48
|
Field,
|
42
49
|
LifecycleState,
|
@@ -53,9 +60,16 @@ from deltacat.storage import (
|
|
53
60
|
SortScheme,
|
54
61
|
NullOrder,
|
55
62
|
)
|
56
|
-
from deltacat.storage.rivulet import Dataset, Datatype
|
57
|
-
from deltacat.types.media import
|
63
|
+
from deltacat.storage.rivulet import Dataset as RivDataset, Datatype as RivDatatype
|
64
|
+
from deltacat.types.media import (
|
65
|
+
ContentEncoding,
|
66
|
+
ContentType,
|
67
|
+
DatasetType,
|
68
|
+
DatastoreType,
|
69
|
+
)
|
70
|
+
|
58
71
|
from deltacat.types.tables import TableWriteMode
|
72
|
+
from deltacat.utils.url import DeltaCatUrl
|
59
73
|
|
60
74
|
__iceberg__ = []
|
61
75
|
if importlib.util.find_spec("pyiceberg") is not None:
|
@@ -67,13 +81,16 @@ if importlib.util.find_spec("pyiceberg") is not None:
|
|
67
81
|
|
68
82
|
deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
|
69
83
|
|
70
|
-
__version__ = "2.0.
|
84
|
+
__version__ = "2.0.0b10"
|
71
85
|
|
72
86
|
|
73
87
|
__all__ = [
|
74
88
|
"__version__",
|
89
|
+
"job_client",
|
90
|
+
"local_job_client",
|
75
91
|
"copy",
|
76
92
|
"get",
|
93
|
+
"list",
|
77
94
|
"put",
|
78
95
|
"alter_table",
|
79
96
|
"create_table",
|
@@ -95,14 +112,19 @@ __all__ = [
|
|
95
112
|
"read_table",
|
96
113
|
"get_catalog",
|
97
114
|
"put_catalog",
|
115
|
+
"raise_if_not_initialized",
|
98
116
|
"is_initialized",
|
99
117
|
"init",
|
100
118
|
"Catalog",
|
101
119
|
"ContentType",
|
102
120
|
"ContentEncoding",
|
103
|
-
"DistributedDataset",
|
104
121
|
"Dataset",
|
105
|
-
"
|
122
|
+
"DatasetType",
|
123
|
+
"DatastoreType",
|
124
|
+
"DeltaCatUrl",
|
125
|
+
"DistributedDataset",
|
126
|
+
"RivDataset",
|
127
|
+
"RivDatatype",
|
106
128
|
"Field",
|
107
129
|
"LifecycleState",
|
108
130
|
"ListResult",
|
@@ -118,7 +140,6 @@ __all__ = [
|
|
118
140
|
"SortOrder",
|
119
141
|
"SortScheme",
|
120
142
|
"TableDefinition",
|
121
|
-
"TableType",
|
122
143
|
"TableWriteMode",
|
123
144
|
]
|
124
145
|
|