deltacat 0.2.10__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/s3u.py +250 -111
- deltacat/catalog/default_catalog_impl/__init__.py +369 -0
- deltacat/compute/compactor_v2/compaction_session.py +175 -152
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +6 -0
- deltacat/compute/compactor_v2/model/merge_file_group.py +213 -0
- deltacat/compute/compactor_v2/model/merge_input.py +8 -24
- deltacat/compute/compactor_v2/model/merge_result.py +1 -0
- deltacat/compute/compactor_v2/steps/hash_bucket.py +4 -56
- deltacat/compute/compactor_v2/steps/merge.py +106 -171
- deltacat/compute/compactor_v2/utils/delta.py +97 -0
- deltacat/compute/compactor_v2/utils/merge.py +126 -0
- deltacat/compute/compactor_v2/utils/task_options.py +16 -4
- deltacat/compute/merge_on_read/__init__.py +4 -0
- deltacat/compute/merge_on_read/daft.py +40 -0
- deltacat/compute/merge_on_read/model/__init__.py +0 -0
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +66 -0
- deltacat/compute/merge_on_read/utils/__init__.py +0 -0
- deltacat/compute/merge_on_read/utils/delta.py +42 -0
- deltacat/storage/interface.py +10 -2
- deltacat/storage/model/types.py +3 -11
- deltacat/tests/catalog/__init__.py +0 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +98 -0
- deltacat/tests/compute/compact_partition_test_cases.py +126 -1
- deltacat/tests/compute/test_compact_partition_incremental.py +4 -1
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +9 -2
- deltacat/tests/local_deltacat_storage/__init__.py +19 -2
- deltacat/tests/test_utils/pyarrow.py +33 -14
- deltacat/tests/utils/test_daft.py +42 -2
- deltacat/types/media.py +5 -0
- deltacat/types/tables.py +7 -1
- deltacat/utils/daft.py +78 -13
- {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/METADATA +2 -2
- {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/RECORD +37 -25
- {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/LICENSE +0 -0
- {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/WHEEL +0 -0
- {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,7 @@ from deltacat.tests.compute.compact_partition_test_cases import (
|
|
32
32
|
REBASE_THEN_INCREMENTAL_TEST_CASES,
|
33
33
|
)
|
34
34
|
from typing import Any, Callable, Dict, List, Optional, Set
|
35
|
+
from deltacat.types.media import StorageType
|
35
36
|
|
36
37
|
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
37
38
|
"db_file_path",
|
@@ -272,7 +273,9 @@ def test_compact_partition_rebase_then_incremental(
|
|
272
273
|
compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
|
273
274
|
setup_s3_resource, rcf_file_s3_uri
|
274
275
|
)
|
275
|
-
tables = ds.download_delta(
|
276
|
+
tables = ds.download_delta(
|
277
|
+
compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
|
278
|
+
)
|
276
279
|
actual_rebase_compacted_table = pa.concat_tables(tables)
|
277
280
|
# if no primary key is specified then sort by sort_key for consistent assertion
|
278
281
|
sorting_cols: List[Any] = (
|
@@ -341,7 +344,11 @@ def test_compact_partition_rebase_then_incremental(
|
|
341
344
|
**compaction_audit_obj
|
342
345
|
)
|
343
346
|
|
344
|
-
tables = ds.download_delta(
|
347
|
+
tables = ds.download_delta(
|
348
|
+
compacted_delta_locator_incremental,
|
349
|
+
storage_type=StorageType.LOCAL,
|
350
|
+
**ds_mock_kwargs,
|
351
|
+
)
|
345
352
|
actual_compacted_table = pa.concat_tables(tables)
|
346
353
|
expected_terminal_compact_partition_result = (
|
347
354
|
expected_terminal_compact_partition_result.combine_chunks().sort_by(
|
@@ -1,10 +1,12 @@
|
|
1
1
|
from typing import Any, Callable, Dict, List, Optional, Set, Union, Tuple
|
2
2
|
|
3
3
|
import pyarrow as pa
|
4
|
+
import daft
|
4
5
|
import json
|
5
6
|
import sqlite3
|
6
7
|
from sqlite3 import Cursor, Connection
|
7
8
|
import uuid
|
9
|
+
import ray
|
8
10
|
import io
|
9
11
|
|
10
12
|
from deltacat.tests.test_utils.storage import create_empty_delta
|
@@ -38,7 +40,13 @@ from deltacat.storage import (
|
|
38
40
|
ManifestEntry,
|
39
41
|
ManifestEntryList,
|
40
42
|
)
|
41
|
-
from deltacat.types.media import
|
43
|
+
from deltacat.types.media import (
|
44
|
+
ContentType,
|
45
|
+
StorageType,
|
46
|
+
TableType,
|
47
|
+
ContentEncoding,
|
48
|
+
DistributedDatasetType,
|
49
|
+
)
|
42
50
|
from deltacat.utils.common import ReadKwargsProvider
|
43
51
|
|
44
52
|
SQLITE_CUR_ARG = "sqlite3_cur"
|
@@ -337,9 +345,10 @@ def download_delta(
|
|
337
345
|
columns: Optional[List[str]] = None,
|
338
346
|
file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
339
347
|
ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
|
348
|
+
distributed_dataset_type: DistributedDatasetType = DistributedDatasetType.RAY_DATASET,
|
340
349
|
*args,
|
341
350
|
**kwargs,
|
342
|
-
) -> Union[LocalDataset, DistributedDataset]:
|
351
|
+
) -> Union[LocalDataset, DistributedDataset]: # type: ignore
|
343
352
|
result = []
|
344
353
|
manifest = get_delta_manifest(delta_like, *args, **kwargs)
|
345
354
|
|
@@ -356,6 +365,14 @@ def download_delta(
|
|
356
365
|
)
|
357
366
|
)
|
358
367
|
|
368
|
+
if storage_type == StorageType.DISTRIBUTED:
|
369
|
+
if distributed_dataset_type is DistributedDatasetType.DAFT:
|
370
|
+
return daft.from_arrow(result)
|
371
|
+
elif distributed_dataset_type is DistributedDatasetType.RAY_DATASET:
|
372
|
+
return ray.data.from_arrow(result)
|
373
|
+
else:
|
374
|
+
raise ValueError(f"Dataset type {distributed_dataset_type} not supported!")
|
375
|
+
|
359
376
|
return result
|
360
377
|
|
361
378
|
|
@@ -1,14 +1,18 @@
|
|
1
|
-
from typing import List
|
1
|
+
from typing import List, Optional
|
2
2
|
import pyarrow as pa
|
3
|
-
from deltacat.storage import Delta, Partition
|
3
|
+
from deltacat.storage import Delta, Partition, PartitionLocator
|
4
4
|
import deltacat.tests.local_deltacat_storage as ds
|
5
5
|
|
6
6
|
|
7
7
|
def create_delta_from_csv_file(
|
8
|
-
namespace: str,
|
8
|
+
namespace: str,
|
9
|
+
file_paths: List[str],
|
10
|
+
table_name: Optional[str] = None,
|
11
|
+
*args,
|
12
|
+
**kwargs
|
9
13
|
) -> Delta:
|
10
14
|
staged_partition = stage_partition_from_file_paths(
|
11
|
-
namespace, file_paths, *args, **kwargs
|
15
|
+
namespace, file_paths, *args, table_name=table_name, **kwargs
|
12
16
|
)
|
13
17
|
|
14
18
|
committed_delta = commit_delta_to_staged_partition(
|
@@ -19,10 +23,15 @@ def create_delta_from_csv_file(
|
|
19
23
|
|
20
24
|
|
21
25
|
def stage_partition_from_file_paths(
|
22
|
-
namespace: str,
|
26
|
+
namespace: str,
|
27
|
+
file_paths: List[str],
|
28
|
+
table_name: Optional[str] = None,
|
29
|
+
*args,
|
30
|
+
**kwargs
|
23
31
|
) -> Partition:
|
24
32
|
ds.create_namespace(namespace, {}, **kwargs)
|
25
|
-
table_name
|
33
|
+
if table_name is None:
|
34
|
+
table_name = "-".join(file_paths).replace("/", "_")
|
26
35
|
ds.create_table_version(namespace, table_name, "1", **kwargs)
|
27
36
|
stream = ds.get_stream(namespace, table_name, "1", **kwargs)
|
28
37
|
staged_partition = ds.stage_partition(stream, [], **kwargs)
|
@@ -31,19 +40,29 @@ def stage_partition_from_file_paths(
|
|
31
40
|
|
32
41
|
def commit_delta_to_staged_partition(
|
33
42
|
staged_partition, file_paths: List[str], *args, **kwargs
|
43
|
+
) -> Delta:
|
44
|
+
committed_delta = commit_delta_to_partition(
|
45
|
+
staged_partition, file_paths=file_paths, *args, **kwargs
|
46
|
+
)
|
47
|
+
ds.commit_partition(staged_partition, **kwargs)
|
48
|
+
return committed_delta
|
49
|
+
|
50
|
+
|
51
|
+
def commit_delta_to_partition(
|
52
|
+
partition: Partition, file_paths: List[str], *args, **kwargs
|
34
53
|
) -> Delta:
|
35
54
|
tables = []
|
36
55
|
|
56
|
+
if isinstance(partition, PartitionLocator):
|
57
|
+
partition = ds.get_partition(
|
58
|
+
partition.stream_locator, partition.partition_values, *args, **kwargs
|
59
|
+
)
|
60
|
+
|
37
61
|
for file_path in file_paths:
|
38
62
|
table = pa.csv.read_csv(file_path)
|
39
63
|
tables.append(table)
|
40
|
-
deltas = []
|
41
64
|
|
42
|
-
|
43
|
-
|
44
|
-
deltas.append(delta)
|
65
|
+
table = pa.concat_tables(tables)
|
66
|
+
staged_delta = ds.stage_delta(table, partition, **kwargs)
|
45
67
|
|
46
|
-
|
47
|
-
committed_delta = ds.commit_delta(merged_delta, **kwargs)
|
48
|
-
ds.commit_partition(staged_partition, **kwargs)
|
49
|
-
return committed_delta
|
68
|
+
return ds.commit_delta(staged_delta, **kwargs)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import unittest
|
2
2
|
from deltacat.types.media import ContentEncoding, ContentType
|
3
|
-
from deltacat.utils.daft import daft_s3_file_to_table
|
3
|
+
from deltacat.utils.daft import daft_s3_file_to_table, s3_files_to_dataframe
|
4
4
|
|
5
5
|
from deltacat.utils.pyarrow import ReadKwargsProviderPyArrowSchemaOverride
|
6
6
|
from deltacat.types.partial_download import PartialParquetParameters
|
@@ -9,7 +9,7 @@ import pyarrow as pa
|
|
9
9
|
from pyarrow import parquet as pq
|
10
10
|
|
11
11
|
|
12
|
-
class
|
12
|
+
class TestDaftS3FileToTable(unittest.TestCase):
|
13
13
|
MVP_PATH = "deltacat/tests/utils/data/mvp.parquet"
|
14
14
|
|
15
15
|
def test_read_from_s3_all_columns(self):
|
@@ -121,5 +121,45 @@ class TestDaftParquetReader(unittest.TestCase):
|
|
121
121
|
self.assertEqual(table.num_rows, 10)
|
122
122
|
|
123
123
|
|
124
|
+
class TestDaftS3FilesToDataFrame(unittest.TestCase):
|
125
|
+
MVP_PATH = "deltacat/tests/utils/data/mvp.parquet"
|
126
|
+
|
127
|
+
def test_read_from_s3_all_columns(self):
|
128
|
+
df = s3_files_to_dataframe(
|
129
|
+
uris=[self.MVP_PATH],
|
130
|
+
content_encoding=ContentEncoding.IDENTITY.value,
|
131
|
+
content_type=ContentType.PARQUET.value,
|
132
|
+
ray_init_options={"local_mode": True},
|
133
|
+
)
|
134
|
+
|
135
|
+
table = df.to_arrow()
|
136
|
+
self.assertEqual(table.schema.names, ["a", "b"])
|
137
|
+
self.assertEqual(table.num_rows, 100)
|
138
|
+
|
139
|
+
def test_does_not_read_from_s3_if_not_materialized(self):
|
140
|
+
df = s3_files_to_dataframe(
|
141
|
+
uris=[self.MVP_PATH],
|
142
|
+
content_encoding=ContentEncoding.IDENTITY.value,
|
143
|
+
content_type=ContentType.PARQUET.value,
|
144
|
+
ray_init_options={"local_mode": True},
|
145
|
+
)
|
146
|
+
|
147
|
+
self.assertRaises(RuntimeError, lambda: len(df))
|
148
|
+
df.collect()
|
149
|
+
self.assertEqual(len(df), 100)
|
150
|
+
|
151
|
+
def test_raises_error_if_not_supported_content_type(self):
|
152
|
+
|
153
|
+
self.assertRaises(
|
154
|
+
AssertionError,
|
155
|
+
lambda: s3_files_to_dataframe(
|
156
|
+
uris=[self.MVP_PATH],
|
157
|
+
content_encoding=ContentEncoding.IDENTITY.value,
|
158
|
+
content_type=ContentType.UNESCAPED_TSV.value,
|
159
|
+
ray_init_options={"local_mode": True},
|
160
|
+
),
|
161
|
+
)
|
162
|
+
|
163
|
+
|
124
164
|
if __name__ == "__main__":
|
125
165
|
unittest.main()
|
deltacat/types/media.py
CHANGED
deltacat/types/tables.py
CHANGED
@@ -15,10 +15,11 @@ from ray.data.read_api import (
|
|
15
15
|
)
|
16
16
|
|
17
17
|
import deltacat.storage as dcs
|
18
|
-
from deltacat.types.media import TableType
|
18
|
+
from deltacat.types.media import TableType, DistributedDatasetType
|
19
19
|
from deltacat.utils import numpy as np_utils
|
20
20
|
from deltacat.utils import pandas as pd_utils
|
21
21
|
from deltacat.utils import pyarrow as pa_utils
|
22
|
+
from deltacat.utils import daft as daft_utils
|
22
23
|
from deltacat.utils.ray_utils import dataset as ds_utils
|
23
24
|
|
24
25
|
TABLE_TYPE_TO_READER_FUNC: Dict[int, Callable] = {
|
@@ -78,6 +79,11 @@ TABLE_TYPE_TO_DATASET_CREATE_FUNC_REFS: Dict[str, Callable] = {
|
|
78
79
|
}
|
79
80
|
|
80
81
|
|
82
|
+
DISTRIBUTED_DATASET_TYPE_TO_READER_FUNC: Dict[int, Callable] = {
|
83
|
+
DistributedDatasetType.DAFT.value: daft_utils.s3_files_to_dataframe
|
84
|
+
}
|
85
|
+
|
86
|
+
|
81
87
|
class TableWriteMode(str, Enum):
|
82
88
|
"""
|
83
89
|
Enum controlling how a given dataset will be written to a table.
|
deltacat/utils/daft.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Optional, List
|
3
|
-
|
2
|
+
from typing import Optional, List, Any, Dict, Callable
|
3
|
+
import daft
|
4
|
+
import ray
|
4
5
|
from daft.table import read_parquet_into_pyarrow
|
5
|
-
from daft import TimeUnit
|
6
|
+
from daft import TimeUnit, DataFrame
|
6
7
|
from daft.io import IOConfig, S3Config
|
7
8
|
import pyarrow as pa
|
8
9
|
|
@@ -22,6 +23,66 @@ from deltacat.types.partial_download import (
|
|
22
23
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
23
24
|
|
24
25
|
|
26
|
+
def s3_files_to_dataframe(
|
27
|
+
uris: List[str],
|
28
|
+
content_type: str,
|
29
|
+
content_encoding: str,
|
30
|
+
column_names: Optional[List[str]] = None,
|
31
|
+
include_columns: Optional[List[str]] = None,
|
32
|
+
read_func_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
33
|
+
ray_options_provider: Optional[Callable[[int, Any], Dict[str, Any]]] = None,
|
34
|
+
s3_client_kwargs: Optional[Any] = None,
|
35
|
+
ray_init_options: Optional[Dict[str, Any]] = None,
|
36
|
+
) -> DataFrame:
|
37
|
+
|
38
|
+
if ray_init_options is None:
|
39
|
+
ray_init_options = {}
|
40
|
+
|
41
|
+
assert (
|
42
|
+
content_type == ContentType.PARQUET.value
|
43
|
+
), f"daft native reader currently only supports parquet, got {content_type}"
|
44
|
+
|
45
|
+
assert (
|
46
|
+
content_encoding == ContentEncoding.IDENTITY.value
|
47
|
+
), f"daft native reader currently only supports identity encoding, got {content_encoding}"
|
48
|
+
|
49
|
+
if not ray.is_initialized():
|
50
|
+
ray.init(address="auto", ignore_reinit_error=True, **ray_init_options)
|
51
|
+
|
52
|
+
daft.context.set_runner_ray(noop_if_initialized=True)
|
53
|
+
|
54
|
+
if s3_client_kwargs is None:
|
55
|
+
s3_client_kwargs = {}
|
56
|
+
|
57
|
+
kwargs = {}
|
58
|
+
if read_func_kwargs_provider is not None:
|
59
|
+
kwargs = read_func_kwargs_provider(content_type, kwargs)
|
60
|
+
|
61
|
+
# TODO(raghumdani): pass in coerce_int96_timestamp arg
|
62
|
+
# https://github.com/Eventual-Inc/Daft/issues/1894
|
63
|
+
|
64
|
+
io_config = _get_s3_io_config(s3_client_kwargs=s3_client_kwargs)
|
65
|
+
|
66
|
+
logger.debug(
|
67
|
+
f"Preparing to read S3 object from {len(uris)} files into daft dataframe"
|
68
|
+
)
|
69
|
+
|
70
|
+
df, latency = timed_invocation(
|
71
|
+
daft.read_parquet, path=uris, io_config=io_config, use_native_downloader=True
|
72
|
+
)
|
73
|
+
|
74
|
+
logger.debug(f"Time to create daft dataframe from {len(uris)} files is {latency}s")
|
75
|
+
|
76
|
+
columns_to_read = include_columns or column_names
|
77
|
+
|
78
|
+
logger.debug(f"Taking columns {columns_to_read} from the daft df.")
|
79
|
+
|
80
|
+
if columns_to_read:
|
81
|
+
return df.select(*columns_to_read)
|
82
|
+
else:
|
83
|
+
return df
|
84
|
+
|
85
|
+
|
25
86
|
def daft_s3_file_to_table(
|
26
87
|
s3_url: str,
|
27
88
|
content_type: str,
|
@@ -55,16 +116,7 @@ def daft_s3_file_to_table(
|
|
55
116
|
):
|
56
117
|
row_groups = partial_file_download_params.row_groups_to_download
|
57
118
|
|
58
|
-
io_config =
|
59
|
-
s3=S3Config(
|
60
|
-
key_id=s3_client_kwargs.get("aws_access_key_id"),
|
61
|
-
access_key=s3_client_kwargs.get("aws_secret_access_key"),
|
62
|
-
session_token=s3_client_kwargs.get("aws_session_token"),
|
63
|
-
retry_mode="adaptive",
|
64
|
-
num_tries=BOTO_MAX_RETRIES,
|
65
|
-
max_connections=DAFT_MAX_S3_CONNECTIONS_PER_FILE,
|
66
|
-
)
|
67
|
-
)
|
119
|
+
io_config = _get_s3_io_config(s3_client_kwargs=s3_client_kwargs)
|
68
120
|
|
69
121
|
logger.debug(f"Preparing to read S3 object from {s3_url} into daft table")
|
70
122
|
|
@@ -95,3 +147,16 @@ def daft_s3_file_to_table(
|
|
95
147
|
return coerce_pyarrow_table_to_schema(pa_table, input_schema)
|
96
148
|
else:
|
97
149
|
return pa_table
|
150
|
+
|
151
|
+
|
152
|
+
def _get_s3_io_config(s3_client_kwargs) -> IOConfig:
|
153
|
+
return IOConfig(
|
154
|
+
s3=S3Config(
|
155
|
+
key_id=s3_client_kwargs.get("aws_access_key_id"),
|
156
|
+
access_key=s3_client_kwargs.get("aws_secret_access_key"),
|
157
|
+
session_token=s3_client_kwargs.get("aws_session_token"),
|
158
|
+
retry_mode="adaptive",
|
159
|
+
num_tries=BOTO_MAX_RETRIES,
|
160
|
+
max_connections=DAFT_MAX_S3_CONNECTIONS_PER_FILE,
|
161
|
+
)
|
162
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: deltacat
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0
|
4
4
|
Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
@@ -27,7 +27,7 @@ Requires-Dist: tenacity ==8.1.0
|
|
27
27
|
Requires-Dist: typing-extensions ==4.4.0
|
28
28
|
Requires-Dist: pymemcache ==4.0.0
|
29
29
|
Requires-Dist: redis ==4.6.0
|
30
|
-
Requires-Dist: getdaft ==0.2.
|
30
|
+
Requires-Dist: getdaft ==0.2.16
|
31
31
|
Requires-Dist: schedule ==1.2.0
|
32
32
|
|
33
33
|
# DeltaCAT
|
@@ -1,11 +1,11 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=hul5a8v-ltL75fkLOfQBj_i2OoY5tKMy-yO3oTZzikA,1777
|
2
2
|
deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
|
3
3
|
deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
|
4
4
|
deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
|
5
5
|
deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
deltacat/aws/clients.py,sha256=wWiqXyZPWXezdEbhQ7DLwEVnYV6KiitqzBc5B4UAwc0,6184
|
7
7
|
deltacat/aws/constants.py,sha256=luXWMO_8eatq8f9NlFjNM7q362j77JwzTM2BEVS_8-8,353
|
8
|
-
deltacat/aws/s3u.py,sha256=
|
8
|
+
deltacat/aws/s3u.py,sha256=aK1_pyfipd9Jq1ZiaOC-gszyIIfc1TSbch6YmuZmjt0,23878
|
9
9
|
deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
|
10
10
|
deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
deltacat/aws/redshift/model/manifest.py,sha256=ThgpdwzaWz493Zz9e8HSWwuxEheA1nDuypM3pe4vozk,12987
|
@@ -15,6 +15,7 @@ deltacat/benchmarking/conftest.py,sha256=6M9NJ71vnOpeMxG-Ly9UWRsgZmky5-1GTuoRD-O
|
|
15
15
|
deltacat/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
deltacat/catalog/delegate.py,sha256=lVPPG4bBxoNt0IFufKIXc7H2Nhvb6QNox-WfXxaNxuc,8745
|
17
17
|
deltacat/catalog/interface.py,sha256=On5hsbznnIKsvCcm5C1N3OS3ar3v6q7pAUkLWMb6PtY,6556
|
18
|
+
deltacat/catalog/default_catalog_impl/__init__.py,sha256=ted1_sA2Y3ljJjb9hRuWUCiUlCen-HAW8TxoV4g4IW0,12677
|
18
19
|
deltacat/catalog/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
20
|
deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9pIxo,2212
|
20
21
|
deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
|
@@ -48,22 +49,31 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
|
|
48
49
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
49
50
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
50
51
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
|
-
deltacat/compute/compactor_v2/compaction_session.py,sha256=
|
52
|
+
deltacat/compute/compactor_v2/compaction_session.py,sha256=QVWuI5edQORqhBkccM5waEXMOzg_6756-eT4YqFxkD4,21932
|
52
53
|
deltacat/compute/compactor_v2/constants.py,sha256=yZgzFD59wiXbXiTVgYPWRodZGpngiSBNFB2jmoZ4fps,1471
|
53
54
|
deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
|
-
deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=
|
55
|
+
deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=HS1BLgNCjQoqe7EuM9baQWE6U4BFqU2PVEWFEF725VE,2815
|
55
56
|
deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
|
56
|
-
deltacat/compute/compactor_v2/model/
|
57
|
-
deltacat/compute/compactor_v2/model/
|
57
|
+
deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViVO1SVljCj6f0B3MfB3hqtGm2S0s,7410
|
58
|
+
deltacat/compute/compactor_v2/model/merge_input.py,sha256=xiUsA2jY1tQRGc84Q2SygijF5N47x3WqFjcy6sJwEIs,4511
|
59
|
+
deltacat/compute/compactor_v2/model/merge_result.py,sha256=R9PV-0NGJ9GaSSeVqbIEhiqBolshPP4m7_eCb0yJR8g,401
|
58
60
|
deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
59
|
-
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=
|
60
|
-
deltacat/compute/compactor_v2/steps/merge.py,sha256=
|
61
|
+
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=2RfkVh-0mccgONa4atQFmNfPk668oIZr8TthwTUyh-k,6295
|
62
|
+
deltacat/compute/compactor_v2/steps/merge.py,sha256=UR6HbPUNxEVtvsGiibRYmzq0_nT9Lq6AbEa6e71KaNQ,14496
|
61
63
|
deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
62
64
|
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkwX8eHUQiFoTR1V-E66pMqWigtrs618,2156
|
63
65
|
deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
|
66
|
+
deltacat/compute/compactor_v2/utils/delta.py,sha256=73wET1zqQyYR8eaO59XyKYVn-lqshPrScSiiFnHRUj4,3373
|
64
67
|
deltacat/compute/compactor_v2/utils/io.py,sha256=jgIfwrfH2mTFUx1M0TgwZGGfrS4IXjP1PmqwaQmNAJM,5092
|
68
|
+
deltacat/compute/compactor_v2/utils/merge.py,sha256=tcHlaHHeEN1F_rrAadS_TPWX719uGKjE8yTeVwEwsTk,4843
|
65
69
|
deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=MAscmL35WfwN7Is72aFlD_cGhxtZgjRwwR5kS9Yn2uU,11393
|
66
|
-
deltacat/compute/compactor_v2/utils/task_options.py,sha256=
|
70
|
+
deltacat/compute/compactor_v2/utils/task_options.py,sha256=Ndhff9F_zff6zX3mw4AztHkvSBgR4O8SgMUgwq3rvyM,10601
|
71
|
+
deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
|
72
|
+
deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
|
73
|
+
deltacat/compute/merge_on_read/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
|
+
deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtLnsY987Ulx9n20oAydfPq3Zd3Y9ocbTI,2035
|
75
|
+
deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
|
+
deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
|
67
77
|
deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
68
78
|
deltacat/compute/metastats/meta_stats.py,sha256=78hN3aN5wLHUFJsZXuv2JLeqA35HZ8mLUWJDMslMj5Q,18731
|
69
79
|
deltacat/compute/metastats/stats.py,sha256=8iUiSXOAjqiEeNP5RIb5gvhykBgpNHD5IKkB8zsPR0E,7363
|
@@ -102,7 +112,7 @@ deltacat/io/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
102
112
|
deltacat/io/aws/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
103
113
|
deltacat/io/aws/redshift/redshift_datasource.py,sha256=X183O4tgBqtaZOSFmMFvp-9mv8NX5kGvRvX0eoSX8rA,22599
|
104
114
|
deltacat/storage/__init__.py,sha256=B2cjcqySR6g0LBK83H0nOVojYNurbhp5kq3n8pfkzcA,1466
|
105
|
-
deltacat/storage/interface.py,sha256=
|
115
|
+
deltacat/storage/interface.py,sha256=BsB8YnsB-RdUMJSmHj6ef-qBOj5yqWoU9b0HKzQT8OM,21569
|
106
116
|
deltacat/storage/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
117
|
deltacat/storage/model/delta.py,sha256=2qhCiEWmrybV0mfBTKHi9VKlNTqJ6wkE5ssq4ozMMQQ,13757
|
108
118
|
deltacat/storage/model/list_result.py,sha256=FgD6oYeKo0EPe8z7jC8T4pAFjBOuBwd4axxGrnYyBG4,2466
|
@@ -113,15 +123,17 @@ deltacat/storage/model/sort_key.py,sha256=SPIxJfI_o7fbp1s3ZKMyX9x7_jK8UZapaVnKSA
|
|
113
123
|
deltacat/storage/model/stream.py,sha256=XZ-c4EQR89NWydEOEG5GCaT8zST10OmjLZBKHZPdrzA,7738
|
114
124
|
deltacat/storage/model/table.py,sha256=IOu1ZOrdRkVDB-FOxYMRvnNf5TukIDfbdHWTqHYN_OY,4225
|
115
125
|
deltacat/storage/model/table_version.py,sha256=cOM9dN-YB_Hhi4h1CzFbldC5qRkm4C1rQ3rpKIZzCNs,7413
|
116
|
-
deltacat/storage/model/types.py,sha256=
|
126
|
+
deltacat/storage/model/types.py,sha256=hj7MmjjVmKT-R9sMUulOWG-FByGZKKaYXNnOWW32mP0,1608
|
117
127
|
deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
118
128
|
deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
119
129
|
deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
|
130
|
+
deltacat/tests/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
131
|
+
deltacat/tests/catalog/test_default_catalog_impl.py,sha256=9srCU5yQ159oZ9_PoJ_mWMzVUW5bKV0mnmPJc5zKCQQ,3125
|
120
132
|
deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
121
|
-
deltacat/tests/compute/compact_partition_test_cases.py,sha256=
|
122
|
-
deltacat/tests/compute/test_compact_partition_incremental.py,sha256
|
133
|
+
deltacat/tests/compute/compact_partition_test_cases.py,sha256=nvFIEQWvRjpi5yaYL6KIGKgBUjKqYE_elCcc3GHHUlU,53188
|
134
|
+
deltacat/tests/compute/test_compact_partition_incremental.py,sha256=F02ia0zvapQmSJ48KGdwhtjaUrIFQmH77CKNLINUsWQ,10102
|
123
135
|
deltacat/tests/compute/test_compact_partition_params.py,sha256=MIzIcBscwFA1W-cfTTxVx0zcgbrs8D4bI9Hy4TF5eRo,8322
|
124
|
-
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256
|
136
|
+
deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=s17wb7ub0v_1qLhIzLaHZ68P2xdGilQlV8ibqj57PCc,13372
|
125
137
|
deltacat/tests/compute/test_util_common.py,sha256=Skz0ZfHzidArZhIzRDHOYt-5uGBwx6MRfKZpeBnzh9w,6055
|
126
138
|
deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
|
127
139
|
deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=5yP285lY539CP1UuyYe8Kz14CnBUpE1kZJZjxBAaXew,6530
|
@@ -142,30 +154,30 @@ deltacat/tests/io/test_memcached_object_store.py,sha256=gUVYycPkNpq9XxotdJwFZ2HO
|
|
142
154
|
deltacat/tests/io/test_ray_plasma_object_store.py,sha256=-wJZP6lRtEOogR25wjEiIBGz_lpvWVihwlZ5GqandZU,1911
|
143
155
|
deltacat/tests/io/test_redis_object_store.py,sha256=sZrXrYjkw8u_XrvFilhBbLc8PPnZiuMKa1_Bt9ka5qs,3838
|
144
156
|
deltacat/tests/io/test_s3_object_store.py,sha256=4b7PYEfQJnYGUz6fcLFWVVyRHTlH_yd8CIaCv9l33Gg,1900
|
145
|
-
deltacat/tests/local_deltacat_storage/__init__.py,sha256=
|
157
|
+
deltacat/tests/local_deltacat_storage/__init__.py,sha256=UGf8W9y2gDgOTR2Uj9qWCqe7CXu0GTl1hygoSFEKe80,35341
|
146
158
|
deltacat/tests/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
147
159
|
deltacat/tests/stats/test_intervals.py,sha256=S92DgkALQ1WmbLWcxtvS7RlVGvL-XoPJKUUbkdn9_CQ,1955
|
148
160
|
deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
149
161
|
deltacat/tests/test_utils/constants.py,sha256=zgqFmfIE5ZCtDw4NF-Y4ZEEnaPUP5nDY5768WPod0Fc,208
|
150
|
-
deltacat/tests/test_utils/pyarrow.py,sha256=
|
162
|
+
deltacat/tests/test_utils/pyarrow.py,sha256=nIvS01vJbUMeJ_CoNEmylL5Ca-r9X8M3NlbV0OBnDOs,2000
|
151
163
|
deltacat/tests/test_utils/storage.py,sha256=93GEn4A5WbMHWk0Ec4Bd7RxeHoSEnBfSarfWhKOSNtM,972
|
152
164
|
deltacat/tests/test_utils/utils.py,sha256=a32qEwcSSd1lvRi0aJJ4ZLnc1ZyXmoQF_K95zaQRk2M,455
|
153
165
|
deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
154
166
|
deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iyzjiJ7-Rcl0o,1506
|
155
|
-
deltacat/tests/utils/test_daft.py,sha256=
|
167
|
+
deltacat/tests/utils/test_daft.py,sha256=Xal84zR42rXsWQI3lImdDYWOzewomKmhmiUQ59m67V0,6488
|
156
168
|
deltacat/tests/utils/test_pyarrow.py,sha256=eZAuYp9MUf8lmpIilH57JkURuNsTGZ3IAGC4Gm5hdrM,17307
|
157
169
|
deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
|
158
170
|
deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
|
159
171
|
deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
160
172
|
deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
161
|
-
deltacat/types/media.py,sha256=
|
173
|
+
deltacat/types/media.py,sha256=7_QRU6NbjmJk0GLAn_Km6ja8RE5G3V8jvLfUXqnjnqU,2320
|
162
174
|
deltacat/types/partial_download.py,sha256=9BJ5b0DHyWWeV7wMZjOfYoeH_iil_bjZ9b_WMpUzvHs,2516
|
163
|
-
deltacat/types/tables.py,sha256=
|
175
|
+
deltacat/types/tables.py,sha256=HLm-xI5xCuKTeGbaErspni3DeSVC_aRe2TYFLd6b_mA,4409
|
164
176
|
deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
165
177
|
deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
|
166
178
|
deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
|
167
179
|
deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
|
168
|
-
deltacat/utils/daft.py,sha256=
|
180
|
+
deltacat/utils/daft.py,sha256=UlbJpWsxZQ1pC0RVks5lsVJk3Bq5pa0Rw2lKMc9Gz4Q,5441
|
169
181
|
deltacat/utils/metrics.py,sha256=Ob-RXGoNnfTMRXaNbSHoqW8y-n8KfRA9nLuo9AvsReI,6201
|
170
182
|
deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
|
171
183
|
deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
|
@@ -181,8 +193,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
|
|
181
193
|
deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
|
182
194
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
183
195
|
deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
|
184
|
-
deltacat-0.
|
185
|
-
deltacat-0.
|
186
|
-
deltacat-0.
|
187
|
-
deltacat-0.
|
188
|
-
deltacat-0.
|
196
|
+
deltacat-1.0.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
197
|
+
deltacat-1.0.0.dist-info/METADATA,sha256=Jtp8Vk3_u99IPtRxgJ59xSXCWk3GPF2Uk4tX9x-tUcM,1780
|
198
|
+
deltacat-1.0.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
199
|
+
deltacat-1.0.0.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
200
|
+
deltacat-1.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|