deltacat 0.2.10__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/aws/s3u.py +250 -111
  3. deltacat/catalog/default_catalog_impl/__init__.py +369 -0
  4. deltacat/compute/compactor_v2/compaction_session.py +175 -152
  5. deltacat/compute/compactor_v2/model/hash_bucket_input.py +6 -0
  6. deltacat/compute/compactor_v2/model/merge_file_group.py +213 -0
  7. deltacat/compute/compactor_v2/model/merge_input.py +8 -24
  8. deltacat/compute/compactor_v2/model/merge_result.py +1 -0
  9. deltacat/compute/compactor_v2/steps/hash_bucket.py +4 -56
  10. deltacat/compute/compactor_v2/steps/merge.py +106 -171
  11. deltacat/compute/compactor_v2/utils/delta.py +97 -0
  12. deltacat/compute/compactor_v2/utils/merge.py +126 -0
  13. deltacat/compute/compactor_v2/utils/task_options.py +16 -4
  14. deltacat/compute/merge_on_read/__init__.py +4 -0
  15. deltacat/compute/merge_on_read/daft.py +40 -0
  16. deltacat/compute/merge_on_read/model/__init__.py +0 -0
  17. deltacat/compute/merge_on_read/model/merge_on_read_params.py +66 -0
  18. deltacat/compute/merge_on_read/utils/__init__.py +0 -0
  19. deltacat/compute/merge_on_read/utils/delta.py +42 -0
  20. deltacat/storage/interface.py +10 -2
  21. deltacat/storage/model/types.py +3 -11
  22. deltacat/tests/catalog/__init__.py +0 -0
  23. deltacat/tests/catalog/test_default_catalog_impl.py +98 -0
  24. deltacat/tests/compute/compact_partition_test_cases.py +126 -1
  25. deltacat/tests/compute/test_compact_partition_incremental.py +4 -1
  26. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +9 -2
  27. deltacat/tests/local_deltacat_storage/__init__.py +19 -2
  28. deltacat/tests/test_utils/pyarrow.py +33 -14
  29. deltacat/tests/utils/test_daft.py +42 -2
  30. deltacat/types/media.py +5 -0
  31. deltacat/types/tables.py +7 -1
  32. deltacat/utils/daft.py +78 -13
  33. {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/METADATA +2 -2
  34. {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/RECORD +37 -25
  35. {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/LICENSE +0 -0
  36. {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/WHEEL +0 -0
  37. {deltacat-0.2.10.dist-info → deltacat-1.0.0.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,7 @@ from deltacat.tests.compute.compact_partition_test_cases import (
32
32
  REBASE_THEN_INCREMENTAL_TEST_CASES,
33
33
  )
34
34
  from typing import Any, Callable, Dict, List, Optional, Set
35
+ from deltacat.types.media import StorageType
35
36
 
36
37
  DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
37
38
  "db_file_path",
@@ -272,7 +273,9 @@ def test_compact_partition_rebase_then_incremental(
272
273
  compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
273
274
  setup_s3_resource, rcf_file_s3_uri
274
275
  )
275
- tables = ds.download_delta(compacted_delta_locator, **ds_mock_kwargs)
276
+ tables = ds.download_delta(
277
+ compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
278
+ )
276
279
  actual_rebase_compacted_table = pa.concat_tables(tables)
277
280
  # if no primary key is specified then sort by sort_key for consistent assertion
278
281
  sorting_cols: List[Any] = (
@@ -341,7 +344,11 @@ def test_compact_partition_rebase_then_incremental(
341
344
  **compaction_audit_obj
342
345
  )
343
346
 
344
- tables = ds.download_delta(compacted_delta_locator_incremental, **ds_mock_kwargs)
347
+ tables = ds.download_delta(
348
+ compacted_delta_locator_incremental,
349
+ storage_type=StorageType.LOCAL,
350
+ **ds_mock_kwargs,
351
+ )
345
352
  actual_compacted_table = pa.concat_tables(tables)
346
353
  expected_terminal_compact_partition_result = (
347
354
  expected_terminal_compact_partition_result.combine_chunks().sort_by(
@@ -1,10 +1,12 @@
1
1
  from typing import Any, Callable, Dict, List, Optional, Set, Union, Tuple
2
2
 
3
3
  import pyarrow as pa
4
+ import daft
4
5
  import json
5
6
  import sqlite3
6
7
  from sqlite3 import Cursor, Connection
7
8
  import uuid
9
+ import ray
8
10
  import io
9
11
 
10
12
  from deltacat.tests.test_utils.storage import create_empty_delta
@@ -38,7 +40,13 @@ from deltacat.storage import (
38
40
  ManifestEntry,
39
41
  ManifestEntryList,
40
42
  )
41
- from deltacat.types.media import ContentType, StorageType, TableType, ContentEncoding
43
+ from deltacat.types.media import (
44
+ ContentType,
45
+ StorageType,
46
+ TableType,
47
+ ContentEncoding,
48
+ DistributedDatasetType,
49
+ )
42
50
  from deltacat.utils.common import ReadKwargsProvider
43
51
 
44
52
  SQLITE_CUR_ARG = "sqlite3_cur"
@@ -337,9 +345,10 @@ def download_delta(
337
345
  columns: Optional[List[str]] = None,
338
346
  file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
339
347
  ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
348
+ distributed_dataset_type: DistributedDatasetType = DistributedDatasetType.RAY_DATASET,
340
349
  *args,
341
350
  **kwargs,
342
- ) -> Union[LocalDataset, DistributedDataset]:
351
+ ) -> Union[LocalDataset, DistributedDataset]: # type: ignore
343
352
  result = []
344
353
  manifest = get_delta_manifest(delta_like, *args, **kwargs)
345
354
 
@@ -356,6 +365,14 @@ def download_delta(
356
365
  )
357
366
  )
358
367
 
368
+ if storage_type == StorageType.DISTRIBUTED:
369
+ if distributed_dataset_type is DistributedDatasetType.DAFT:
370
+ return daft.from_arrow(result)
371
+ elif distributed_dataset_type is DistributedDatasetType.RAY_DATASET:
372
+ return ray.data.from_arrow(result)
373
+ else:
374
+ raise ValueError(f"Dataset type {distributed_dataset_type} not supported!")
375
+
359
376
  return result
360
377
 
361
378
 
@@ -1,14 +1,18 @@
1
- from typing import List
1
+ from typing import List, Optional
2
2
  import pyarrow as pa
3
- from deltacat.storage import Delta, Partition
3
+ from deltacat.storage import Delta, Partition, PartitionLocator
4
4
  import deltacat.tests.local_deltacat_storage as ds
5
5
 
6
6
 
7
7
  def create_delta_from_csv_file(
8
- namespace: str, file_paths: List[str], *args, **kwargs
8
+ namespace: str,
9
+ file_paths: List[str],
10
+ table_name: Optional[str] = None,
11
+ *args,
12
+ **kwargs
9
13
  ) -> Delta:
10
14
  staged_partition = stage_partition_from_file_paths(
11
- namespace, file_paths, *args, **kwargs
15
+ namespace, file_paths, *args, table_name=table_name, **kwargs
12
16
  )
13
17
 
14
18
  committed_delta = commit_delta_to_staged_partition(
@@ -19,10 +23,15 @@ def create_delta_from_csv_file(
19
23
 
20
24
 
21
25
  def stage_partition_from_file_paths(
22
- namespace: str, file_paths: List[str], *args, **kwargs
26
+ namespace: str,
27
+ file_paths: List[str],
28
+ table_name: Optional[str] = None,
29
+ *args,
30
+ **kwargs
23
31
  ) -> Partition:
24
32
  ds.create_namespace(namespace, {}, **kwargs)
25
- table_name = "-".join(file_paths).replace("/", "_")
33
+ if table_name is None:
34
+ table_name = "-".join(file_paths).replace("/", "_")
26
35
  ds.create_table_version(namespace, table_name, "1", **kwargs)
27
36
  stream = ds.get_stream(namespace, table_name, "1", **kwargs)
28
37
  staged_partition = ds.stage_partition(stream, [], **kwargs)
@@ -31,19 +40,29 @@ def stage_partition_from_file_paths(
31
40
 
32
41
  def commit_delta_to_staged_partition(
33
42
  staged_partition, file_paths: List[str], *args, **kwargs
43
+ ) -> Delta:
44
+ committed_delta = commit_delta_to_partition(
45
+ staged_partition, file_paths=file_paths, *args, **kwargs
46
+ )
47
+ ds.commit_partition(staged_partition, **kwargs)
48
+ return committed_delta
49
+
50
+
51
+ def commit_delta_to_partition(
52
+ partition: Partition, file_paths: List[str], *args, **kwargs
34
53
  ) -> Delta:
35
54
  tables = []
36
55
 
56
+ if isinstance(partition, PartitionLocator):
57
+ partition = ds.get_partition(
58
+ partition.stream_locator, partition.partition_values, *args, **kwargs
59
+ )
60
+
37
61
  for file_path in file_paths:
38
62
  table = pa.csv.read_csv(file_path)
39
63
  tables.append(table)
40
- deltas = []
41
64
 
42
- for table in tables:
43
- delta = ds.stage_delta(table, staged_partition, **kwargs)
44
- deltas.append(delta)
65
+ table = pa.concat_tables(tables)
66
+ staged_delta = ds.stage_delta(table, partition, **kwargs)
45
67
 
46
- merged_delta = Delta.merge_deltas(deltas=deltas)
47
- committed_delta = ds.commit_delta(merged_delta, **kwargs)
48
- ds.commit_partition(staged_partition, **kwargs)
49
- return committed_delta
68
+ return ds.commit_delta(staged_delta, **kwargs)
@@ -1,6 +1,6 @@
1
1
  import unittest
2
2
  from deltacat.types.media import ContentEncoding, ContentType
3
- from deltacat.utils.daft import daft_s3_file_to_table
3
+ from deltacat.utils.daft import daft_s3_file_to_table, s3_files_to_dataframe
4
4
 
5
5
  from deltacat.utils.pyarrow import ReadKwargsProviderPyArrowSchemaOverride
6
6
  from deltacat.types.partial_download import PartialParquetParameters
@@ -9,7 +9,7 @@ import pyarrow as pa
9
9
  from pyarrow import parquet as pq
10
10
 
11
11
 
12
- class TestDaftParquetReader(unittest.TestCase):
12
+ class TestDaftS3FileToTable(unittest.TestCase):
13
13
  MVP_PATH = "deltacat/tests/utils/data/mvp.parquet"
14
14
 
15
15
  def test_read_from_s3_all_columns(self):
@@ -121,5 +121,45 @@ class TestDaftParquetReader(unittest.TestCase):
121
121
  self.assertEqual(table.num_rows, 10)
122
122
 
123
123
 
124
+ class TestDaftS3FilesToDataFrame(unittest.TestCase):
125
+ MVP_PATH = "deltacat/tests/utils/data/mvp.parquet"
126
+
127
+ def test_read_from_s3_all_columns(self):
128
+ df = s3_files_to_dataframe(
129
+ uris=[self.MVP_PATH],
130
+ content_encoding=ContentEncoding.IDENTITY.value,
131
+ content_type=ContentType.PARQUET.value,
132
+ ray_init_options={"local_mode": True},
133
+ )
134
+
135
+ table = df.to_arrow()
136
+ self.assertEqual(table.schema.names, ["a", "b"])
137
+ self.assertEqual(table.num_rows, 100)
138
+
139
+ def test_does_not_read_from_s3_if_not_materialized(self):
140
+ df = s3_files_to_dataframe(
141
+ uris=[self.MVP_PATH],
142
+ content_encoding=ContentEncoding.IDENTITY.value,
143
+ content_type=ContentType.PARQUET.value,
144
+ ray_init_options={"local_mode": True},
145
+ )
146
+
147
+ self.assertRaises(RuntimeError, lambda: len(df))
148
+ df.collect()
149
+ self.assertEqual(len(df), 100)
150
+
151
+ def test_raises_error_if_not_supported_content_type(self):
152
+
153
+ self.assertRaises(
154
+ AssertionError,
155
+ lambda: s3_files_to_dataframe(
156
+ uris=[self.MVP_PATH],
157
+ content_encoding=ContentEncoding.IDENTITY.value,
158
+ content_type=ContentType.UNESCAPED_TSV.value,
159
+ ray_init_options={"local_mode": True},
160
+ ),
161
+ )
162
+
163
+
124
164
  if __name__ == "__main__":
125
165
  unittest.main()
deltacat/types/media.py CHANGED
@@ -44,6 +44,11 @@ class TableType(str, Enum):
44
44
  PYARROW_PARQUET = "pyarrow_parquet"
45
45
 
46
46
 
47
+ class DistributedDatasetType(str, Enum):
48
+ DAFT = "daft"
49
+ RAY_DATASET = "ray_dataset"
50
+
51
+
47
52
  class SchemaType(str, Enum):
48
53
  ARROW = "arrow"
49
54
 
deltacat/types/tables.py CHANGED
@@ -15,10 +15,11 @@ from ray.data.read_api import (
15
15
  )
16
16
 
17
17
  import deltacat.storage as dcs
18
- from deltacat.types.media import TableType
18
+ from deltacat.types.media import TableType, DistributedDatasetType
19
19
  from deltacat.utils import numpy as np_utils
20
20
  from deltacat.utils import pandas as pd_utils
21
21
  from deltacat.utils import pyarrow as pa_utils
22
+ from deltacat.utils import daft as daft_utils
22
23
  from deltacat.utils.ray_utils import dataset as ds_utils
23
24
 
24
25
  TABLE_TYPE_TO_READER_FUNC: Dict[int, Callable] = {
@@ -78,6 +79,11 @@ TABLE_TYPE_TO_DATASET_CREATE_FUNC_REFS: Dict[str, Callable] = {
78
79
  }
79
80
 
80
81
 
82
+ DISTRIBUTED_DATASET_TYPE_TO_READER_FUNC: Dict[int, Callable] = {
83
+ DistributedDatasetType.DAFT.value: daft_utils.s3_files_to_dataframe
84
+ }
85
+
86
+
81
87
  class TableWriteMode(str, Enum):
82
88
  """
83
89
  Enum controlling how a given dataset will be written to a table.
deltacat/utils/daft.py CHANGED
@@ -1,8 +1,9 @@
1
1
  import logging
2
- from typing import Optional, List
3
-
2
+ from typing import Optional, List, Any, Dict, Callable
3
+ import daft
4
+ import ray
4
5
  from daft.table import read_parquet_into_pyarrow
5
- from daft import TimeUnit
6
+ from daft import TimeUnit, DataFrame
6
7
  from daft.io import IOConfig, S3Config
7
8
  import pyarrow as pa
8
9
 
@@ -22,6 +23,66 @@ from deltacat.types.partial_download import (
22
23
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
23
24
 
24
25
 
26
+ def s3_files_to_dataframe(
27
+ uris: List[str],
28
+ content_type: str,
29
+ content_encoding: str,
30
+ column_names: Optional[List[str]] = None,
31
+ include_columns: Optional[List[str]] = None,
32
+ read_func_kwargs_provider: Optional[ReadKwargsProvider] = None,
33
+ ray_options_provider: Optional[Callable[[int, Any], Dict[str, Any]]] = None,
34
+ s3_client_kwargs: Optional[Any] = None,
35
+ ray_init_options: Optional[Dict[str, Any]] = None,
36
+ ) -> DataFrame:
37
+
38
+ if ray_init_options is None:
39
+ ray_init_options = {}
40
+
41
+ assert (
42
+ content_type == ContentType.PARQUET.value
43
+ ), f"daft native reader currently only supports parquet, got {content_type}"
44
+
45
+ assert (
46
+ content_encoding == ContentEncoding.IDENTITY.value
47
+ ), f"daft native reader currently only supports identity encoding, got {content_encoding}"
48
+
49
+ if not ray.is_initialized():
50
+ ray.init(address="auto", ignore_reinit_error=True, **ray_init_options)
51
+
52
+ daft.context.set_runner_ray(noop_if_initialized=True)
53
+
54
+ if s3_client_kwargs is None:
55
+ s3_client_kwargs = {}
56
+
57
+ kwargs = {}
58
+ if read_func_kwargs_provider is not None:
59
+ kwargs = read_func_kwargs_provider(content_type, kwargs)
60
+
61
+ # TODO(raghumdani): pass in coerce_int96_timestamp arg
62
+ # https://github.com/Eventual-Inc/Daft/issues/1894
63
+
64
+ io_config = _get_s3_io_config(s3_client_kwargs=s3_client_kwargs)
65
+
66
+ logger.debug(
67
+ f"Preparing to read S3 object from {len(uris)} files into daft dataframe"
68
+ )
69
+
70
+ df, latency = timed_invocation(
71
+ daft.read_parquet, path=uris, io_config=io_config, use_native_downloader=True
72
+ )
73
+
74
+ logger.debug(f"Time to create daft dataframe from {len(uris)} files is {latency}s")
75
+
76
+ columns_to_read = include_columns or column_names
77
+
78
+ logger.debug(f"Taking columns {columns_to_read} from the daft df.")
79
+
80
+ if columns_to_read:
81
+ return df.select(*columns_to_read)
82
+ else:
83
+ return df
84
+
85
+
25
86
  def daft_s3_file_to_table(
26
87
  s3_url: str,
27
88
  content_type: str,
@@ -55,16 +116,7 @@ def daft_s3_file_to_table(
55
116
  ):
56
117
  row_groups = partial_file_download_params.row_groups_to_download
57
118
 
58
- io_config = IOConfig(
59
- s3=S3Config(
60
- key_id=s3_client_kwargs.get("aws_access_key_id"),
61
- access_key=s3_client_kwargs.get("aws_secret_access_key"),
62
- session_token=s3_client_kwargs.get("aws_session_token"),
63
- retry_mode="adaptive",
64
- num_tries=BOTO_MAX_RETRIES,
65
- max_connections=DAFT_MAX_S3_CONNECTIONS_PER_FILE,
66
- )
67
- )
119
+ io_config = _get_s3_io_config(s3_client_kwargs=s3_client_kwargs)
68
120
 
69
121
  logger.debug(f"Preparing to read S3 object from {s3_url} into daft table")
70
122
 
@@ -95,3 +147,16 @@ def daft_s3_file_to_table(
95
147
  return coerce_pyarrow_table_to_schema(pa_table, input_schema)
96
148
  else:
97
149
  return pa_table
150
+
151
+
152
+ def _get_s3_io_config(s3_client_kwargs) -> IOConfig:
153
+ return IOConfig(
154
+ s3=S3Config(
155
+ key_id=s3_client_kwargs.get("aws_access_key_id"),
156
+ access_key=s3_client_kwargs.get("aws_secret_access_key"),
157
+ session_token=s3_client_kwargs.get("aws_session_token"),
158
+ retry_mode="adaptive",
159
+ num_tries=BOTO_MAX_RETRIES,
160
+ max_connections=DAFT_MAX_S3_CONNECTIONS_PER_FILE,
161
+ )
162
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 0.2.10
3
+ Version: 1.0.0
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -27,7 +27,7 @@ Requires-Dist: tenacity ==8.1.0
27
27
  Requires-Dist: typing-extensions ==4.4.0
28
28
  Requires-Dist: pymemcache ==4.0.0
29
29
  Requires-Dist: redis ==4.6.0
30
- Requires-Dist: getdaft ==0.2.13
30
+ Requires-Dist: getdaft ==0.2.16
31
31
  Requires-Dist: schedule ==1.2.0
32
32
 
33
33
  # DeltaCAT
@@ -1,11 +1,11 @@
1
- deltacat/__init__.py,sha256=HQxIkU4btF_x_4uymlZWtg9axgY7s2D7JzQzFScu1RU,1778
1
+ deltacat/__init__.py,sha256=hul5a8v-ltL75fkLOfQBj_i2OoY5tKMy-yO3oTZzikA,1777
2
2
  deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
3
3
  deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
4
4
  deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
5
5
  deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  deltacat/aws/clients.py,sha256=wWiqXyZPWXezdEbhQ7DLwEVnYV6KiitqzBc5B4UAwc0,6184
7
7
  deltacat/aws/constants.py,sha256=luXWMO_8eatq8f9NlFjNM7q362j77JwzTM2BEVS_8-8,353
8
- deltacat/aws/s3u.py,sha256=s2On5X3IQiCsCMKw4lpfV1GfKQVWOXNsdAmIJK5PEM0,18610
8
+ deltacat/aws/s3u.py,sha256=aK1_pyfipd9Jq1ZiaOC-gszyIIfc1TSbch6YmuZmjt0,23878
9
9
  deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
10
10
  deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  deltacat/aws/redshift/model/manifest.py,sha256=ThgpdwzaWz493Zz9e8HSWwuxEheA1nDuypM3pe4vozk,12987
@@ -15,6 +15,7 @@ deltacat/benchmarking/conftest.py,sha256=6M9NJ71vnOpeMxG-Ly9UWRsgZmky5-1GTuoRD-O
15
15
  deltacat/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  deltacat/catalog/delegate.py,sha256=lVPPG4bBxoNt0IFufKIXc7H2Nhvb6QNox-WfXxaNxuc,8745
17
17
  deltacat/catalog/interface.py,sha256=On5hsbznnIKsvCcm5C1N3OS3ar3v6q7pAUkLWMb6PtY,6556
18
+ deltacat/catalog/default_catalog_impl/__init__.py,sha256=ted1_sA2Y3ljJjb9hRuWUCiUlCen-HAW8TxoV4g4IW0,12677
18
19
  deltacat/catalog/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
20
  deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9pIxo,2212
20
21
  deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
@@ -48,22 +49,31 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
48
49
  deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
49
50
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
50
51
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- deltacat/compute/compactor_v2/compaction_session.py,sha256=YnKG2LlrgYYsVKt_6txcXzCgolaQWF4SuQz0eZmChZM,20422
52
+ deltacat/compute/compactor_v2/compaction_session.py,sha256=QVWuI5edQORqhBkccM5waEXMOzg_6756-eT4YqFxkD4,21932
52
53
  deltacat/compute/compactor_v2/constants.py,sha256=yZgzFD59wiXbXiTVgYPWRodZGpngiSBNFB2jmoZ4fps,1471
53
54
  deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
55
+ deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=HS1BLgNCjQoqe7EuM9baQWE6U4BFqU2PVEWFEF725VE,2815
55
56
  deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
56
- deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
57
- deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
57
+ deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViVO1SVljCj6f0B3MfB3hqtGm2S0s,7410
58
+ deltacat/compute/compactor_v2/model/merge_input.py,sha256=xiUsA2jY1tQRGc84Q2SygijF5N47x3WqFjcy6sJwEIs,4511
59
+ deltacat/compute/compactor_v2/model/merge_result.py,sha256=R9PV-0NGJ9GaSSeVqbIEhiqBolshPP4m7_eCb0yJR8g,401
58
60
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=wFu4vAS8PR0_SxxLIfGPmtLjUV9hCfPeHG56CFpoLIM,8100
60
- deltacat/compute/compactor_v2/steps/merge.py,sha256=QI8ovaO6yPw_VgDYqTzQOxw2oov4ipuW2gR-w01FWGY,18087
61
+ deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=2RfkVh-0mccgONa4atQFmNfPk668oIZr8TthwTUyh-k,6295
62
+ deltacat/compute/compactor_v2/steps/merge.py,sha256=UR6HbPUNxEVtvsGiibRYmzq0_nT9Lq6AbEa6e71KaNQ,14496
61
63
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
64
  deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkwX8eHUQiFoTR1V-E66pMqWigtrs618,2156
63
65
  deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
66
+ deltacat/compute/compactor_v2/utils/delta.py,sha256=73wET1zqQyYR8eaO59XyKYVn-lqshPrScSiiFnHRUj4,3373
64
67
  deltacat/compute/compactor_v2/utils/io.py,sha256=jgIfwrfH2mTFUx1M0TgwZGGfrS4IXjP1PmqwaQmNAJM,5092
68
+ deltacat/compute/compactor_v2/utils/merge.py,sha256=tcHlaHHeEN1F_rrAadS_TPWX719uGKjE8yTeVwEwsTk,4843
65
69
  deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=MAscmL35WfwN7Is72aFlD_cGhxtZgjRwwR5kS9Yn2uU,11393
66
- deltacat/compute/compactor_v2/utils/task_options.py,sha256=LA1QbiDv3f9LJQwjKz3-YH3TpK3exL1c5acaGAOF57E,10210
70
+ deltacat/compute/compactor_v2/utils/task_options.py,sha256=Ndhff9F_zff6zX3mw4AztHkvSBgR4O8SgMUgwq3rvyM,10601
71
+ deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
72
+ deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
73
+ deltacat/compute/merge_on_read/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
+ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtLnsY987Ulx9n20oAydfPq3Zd3Y9ocbTI,2035
75
+ deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
+ deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
67
77
  deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
78
  deltacat/compute/metastats/meta_stats.py,sha256=78hN3aN5wLHUFJsZXuv2JLeqA35HZ8mLUWJDMslMj5Q,18731
69
79
  deltacat/compute/metastats/stats.py,sha256=8iUiSXOAjqiEeNP5RIb5gvhykBgpNHD5IKkB8zsPR0E,7363
@@ -102,7 +112,7 @@ deltacat/io/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
112
  deltacat/io/aws/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
113
  deltacat/io/aws/redshift/redshift_datasource.py,sha256=X183O4tgBqtaZOSFmMFvp-9mv8NX5kGvRvX0eoSX8rA,22599
104
114
  deltacat/storage/__init__.py,sha256=B2cjcqySR6g0LBK83H0nOVojYNurbhp5kq3n8pfkzcA,1466
105
- deltacat/storage/interface.py,sha256=JEtJ1zzsPUEEo3_K5vUeCN4nQDbPyJ6neHllfYt4tCk,21347
115
+ deltacat/storage/interface.py,sha256=BsB8YnsB-RdUMJSmHj6ef-qBOj5yqWoU9b0HKzQT8OM,21569
106
116
  deltacat/storage/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
117
  deltacat/storage/model/delta.py,sha256=2qhCiEWmrybV0mfBTKHi9VKlNTqJ6wkE5ssq4ozMMQQ,13757
108
118
  deltacat/storage/model/list_result.py,sha256=FgD6oYeKo0EPe8z7jC8T4pAFjBOuBwd4axxGrnYyBG4,2466
@@ -113,15 +123,17 @@ deltacat/storage/model/sort_key.py,sha256=SPIxJfI_o7fbp1s3ZKMyX9x7_jK8UZapaVnKSA
113
123
  deltacat/storage/model/stream.py,sha256=XZ-c4EQR89NWydEOEG5GCaT8zST10OmjLZBKHZPdrzA,7738
114
124
  deltacat/storage/model/table.py,sha256=IOu1ZOrdRkVDB-FOxYMRvnNf5TukIDfbdHWTqHYN_OY,4225
115
125
  deltacat/storage/model/table_version.py,sha256=cOM9dN-YB_Hhi4h1CzFbldC5qRkm4C1rQ3rpKIZzCNs,7413
116
- deltacat/storage/model/types.py,sha256=8SuReHxWtGLomac8eTibzJcNYfLansZHu-CxWI6Wwno,2114
126
+ deltacat/storage/model/types.py,sha256=hj7MmjjVmKT-R9sMUulOWG-FByGZKKaYXNnOWW32mP0,1608
117
127
  deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
128
  deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
119
129
  deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
130
+ deltacat/tests/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
+ deltacat/tests/catalog/test_default_catalog_impl.py,sha256=9srCU5yQ159oZ9_PoJ_mWMzVUW5bKV0mnmPJc5zKCQQ,3125
120
132
  deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
121
- deltacat/tests/compute/compact_partition_test_cases.py,sha256=EyZwh-7qKiMmzJT8E_V74cvle5uONYZyt89jmdAu1TI,47952
122
- deltacat/tests/compute/test_compact_partition_incremental.py,sha256=-nIQev0FYWbp76LwM0H4KpMEgP2GMqVRFFJHMsLUN2E,10011
133
+ deltacat/tests/compute/compact_partition_test_cases.py,sha256=nvFIEQWvRjpi5yaYL6KIGKgBUjKqYE_elCcc3GHHUlU,53188
134
+ deltacat/tests/compute/test_compact_partition_incremental.py,sha256=F02ia0zvapQmSJ48KGdwhtjaUrIFQmH77CKNLINUsWQ,10102
123
135
  deltacat/tests/compute/test_compact_partition_params.py,sha256=MIzIcBscwFA1W-cfTTxVx0zcgbrs8D4bI9Hy4TF5eRo,8322
124
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=-yFmEGqWMTIq9iShFU9rn4cX7ky1Zmm3pv4F9NwsQUo,13218
136
+ deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=s17wb7ub0v_1qLhIzLaHZ68P2xdGilQlV8ibqj57PCc,13372
125
137
  deltacat/tests/compute/test_util_common.py,sha256=Skz0ZfHzidArZhIzRDHOYt-5uGBwx6MRfKZpeBnzh9w,6055
126
138
  deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
127
139
  deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=5yP285lY539CP1UuyYe8Kz14CnBUpE1kZJZjxBAaXew,6530
@@ -142,30 +154,30 @@ deltacat/tests/io/test_memcached_object_store.py,sha256=gUVYycPkNpq9XxotdJwFZ2HO
142
154
  deltacat/tests/io/test_ray_plasma_object_store.py,sha256=-wJZP6lRtEOogR25wjEiIBGz_lpvWVihwlZ5GqandZU,1911
143
155
  deltacat/tests/io/test_redis_object_store.py,sha256=sZrXrYjkw8u_XrvFilhBbLc8PPnZiuMKa1_Bt9ka5qs,3838
144
156
  deltacat/tests/io/test_s3_object_store.py,sha256=4b7PYEfQJnYGUz6fcLFWVVyRHTlH_yd8CIaCv9l33Gg,1900
145
- deltacat/tests/local_deltacat_storage/__init__.py,sha256=Jv0ZVjEvFptoNTZoIFvFYisjkqY4O5HSnviUPbiTQUY,34776
157
+ deltacat/tests/local_deltacat_storage/__init__.py,sha256=UGf8W9y2gDgOTR2Uj9qWCqe7CXu0GTl1hygoSFEKe80,35341
146
158
  deltacat/tests/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
159
  deltacat/tests/stats/test_intervals.py,sha256=S92DgkALQ1WmbLWcxtvS7RlVGvL-XoPJKUUbkdn9_CQ,1955
148
160
  deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
161
  deltacat/tests/test_utils/constants.py,sha256=zgqFmfIE5ZCtDw4NF-Y4ZEEnaPUP5nDY5768WPod0Fc,208
150
- deltacat/tests/test_utils/pyarrow.py,sha256=QUqiEbHC3_v0BqDgQyHWxyCrsmqXlz6CI5lJPs-3x7I,1510
162
+ deltacat/tests/test_utils/pyarrow.py,sha256=nIvS01vJbUMeJ_CoNEmylL5Ca-r9X8M3NlbV0OBnDOs,2000
151
163
  deltacat/tests/test_utils/storage.py,sha256=93GEn4A5WbMHWk0Ec4Bd7RxeHoSEnBfSarfWhKOSNtM,972
152
164
  deltacat/tests/test_utils/utils.py,sha256=a32qEwcSSd1lvRi0aJJ4ZLnc1ZyXmoQF_K95zaQRk2M,455
153
165
  deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
154
166
  deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iyzjiJ7-Rcl0o,1506
155
- deltacat/tests/utils/test_daft.py,sha256=iN6rAwGXw5F4xT2UZ72bN276hkKVD7XD4WNp5DKgm2Q,5098
167
+ deltacat/tests/utils/test_daft.py,sha256=Xal84zR42rXsWQI3lImdDYWOzewomKmhmiUQ59m67V0,6488
156
168
  deltacat/tests/utils/test_pyarrow.py,sha256=eZAuYp9MUf8lmpIilH57JkURuNsTGZ3IAGC4Gm5hdrM,17307
157
169
  deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
158
170
  deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
159
171
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
172
  deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
- deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
173
+ deltacat/types/media.py,sha256=7_QRU6NbjmJk0GLAn_Km6ja8RE5G3V8jvLfUXqnjnqU,2320
162
174
  deltacat/types/partial_download.py,sha256=9BJ5b0DHyWWeV7wMZjOfYoeH_iil_bjZ9b_WMpUzvHs,2516
163
- deltacat/types/tables.py,sha256=xedkualOnncyH_AjiflrkQY4YCZ_IW-zdOZRsHm8_5I,4198
175
+ deltacat/types/tables.py,sha256=HLm-xI5xCuKTeGbaErspni3DeSVC_aRe2TYFLd6b_mA,4409
164
176
  deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
177
  deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
166
178
  deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
167
179
  deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
168
- deltacat/utils/daft.py,sha256=eZG1AjK21lM7bzEc3_BniDqpqMGDrlp_qj9Du4dxaV0,3334
180
+ deltacat/utils/daft.py,sha256=UlbJpWsxZQ1pC0RVks5lsVJk3Bq5pa0Rw2lKMc9Gz4Q,5441
169
181
  deltacat/utils/metrics.py,sha256=Ob-RXGoNnfTMRXaNbSHoqW8y-n8KfRA9nLuo9AvsReI,6201
170
182
  deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
171
183
  deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
@@ -181,8 +193,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
181
193
  deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
182
194
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
183
195
  deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
184
- deltacat-0.2.10.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
185
- deltacat-0.2.10.dist-info/METADATA,sha256=ZI6_ysr3AKmzAUGDPkypTm1QhtvGzVK3Yxnuu6KgM4Q,1781
186
- deltacat-0.2.10.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
187
- deltacat-0.2.10.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
188
- deltacat-0.2.10.dist-info/RECORD,,
196
+ deltacat-1.0.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
197
+ deltacat-1.0.0.dist-info/METADATA,sha256=Jtp8Vk3_u99IPtRxgJ59xSXCWk3GPF2Uk4tX9x-tUcM,1780
198
+ deltacat-1.0.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
199
+ deltacat-1.0.0.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
200
+ deltacat-1.0.0.dist-info/RECORD,,