deltacat 1.1.35__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +2 -3
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -1
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -5
- deltacat/compute/compactor_v2/steps/merge.py +11 -80
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.35.dist-info/METADATA +0 -64
- deltacat-1.1.35.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,260 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from bisect import bisect_left, bisect_right
|
4
|
+
from collections import defaultdict
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from itertools import tee
|
7
|
+
from typing import Any, Dict, Set, List, FrozenSet, Iterable, TypeVar, NamedTuple
|
8
|
+
|
9
|
+
from intervaltree import Interval, IntervalTree
|
10
|
+
|
11
|
+
from deltacat.storage.rivulet.metastore.delta import DeltaContext
|
12
|
+
from deltacat.storage.rivulet.metastore.sst import SSTable, SSTableRow
|
13
|
+
from deltacat.storage.rivulet import Schema
|
14
|
+
|
15
|
+
T = TypeVar("T")
|
16
|
+
|
17
|
+
|
18
|
+
# Can be replaced with itertools.pairwise once we're on python 3.10+
|
19
|
+
def pairwise(iterable):
|
20
|
+
a, b = tee(iterable)
|
21
|
+
next(b, None)
|
22
|
+
return zip(a, b)
|
23
|
+
|
24
|
+
|
25
|
+
class Block(NamedTuple):
|
26
|
+
row: SSTableRow
|
27
|
+
context: DeltaContext
|
28
|
+
"""Context from the manifest around the placement of this row in the LSM-Tree"""
|
29
|
+
|
30
|
+
|
31
|
+
@dataclass(frozen=True)
|
32
|
+
class BlockGroup:
|
33
|
+
"""
|
34
|
+
TODO discuss "Block" terminology.
|
35
|
+
|
36
|
+
This data structure represents a set of data blocks which have to be traversed
|
37
|
+
together, because rows in the block is in an overlapping key range
|
38
|
+
|
39
|
+
The data and key ranges within a BlockGroup can be processed in parallel during a scan
|
40
|
+
|
41
|
+
Each block may show up in many BlockGroups. This is because as soon as the combination of blocks in the group
|
42
|
+
change, it becomes a different group.
|
43
|
+
|
44
|
+
Take the following example: A dataset has Field Group 1 (FG1) and Field Group 2 (FG2). The SSTs rows look like below:
|
45
|
+
|
46
|
+
Field group 1 Field group 2
|
47
|
+
------------------|---------------
|
48
|
+
[0,100] -> Block1 | [0-10] -> Block4
|
49
|
+
[3-90] -> Block2 | [0-100] -> Block5
|
50
|
+
[10-95] -> Block3 |
|
51
|
+
|
52
|
+
If we want to scan this data, we can index of off the SST rows into an interval tree and observe the following
|
53
|
+
"boundaries" in the interval tree:
|
54
|
+
|
55
|
+
BlockGroup1 - Covers rows [0-3), includes Blocks 1,4,5
|
56
|
+
BlockGroup2 - Covers rows [3-10), includes Blocks 1, 2, 4, 5
|
57
|
+
BlockGroup3 - Covers rows [10-90), includes Blocks 1, 2, 3, 4*, 5
|
58
|
+
BlockGroup4 - Covers rows [90-95), includes Blocks 1, 2*, 3, 5
|
59
|
+
BlockGroup5 - Covers rows [95-100], includes Blocks 1, 3*, 5
|
60
|
+
*special case - interval end==block group start
|
61
|
+
|
62
|
+
Creating a sorted list of BlockGroups like this allows us to know exactly which blocks contain records
|
63
|
+
for any given point query, range query, or scan. For instance, if the user queries for key=3, we know
|
64
|
+
to read BlockGroup1, or key[0-10] to read BlockGroup1+BlockGroup2.
|
65
|
+
"""
|
66
|
+
|
67
|
+
key_min: T
|
68
|
+
"""
|
69
|
+
Key min is inclusive (the block group covers data where primary key>=key_min)
|
70
|
+
"""
|
71
|
+
key_max: T
|
72
|
+
field_group_to_blocks: Dict[Schema, FrozenSet[Block]]
|
73
|
+
key_max_inclusive: bool = False
|
74
|
+
"""
|
75
|
+
By default, key_min is inclusive and key_max is non-inclusive.
|
76
|
+
For the highest key in an SSTable, we need to set key_max_inclusive to True
|
77
|
+
"""
|
78
|
+
|
79
|
+
def key_in_range(self, key: T) -> bool:
|
80
|
+
if self.key_max_inclusive:
|
81
|
+
return self.key_min <= key <= self.key_max
|
82
|
+
else:
|
83
|
+
return self.key_min <= key < self.key_max
|
84
|
+
|
85
|
+
def key_below_range(self, key: T) -> bool:
|
86
|
+
return key < self.key_min
|
87
|
+
|
88
|
+
|
89
|
+
@dataclass(frozen=True)
|
90
|
+
class OrderedBlockGroups:
|
91
|
+
"""
|
92
|
+
Ordered block groups representing a sequential interval of a primary key range
|
93
|
+
|
94
|
+
Block groups have an inclusive
|
95
|
+
|
96
|
+
The block groups have a "boundary table" which represents primary key lower/upper ranges in table
|
97
|
+
|
98
|
+
For example, a boundary table of [1,3,5,10] has blocks: [1,3), [3,5), [5,10]
|
99
|
+
"""
|
100
|
+
|
101
|
+
key_min: T
|
102
|
+
"""
|
103
|
+
Key min is inclusive
|
104
|
+
"""
|
105
|
+
key_max: T
|
106
|
+
"""
|
107
|
+
Key max is inclusive
|
108
|
+
"""
|
109
|
+
block_groups: List[BlockGroup]
|
110
|
+
boundary_table: List[T]
|
111
|
+
|
112
|
+
|
113
|
+
class BlockIntervalTree:
|
114
|
+
"""
|
115
|
+
This interval tree combines all L0 SSTables in order to effectively traverse key ranges
|
116
|
+
so that data can be zippered across field groups
|
117
|
+
|
118
|
+
This interval tree is SHARED across N different field groups. This is because
|
119
|
+
maintaining a different interval tree for N different field groups would require
|
120
|
+
more complex traversal to achieve the same result
|
121
|
+
"""
|
122
|
+
|
123
|
+
def __init__(self):
|
124
|
+
self.tree: IntervalTree = IntervalTree()
|
125
|
+
self.max_key_map: Dict[Any, List[Interval]] = {}
|
126
|
+
|
127
|
+
def add_sst_table(self, sst: SSTable, context: DeltaContext):
|
128
|
+
"""
|
129
|
+
Add intervals to SSTree which use primary key min and max as intervals
|
130
|
+
The data for each interval is a tuple of (schema, SSTableRow)
|
131
|
+
"""
|
132
|
+
self.add_sst_rows(sst.rows, context)
|
133
|
+
|
134
|
+
def add_sst_rows(self, sst_rows: Iterable[SSTableRow], context: DeltaContext):
|
135
|
+
"""
|
136
|
+
Add individual SSTable rows to tree
|
137
|
+
"""
|
138
|
+
for row in sst_rows:
|
139
|
+
interval: Interval = Interval(row.key_min, row.key_max, Block(row, context))
|
140
|
+
self.tree.add(interval)
|
141
|
+
if row.key_max not in self.max_key_map:
|
142
|
+
self.max_key_map[row.key_max] = [interval]
|
143
|
+
else:
|
144
|
+
self.max_key_map[row.key_max].append(interval)
|
145
|
+
|
146
|
+
def get_sorted_block_groups(
|
147
|
+
self, min_key: Any | None = None, max_key: Any | None = None
|
148
|
+
) -> OrderedBlockGroups:
|
149
|
+
"""
|
150
|
+
Returns an ordered list of block group by primary key range
|
151
|
+
The IntervalTree boundary table contains each boundary where the set of intervals change
|
152
|
+
This function traverses the boundary table and builds a list
|
153
|
+
|
154
|
+
Edge case - incompatibility with Interval Tree Library
|
155
|
+
|
156
|
+
Note that the IntervalTree Library treats ALL ranges as min=inclusive, max=non-inclusive
|
157
|
+
This is a different from our SSTables, where min_key and max_key are both inclusive.
|
158
|
+
|
159
|
+
As a suboptimal workaround, we can fix this by adding an SSTRow to an interval when the
|
160
|
+
SST row's max key is equal to the lower bound of the interval. Take the following example:
|
161
|
+
|
162
|
+
Field group 1 Field group 2
|
163
|
+
------------------|---------------
|
164
|
+
[0,100] -> Block1 | [0-10] -> Block4
|
165
|
+
[3-90] -> Block2 | [0-100] -> Block5
|
166
|
+
[10-95] -> Block3 |
|
167
|
+
|
168
|
+
Our workaround adds for example Block4 to the interval [10,90), because Block4 is inclusive
|
169
|
+
of key 10 but the IntervalTree library thinks it is non-inclusive.
|
170
|
+
|
171
|
+
BlockGroup1 - Covers rows [0-3), includes Blocks 1,4,5
|
172
|
+
BlockGroup2 - Covers rows [3-10), includes Blocks 1, 2, 4, 5
|
173
|
+
BlockGroup3 - Covers rows [10-90), includes Blocks 1, 2, 3, 4*, 5
|
174
|
+
BlockGroup4 - Covers rows [90-95), includes Blocks 1, 2*, 3, 5
|
175
|
+
BlockGroup5 - Covers rows [95-100], includes Blocks 1, 3*, 5
|
176
|
+
*special case - interval end==block group start
|
177
|
+
|
178
|
+
An ideal solution would produce block groups like below. To do this against the IntervalTree
|
179
|
+
library, we would need to know how to convert an inclusive range like ["bar", "foo"] into a
|
180
|
+
range like ["bar", "fooa") where the end range is non-inclusive. It is fine for our block groups
|
181
|
+
to be non-optimal, we just need code when zipper merging to detect if a block group's max key is
|
182
|
+
less than the current iterator and therfore not consider it.
|
183
|
+
|
184
|
+
Optimal block groups:
|
185
|
+
BlockGroup1 - Covers rows [0-3), includes Blocks 1,5,4
|
186
|
+
BlockGroup2 - Covers rows [3-10), includes Blocks 1,5,2,4
|
187
|
+
BlockGroup3 - Covers rows [10-11), includes Blocks 1, 2, 3, 4, 5
|
188
|
+
BlockGroup3 - Covers rows [11-91), includes Blocks 1,2,3,5
|
189
|
+
BlockGroup4 - Covers rows [91-96), includes Blocks 1,3,5
|
190
|
+
BlockGroup5 - Covers rows [96-100], includes Blocks 1,5
|
191
|
+
|
192
|
+
:param: min_key optionally restrict result so that they must overlap or be greater than min_key, INCLUSIVE
|
193
|
+
A range [0,200) will be included for min_key <=100 because it overlaps with the min key
|
194
|
+
A range [100, 200) will be included because it overlaps with the min key (100 key inclusive)
|
195
|
+
:param: max_key optionally restrict result to be less than or overlap with max_key, INCLUSIVE
|
196
|
+
A range like [100,200) will NOT included for max key=200 because range maxes are non-inclusive
|
197
|
+
"""
|
198
|
+
key_boundaries = self.tree.boundary_table.keys()
|
199
|
+
block_groups: List[BlockGroup] = []
|
200
|
+
block_groups_min = None
|
201
|
+
block_groups_max = None
|
202
|
+
|
203
|
+
# Note that we need to expand min_key_idx and max_key_idx by 1 to cover cases where
|
204
|
+
# the pairwise traversal (x,y) has x>=min_key>=y and x<=max_key<=y
|
205
|
+
if min_key is not None and max_key is not None and min_key > max_key:
|
206
|
+
raise ValueError(
|
207
|
+
f"min_key {min_key} cannot be greater than max_key {max_key}"
|
208
|
+
)
|
209
|
+
|
210
|
+
min_key_idx = (
|
211
|
+
max(0, bisect_left(key_boundaries, min_key) - 1)
|
212
|
+
if min_key is not None
|
213
|
+
else None
|
214
|
+
)
|
215
|
+
max_key_idx = (
|
216
|
+
bisect_right(key_boundaries, max_key) + 1 if max_key is not None else None
|
217
|
+
)
|
218
|
+
boundary_table = key_boundaries[min_key_idx:max_key_idx]
|
219
|
+
|
220
|
+
for lower_bound, upper_bound in pairwise(boundary_table):
|
221
|
+
# Note that IntervalTree library treats lower bound of slice as inclusive and upper as exclusive
|
222
|
+
# We follow the same structure in our BlockGroup
|
223
|
+
intervals: Set[Interval] = self.tree.overlap(lower_bound, upper_bound)
|
224
|
+
|
225
|
+
# Special case for if max key is equal to lower_bound. See method pydoc for more details
|
226
|
+
for i in self.max_key_map.get(lower_bound, []):
|
227
|
+
intervals.add(i)
|
228
|
+
|
229
|
+
field_group_to_blocks = defaultdict(set)
|
230
|
+
for interval in intervals:
|
231
|
+
data: Block = interval.data
|
232
|
+
schema = data.context.schema
|
233
|
+
field_group_to_blocks[schema].add(data)
|
234
|
+
|
235
|
+
# freeze dict to make it hashable
|
236
|
+
field_group_to_blocks = {
|
237
|
+
k: frozenset(v) for k, v in field_group_to_blocks.items()
|
238
|
+
}
|
239
|
+
|
240
|
+
# Special case - if this is the very last iteration, set key_max_inclusive to True
|
241
|
+
max_key_inclusive = upper_bound == boundary_table[-1]
|
242
|
+
|
243
|
+
block_group = BlockGroup(
|
244
|
+
lower_bound, upper_bound, field_group_to_blocks, max_key_inclusive
|
245
|
+
)
|
246
|
+
block_groups_min = (
|
247
|
+
lower_bound
|
248
|
+
if block_groups_min is None
|
249
|
+
else min(block_groups_min, lower_bound)
|
250
|
+
)
|
251
|
+
block_groups_max = (
|
252
|
+
upper_bound
|
253
|
+
if block_groups_max is None
|
254
|
+
else max(block_groups_max, upper_bound)
|
255
|
+
)
|
256
|
+
block_groups.append(block_group)
|
257
|
+
|
258
|
+
return OrderedBlockGroups(
|
259
|
+
block_groups_min, block_groups_max, block_groups, boundary_table
|
260
|
+
)
|
@@ -0,0 +1,101 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import List, Dict, Any
|
3
|
+
from typing import Iterable
|
4
|
+
|
5
|
+
from collections.abc import Mapping
|
6
|
+
|
7
|
+
from pyarrow import RecordBatch
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class MvpRow(Mapping):
|
12
|
+
data: Dict[str, Any]
|
13
|
+
|
14
|
+
def __getitem__(self, key):
|
15
|
+
return self.data[key]
|
16
|
+
|
17
|
+
def __iter__(self):
|
18
|
+
return iter(self.data)
|
19
|
+
|
20
|
+
def __len__(self):
|
21
|
+
return len(self.data)
|
22
|
+
|
23
|
+
def __contains__(self, key):
|
24
|
+
return key in self.data
|
25
|
+
|
26
|
+
def keys(self):
|
27
|
+
return self.data.keys()
|
28
|
+
|
29
|
+
def values(self):
|
30
|
+
return self.data.values()
|
31
|
+
|
32
|
+
def items(self):
|
33
|
+
return self.data.items()
|
34
|
+
|
35
|
+
def get(self, key, default=None):
|
36
|
+
return self.data.get(key, default)
|
37
|
+
|
38
|
+
@staticmethod
|
39
|
+
def generate_from_arrow(batch: RecordBatch):
|
40
|
+
for row_idx in range(batch.num_rows):
|
41
|
+
out = {}
|
42
|
+
for column_idx, column in enumerate(batch.column_names):
|
43
|
+
col = batch.column(column_idx)
|
44
|
+
out.update({column: col[row_idx].as_py()})
|
45
|
+
yield MvpRow(out)
|
46
|
+
|
47
|
+
|
48
|
+
@dataclass
|
49
|
+
class MvpTable(Iterable[Dict[str, Any]]):
|
50
|
+
data: Dict[str, List[Any]]
|
51
|
+
|
52
|
+
def __iter__(self):
|
53
|
+
# Get the lengths of all columns (they should be the same)
|
54
|
+
row_count = len(next(iter(self.data.values())))
|
55
|
+
|
56
|
+
# Iterate over the rows
|
57
|
+
for i in range(row_count):
|
58
|
+
row_data = {
|
59
|
+
field_name: field_arr[i] for field_name, field_arr in self.data.items()
|
60
|
+
}
|
61
|
+
yield row_data
|
62
|
+
|
63
|
+
def to_rows_by_key(self, mk: str) -> Dict[str, "MvpRow"]:
|
64
|
+
# Find the provided key field in the schema
|
65
|
+
# build row data
|
66
|
+
pk_col = self.data[mk]
|
67
|
+
row_data: Dict[str, MvpRow] = {}
|
68
|
+
for i, value in enumerate(pk_col):
|
69
|
+
row_data[value] = MvpRow(
|
70
|
+
{
|
71
|
+
field_name: field_arr[i]
|
72
|
+
for field_name, field_arr in self.data.items()
|
73
|
+
}
|
74
|
+
)
|
75
|
+
return row_data
|
76
|
+
|
77
|
+
def to_rows_list(self) -> List[Dict[str, Any]]:
|
78
|
+
return [r for r in self]
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def merge(cls, dataset1: "MvpTable", dataset2: "MvpTable", pk: str) -> "MvpTable":
|
82
|
+
|
83
|
+
merged_data: Dict[str, List[Any]] = {}
|
84
|
+
# Initialize merged_data with keys from both datasets
|
85
|
+
for k in set(dataset1.data.keys()) | set(dataset2.data.keys()):
|
86
|
+
merged_data[k] = []
|
87
|
+
|
88
|
+
# Create dictionaries for quick lookup
|
89
|
+
row_data_ds1: dict[str, MvpRow] = dataset1.to_rows_by_key(pk)
|
90
|
+
row_data_ds2: dict[str, MvpRow] = dataset2.to_rows_by_key(pk)
|
91
|
+
|
92
|
+
# Merge the datasets
|
93
|
+
all_keys = set(row_data_ds1.keys()) | set(row_data_ds2.keys())
|
94
|
+
for k in all_keys:
|
95
|
+
row1: MvpRow = row_data_ds1.get(k, MvpRow({}))
|
96
|
+
row2: MvpRow = row_data_ds2.get(k, MvpRow({}))
|
97
|
+
merged_row = {**row1, **row2}
|
98
|
+
for column, values in merged_data.items():
|
99
|
+
values.append(merged_row.get(column))
|
100
|
+
|
101
|
+
return cls(merged_data)
|
@@ -0,0 +1,5 @@
|
|
1
|
+
# TODO later on this will be moved to a dedicated package
|
2
|
+
from deltacat.storage.rivulet.parquet.file_reader import ParquetFileReader
|
3
|
+
from deltacat.storage.rivulet.reader.reader_type_registrar import FileReaderRegistrar
|
4
|
+
|
5
|
+
FileReaderRegistrar.register_reader("parquet", ParquetFileReader)
|
File without changes
|
@@ -0,0 +1,127 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from pyarrow import RecordBatch
|
6
|
+
|
7
|
+
from deltacat.storage.rivulet.fs.file_provider import FileProvider
|
8
|
+
from deltacat.storage.rivulet.metastore.sst import SSTableRow
|
9
|
+
from deltacat.storage.rivulet.reader.data_reader import (
|
10
|
+
RowAndKey,
|
11
|
+
FileReader,
|
12
|
+
FILE_FORMAT,
|
13
|
+
)
|
14
|
+
from deltacat.storage.rivulet.reader.pyarrow_data_reader import RecordBatchRowIndex
|
15
|
+
from deltacat.storage.rivulet.schema.schema import Schema
|
16
|
+
import pyarrow.parquet as pq
|
17
|
+
import pyarrow as pa
|
18
|
+
|
19
|
+
|
20
|
+
class ParquetFileReader(FileReader[RecordBatchRowIndex]):
|
21
|
+
"""
|
22
|
+
Parquet file reader
|
23
|
+
|
24
|
+
This class is not thread safe
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
sst_row: SSTableRow,
|
30
|
+
file_provider: FileProvider,
|
31
|
+
key: str,
|
32
|
+
schema: Schema,
|
33
|
+
iter_batch_size=1000,
|
34
|
+
):
|
35
|
+
self.sst_row = sst_row
|
36
|
+
self.input = file_provider.provide_input_file(sst_row.uri)
|
37
|
+
|
38
|
+
self.key = key
|
39
|
+
self.parquet_file: pa.parquet.ParquetFile | None = None
|
40
|
+
self.iter_batch_size = iter_batch_size
|
41
|
+
|
42
|
+
# Iterator from pyarrow iter_batches API call. Pyarrow manages state of traversal within parquet row groups
|
43
|
+
self._record_batch_iter = None
|
44
|
+
|
45
|
+
self.schema = schema
|
46
|
+
|
47
|
+
"""
|
48
|
+
These variables keep state about where the iterator is current at. They are initialized in __enter__()
|
49
|
+
"""
|
50
|
+
self._curr_batch: RecordBatch | None = None
|
51
|
+
self._curr_row_offset = 0
|
52
|
+
self._pk_col = None
|
53
|
+
|
54
|
+
def peek(self) -> Optional[RowAndKey[FILE_FORMAT]]:
|
55
|
+
"""
|
56
|
+
Peek next record
|
57
|
+
|
58
|
+
Note that there is an edge case where peek() is called on the bounary between record batches
|
59
|
+
This only happens curr_row_offset == curr_batch.num_rows, meaning next() or peek() would need to advance
|
60
|
+
to the next record batch. When this happens, peek() increments _curr_batch and sets _curr_row_offset to 0
|
61
|
+
|
62
|
+
:return: Optional of RowAndKey
|
63
|
+
"""
|
64
|
+
if not self.__is_initialized():
|
65
|
+
raise RuntimeError(
|
66
|
+
"ParquetFileReader must be initialized with __enter__ before reading"
|
67
|
+
)
|
68
|
+
|
69
|
+
if self.__need_to_advance_record_batch():
|
70
|
+
try:
|
71
|
+
self.__advance_record_batch()
|
72
|
+
except StopIteration:
|
73
|
+
return None
|
74
|
+
|
75
|
+
pk = self._pk_col[self._curr_row_offset].as_py()
|
76
|
+
return RowAndKey(
|
77
|
+
RecordBatchRowIndex(self._curr_batch, self._curr_row_offset), pk
|
78
|
+
)
|
79
|
+
|
80
|
+
def __next__(self) -> RowAndKey[FILE_FORMAT]:
|
81
|
+
if not self.__is_initialized():
|
82
|
+
raise RuntimeError(
|
83
|
+
"ParquetFileReader must be initialized with __enter__ before reading"
|
84
|
+
)
|
85
|
+
|
86
|
+
if self.__need_to_advance_record_batch():
|
87
|
+
self.__advance_record_batch()
|
88
|
+
pk = self._pk_col[0].as_py()
|
89
|
+
return RowAndKey(RecordBatchRowIndex(self._curr_batch, 0), pk)
|
90
|
+
else:
|
91
|
+
pk = self._pk_col[self._curr_row_offset].as_py()
|
92
|
+
offset = self._curr_row_offset
|
93
|
+
self._curr_row_offset += 1
|
94
|
+
return RowAndKey(RecordBatchRowIndex(self._curr_batch, offset), pk)
|
95
|
+
|
96
|
+
def __enter__(self):
|
97
|
+
with self.input.open() as f:
|
98
|
+
self.parquet_file = pq.ParquetFile(f)
|
99
|
+
# Initialize _curr_batch
|
100
|
+
row_groups = list(range(self.sst_row.offset_start, self.sst_row.offset_end))
|
101
|
+
self._record_batch_iter = self.parquet_file.iter_batches(
|
102
|
+
self.iter_batch_size, row_groups, columns=self.schema.keys()
|
103
|
+
)
|
104
|
+
self.__advance_record_batch()
|
105
|
+
|
106
|
+
def __exit__(self, __exc_type, __exc_value, __traceback):
|
107
|
+
self.close()
|
108
|
+
# return False to propagate up error messages
|
109
|
+
return False
|
110
|
+
|
111
|
+
def close(self):
|
112
|
+
self.parquet_file.close()
|
113
|
+
|
114
|
+
def __is_initialized(self):
|
115
|
+
return self.parquet_file and self._curr_batch and self._pk_col
|
116
|
+
|
117
|
+
def __need_to_advance_record_batch(self):
|
118
|
+
return not self._curr_row_offset < self._curr_batch.num_rows
|
119
|
+
|
120
|
+
def __advance_record_batch(self):
|
121
|
+
"""
|
122
|
+
Advance to next record batch
|
123
|
+
:raise StopIteration: If there are no more record batches
|
124
|
+
"""
|
125
|
+
self._curr_batch = next(self._record_batch_iter)
|
126
|
+
self._curr_row_offset = 0
|
127
|
+
self._pk_col = self._curr_batch[self.key]
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from typing import List, Any
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
from pyarrow.parquet import FileMetaData
|
5
|
+
|
6
|
+
from deltacat.storage.rivulet.metastore.sst import SSTableRow
|
7
|
+
from deltacat.storage.rivulet import Schema
|
8
|
+
from deltacat.storage.rivulet.arrow.serializer import ArrowSerializer
|
9
|
+
|
10
|
+
from deltacat.storage.rivulet.fs.file_provider import FileProvider
|
11
|
+
|
12
|
+
|
13
|
+
class ParquetDataSerializer(ArrowSerializer):
|
14
|
+
"""
|
15
|
+
Parquet data writer. Responsible for flushing rows to parquet and returning SSTable rows for any file(s) written
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(self, file_provider: FileProvider, schema: Schema):
|
19
|
+
super().__init__(file_provider, schema)
|
20
|
+
|
21
|
+
def serialize(self, table: pa.Table) -> List[SSTableRow]:
|
22
|
+
file = self.file_provider.provide_data_file("parquet")
|
23
|
+
with file.create() as outfile:
|
24
|
+
metadata_collector: list[Any] = []
|
25
|
+
pa.parquet.write_table(
|
26
|
+
table=table, where=outfile, metadata_collector=metadata_collector
|
27
|
+
)
|
28
|
+
# look for file metadata
|
29
|
+
file_metadata: FileMetaData = next(
|
30
|
+
item for item in metadata_collector if isinstance(item, FileMetaData)
|
31
|
+
)
|
32
|
+
row_group_count = file_metadata.num_row_groups
|
33
|
+
|
34
|
+
# Because ParquetWriter only writes one row group, it only creates one SSTableRow
|
35
|
+
# we may have more granular SST indexes for other formats
|
36
|
+
key_min, key_max = self._get_min_max_key(table)
|
37
|
+
return [SSTableRow(key_min, key_max, file.location, 0, 0 + row_group_count)]
|
File without changes
|