deltacat 1.1.36__py3-none-any.whl → 2.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +5 -9
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -7
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
  40. deltacat/compute/compactor_v2/steps/merge.py +17 -126
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  45. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  46. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  47. deltacat/compute/converter/constants.py +4 -0
  48. deltacat/compute/converter/converter_session.py +143 -0
  49. deltacat/compute/converter/model/convert_input.py +69 -0
  50. deltacat/compute/converter/model/convert_input_files.py +61 -0
  51. deltacat/compute/converter/model/converter_session_params.py +99 -0
  52. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  53. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  54. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  55. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  56. deltacat/compute/converter/steps/__init__.py +0 -0
  57. deltacat/compute/converter/steps/convert.py +211 -0
  58. deltacat/compute/converter/steps/dedupe.py +60 -0
  59. deltacat/compute/converter/utils/__init__.py +0 -0
  60. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  61. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  62. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  63. deltacat/compute/converter/utils/io.py +43 -0
  64. deltacat/compute/converter/utils/s3u.py +133 -0
  65. deltacat/compute/resource_estimation/delta.py +1 -19
  66. deltacat/constants.py +47 -1
  67. deltacat/env.py +51 -0
  68. deltacat/examples/__init__.py +0 -0
  69. deltacat/examples/basic_logging.py +101 -0
  70. deltacat/examples/common/__init__.py +0 -0
  71. deltacat/examples/common/fixtures.py +15 -0
  72. deltacat/examples/hello_world.py +27 -0
  73. deltacat/examples/iceberg/__init__.py +0 -0
  74. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  75. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  76. deltacat/exceptions.py +51 -9
  77. deltacat/logs.py +4 -1
  78. deltacat/storage/__init__.py +118 -28
  79. deltacat/storage/iceberg/__init__.py +0 -0
  80. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  81. deltacat/storage/iceberg/impl.py +737 -0
  82. deltacat/storage/iceberg/model.py +709 -0
  83. deltacat/storage/interface.py +217 -134
  84. deltacat/storage/main/__init__.py +0 -0
  85. deltacat/storage/main/impl.py +2077 -0
  86. deltacat/storage/model/delta.py +118 -71
  87. deltacat/storage/model/interop.py +24 -0
  88. deltacat/storage/model/list_result.py +8 -0
  89. deltacat/storage/model/locator.py +93 -3
  90. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  91. deltacat/storage/model/metafile.py +1316 -0
  92. deltacat/storage/model/namespace.py +34 -18
  93. deltacat/storage/model/partition.py +362 -37
  94. deltacat/storage/model/scan/__init__.py +0 -0
  95. deltacat/storage/model/scan/push_down.py +19 -0
  96. deltacat/storage/model/scan/scan_plan.py +10 -0
  97. deltacat/storage/model/scan/scan_task.py +34 -0
  98. deltacat/storage/model/schema.py +892 -0
  99. deltacat/storage/model/shard.py +47 -0
  100. deltacat/storage/model/sort_key.py +170 -13
  101. deltacat/storage/model/stream.py +208 -80
  102. deltacat/storage/model/table.py +123 -29
  103. deltacat/storage/model/table_version.py +322 -46
  104. deltacat/storage/model/transaction.py +757 -0
  105. deltacat/storage/model/transform.py +198 -61
  106. deltacat/storage/model/types.py +111 -13
  107. deltacat/storage/rivulet/__init__.py +11 -0
  108. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  109. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  110. deltacat/storage/rivulet/dataset.py +744 -0
  111. deltacat/storage/rivulet/dataset_executor.py +87 -0
  112. deltacat/storage/rivulet/feather/__init__.py +5 -0
  113. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  114. deltacat/storage/rivulet/feather/serializer.py +35 -0
  115. deltacat/storage/rivulet/fs/__init__.py +0 -0
  116. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  117. deltacat/storage/rivulet/fs/file_store.py +130 -0
  118. deltacat/storage/rivulet/fs/input_file.py +76 -0
  119. deltacat/storage/rivulet/fs/output_file.py +86 -0
  120. deltacat/storage/rivulet/logical_plan.py +105 -0
  121. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  122. deltacat/storage/rivulet/metastore/delta.py +190 -0
  123. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  124. deltacat/storage/rivulet/metastore/sst.py +82 -0
  125. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  126. deltacat/storage/rivulet/mvp/Table.py +101 -0
  127. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  129. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  130. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  131. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  132. deltacat/storage/rivulet/reader/__init__.py +0 -0
  133. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  134. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  135. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  136. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  137. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  138. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  139. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  140. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  141. deltacat/storage/rivulet/schema/__init__.py +0 -0
  142. deltacat/storage/rivulet/schema/datatype.py +128 -0
  143. deltacat/storage/rivulet/schema/schema.py +251 -0
  144. deltacat/storage/rivulet/serializer.py +40 -0
  145. deltacat/storage/rivulet/serializer_factory.py +42 -0
  146. deltacat/storage/rivulet/writer/__init__.py +0 -0
  147. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  148. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  149. deltacat/storage/util/__init__.py +0 -0
  150. deltacat/storage/util/scan_planner.py +26 -0
  151. deltacat/tests/_io/__init__.py +1 -0
  152. deltacat/tests/catalog/test_catalogs.py +324 -0
  153. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  154. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  155. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  156. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  157. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  158. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  159. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  160. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  161. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  162. deltacat/tests/compute/conftest.py +75 -0
  163. deltacat/tests/compute/converter/__init__.py +0 -0
  164. deltacat/tests/compute/converter/conftest.py +80 -0
  165. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  166. deltacat/tests/compute/converter/utils.py +123 -0
  167. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  168. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  169. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  170. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  171. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  172. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  173. deltacat/tests/compute/test_util_common.py +19 -12
  174. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  175. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  176. deltacat/tests/storage/__init__.py +0 -0
  177. deltacat/tests/storage/conftest.py +25 -0
  178. deltacat/tests/storage/main/__init__.py +0 -0
  179. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  180. deltacat/tests/storage/model/__init__.py +0 -0
  181. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  182. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  183. deltacat/tests/storage/model/test_schema.py +308 -0
  184. deltacat/tests/storage/model/test_shard.py +22 -0
  185. deltacat/tests/storage/model/test_table_version.py +110 -0
  186. deltacat/tests/storage/model/test_transaction.py +308 -0
  187. deltacat/tests/storage/rivulet/__init__.py +0 -0
  188. deltacat/tests/storage/rivulet/conftest.py +149 -0
  189. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  190. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  191. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  192. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  193. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  194. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  195. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  196. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  197. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  198. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  199. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  200. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  201. deltacat/tests/test_deltacat_api.py +39 -0
  202. deltacat/tests/test_utils/filesystem.py +14 -0
  203. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  204. deltacat/tests/test_utils/pyarrow.py +8 -15
  205. deltacat/tests/test_utils/storage.py +266 -3
  206. deltacat/tests/utils/test_daft.py +3 -3
  207. deltacat/tests/utils/test_pyarrow.py +0 -432
  208. deltacat/types/partial_download.py +1 -1
  209. deltacat/types/tables.py +1 -1
  210. deltacat/utils/export.py +59 -0
  211. deltacat/utils/filesystem.py +320 -0
  212. deltacat/utils/metafile_locator.py +73 -0
  213. deltacat/utils/pyarrow.py +36 -183
  214. deltacat-2.0.0b2.dist-info/METADATA +65 -0
  215. deltacat-2.0.0b2.dist-info/RECORD +349 -0
  216. deltacat/aws/redshift/__init__.py +0 -19
  217. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  218. deltacat/io/dataset.py +0 -73
  219. deltacat/io/read_api.py +0 -143
  220. deltacat/storage/model/delete_parameters.py +0 -40
  221. deltacat/storage/model/partition_spec.py +0 -71
  222. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  223. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  224. deltacat-1.1.36.dist-info/METADATA +0 -64
  225. deltacat-1.1.36.dist-info/RECORD +0 -219
  226. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  227. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  228. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  229. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  230. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  231. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  233. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  234. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  235. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  236. {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/LICENSE +0 -0
  237. {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/WHEEL +0 -0
  238. {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,378 @@
1
+ import heapq
2
+ import logging
3
+
4
+ from collections import defaultdict
5
+ from typing import (
6
+ Generator,
7
+ Dict,
8
+ Set,
9
+ Type,
10
+ TypeVar,
11
+ NamedTuple,
12
+ Any,
13
+ List,
14
+ Generic,
15
+ AbstractSet,
16
+ )
17
+
18
+ from deltacat.storage.rivulet.metastore.delta import DeltaContext
19
+ from deltacat.storage.rivulet.metastore.sst import SSTableRow
20
+ from deltacat.storage.rivulet.metastore.sst_interval_tree import (
21
+ OrderedBlockGroups,
22
+ BlockGroup,
23
+ Block,
24
+ )
25
+ from deltacat.storage.rivulet.reader.data_reader import RowAndKey, FileReader
26
+ from deltacat.storage.rivulet.reader.dataset_metastore import DatasetMetastore
27
+ from deltacat.storage.rivulet.reader.pyarrow_data_reader import ArrowDataReader
28
+ from deltacat.storage.rivulet.reader.query_expression import QueryExpression
29
+ from deltacat.storage.rivulet.reader.reader_type_registrar import FileReaderRegistrar
30
+ from deltacat.storage.rivulet import Schema
31
+ from deltacat import logs
32
+
33
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
34
+
35
+ FILE_FORMAT = TypeVar("FILE_FORMAT")
36
+ MEMORY_FORMAT = TypeVar("MEMORY_FORMAT")
37
+
38
+
39
+ class FileReaderWithContext(NamedTuple):
40
+ reader: FileReader[FILE_FORMAT]
41
+ context: DeltaContext
42
+
43
+
44
+ class ZipperMergeHeapRecord(NamedTuple):
45
+ """
46
+ Named tuple for data structure we're putting into heap during zipper merge
47
+
48
+ Note we override the equality/comparison operators to use key
49
+ so that we can add these items to a heap by key
50
+ """
51
+
52
+ key: Any
53
+ data: FILE_FORMAT
54
+ reader: FileReaderWithContext
55
+
56
+ def __lt__(self, other):
57
+ return self.key < other.key
58
+
59
+ def __le__(self, other):
60
+ return self.key <= other.key
61
+
62
+ def __gt__(self, other):
63
+ return self.key > other.key
64
+
65
+ def __ge__(self, other):
66
+ return self.key >= other.key
67
+
68
+
69
+ class ZipperBlockScanExecutor(Generic[MEMORY_FORMAT]):
70
+ """
71
+ Class for managing a zipper scan across multiple field groups. This class is only ever called inside the higher level BlockScanner class
72
+
73
+ It is factored into a dedicated class because of the complexity and state management of
74
+ zipper merging
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ result_schema: Schema,
80
+ deserialize_to: Type[MEMORY_FORMAT],
81
+ ordered_block_groups: OrderedBlockGroups,
82
+ query: QueryExpression[Any],
83
+ metastore: DatasetMetastore,
84
+ file_readers: Dict[str, FileReader],
85
+ ):
86
+
87
+ self.result_schema = result_schema
88
+ self.deserialize_to = deserialize_to
89
+ self.ordered_block_groups = ordered_block_groups
90
+ self.query = query
91
+ self.metastore = metastore
92
+ self.file_readers = file_readers
93
+ """
94
+ Keeps track of block file readers that are open, across block group boundaries. E.g., if Block Group 1 has
95
+ blocks [1,2,3] and BlockGroup2 has blocks [2,3], we will start reading blocks [2,3] and need to re-use the
96
+ open iterator while reading BlockGroup2
97
+ """
98
+ self._open_file_readers: Dict[SSTableRow, FileReaderWithContext] = {}
99
+
100
+ def scan(self) -> Generator[MEMORY_FORMAT, None, None]:
101
+ """
102
+ Perform N-wise zipper across N field groups.
103
+ Within each field group, there is a set of blocks which belong in this BlockGroup's key range
104
+
105
+ As a simplified example, we may have:
106
+ FieldGroup1: [BlockA, BlockB]
107
+ FieldGroup2: [BlockC]
108
+ BlockA: keys 1,3,9,10
109
+ BlockB: keys 2,4,5,6,7,8
110
+ BlockC: keys 1-10
111
+
112
+ The algorithm to merge these looks like:
113
+ 1. Load each block in DataReader to get iterator over sorted keys
114
+ 2. Build a heap of records across blocks across field groups
115
+ 3. Pop record(s) from heap as long as they have equal keys. For up to N records, merge column wise
116
+ 4. Continue until all blocks are read OR the key range in query is exceeded
117
+ """
118
+ for block_group in self.ordered_block_groups.block_groups:
119
+
120
+ logger.debug(f"Starting scan of block group {block_group}")
121
+
122
+ # Set of all blocks that need to be read within this block group
123
+ blocks: set[Block] = {
124
+ block
125
+ for block_set in block_group.field_group_to_blocks.values()
126
+ for block in block_set
127
+ }
128
+ # Open all file readers, such that self._open_block_iterators has pointers to open readers
129
+ self.__open_file_readers(blocks)
130
+ record_heap: List[ZipperMergeHeapRecord] = []
131
+
132
+ # Seed record heap with record from each iterator
133
+ file_reader_context: FileReaderWithContext
134
+ for block, file_reader_context in self._open_file_readers.items():
135
+ self.__push_next_row_back_to_heap(
136
+ block_group, file_reader_context, record_heap
137
+ )
138
+
139
+ # For each zipper merged entry from heap traversal, delegate to deserializer
140
+ for zipper_merged in self.__zipper_merge_sorted_records(
141
+ record_heap, block_group
142
+ ):
143
+ records = [z.data for z in self._dedupe_records(zipper_merged)]
144
+ # TODO (multi format support) we need to handle joining across data readers in the future
145
+ # For now, assume all data readers MUST read to Arrow intermediate format
146
+ for result in ArrowDataReader().join_deserialize_records(
147
+ records, self.deserialize_to, self.result_schema.get_merge_key()
148
+ ):
149
+ yield result
150
+
151
+ def _dedupe_records(
152
+ self, records: List[ZipperMergeHeapRecord]
153
+ ) -> List[ZipperMergeHeapRecord]:
154
+ """Deduplicate records with the same key (as a sorted list of records).
155
+
156
+ Deduplication chooses records based on the following rules of precedence
157
+
158
+ 1. Levels with lower numbers take precedence over levels with higher numbers (L0 is preferred over L1)
159
+ 2. Newer stream positions take precedence over older stream positions
160
+
161
+ Undefined Behavior:
162
+
163
+ - Duplicate records within files from the same manifest (either in the same ir across data files)
164
+
165
+ TODO: allow for the definition of a 'dedupe' column to break ties.
166
+ """
167
+ sort_criteria = lambda x: (
168
+ -x.reader.context.level,
169
+ x.reader.context.stream_position,
170
+ )
171
+
172
+ grouped_by_sort_group: defaultdict[
173
+ Schema, List[ZipperMergeHeapRecord]
174
+ ] = defaultdict(list)
175
+ for record in records:
176
+ grouped_by_sort_group[record.reader.context.schema].append(record)
177
+ deduped = [
178
+ max(group, key=sort_criteria) for group in grouped_by_sort_group.values()
179
+ ]
180
+ # Sort one last time across schemas (in case there's overlapping fields)
181
+ deduped.sort(key=sort_criteria)
182
+ return deduped
183
+
184
+ def __zipper_merge_sorted_records(
185
+ self, record_heap: List[ZipperMergeHeapRecord], block_group: BlockGroup
186
+ ) -> Generator[List[ZipperMergeHeapRecord], None, None]:
187
+ """
188
+ Continually pop from heap until heap empty OR block range exceeded. Generate "zipper merge" of records
189
+
190
+ Algorithm is:
191
+ (1) Pop lowest element from heap. Includes pointer to the iterator it came from.
192
+ Push next largest element from that generator back onto heap
193
+ (2) Buffer records of same key and peek/pop the heap as long as there is a key match
194
+ For any record popped, push next largest element from generator back onto heap
195
+ (3) Yield merged record by invoking Data Reader
196
+
197
+ This solution maintains the following invariants:
198
+ (1) the heap will have at most N records, where N=total blocks in BlockGroup
199
+ (2) the heap has the N smallest records globally
200
+ (3) any data that needs to be merged for a given key exists in the heap
201
+
202
+ :param record_heap: seeded heap of ZipperMergeHeapRecords.
203
+ :param block_group: block group being traversed
204
+ :return: generator of merged records. Note this is a list not a set to not require hash support
205
+ """
206
+ if not record_heap:
207
+ return
208
+
209
+ # Keep iterating until heap is empty or key range is exceeded
210
+ while record_heap:
211
+ curr_heap_record = heapq.heappop(record_heap)
212
+ curr_pk = curr_heap_record.key
213
+
214
+ if not self.query.matches_query(curr_pk):
215
+ continue
216
+
217
+ # Sanity check - assert that key we are looking at is in block group's range
218
+ if not block_group.key_in_range(curr_pk):
219
+ raise RuntimeError(
220
+ f"Did not expect to find key {curr_pk} on zipper merge heap"
221
+ f"for block group {block_group}"
222
+ )
223
+
224
+ # Find all records to be merged by continuing to pop heap
225
+ merged_by_pk = [curr_heap_record]
226
+ # For the current record itself - push next row back to heap
227
+ self.__push_next_row_back_to_heap(
228
+ block_group, curr_heap_record.reader, record_heap
229
+ )
230
+ # For the rest of the heap elements - peek/pop as long as they equal key
231
+ # Note that heap[0] is equivalent to peek operation
232
+ while record_heap and record_heap[0][0] == curr_pk:
233
+ merge_heap_record: ZipperMergeHeapRecord = heapq.heappop(record_heap)
234
+ merged_by_pk.append(merge_heap_record)
235
+ self.__push_next_row_back_to_heap(
236
+ block_group, merge_heap_record.reader, record_heap
237
+ )
238
+ yield merged_by_pk
239
+
240
+ def __push_next_row_back_to_heap(
241
+ self,
242
+ block_group: BlockGroup,
243
+ row_context: FileReaderWithContext,
244
+ record_heap: List[ZipperMergeHeapRecord],
245
+ ):
246
+ """
247
+ This is a helper function for __zipper_merge_sorted_records and for scan().
248
+
249
+ Given a file reader, it will next() records until it finds the next record within the block group
250
+ and current query. It then pushes that record onto the heap
251
+
252
+ Sometimes we end up needing to seek into the middle of a block because the key range of a query starts
253
+ in the middle of the block. For example, if the block has keys range [0,100],
254
+ and the query is for keys [50-100], we need to seek to the first key in the block that is >= 50
255
+
256
+ TODO better support for seeking within block (rather than O(N) iteration)
257
+ """
258
+
259
+ file_reader = row_context.reader
260
+ while file_reader.peek() is not None and (
261
+ block_group.key_below_range(file_reader.peek().key)
262
+ or self.query.below_query_range(file_reader.peek().key)
263
+ ):
264
+ try:
265
+ # call next() on file reader to throw out key which is below range of block group
266
+ next(file_reader)
267
+ except StopIteration:
268
+ # If we have exhausted iterator, this just means no keys from this block actually match the query
269
+ file_reader.close()
270
+ # TODO how to remove file reader from _open_file_readers?
271
+
272
+ if (
273
+ file_reader.peek()
274
+ and self.query.matches_query(file_reader.peek().key)
275
+ and block_group.key_in_range(file_reader.peek().key)
276
+ ):
277
+ try:
278
+ r: RowAndKey = next(file_reader)
279
+ heapq.heappush(
280
+ record_heap,
281
+ ZipperMergeHeapRecord(r.key, r.row, row_context),
282
+ )
283
+ except StopIteration:
284
+ # This means we have exhausted the open FileReader and should close it
285
+ file_reader.__exit__()
286
+ # TODO how to remove file reader from _open_file_readers?
287
+
288
+ def __open_file_readers(self, blocks: AbstractSet[Block]):
289
+ """
290
+ This method should be called once per block group.
291
+ It opens iterators across all blocks in the block group and stores them in a map
292
+ Blocks may already be open, if they were also in previous block groups.
293
+ """
294
+ for block in blocks:
295
+ sst_row: SSTableRow = block.row
296
+ if sst_row not in self._open_file_readers:
297
+ file_reader = FileReaderRegistrar.construct_reader_instance(
298
+ sst_row,
299
+ self.metastore.file_provider,
300
+ self.result_schema.get_merge_key(),
301
+ self.result_schema,
302
+ self.file_readers,
303
+ )
304
+ file_reader.__enter__()
305
+ # TODO we need some way to compare the blocks. using serialized timestamp as proxy for now
306
+ context = FileReaderWithContext(file_reader, block.context)
307
+ self._open_file_readers[sst_row] = context
308
+
309
+
310
+ class BlockScanner:
311
+ """
312
+ BlockScanner is a low level internal class which performs IO on Block Groups
313
+
314
+ Note that we expect a block scanner to be initialized PER QUERY because it will keep state about ongoing execution,
315
+ e.g. open iterators across block groups
316
+
317
+ TODO efficiency improvements like parallelizing scanning.
318
+ TODO handle "partial schema" use case, in which the query schema is a subset of full schema
319
+ TODO in the future we will probably want to cache blocks read across queries
320
+ """
321
+
322
+ def __init__(self, metastore: DatasetMetastore):
323
+ # Persist initialized file readers
324
+ self.metastore = metastore
325
+ self.file_readers: Dict[str, FileReader] = {}
326
+
327
+ def scan(
328
+ self,
329
+ schema: Schema,
330
+ deserialize_to: Type[MEMORY_FORMAT],
331
+ blocks: Set[SSTableRow],
332
+ query: QueryExpression[Any](),
333
+ ) -> Generator[MEMORY_FORMAT, None, None]:
334
+ """
335
+ Scan records given query and deserialize to desired memory output format
336
+ Set of blocks can all be scanned and returned independently
337
+ TODO handle "partial schema" use case, in which the query schema is a subset of full schema
338
+ TODO parallelize scan with async io
339
+ """
340
+ data_reader = ArrowDataReader()
341
+ for block in blocks:
342
+ file_reader = FileReaderRegistrar.construct_reader_instance(
343
+ block,
344
+ self.metastore.file_provider,
345
+ schema.get_merge_key(),
346
+ schema,
347
+ self.file_readers,
348
+ )
349
+ with file_reader:
350
+ for generated_records in file_reader.__iter__():
351
+ # Check whether row matches key in query before deserializing
352
+ if query.key_range:
353
+ start, end = query.key_range
354
+ if generated_records.key < start or generated_records.key > end:
355
+ continue
356
+
357
+ # Otherwise, key predicate matched and yield deserialized row
358
+ for deserialized_row in data_reader.deserialize_records(
359
+ generated_records, deserialize_to
360
+ ):
361
+ yield deserialized_row
362
+
363
+ def scan_with_zipper(
364
+ self,
365
+ schema: Schema,
366
+ deserialize_to: Type[MEMORY_FORMAT],
367
+ ordered_block_groups: OrderedBlockGroups,
368
+ query: QueryExpression[Any](),
369
+ ) -> Generator[MEMORY_FORMAT, None, None]:
370
+ zipper_scan_executor = ZipperBlockScanExecutor(
371
+ schema,
372
+ deserialize_to,
373
+ ordered_block_groups,
374
+ query,
375
+ self.metastore,
376
+ self.file_readers,
377
+ )
378
+ return zipper_scan_executor.scan()
@@ -0,0 +1,136 @@
1
+ import typing
2
+ from abc import abstractmethod
3
+ from dataclasses import dataclass
4
+ from typing import (
5
+ Protocol,
6
+ Generator,
7
+ Any,
8
+ TypeVar,
9
+ Type,
10
+ Generic,
11
+ List,
12
+ Iterator,
13
+ Optional,
14
+ )
15
+
16
+ from deltacat.storage.rivulet.fs.file_provider import FileProvider
17
+ from deltacat.storage.rivulet.metastore.sst import SSTableRow
18
+ from deltacat.storage.rivulet.schema.schema import Schema
19
+
20
+ FILE_FORMAT = TypeVar("FILE_FORMAT")
21
+ MEMORY_FORMAT = TypeVar("MEMORY_FORMAT")
22
+
23
+ T = TypeVar("T")
24
+
25
+
26
+ @dataclass
27
+ class RowAndKey(Generic[FILE_FORMAT]):
28
+ """
29
+ Named tuple for a record batch with an index into a specific row
30
+ Note that record batches store data by column, so the row index should be
31
+ used to index into each column array
32
+ """
33
+
34
+ row: FILE_FORMAT
35
+ key: Any
36
+
37
+
38
+ class FileReader(
39
+ Protocol[FILE_FORMAT],
40
+ Iterator[RowAndKey[FILE_FORMAT]],
41
+ typing.ContextManager,
42
+ ):
43
+ """
44
+ Interface for reading specific file
45
+
46
+ TODO (IO abstraction) we will need to think about how various IO interfaces (S3, filesystem, memory)
47
+ plug into this.
48
+ """
49
+
50
+ @abstractmethod
51
+ def __init__(
52
+ self,
53
+ sst_row: SSTableRow,
54
+ file_provider: FileProvider,
55
+ primary_key: str,
56
+ schema: Schema,
57
+ ) -> None:
58
+ """
59
+ Required constructor (see: FileReaderRegistrar)
60
+
61
+ :param sst_row: SSTableRow containing file metadata
62
+ :param file_store: Object providing file access
63
+ """
64
+ ...
65
+
66
+ @abstractmethod
67
+ def peek(self) -> Optional[RowAndKey[FILE_FORMAT]]:
68
+ """
69
+ Peek at the next RowAndPrimaryKey without advancing the iterator
70
+ :return: Optional of RowAndPrimaryKey
71
+ """
72
+ ...
73
+
74
+ @abstractmethod
75
+ def __next__(self) -> RowAndKey[FILE_FORMAT]:
76
+ """
77
+ Fetch the next RowAndPrimaryKey and advance iterator
78
+ """
79
+ ...
80
+
81
+ @abstractmethod
82
+ def close(self):
83
+ """
84
+ Explicit add close so that resources can be cleaned up outside the ContextManager.
85
+
86
+ We expect that callers opening the reader can EITHER use a with statement or call __enter__()
87
+ Callers closing the reader can EITHER explicitly call close() or have with statement manage calling __exit__
88
+ """
89
+ ...
90
+
91
+
92
+ class DataReader(Protocol[FILE_FORMAT]):
93
+ """
94
+ Interface for reading specific file formats
95
+ A DatasetReader uses a different DataReader for each format
96
+
97
+ TODO (IO abstraction) we will need to think about how various IO interfaces (S3, filesystem, memory)
98
+ plug into this.
99
+ """
100
+
101
+ @abstractmethod
102
+ def deserialize_records(
103
+ self, records: FILE_FORMAT, output_type: Type[MEMORY_FORMAT]
104
+ ) -> Generator[MEMORY_FORMAT, None, None]:
105
+ """
106
+ Deserialize records into the specified format.
107
+
108
+ Note that output_type gets set based on what a DataScan converts results to,
109
+ e.g. to_arrow, to_dict
110
+
111
+ :param records: Input data (generated by generate_records method)
112
+ :param output_type: Type to deserialize into
113
+ :returns: A generator yielding records of the specified type.
114
+ """
115
+ ...
116
+
117
+ @abstractmethod
118
+ def join_deserialize_records(
119
+ self,
120
+ records: List[FILE_FORMAT],
121
+ output_type: Type[MEMORY_FORMAT],
122
+ join_key: str,
123
+ ) -> Generator[MEMORY_FORMAT, None, None]:
124
+ """
125
+ Deserialize records into the specified format.
126
+
127
+ Note that output_type gets set based on what a DataScan converts results to,
128
+ e.g. to_arrow, to_dict
129
+
130
+ :param records: Multiple records which should be merged into final output record
131
+ Note this is a list instead of a set to not enforce hashability
132
+ :param join_key name of field to join across record. This field must be present on all records
133
+ :param output_type: Type to deserialize into
134
+ :returns: A generator yielding records of the specified type.
135
+ """
136
+ ...
@@ -0,0 +1,63 @@
1
+ from typing import Generator, Dict, Optional
2
+
3
+ import pyarrow as pa
4
+
5
+ from deltacat.storage.model.shard import Shard
6
+ from deltacat.storage.rivulet.reader.dataset_reader import DatasetReader
7
+ from deltacat.storage.rivulet.reader.query_expression import QueryExpression
8
+ from deltacat.storage.rivulet import Schema
9
+
10
+
11
+ class DataScan:
12
+ """
13
+ Top level class representing and executing a data scan, on both riv internal and external data
14
+ This class is lazy, and executed when the user calls a method "to_{format}"
15
+ to deserialize data into the chosen in-memory format
16
+
17
+ Dataset.py scan() is the entrypoint to create and return data scan. The user
18
+ then has to chain a "to_{format}" method to read rows in their chosen in-memory format
19
+
20
+ Rivulet cannot simply return file URIs and allow query engine to process files,
21
+ because rivulet will internally manage details like indexes, custom file formats for bulk records, where data is physically laid out across row groups, etc.
22
+
23
+ DataScan allows query engines to send push down predicates. Push down predicates are used to filter on dimensions natively indexed by riv (e.g. primary key), and also
24
+
25
+ DataScan is coupled to internals of the riv rivulet format. If the rivulet format evolves, DataScan execution hould be able to understand which rivulet spec version is used and be compatible with any valid rivule rivulet.
26
+
27
+ FUTURE IMPROVEMENTS
28
+ 1. Implement full spec for push down predicates
29
+ 2. Figure out how permissions/credential providers work.
30
+ 3. Figure out how extension libraries can plug in to_x deserialization support.
31
+ One potential option is to override __getattr__ and check a static class-level Registry
32
+ of to_x methods. Modules would have to import DataScan and call DataScan.register_deserializer(...)
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ dataset_schema: Schema,
38
+ query: QueryExpression,
39
+ dataset_reader: DatasetReader,
40
+ shard: Optional[Shard],
41
+ ):
42
+ self.dataset_schema = dataset_schema
43
+ self.query = query
44
+ self.dataset_reader = dataset_reader
45
+ self.shard = shard
46
+
47
+ def to_arrow(self) -> Generator[pa.RecordBatch, None, None]:
48
+ """
49
+ Generates scan results as arrow record batches
50
+
51
+ TODO how to make the .to_x methods pluggable?
52
+ """
53
+ return self.dataset_reader.scan(
54
+ self.dataset_schema, pa.RecordBatch, self.query, shard=self.shard
55
+ )
56
+
57
+ def to_pydict(self) -> Generator[Dict, None, None]:
58
+ """
59
+ Generates scan results as a Dict for each row
60
+ """
61
+ return self.dataset_reader.scan(
62
+ self.dataset_schema, Dict, self.query, shard=self.shard
63
+ )