deltacat 2.0.0b11__py3-none-any.whl → 2.0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0.post1.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/top_level.txt +0 -0
@@ -1,388 +0,0 @@
1
- # Allow classes to use self-referencing Type hints in Python 3.7.
2
- from __future__ import annotations
3
- from typing import Any, Dict, List, Optional, Tuple
4
- import pyarrow as pa
5
-
6
- from deltacat.tests.compute.test_util_common import (
7
- PartitionKey,
8
- )
9
-
10
- from deltacat.storage import (
11
- Delta,
12
- DeltaType,
13
- Partition,
14
- PartitionLocator,
15
- Stream,
16
- )
17
- from deltacat.tests.compute.test_util_common import (
18
- create_src_table,
19
- create_destination_table,
20
- create_rebase_table,
21
- )
22
- import logging
23
- from deltacat import logs
24
-
25
- logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
26
-
27
-
28
- def _add_deltas_to_partition(
29
- deltas_ingredients: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
30
- partition: Optional[Partition],
31
- ds_mock_kwargs: Optional[Dict[str, Any]],
32
- ) -> List[Optional[Delta], int]:
33
- import deltacat.tests.local_deltacat_storage as ds
34
-
35
- all_deltas_length = 0
36
- for (delta_data, delta_type, delete_parameters) in deltas_ingredients:
37
- staged_delta: Delta = ds.stage_delta(
38
- delta_data,
39
- partition,
40
- delta_type,
41
- entry_params=delete_parameters,
42
- **ds_mock_kwargs,
43
- )
44
- incremental_delta = ds.commit_delta(
45
- staged_delta,
46
- **ds_mock_kwargs,
47
- )
48
- all_deltas_length += len(delta_data) if delta_data else 0
49
- return incremental_delta, all_deltas_length
50
-
51
-
52
- def add_late_deltas_to_partition(
53
- late_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
54
- source_partition: Optional[Partition],
55
- ds_mock_kwargs: Optional[Dict[str, Any]],
56
- ) -> List[Optional[Delta], int]:
57
- return _add_deltas_to_partition(late_deltas, source_partition, ds_mock_kwargs)
58
-
59
-
60
- def create_incremental_deltas_on_source_table(
61
- source_namespace: str,
62
- source_table_name: str,
63
- source_table_version: str,
64
- source_table_stream: Stream,
65
- partition_values_param,
66
- incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
67
- ds_mock_kwargs: Optional[Dict[str, Any]] = None,
68
- ) -> Tuple[PartitionLocator, Delta, int, bool]:
69
- import deltacat.tests.local_deltacat_storage as ds
70
-
71
- incremental_delta_length = 0
72
- is_delete = False
73
- src_partition: Partition = ds.get_partition(
74
- source_table_stream.locator,
75
- partition_values_param,
76
- **ds_mock_kwargs,
77
- )
78
- for (
79
- incremental_data,
80
- incremental_delta_type,
81
- incremental_delete_parameters,
82
- ) in incremental_deltas:
83
- if incremental_delta_type is DeltaType.DELETE:
84
- is_delete = True
85
- incremental_delta: Delta = ds.commit_delta(
86
- ds.stage_delta(
87
- incremental_data,
88
- src_partition,
89
- incremental_delta_type,
90
- entry_params=incremental_delete_parameters,
91
- **ds_mock_kwargs,
92
- ),
93
- **ds_mock_kwargs,
94
- )
95
- incremental_delta_length += len(incremental_data) if incremental_data else 0
96
- src_table_stream_after_committed_delta: Stream = ds.get_stream(
97
- source_namespace,
98
- source_table_name,
99
- source_table_version,
100
- **ds_mock_kwargs,
101
- )
102
- src_partition_after_committed_delta: Partition = ds.get_partition(
103
- src_table_stream_after_committed_delta.locator,
104
- partition_values_param,
105
- **ds_mock_kwargs,
106
- )
107
- return (
108
- src_partition_after_committed_delta.locator,
109
- incremental_delta,
110
- incremental_delta_length,
111
- is_delete,
112
- )
113
-
114
-
115
- def create_src_w_deltas_destination_plus_destination(
116
- sort_keys: Optional[List[Any]],
117
- partition_keys: Optional[List[PartitionKey]],
118
- input_deltas: pa.Table,
119
- input_delta_type: DeltaType,
120
- partition_values: Optional[List[Any]],
121
- ds_mock_kwargs: Optional[Dict[str, Any]],
122
- simulate_is_inplace: bool = False,
123
- ) -> Tuple[Stream, Stream, Optional[Stream], str, str, str]:
124
- import deltacat.tests.local_deltacat_storage as ds
125
-
126
- source_namespace, source_table_name, source_table_version = create_src_table(
127
- sort_keys, partition_keys, ds_mock_kwargs
128
- )
129
-
130
- source_table_stream: Stream = ds.get_stream(
131
- namespace=source_namespace,
132
- table_name=source_table_name,
133
- table_version=source_table_version,
134
- **ds_mock_kwargs,
135
- )
136
- staged_partition: Partition = ds.stage_partition(
137
- source_table_stream, partition_values, **ds_mock_kwargs
138
- )
139
- ds.commit_delta(
140
- ds.stage_delta(
141
- input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
142
- ),
143
- **ds_mock_kwargs,
144
- )
145
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
146
- source_table_stream_after_committed: Stream = ds.get_stream(
147
- namespace=source_namespace,
148
- table_name=source_table_name,
149
- table_version=source_table_version,
150
- **ds_mock_kwargs,
151
- )
152
- destination_table_namespace: Optional[str] = None
153
- destination_table_name: Optional[str] = None
154
- destination_table_version: Optional[str] = None
155
- if not simulate_is_inplace:
156
- (
157
- destination_table_namespace,
158
- destination_table_name,
159
- destination_table_version,
160
- ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
161
- else:
162
- # not creating a table as in-place
163
- destination_table_namespace = source_namespace
164
- destination_table_name = source_table_name
165
- destination_table_version = source_table_version
166
-
167
- destination_table_stream: Stream = ds.get_stream(
168
- namespace=destination_table_namespace,
169
- table_name=destination_table_name,
170
- table_version=destination_table_version,
171
- **ds_mock_kwargs,
172
- )
173
- return (
174
- source_table_stream_after_committed,
175
- destination_table_stream,
176
- None,
177
- source_namespace,
178
- source_table_name,
179
- source_table_version,
180
- )
181
-
182
-
183
- def create_src_w_deltas_destination_rebase_w_deltas_strategy(
184
- sort_keys: Optional[List[Any]],
185
- partition_keys: Optional[List[PartitionKey]],
186
- input_deltas: pa.Table,
187
- input_delta_type: DeltaType,
188
- partition_values: Optional[List[Any]],
189
- ds_mock_kwargs: Optional[Dict[str, Any]],
190
- ) -> Tuple[Stream, Stream, Optional[Stream]]:
191
- import deltacat.tests.local_deltacat_storage as ds
192
- from deltacat.storage import Delta
193
- from deltacat.utils.common import current_time_ms
194
-
195
- last_stream_position = current_time_ms()
196
- source_namespace, source_table_name, source_table_version = create_src_table(
197
- sort_keys, partition_keys, ds_mock_kwargs
198
- )
199
-
200
- source_table_stream: Stream = ds.get_stream(
201
- namespace=source_namespace,
202
- table_name=source_table_name,
203
- table_version=source_table_version,
204
- **ds_mock_kwargs,
205
- )
206
- staged_partition: Partition = ds.stage_partition(
207
- source_table_stream, partition_values, **ds_mock_kwargs
208
- )
209
- staged_delta: Delta = ds.stage_delta(
210
- input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
211
- )
212
- staged_delta.locator.stream_position = last_stream_position
213
- ds.commit_delta(
214
- staged_delta,
215
- **ds_mock_kwargs,
216
- )
217
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
218
- source_table_stream_after_committed: Stream = ds.get_stream(
219
- namespace=source_namespace,
220
- table_name=source_table_name,
221
- table_version=source_table_version,
222
- **ds_mock_kwargs,
223
- )
224
- # create the destination table
225
- (
226
- destination_table_namespace,
227
- destination_table_name,
228
- destination_table_version,
229
- ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
230
- # create the rebase table
231
- (
232
- rebase_table_namespace,
233
- rebase_table_name,
234
- rebase_table_version,
235
- ) = create_rebase_table(sort_keys, partition_keys, ds_mock_kwargs)
236
- rebasing_table_stream: Stream = ds.get_stream(
237
- namespace=rebase_table_namespace,
238
- table_name=rebase_table_name,
239
- table_version=rebase_table_version,
240
- **ds_mock_kwargs,
241
- )
242
- staged_partition: Partition = ds.stage_partition(
243
- rebasing_table_stream, partition_values, **ds_mock_kwargs
244
- )
245
- staged_delta: Delta = ds.stage_delta(
246
- input_deltas, staged_partition, **ds_mock_kwargs
247
- )
248
- staged_delta.locator.stream_position = last_stream_position
249
- ds.commit_delta(
250
- staged_delta,
251
- **ds_mock_kwargs,
252
- )
253
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
254
-
255
- # get streams
256
- # TODO: Add deltas to destination stream
257
- destination_table_stream: Stream = ds.get_stream(
258
- namespace=destination_table_namespace,
259
- table_name=destination_table_name,
260
- table_version=destination_table_version,
261
- **ds_mock_kwargs,
262
- )
263
- rebased_stream_after_committed: Stream = ds.get_stream(
264
- namespace=rebase_table_namespace,
265
- table_name=rebase_table_name,
266
- table_version=rebase_table_version,
267
- **ds_mock_kwargs,
268
- )
269
- return (
270
- source_table_stream_after_committed,
271
- destination_table_stream,
272
- rebased_stream_after_committed,
273
- )
274
-
275
-
276
- def multiple_rounds_create_src_w_deltas_destination_rebase_w_deltas_strategy(
277
- sort_keys: Optional[List[Any]],
278
- partition_keys: Optional[List[PartitionKey]],
279
- input_deltas: List[pa.Table],
280
- partition_values: Optional[List[Any]],
281
- ds_mock_kwargs: Optional[Dict[str, Any]],
282
- ) -> Tuple[Stream, Stream, Optional[Stream], bool]:
283
- import deltacat.tests.local_deltacat_storage as ds
284
- from deltacat.storage import Partition, Stream
285
-
286
- source_namespace, source_table_name, source_table_version = create_src_table(
287
- sort_keys, partition_keys, ds_mock_kwargs
288
- )
289
-
290
- source_table_stream: Stream = ds.get_stream(
291
- namespace=source_namespace,
292
- table_name=source_table_name,
293
- table_version=source_table_version,
294
- **ds_mock_kwargs,
295
- )
296
- staged_partition: Partition = ds.stage_partition(
297
- source_table_stream, partition_values, **ds_mock_kwargs
298
- )
299
- is_delete = False
300
- input_delta_length = 0
301
- for (
302
- input_delta,
303
- input_delta_type,
304
- input_delta_parameters,
305
- ) in input_deltas:
306
- if input_delta_type is DeltaType.DELETE:
307
- is_delete = True
308
- staged_delta = ds.stage_delta(
309
- input_delta,
310
- staged_partition,
311
- input_delta_type,
312
- entry_params=input_delta_parameters,
313
- **ds_mock_kwargs,
314
- )
315
- ds.commit_delta(
316
- staged_delta,
317
- **ds_mock_kwargs,
318
- )
319
- input_delta_length += len(input_delta) if input_delta else 0
320
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
321
- source_table_stream_after_committed: Stream = ds.get_stream(
322
- namespace=source_namespace,
323
- table_name=source_table_name,
324
- table_version=source_table_version,
325
- **ds_mock_kwargs,
326
- )
327
- # create the destination table
328
- (
329
- destination_table_namespace,
330
- destination_table_name,
331
- destination_table_version,
332
- ) = create_destination_table(sort_keys, partition_keys, ds_mock_kwargs)
333
- # create the rebase table
334
- (
335
- rebase_table_namespace,
336
- rebase_table_name,
337
- rebase_table_version,
338
- ) = create_rebase_table(sort_keys, partition_keys, ds_mock_kwargs)
339
- rebasing_table_stream: Stream = ds.get_stream(
340
- namespace=rebase_table_namespace,
341
- table_name=rebase_table_name,
342
- table_version=rebase_table_version,
343
- **ds_mock_kwargs,
344
- )
345
- staged_partition: Partition = ds.stage_partition(
346
- rebasing_table_stream, partition_values, **ds_mock_kwargs
347
- )
348
- input_delta_length = 0
349
- for (
350
- input_delta,
351
- input_delta_type,
352
- input_delta_parameters,
353
- ) in input_deltas:
354
- if input_delta_type is DeltaType.DELETE:
355
- is_delete = True
356
- staged_delta = ds.stage_delta(
357
- input_delta,
358
- staged_partition,
359
- input_delta_type,
360
- entry_params=input_delta_parameters,
361
- **ds_mock_kwargs,
362
- )
363
- ds.commit_delta(
364
- staged_delta,
365
- **ds_mock_kwargs,
366
- )
367
- input_delta_length += len(input_delta) if input_delta else 0
368
- ds.commit_partition(staged_partition, **ds_mock_kwargs)
369
-
370
- # get streams
371
- destination_table_stream: Stream = ds.get_stream(
372
- namespace=destination_table_namespace,
373
- table_name=destination_table_name,
374
- table_version=destination_table_version,
375
- **ds_mock_kwargs,
376
- )
377
- rebased_stream_after_committed: Stream = ds.get_stream(
378
- namespace=rebase_table_namespace,
379
- table_name=rebase_table_name,
380
- table_version=rebase_table_version,
381
- **ds_mock_kwargs,
382
- )
383
- return (
384
- source_table_stream_after_committed,
385
- destination_table_stream,
386
- rebased_stream_after_committed,
387
- is_delete,
388
- )