deltacat 0.1.18b15__py3-none-any.whl → 0.1.18b16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor/model/compact_partition_params.py +11 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +13 -0
- deltacat/compute/compactor/model/delta_annotated.py +10 -6
- deltacat/compute/compactor/repartition_session.py +2 -0
- deltacat/compute/compactor/steps/repartition.py +6 -0
- deltacat/compute/compactor_v2/compaction_session.py +72 -69
- deltacat/compute/compactor_v2/constants.py +3 -0
- deltacat/compute/compactor_v2/model/merge_input.py +17 -1
- deltacat/compute/compactor_v2/steps/merge.py +430 -2
- deltacat/compute/compactor_v2/utils/content_type_params.py +43 -14
- deltacat/compute/compactor_v2/utils/dedupe.py +58 -0
- deltacat/compute/compactor_v2/utils/io.py +11 -8
- deltacat/compute/compactor_v2/utils/primary_key_index.py +58 -25
- deltacat/compute/compactor_v2/utils/task_options.py +8 -15
- deltacat/tests/compute/common.py +1 -1
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -0
- deltacat/tests/compute/test_compaction_session_incremental.py +16 -1
- deltacat/tests/compute/testcases.py +7 -2
- deltacat/tests/test_utils/pyarrow.py +23 -6
- deltacat/types/partial_download.py +1 -0
- deltacat/types/tables.py +5 -0
- deltacat/utils/arguments.py +1 -2
- deltacat/utils/pyarrow.py +5 -0
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/METADATA +1 -1
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/RECORD +29 -30
- deltacat/tests/compute/compactor_v2/steps/__init__.py +0 -0
- deltacat/tests/compute/compactor_v2/steps/test_hash_bucket.py +0 -199
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/LICENSE +0 -0
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/WHEEL +0 -0
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
@@ -19,6 +19,7 @@ from deltacat.compute.compactor_v2.constants import (
|
|
19
19
|
MIN_FILES_IN_BATCH,
|
20
20
|
AVERAGE_RECORD_SIZE_BYTES,
|
21
21
|
TASK_MAX_PARALLELISM,
|
22
|
+
DROP_DUPLICATES,
|
22
23
|
)
|
23
24
|
from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
|
24
25
|
from deltacat.compute.compactor.utils.sort_key import validate_sort_keys
|
@@ -88,6 +89,7 @@ class CompactPartitionParams(dict):
|
|
88
89
|
result.hash_group_count = params.get(
|
89
90
|
"hash_group_count", result.hash_bucket_count
|
90
91
|
)
|
92
|
+
result.drop_duplicates = params.get("drop_duplicates", DROP_DUPLICATES)
|
91
93
|
|
92
94
|
if not importlib.util.find_spec("memray"):
|
93
95
|
result.enable_profiler = False
|
@@ -196,7 +198,7 @@ class CompactPartitionParams(dict):
|
|
196
198
|
|
197
199
|
@property
|
198
200
|
def min_delta_bytes_in_batch(self) -> float:
|
199
|
-
return self["
|
201
|
+
return self["min_delta_bytes_in_batch"]
|
200
202
|
|
201
203
|
@min_delta_bytes_in_batch.setter
|
202
204
|
def min_delta_bytes_in_batch(self, min_delta_bytes_in_batch: float) -> None:
|
@@ -258,6 +260,14 @@ class CompactPartitionParams(dict):
|
|
258
260
|
def records_per_compacted_file(self, count: int) -> None:
|
259
261
|
self["records_per_compacted_file"] = count
|
260
262
|
|
263
|
+
@property
|
264
|
+
def drop_duplicates(self) -> bool:
|
265
|
+
return self["drop_duplicates"]
|
266
|
+
|
267
|
+
@drop_duplicates.setter
|
268
|
+
def drop_duplicates(self, value: bool):
|
269
|
+
self["drop_duplicates"] = value
|
270
|
+
|
261
271
|
@property
|
262
272
|
def bit_width_of_sort_keys(self) -> int:
|
263
273
|
return self["bit_width_of_sort_keys"]
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
2
|
from __future__ import annotations
|
3
|
+
import pyarrow as pa
|
3
4
|
import logging
|
4
5
|
from deltacat import logs
|
5
6
|
from typing import List, Union
|
@@ -419,6 +420,13 @@ class CompactionSessionAuditInfo(dict):
|
|
419
420
|
"""
|
420
421
|
return self.get("usedCPUSeconds")
|
421
422
|
|
423
|
+
@property
|
424
|
+
def pyarrow_version(self) -> str:
|
425
|
+
"""
|
426
|
+
The version of PyArrow used.
|
427
|
+
"""
|
428
|
+
return self.get("pyarrowVersion")
|
429
|
+
|
422
430
|
# Setters follow
|
423
431
|
|
424
432
|
def set_audit_url(self, audit_url: str) -> CompactionSessionAuditInfo:
|
@@ -735,6 +743,10 @@ class CompactionSessionAuditInfo(dict):
|
|
735
743
|
self["usedCPUSeconds"] = value
|
736
744
|
return self
|
737
745
|
|
746
|
+
def set_pyarrow_version(self, value: str) -> CompactionSessionAuditInfo:
|
747
|
+
self["pyarrowVersion"] = value
|
748
|
+
return self
|
749
|
+
|
738
750
|
# High level methods to save stats
|
739
751
|
def save_step_stats(
|
740
752
|
self,
|
@@ -863,4 +875,5 @@ class CompactionSessionAuditInfo(dict):
|
|
863
875
|
)
|
864
876
|
)
|
865
877
|
|
878
|
+
self.set_pyarrow_version(pa.__version__)
|
866
879
|
self.set_telemetry_time_in_seconds(total_telemetry_time)
|
@@ -89,6 +89,11 @@ class DeltaAnnotated(Delta):
|
|
89
89
|
for delta_annotated in annotated_deltas:
|
90
90
|
split_annotated_deltas.extend(DeltaAnnotated._split_single(delta_annotated))
|
91
91
|
|
92
|
+
logger.info(
|
93
|
+
f"Split the {len(annotated_deltas)} annotated deltas "
|
94
|
+
f"into {len(split_annotated_deltas)} groups."
|
95
|
+
)
|
96
|
+
|
92
97
|
for src_da in split_annotated_deltas:
|
93
98
|
src_da_annotations = src_da.annotations
|
94
99
|
src_da_entries = src_da.manifest.entries
|
@@ -280,12 +285,11 @@ class DeltaAnnotated(Delta):
|
|
280
285
|
)
|
281
286
|
|
282
287
|
result.append(new_da)
|
288
|
+
else:
|
289
|
+
return [delta_annotated]
|
283
290
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
logger.info(
|
288
|
-
f"Split was not performed on the delta with locator: {delta_annotated.locator}"
|
289
|
-
)
|
291
|
+
logger.info(
|
292
|
+
f"Split was not performed on the delta with locator: {delta_annotated.locator}"
|
293
|
+
)
|
290
294
|
|
291
295
|
return [delta_annotated]
|
@@ -54,6 +54,7 @@ def repartition(
|
|
54
54
|
pg_config: Optional[PlacementGroupConfig] = None,
|
55
55
|
list_deltas_kwargs: Optional[Dict[str, Any]] = None,
|
56
56
|
read_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
57
|
+
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
57
58
|
s3_client_kwargs: Optional[Dict[str, Any]] = None,
|
58
59
|
deltacat_storage=unimplemented_deltacat_storage,
|
59
60
|
**kwargs,
|
@@ -131,6 +132,7 @@ def repartition(
|
|
131
132
|
enable_profiler=enable_profiler,
|
132
133
|
metrics_config=metrics_config,
|
133
134
|
read_kwargs_provider=read_kwargs_provider,
|
135
|
+
s3_table_writer_kwargs=s3_table_writer_kwargs,
|
134
136
|
repartitioned_file_content_type=repartitioned_file_content_type,
|
135
137
|
deltacat_storage=deltacat_storage,
|
136
138
|
)
|
@@ -56,6 +56,7 @@ def repartition_range(
|
|
56
56
|
destination_partition: Partition,
|
57
57
|
repartition_args: dict,
|
58
58
|
max_records_per_output_file: int,
|
59
|
+
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
59
60
|
repartitioned_file_content_type: ContentType = ContentType.PARQUET,
|
60
61
|
deltacat_storage=unimplemented_deltacat_storage,
|
61
62
|
deltacat_storage_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -145,6 +146,7 @@ def repartition_range(
|
|
145
146
|
destination_partition,
|
146
147
|
max_records_per_entry=max_records_per_output_file,
|
147
148
|
content_type=repartitioned_file_content_type,
|
149
|
+
s3_table_writer_kwargs=s3_table_writer_kwargs,
|
148
150
|
**deltacat_storage_kwargs,
|
149
151
|
)
|
150
152
|
partition_deltas.append(partition_delta)
|
@@ -166,6 +168,7 @@ def _timed_repartition(
|
|
166
168
|
max_records_per_output_file: int,
|
167
169
|
enable_profiler: bool,
|
168
170
|
read_kwargs_provider: Optional[ReadKwargsProvider],
|
171
|
+
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
169
172
|
repartitioned_file_content_type: ContentType = ContentType.PARQUET,
|
170
173
|
deltacat_storage=unimplemented_deltacat_storage,
|
171
174
|
deltacat_storage_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -189,6 +192,7 @@ def _timed_repartition(
|
|
189
192
|
destination_partition=destination_partition,
|
190
193
|
repartition_args=repartition_args,
|
191
194
|
max_records_per_output_file=max_records_per_output_file,
|
195
|
+
s3_table_writer_kwargs=s3_table_writer_kwargs,
|
192
196
|
repartitioned_file_content_type=repartitioned_file_content_type,
|
193
197
|
deltacat_storage=deltacat_storage,
|
194
198
|
deltacat_storage_kwargs=deltacat_storage_kwargs,
|
@@ -209,6 +213,7 @@ def repartition(
|
|
209
213
|
enable_profiler: bool,
|
210
214
|
metrics_config: Optional[MetricsConfig],
|
211
215
|
read_kwargs_provider: Optional[ReadKwargsProvider],
|
216
|
+
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
212
217
|
repartitioned_file_content_type: ContentType = ContentType.PARQUET,
|
213
218
|
deltacat_storage=unimplemented_deltacat_storage,
|
214
219
|
deltacat_storage_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -226,6 +231,7 @@ def repartition(
|
|
226
231
|
max_records_per_output_file=max_records_per_output_file,
|
227
232
|
enable_profiler=enable_profiler,
|
228
233
|
read_kwargs_provider=read_kwargs_provider,
|
234
|
+
s3_table_writer_kwargs=s3_table_writer_kwargs,
|
229
235
|
repartitioned_file_content_type=repartitioned_file_content_type,
|
230
236
|
deltacat_storage=deltacat_storage,
|
231
237
|
deltacat_storage_kwargs=deltacat_storage_kwargs,
|
@@ -133,7 +133,7 @@ def _execute_compaction(
|
|
133
133
|
# read the results from any previously completed compaction round
|
134
134
|
round_completion_info = None
|
135
135
|
high_watermark = None
|
136
|
-
|
136
|
+
previous_compacted_delta_manifest = None
|
137
137
|
|
138
138
|
if not params.rebase_source_partition_locator:
|
139
139
|
round_completion_info = rcf.read_round_completion_file(
|
@@ -147,13 +147,11 @@ def _execute_compaction(
|
|
147
147
|
)
|
148
148
|
else:
|
149
149
|
compacted_delta_locator = round_completion_info.compacted_delta_locator
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
include_manifest=True,
|
156
|
-
**params.deltacat_storage_kwargs,
|
150
|
+
|
151
|
+
previous_compacted_delta_manifest = (
|
152
|
+
params.deltacat_storage.get_delta_manifest(
|
153
|
+
compacted_delta_locator, **params.deltacat_storage_kwargs
|
154
|
+
)
|
157
155
|
)
|
158
156
|
|
159
157
|
high_watermark = round_completion_info.high_watermark
|
@@ -182,7 +180,22 @@ def _execute_compaction(
|
|
182
180
|
params.list_deltas_kwargs,
|
183
181
|
)
|
184
182
|
|
183
|
+
uniform_deltas = io.create_uniform_input_deltas(
|
184
|
+
input_deltas=input_deltas,
|
185
|
+
hash_bucket_count=params.hash_bucket_count,
|
186
|
+
compaction_audit=compaction_audit,
|
187
|
+
deltacat_storage=params.deltacat_storage,
|
188
|
+
previous_inflation=params.previous_inflation,
|
189
|
+
min_delta_bytes=params.min_delta_bytes_in_batch,
|
190
|
+
min_file_counts=params.min_files_in_batch,
|
191
|
+
# disable input split during rebase as the rebase files are already uniform
|
192
|
+
enable_input_split=params.rebase_source_partition_locator is None,
|
193
|
+
deltacat_storage_kwargs=params.deltacat_storage_kwargs,
|
194
|
+
)
|
195
|
+
|
185
196
|
delta_discovery_end = time.monotonic()
|
197
|
+
|
198
|
+
compaction_audit.set_uniform_deltas_created(len(uniform_deltas))
|
186
199
|
compaction_audit.set_delta_discovery_time_in_seconds(
|
187
200
|
delta_discovery_end - delta_discovery_start
|
188
201
|
)
|
@@ -197,19 +210,6 @@ def _execute_compaction(
|
|
197
210
|
logger.info("No input deltas found to compact.")
|
198
211
|
return None, None, None
|
199
212
|
|
200
|
-
uniform_deltas = io.create_uniform_input_deltas(
|
201
|
-
input_deltas=input_deltas,
|
202
|
-
hash_bucket_count=params.hash_bucket_count,
|
203
|
-
compaction_audit=compaction_audit,
|
204
|
-
deltacat_storage=params.deltacat_storage,
|
205
|
-
previous_inflation=params.previous_inflation,
|
206
|
-
min_delta_bytes=params.min_delta_bytes_in_batch,
|
207
|
-
min_file_counts=params.min_files_in_batch,
|
208
|
-
deltacat_storage_kwargs=params.deltacat_storage_kwargs,
|
209
|
-
)
|
210
|
-
|
211
|
-
compaction_audit.set_uniform_deltas_created(len(uniform_deltas))
|
212
|
-
|
213
213
|
hb_options_provider = functools.partial(
|
214
214
|
task_resource_options_provider,
|
215
215
|
pg_config=params.pg_config,
|
@@ -221,20 +221,21 @@ def _execute_compaction(
|
|
221
221
|
|
222
222
|
hb_start = time.monotonic()
|
223
223
|
|
224
|
-
hash_bucket_input_provider
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
224
|
+
def hash_bucket_input_provider(index, item):
|
225
|
+
return {
|
226
|
+
"input": HashBucketInput.of(
|
227
|
+
item,
|
228
|
+
primary_keys=params.primary_keys,
|
229
|
+
num_hash_buckets=params.hash_bucket_count,
|
230
|
+
num_hash_groups=params.hash_group_count,
|
231
|
+
enable_profiler=params.enable_profiler,
|
232
|
+
metrics_config=params.metrics_config,
|
233
|
+
read_kwargs_provider=params.read_kwargs_provider,
|
234
|
+
object_store=params.object_store,
|
235
|
+
deltacat_storage=params.deltacat_storage,
|
236
|
+
deltacat_storage_kwargs=params.deltacat_storage_kwargs,
|
237
|
+
)
|
238
|
+
}
|
238
239
|
|
239
240
|
hb_tasks_pending = invoke_parallel(
|
240
241
|
items=uniform_deltas,
|
@@ -332,33 +333,36 @@ def _execute_compaction(
|
|
332
333
|
hash_group_size_bytes=all_hash_group_idx_to_size_bytes,
|
333
334
|
hash_group_num_rows=all_hash_group_idx_to_num_rows,
|
334
335
|
round_completion_info=round_completion_info,
|
335
|
-
|
336
|
+
compacted_delta_manifest=previous_compacted_delta_manifest,
|
336
337
|
primary_keys=params.primary_keys,
|
337
338
|
deltacat_storage=params.deltacat_storage,
|
338
339
|
deltacat_storage_kwargs=params.deltacat_storage_kwargs,
|
339
340
|
)
|
340
341
|
|
341
|
-
merge_input_provider
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
342
|
+
def merge_input_provider(index, item):
|
343
|
+
return {
|
344
|
+
"input": MergeInput.of(
|
345
|
+
dfe_groups_refs=item[1],
|
346
|
+
write_to_partition=compacted_partition,
|
347
|
+
compacted_file_content_type=params.compacted_file_content_type,
|
348
|
+
primary_keys=params.primary_keys,
|
349
|
+
sort_keys=params.sort_keys,
|
350
|
+
merge_task_index=index,
|
351
|
+
hash_bucket_count=params.hash_bucket_count,
|
352
|
+
drop_duplicates=params.drop_duplicates,
|
353
|
+
hash_group_index=item[0],
|
354
|
+
num_hash_groups=params.hash_group_count,
|
355
|
+
max_records_per_output_file=params.records_per_compacted_file,
|
356
|
+
enable_profiler=params.enable_profiler,
|
357
|
+
metrics_config=params.metrics_config,
|
358
|
+
s3_table_writer_kwargs=params.s3_table_writer_kwargs,
|
359
|
+
read_kwargs_provider=params.read_kwargs_provider,
|
360
|
+
round_completion_info=round_completion_info,
|
361
|
+
object_store=params.object_store,
|
362
|
+
deltacat_storage=params.deltacat_storage,
|
363
|
+
deltacat_storage_kwargs=params.deltacat_storage_kwargs,
|
364
|
+
)
|
365
|
+
}
|
362
366
|
|
363
367
|
merge_start = time.monotonic()
|
364
368
|
|
@@ -399,25 +403,25 @@ def _execute_compaction(
|
|
399
403
|
mat_results, key=lambda m: m.task_index
|
400
404
|
)
|
401
405
|
|
402
|
-
deltas = [m.delta for m in mat_results]
|
403
|
-
|
404
406
|
hb_id_to_entry_indices_range = {}
|
405
407
|
file_index = 0
|
406
408
|
previous_task_index = -1
|
407
409
|
|
408
|
-
for
|
409
|
-
assert
|
410
|
-
|
411
|
-
|
412
|
-
|
410
|
+
for mat_result in mat_results:
|
411
|
+
assert (
|
412
|
+
mat_result.pyarrow_write_result.files >= 1
|
413
|
+
), "Atleast one file must be materialized"
|
414
|
+
assert (
|
415
|
+
mat_result.task_index != previous_task_index
|
416
|
+
), f"Multiple materialize results found for a hash bucket: {mat_result.task_index}"
|
413
417
|
|
414
|
-
hb_id_to_entry_indices_range[str(
|
418
|
+
hb_id_to_entry_indices_range[str(mat_result.task_index)] = (
|
415
419
|
file_index,
|
416
|
-
file_index +
|
420
|
+
file_index + mat_result.pyarrow_write_result.files,
|
417
421
|
)
|
418
422
|
|
419
|
-
file_index +=
|
420
|
-
previous_task_index =
|
423
|
+
file_index += mat_result.pyarrow_write_result.files
|
424
|
+
previous_task_index = mat_result.task_index
|
421
425
|
|
422
426
|
s3_utils.upload(
|
423
427
|
compaction_audit.audit_url,
|
@@ -425,7 +429,6 @@ def _execute_compaction(
|
|
425
429
|
**params.s3_client_kwargs,
|
426
430
|
)
|
427
431
|
|
428
|
-
mat_results = sorted(mat_results, key=lambda m: m.task_index)
|
429
432
|
deltas = [m.delta for m in mat_results]
|
430
433
|
|
431
434
|
# Note: An appropriate last stream position must be set
|
@@ -32,3 +32,6 @@ TOTAL_MEMORY_BUFFER_PERCENTAGE = 20
|
|
32
32
|
# Since, sorting is nlogn, we ensure that is not performed
|
33
33
|
# on a very large dataset for best performance.
|
34
34
|
MAX_SIZE_OF_RECORD_BATCH_IN_GIB = 2 * 1024 * 1024 * 1024
|
35
|
+
|
36
|
+
# Whether to drop duplicates during merge.
|
37
|
+
DROP_DUPLICATES = True
|
@@ -10,6 +10,10 @@ from deltacat.storage import (
|
|
10
10
|
SortKey,
|
11
11
|
interface as unimplemented_deltacat_storage,
|
12
12
|
)
|
13
|
+
from deltacat.compute.compactor_v2.constants import (
|
14
|
+
DROP_DUPLICATES,
|
15
|
+
MAX_RECORDS_PER_COMPACTED_FILE,
|
16
|
+
)
|
13
17
|
from deltacat.types.media import ContentType
|
14
18
|
from deltacat.compute.compactor.model.round_completion_info import RoundCompletionInfo
|
15
19
|
from deltacat.compute.compactor.model.delta_file_envelope import DeltaFileEnvelopeGroups
|
@@ -24,9 +28,11 @@ class MergeInput(Dict):
|
|
24
28
|
primary_keys: List[str],
|
25
29
|
hash_group_index: int,
|
26
30
|
num_hash_groups: int,
|
31
|
+
hash_bucket_count: int,
|
32
|
+
drop_duplicates: Optional[bool] = DROP_DUPLICATES,
|
27
33
|
sort_keys: Optional[List[SortKey]] = None,
|
28
34
|
merge_task_index: Optional[int] = 0,
|
29
|
-
max_records_per_output_file: Optional[int] =
|
35
|
+
max_records_per_output_file: Optional[int] = MAX_RECORDS_PER_COMPACTED_FILE,
|
30
36
|
enable_profiler: Optional[bool] = False,
|
31
37
|
metrics_config: Optional[MetricsConfig] = None,
|
32
38
|
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -44,6 +50,8 @@ class MergeInput(Dict):
|
|
44
50
|
result["primary_keys"] = primary_keys
|
45
51
|
result["hash_group_index"] = hash_group_index
|
46
52
|
result["num_hash_groups"] = num_hash_groups
|
53
|
+
result["hash_bucket_count"] = hash_bucket_count
|
54
|
+
result["drop_duplicates"] = drop_duplicates
|
47
55
|
result["sort_keys"] = sort_keys
|
48
56
|
result["merge_task_index"] = merge_task_index
|
49
57
|
result["max_records_per_output_file"] = max_records_per_output_file
|
@@ -82,6 +90,14 @@ class MergeInput(Dict):
|
|
82
90
|
def num_hash_groups(self) -> int:
|
83
91
|
return self["num_hash_groups"]
|
84
92
|
|
93
|
+
@property
|
94
|
+
def hash_bucket_count(self) -> int:
|
95
|
+
return self["hash_bucket_count"]
|
96
|
+
|
97
|
+
@property
|
98
|
+
def drop_duplicates(self) -> int:
|
99
|
+
return self["drop_duplicates"]
|
100
|
+
|
85
101
|
@property
|
86
102
|
def sort_keys(self) -> Optional[List[SortKey]]:
|
87
103
|
return self.get("sort_keys")
|