deltacat 0.1.18b3__py3-none-any.whl → 0.1.18b7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor/compaction_session.py +184 -29
- deltacat/compute/compactor/model/compact_partition_params.py +153 -0
- deltacat/compute/compactor/model/compaction_session_audit_info.py +725 -0
- deltacat/compute/compactor/model/dedupe_result.py +3 -0
- deltacat/compute/compactor/model/delta_file_envelope.py +8 -0
- deltacat/compute/compactor/model/delta_file_locator.py +11 -6
- deltacat/compute/compactor/model/hash_bucket_result.py +3 -0
- deltacat/compute/compactor/model/materialize_result.py +27 -6
- deltacat/compute/compactor/model/round_completion_info.py +9 -0
- deltacat/compute/compactor/steps/dedupe.py +35 -19
- deltacat/compute/compactor/steps/hash_bucket.py +41 -16
- deltacat/compute/compactor/steps/materialize.py +73 -70
- deltacat/compute/compactor/utils/io.py +15 -0
- deltacat/compute/compactor/utils/primary_key_index.py +9 -15
- deltacat/compute/compactor/utils/round_completion_file.py +13 -4
- deltacat/compute/compactor/utils/system_columns.py +32 -0
- deltacat/io/__init__.py +0 -7
- deltacat/io/file_object_store.py +48 -0
- deltacat/io/memcached_object_store.py +121 -0
- deltacat/io/object_store.py +51 -0
- deltacat/io/ray_plasma_object_store.py +23 -0
- deltacat/io/redis_object_store.py +114 -0
- deltacat/io/s3_object_store.py +44 -0
- deltacat/storage/model/delta.py +2 -1
- deltacat/tests/compactor/test_compact_partition_params.py +237 -0
- deltacat/tests/compactor/utils/test_io.py +27 -5
- deltacat/tests/io/__init__.py +0 -0
- deltacat/tests/io/test_file_object_store.py +86 -0
- deltacat/tests/io/test_memcached_object_store.py +158 -0
- deltacat/tests/io/test_ray_plasma_object_store.py +54 -0
- deltacat/tests/io/test_redis_object_store.py +103 -0
- deltacat/tests/io/test_s3_object_store.py +59 -0
- deltacat/tests/utils/test_record_batch_tables.py +1 -1
- deltacat/tests/utils/test_resources.py +9 -0
- deltacat/utils/ray_utils/concurrency.py +0 -2
- deltacat/utils/resources.py +30 -18
- {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/METADATA +3 -1
- {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/RECORD +42 -27
- {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/LICENSE +0 -0
- {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/WHEEL +0 -0
- {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,8 @@ import numpy as np
|
|
5
5
|
|
6
6
|
from deltacat.storage import DeltaType, LocalTable
|
7
7
|
|
8
|
+
from typing import Optional
|
9
|
+
|
8
10
|
DeltaFileEnvelopeGroups = np.ndarray
|
9
11
|
|
10
12
|
|
@@ -16,6 +18,7 @@ class DeltaFileEnvelope(dict):
|
|
16
18
|
delta_type: DeltaType,
|
17
19
|
table: LocalTable,
|
18
20
|
is_src_delta: np.bool_ = True,
|
21
|
+
file_record_count: Optional[int] = None,
|
19
22
|
) -> DeltaFileEnvelope:
|
20
23
|
"""Static factory builder for a Delta File Envelope
|
21
24
|
`
|
@@ -46,6 +49,7 @@ class DeltaFileEnvelope(dict):
|
|
46
49
|
delta_file_envelope["deltaType"] = delta_type.value
|
47
50
|
delta_file_envelope["table"] = table
|
48
51
|
delta_file_envelope["is_src_delta"] = is_src_delta
|
52
|
+
delta_file_envelope["file_record_count"] = file_record_count
|
49
53
|
return delta_file_envelope
|
50
54
|
|
51
55
|
@property
|
@@ -67,3 +71,7 @@ class DeltaFileEnvelope(dict):
|
|
67
71
|
@property
|
68
72
|
def is_src_delta(self) -> np.bool_:
|
69
73
|
return self["is_src_delta"]
|
74
|
+
|
75
|
+
@property
|
76
|
+
def file_record_count(self) -> int:
|
77
|
+
return self["file_record_count"]
|
@@ -5,11 +5,16 @@ import numpy as np
|
|
5
5
|
|
6
6
|
from deltacat.storage import Locator
|
7
7
|
|
8
|
+
from typing import Optional
|
9
|
+
|
8
10
|
|
9
11
|
class DeltaFileLocator(Locator, tuple):
|
10
12
|
@staticmethod
|
11
13
|
def of(
|
12
|
-
is_src_delta: np.bool_,
|
14
|
+
is_src_delta: np.bool_,
|
15
|
+
stream_position: np.int64,
|
16
|
+
file_index: np.int32,
|
17
|
+
file_record_count: Optional[np.int64] = None,
|
13
18
|
) -> DeltaFileLocator:
|
14
19
|
"""
|
15
20
|
Create a Delta File Locator tuple that can be used to uniquely identify
|
@@ -31,11 +36,7 @@ class DeltaFileLocator(Locator, tuple):
|
|
31
36
|
(is_source_delta, stream_position, file_index).
|
32
37
|
"""
|
33
38
|
return DeltaFileLocator(
|
34
|
-
(
|
35
|
-
is_src_delta,
|
36
|
-
stream_position,
|
37
|
-
file_index,
|
38
|
-
)
|
39
|
+
(is_src_delta, stream_position, file_index, file_record_count)
|
39
40
|
)
|
40
41
|
|
41
42
|
@property
|
@@ -50,6 +51,10 @@ class DeltaFileLocator(Locator, tuple):
|
|
50
51
|
def file_index(self) -> np.int32:
|
51
52
|
return self[2]
|
52
53
|
|
54
|
+
@property
|
55
|
+
def file_record_count(self) -> np.int64:
|
56
|
+
return self[3]
|
57
|
+
|
53
58
|
def canonical_string(self) -> str:
|
54
59
|
"""
|
55
60
|
Returns a unique string for the given locator that can be used
|
@@ -2,6 +2,7 @@
|
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
4
|
from typing import Any, Dict, Optional
|
5
|
+
import numpy as np
|
5
6
|
|
6
7
|
from deltacat.compute.compactor.model.pyarrow_write_result import PyArrowWriteResult
|
7
8
|
from deltacat.storage import Delta
|
@@ -13,15 +14,19 @@ class MaterializeResult(dict):
|
|
13
14
|
delta: Delta,
|
14
15
|
task_index: int,
|
15
16
|
pyarrow_write_result: PyArrowWriteResult,
|
16
|
-
|
17
|
-
|
17
|
+
referenced_pyarrow_write_result: Optional[PyArrowWriteResult] = None,
|
18
|
+
peak_memory_usage_bytes: Optional[np.double] = None,
|
19
|
+
telemetry_time_in_seconds: Optional[np.double] = None,
|
20
|
+
task_completed_at: Optional[np.double] = None,
|
18
21
|
) -> MaterializeResult:
|
19
22
|
materialize_result = MaterializeResult()
|
20
23
|
materialize_result["delta"] = delta
|
21
24
|
materialize_result["taskIndex"] = task_index
|
22
25
|
materialize_result["paWriteResult"] = pyarrow_write_result
|
23
|
-
materialize_result["
|
24
|
-
materialize_result["
|
26
|
+
materialize_result["referencedPaWriteResult"] = referenced_pyarrow_write_result
|
27
|
+
materialize_result["peakMemoryUsageBytes"] = peak_memory_usage_bytes
|
28
|
+
materialize_result["telemetryTimeInSeconds"] = telemetry_time_in_seconds
|
29
|
+
materialize_result["taskCompletedAt"] = task_completed_at
|
25
30
|
return materialize_result
|
26
31
|
|
27
32
|
@property
|
@@ -35,6 +40,14 @@ class MaterializeResult(dict):
|
|
35
40
|
def task_index(self) -> int:
|
36
41
|
return self["taskIndex"]
|
37
42
|
|
43
|
+
@property
|
44
|
+
def peak_memory_usage_bytes(self) -> Optional[np.double]:
|
45
|
+
return self["peakMemoryUsageBytes"]
|
46
|
+
|
47
|
+
@property
|
48
|
+
def telemetry_time_in_seconds(self) -> Optional[np.double]:
|
49
|
+
return self["telemetryTimeInSeconds"]
|
50
|
+
|
38
51
|
@property
|
39
52
|
def pyarrow_write_result(self) -> PyArrowWriteResult:
|
40
53
|
val: Dict[str, Any] = self.get("paWriteResult")
|
@@ -47,5 +60,13 @@ class MaterializeResult(dict):
|
|
47
60
|
return self["countOfSrcFileNotTouched"]
|
48
61
|
|
49
62
|
@property
|
50
|
-
def
|
51
|
-
|
63
|
+
def referenced_pyarrow_write_result(self) -> PyArrowWriteResult:
|
64
|
+
val: Dict[str, Any] = self.get("referencedPaWriteResult")
|
65
|
+
if val is not None and not isinstance(val, PyArrowWriteResult):
|
66
|
+
self["referencedPaWriteResult"] = val = PyArrowWriteResult(val)
|
67
|
+
|
68
|
+
return val
|
69
|
+
|
70
|
+
@property
|
71
|
+
def task_completed_at(self) -> Optional[np.double]:
|
72
|
+
return self["taskCompletedAt"]
|
@@ -3,6 +3,9 @@ from __future__ import annotations
|
|
3
3
|
|
4
4
|
from deltacat.storage import DeltaLocator, PartitionLocator
|
5
5
|
from deltacat.compute.compactor.model.pyarrow_write_result import PyArrowWriteResult
|
6
|
+
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
7
|
+
CompactionSessionAuditInfo,
|
8
|
+
)
|
6
9
|
from typing import Any, Dict, Optional
|
7
10
|
|
8
11
|
|
@@ -39,6 +42,7 @@ class RoundCompletionInfo(dict):
|
|
39
42
|
sort_keys_bit_width: int,
|
40
43
|
rebase_source_partition_locator: Optional[PartitionLocator],
|
41
44
|
manifest_entry_copied_by_reference_ratio: Optional[float] = None,
|
45
|
+
compaction_audit_url: Optional[str] = None,
|
42
46
|
) -> RoundCompletionInfo:
|
43
47
|
|
44
48
|
rci = RoundCompletionInfo()
|
@@ -50,6 +54,7 @@ class RoundCompletionInfo(dict):
|
|
50
54
|
rci[
|
51
55
|
"manifestEntryCopiedByReferenceRatio"
|
52
56
|
] = manifest_entry_copied_by_reference_ratio
|
57
|
+
rci["compactionAuditUrl"] = compaction_audit_url
|
53
58
|
return rci
|
54
59
|
|
55
60
|
@property
|
@@ -81,6 +86,10 @@ class RoundCompletionInfo(dict):
|
|
81
86
|
def sort_keys_bit_width(self) -> int:
|
82
87
|
return self["sortKeysBitWidth"]
|
83
88
|
|
89
|
+
@property
|
90
|
+
def compaction_audit(self) -> Optional[CompactionSessionAuditInfo]:
|
91
|
+
return self.get("compactionAudit")
|
92
|
+
|
84
93
|
@property
|
85
94
|
def rebase_source_partition_locator(self) -> Optional[PartitionLocator]:
|
86
95
|
return self.get("rebaseSourcePartitionLocator")
|
@@ -1,14 +1,14 @@
|
|
1
1
|
import importlib
|
2
2
|
import logging
|
3
|
+
from typing import Optional
|
4
|
+
import time
|
3
5
|
from collections import defaultdict
|
4
6
|
from contextlib import nullcontext
|
5
7
|
from typing import Any, Dict, List, Tuple
|
6
|
-
|
7
8
|
import numpy as np
|
8
9
|
import pyarrow as pa
|
9
10
|
import pyarrow.compute as pc
|
10
11
|
import ray
|
11
|
-
from ray import cloudpickle
|
12
12
|
|
13
13
|
from deltacat import logs
|
14
14
|
from deltacat.compute.compactor import (
|
@@ -25,6 +25,8 @@ from deltacat.utils.ray_utils.runtime import (
|
|
25
25
|
)
|
26
26
|
from deltacat.utils.performance import timed_invocation
|
27
27
|
from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
|
28
|
+
from deltacat.io.object_store import IObjectStore
|
29
|
+
from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
|
28
30
|
|
29
31
|
if importlib.util.find_spec("memray"):
|
30
32
|
import memray
|
@@ -105,6 +107,7 @@ def _timed_dedupe(
|
|
105
107
|
num_materialize_buckets: int,
|
106
108
|
dedupe_task_index: int,
|
107
109
|
enable_profiler: bool,
|
110
|
+
object_store: Optional[IObjectStore],
|
108
111
|
):
|
109
112
|
task_id = get_current_ray_task_id()
|
110
113
|
worker_id = get_current_ray_worker_id()
|
@@ -113,15 +116,12 @@ def _timed_dedupe(
|
|
113
116
|
) if enable_profiler else nullcontext():
|
114
117
|
# TODO (pdames): mitigate risk of running out of memory here in cases of
|
115
118
|
# severe skew of primary key updates in deltas
|
116
|
-
src_file_records_obj_refs = [
|
117
|
-
cloudpickle.loads(obj_id_pkl) for obj_id_pkl in object_ids
|
118
|
-
]
|
119
119
|
logger.info(
|
120
120
|
f"[Dedupe task {dedupe_task_index}] Getting delta file envelope "
|
121
|
-
f"groups for {len(
|
121
|
+
f"groups for {len(object_ids)} object refs..."
|
122
122
|
)
|
123
123
|
|
124
|
-
delta_file_envelope_groups_list =
|
124
|
+
delta_file_envelope_groups_list = object_store.get_many(object_ids)
|
125
125
|
hb_index_to_delta_file_envelopes_list = defaultdict(list)
|
126
126
|
for delta_file_envelope_groups in delta_file_envelope_groups_list:
|
127
127
|
for hb_idx, dfes in enumerate(delta_file_envelope_groups):
|
@@ -188,17 +188,18 @@ def _timed_dedupe(
|
|
188
188
|
file_idx_col = sc.file_index_column_np(table)
|
189
189
|
row_idx_col = sc.record_index_column_np(table)
|
190
190
|
is_source_col = sc.is_source_column_np(table)
|
191
|
+
file_record_count_col = sc.file_record_count_column_np(table)
|
191
192
|
for row_idx in range(len(table)):
|
192
193
|
src_dfl = DeltaFileLocator.of(
|
193
194
|
is_source_col[row_idx],
|
194
195
|
stream_position_col[row_idx],
|
195
196
|
file_idx_col[row_idx],
|
197
|
+
file_record_count_col[row_idx],
|
196
198
|
)
|
197
199
|
# TODO(pdames): merge contiguous record number ranges
|
198
200
|
src_file_id_to_row_indices[src_dfl].append(row_idx_col[row_idx])
|
199
201
|
|
200
202
|
logger.info(f"Finished all dedupe rounds...")
|
201
|
-
mat_bucket_to_src_file_record_count = defaultdict(dict)
|
202
203
|
mat_bucket_to_src_file_records: Dict[
|
203
204
|
MaterializeBucketIndex, DeltaFileLocatorToRecords
|
204
205
|
] = defaultdict(dict)
|
@@ -210,29 +211,30 @@ def _timed_dedupe(
|
|
210
211
|
mat_bucket_to_src_file_records[mat_bucket][src_dfl] = np.array(
|
211
212
|
src_row_indices,
|
212
213
|
)
|
213
|
-
mat_bucket_to_src_file_record_count[mat_bucket][src_dfl] = len(
|
214
|
-
src_row_indices
|
215
|
-
)
|
216
214
|
|
217
215
|
mat_bucket_to_dd_idx_obj_id: Dict[
|
218
216
|
MaterializeBucketIndex, DedupeTaskIndexWithObjectId
|
219
217
|
] = {}
|
220
218
|
for mat_bucket, src_file_records in mat_bucket_to_src_file_records.items():
|
221
|
-
object_ref =
|
222
|
-
pickled_object_ref = cloudpickle.dumps(object_ref)
|
219
|
+
object_ref = object_store.put(src_file_records)
|
223
220
|
mat_bucket_to_dd_idx_obj_id[mat_bucket] = (
|
224
221
|
dedupe_task_index,
|
225
|
-
|
222
|
+
object_ref,
|
226
223
|
)
|
227
224
|
del object_ref
|
228
|
-
del pickled_object_ref
|
229
225
|
logger.info(
|
230
226
|
f"Count of materialize buckets with object refs: "
|
231
227
|
f"{len(mat_bucket_to_dd_idx_obj_id)}"
|
232
228
|
)
|
233
229
|
|
230
|
+
peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
|
231
|
+
|
234
232
|
return DedupeResult(
|
235
|
-
mat_bucket_to_dd_idx_obj_id,
|
233
|
+
mat_bucket_to_dd_idx_obj_id,
|
234
|
+
np.int64(total_deduped_records),
|
235
|
+
np.double(peak_memory_usage_bytes),
|
236
|
+
np.double(0.0),
|
237
|
+
np.double(time.time()),
|
236
238
|
)
|
237
239
|
|
238
240
|
|
@@ -244,6 +246,7 @@ def dedupe(
|
|
244
246
|
dedupe_task_index: int,
|
245
247
|
enable_profiler: bool,
|
246
248
|
metrics_config: MetricsConfig,
|
249
|
+
object_store: Optional[IObjectStore],
|
247
250
|
) -> DedupeResult:
|
248
251
|
logger.info(f"[Dedupe task {dedupe_task_index}] Starting dedupe task...")
|
249
252
|
dedupe_result, duration = timed_invocation(
|
@@ -253,11 +256,24 @@ def dedupe(
|
|
253
256
|
num_materialize_buckets=num_materialize_buckets,
|
254
257
|
dedupe_task_index=dedupe_task_index,
|
255
258
|
enable_profiler=enable_profiler,
|
259
|
+
object_store=object_store,
|
256
260
|
)
|
261
|
+
|
262
|
+
emit_metrics_time = 0.0
|
257
263
|
if metrics_config:
|
258
|
-
|
259
|
-
|
264
|
+
emit_result, latency = timed_invocation(
|
265
|
+
func=emit_timer_metrics,
|
266
|
+
metrics_name="dedupe",
|
267
|
+
value=duration,
|
268
|
+
metrics_config=metrics_config,
|
260
269
|
)
|
270
|
+
emit_metrics_time = latency
|
261
271
|
|
262
272
|
logger.info(f"[Dedupe task index {dedupe_task_index}] Finished dedupe task...")
|
263
|
-
return
|
273
|
+
return DedupeResult(
|
274
|
+
dedupe_result[0],
|
275
|
+
dedupe_result[1],
|
276
|
+
dedupe_result[2],
|
277
|
+
np.double(emit_metrics_time),
|
278
|
+
dedupe_result[4],
|
279
|
+
)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import importlib
|
2
2
|
import logging
|
3
|
+
import time
|
3
4
|
from contextlib import nullcontext
|
4
5
|
from itertools import chain
|
5
6
|
from typing import Generator, List, Optional, Tuple
|
@@ -30,6 +31,8 @@ from deltacat.utils.ray_utils.runtime import (
|
|
30
31
|
from deltacat.utils.common import ReadKwargsProvider
|
31
32
|
from deltacat.utils.performance import timed_invocation
|
32
33
|
from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
|
34
|
+
from deltacat.io.object_store import IObjectStore
|
35
|
+
from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
|
33
36
|
|
34
37
|
if importlib.util.find_spec("memray"):
|
35
38
|
import memray
|
@@ -114,11 +117,12 @@ def _group_file_records_by_pk_hash_bucket(
|
|
114
117
|
hb_to_delta_file_envelopes[hb] = []
|
115
118
|
hb_to_delta_file_envelopes[hb].append(
|
116
119
|
DeltaFileEnvelope.of(
|
117
|
-
dfe.stream_position,
|
118
|
-
dfe.file_index,
|
119
|
-
dfe.delta_type,
|
120
|
-
table,
|
121
|
-
is_src_delta,
|
120
|
+
stream_position=dfe.stream_position,
|
121
|
+
file_index=dfe.file_index,
|
122
|
+
delta_type=dfe.delta_type,
|
123
|
+
table=table,
|
124
|
+
is_src_delta=is_src_delta,
|
125
|
+
file_record_count=dfe.file_record_count,
|
122
126
|
)
|
123
127
|
)
|
124
128
|
return hb_to_delta_file_envelopes, total_record_count
|
@@ -157,10 +161,11 @@ def _read_delta_file_envelopes(
|
|
157
161
|
for i, table in enumerate(tables):
|
158
162
|
total_record_count += len(table)
|
159
163
|
delta_file = DeltaFileEnvelope.of(
|
160
|
-
annotations[i].annotation_stream_position,
|
161
|
-
annotations[i].annotation_file_index,
|
162
|
-
annotations[i].annotation_delta_type,
|
163
|
-
table,
|
164
|
+
stream_position=annotations[i].annotation_stream_position,
|
165
|
+
file_index=annotations[i].annotation_file_index,
|
166
|
+
delta_type=annotations[i].annotation_delta_type,
|
167
|
+
table=table,
|
168
|
+
file_record_count=len(table),
|
164
169
|
)
|
165
170
|
delta_file_envelopes.append(delta_file)
|
166
171
|
return delta_file_envelopes, total_record_count
|
@@ -175,6 +180,7 @@ def _timed_hash_bucket(
|
|
175
180
|
num_groups: int,
|
176
181
|
enable_profiler: bool,
|
177
182
|
read_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
183
|
+
object_store: Optional[IObjectStore] = None,
|
178
184
|
deltacat_storage=unimplemented_deltacat_storage,
|
179
185
|
):
|
180
186
|
task_id = get_current_ray_task_id()
|
@@ -203,12 +209,16 @@ def _timed_hash_bucket(
|
|
203
209
|
deltacat_storage,
|
204
210
|
)
|
205
211
|
hash_bucket_group_to_obj_id, _ = group_hash_bucket_indices(
|
206
|
-
delta_file_envelope_groups,
|
207
|
-
num_buckets,
|
208
|
-
num_groups,
|
212
|
+
delta_file_envelope_groups, num_buckets, num_groups, object_store
|
209
213
|
)
|
214
|
+
|
215
|
+
peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
|
210
216
|
return HashBucketResult(
|
211
|
-
hash_bucket_group_to_obj_id,
|
217
|
+
hash_bucket_group_to_obj_id,
|
218
|
+
np.int64(total_record_count),
|
219
|
+
np.double(peak_memory_usage_bytes),
|
220
|
+
np.double(0.0),
|
221
|
+
np.double(time.time()),
|
212
222
|
)
|
213
223
|
|
214
224
|
|
@@ -223,6 +233,7 @@ def hash_bucket(
|
|
223
233
|
enable_profiler: bool,
|
224
234
|
metrics_config: MetricsConfig,
|
225
235
|
read_kwargs_provider: Optional[ReadKwargsProvider],
|
236
|
+
object_store: Optional[IObjectStore],
|
226
237
|
deltacat_storage=unimplemented_deltacat_storage,
|
227
238
|
) -> HashBucketResult:
|
228
239
|
|
@@ -237,11 +248,25 @@ def hash_bucket(
|
|
237
248
|
num_groups=num_groups,
|
238
249
|
enable_profiler=enable_profiler,
|
239
250
|
read_kwargs_provider=read_kwargs_provider,
|
251
|
+
object_store=object_store,
|
240
252
|
deltacat_storage=deltacat_storage,
|
241
253
|
)
|
254
|
+
|
255
|
+
emit_metrics_time = 0.0
|
242
256
|
if metrics_config:
|
243
|
-
|
244
|
-
|
257
|
+
emit_result, latency = timed_invocation(
|
258
|
+
func=emit_timer_metrics,
|
259
|
+
metrics_name="hash_bucket",
|
260
|
+
value=duration,
|
261
|
+
metrics_config=metrics_config,
|
245
262
|
)
|
263
|
+
emit_metrics_time = latency
|
264
|
+
|
246
265
|
logger.info(f"Finished hash bucket task...")
|
247
|
-
return
|
266
|
+
return HashBucketResult(
|
267
|
+
hash_bucket_result[0],
|
268
|
+
hash_bucket_result[1],
|
269
|
+
hash_bucket_result[2],
|
270
|
+
np.double(emit_metrics_time),
|
271
|
+
hash_bucket_result[4],
|
272
|
+
)
|