deltacat 1.1.29__py3-none-any.whl → 1.1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.29"
47
+ __version__ = "1.1.31"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -1,11 +1,16 @@
1
1
  import logging
2
2
  from typing import Dict, Optional, List, Tuple, Any
3
3
  from deltacat import logs
4
+ from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
5
+ from deltacat.compute.compactor_v2.constants import (
6
+ AVERAGE_RECORD_SIZE_BYTES as DEFAULT_AVERAGE_RECORD_SIZE_BYTES,
7
+ )
4
8
  from deltacat.compute.compactor_v2.model.merge_file_group import (
5
9
  LocalMergeFileGroupsProvider,
6
10
  )
7
11
  from deltacat.storage import (
8
12
  Manifest,
13
+ ManifestEntry,
9
14
  interface as unimplemented_deltacat_storage,
10
15
  )
11
16
  from deltacat.compute.compactor.model.delta_annotated import DeltaAnnotated
@@ -81,16 +86,27 @@ def _get_merge_task_options(
81
86
  and compacted_delta_manifest
82
87
  and round_completion_info.hb_index_to_entry_range
83
88
  ):
84
-
85
- previous_inflation = (
86
- round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
87
- / round_completion_info.compacted_pyarrow_write_result.file_bytes
89
+ logger.debug_conditional(
90
+ f"[Merge task {index}]: Using previous compaction rounds to calculate merge memory: {round_completion_info.compacted_pyarrow_write_result}",
91
+ memory_logs_enabled,
92
+ )
93
+ previous_inflation: float = (
94
+ (
95
+ round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
96
+ / round_completion_info.compacted_pyarrow_write_result.file_bytes
97
+ )
98
+ if round_completion_info.compacted_pyarrow_write_result.file_bytes
99
+ else PYARROW_INFLATION_MULTIPLIER
88
100
  )
89
101
  debug_memory_params["previous_inflation"] = previous_inflation
90
102
 
91
- average_record_size = (
92
- round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
93
- / round_completion_info.compacted_pyarrow_write_result.records
103
+ average_record_size: float = (
104
+ (
105
+ round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
106
+ / round_completion_info.compacted_pyarrow_write_result.records
107
+ )
108
+ if round_completion_info.compacted_pyarrow_write_result.records
109
+ else DEFAULT_AVERAGE_RECORD_SIZE_BYTES
94
110
  )
95
111
  debug_memory_params["average_record_size"] = average_record_size
96
112
 
@@ -106,31 +122,36 @@ def _get_merge_task_options(
106
122
  str(hb_idx)
107
123
  ]
108
124
  for entry_index in range(entry_start, entry_end):
109
- entry = compacted_delta_manifest.entries[entry_index]
110
-
111
- current_entry_size = estimate_manifest_entry_size_bytes(
112
- entry=entry,
113
- operation_type=OperationType.PYARROW_DOWNLOAD,
114
- estimate_resources_params=estimate_resources_params,
125
+ entry: ManifestEntry = compacted_delta_manifest.entries[entry_index]
126
+ current_entry_size: float = (
127
+ estimate_manifest_entry_size_bytes(
128
+ entry=entry,
129
+ operation_type=OperationType.PYARROW_DOWNLOAD,
130
+ estimate_resources_params=estimate_resources_params,
131
+ )
132
+ or 0.0
115
133
  )
116
- current_entry_rows = estimate_manifest_entry_num_rows(
117
- entry=entry,
118
- operation_type=OperationType.PYARROW_DOWNLOAD,
119
- estimate_resources_params=estimate_resources_params,
134
+ current_entry_rows: int = (
135
+ estimate_manifest_entry_num_rows(
136
+ entry=entry,
137
+ operation_type=OperationType.PYARROW_DOWNLOAD,
138
+ estimate_resources_params=estimate_resources_params,
139
+ )
140
+ or 0
120
141
  )
121
-
142
+ # NOTE: We can treat the current_entry_size and current_entry_rows as 0 as a None estimated entry size implies a 0 value
122
143
  data_size += current_entry_size
123
144
  num_rows += current_entry_rows
124
-
125
145
  if primary_keys:
126
- pk_size = estimate_manifest_entry_column_size_bytes(
146
+ pk_size: Optional[
147
+ float
148
+ ] = estimate_manifest_entry_column_size_bytes(
127
149
  entry=entry,
128
150
  columns=primary_keys,
129
151
  operation_type=OperationType.PYARROW_DOWNLOAD,
130
152
  estimate_resources_params=estimate_resources_params,
131
153
  )
132
-
133
- if pk_size is None:
154
+ if not pk_size:
134
155
  pk_size_bytes += current_entry_size
135
156
  else:
136
157
  pk_size_bytes += pk_size
@@ -159,7 +180,6 @@ def _get_merge_task_options(
159
180
  f"[Merge task {index}]: Params used for calculating merge memory: {debug_memory_params}",
160
181
  memory_logs_enabled,
161
182
  )
162
-
163
183
  return _get_task_options(0.01, total_memory, ray_custom_resources)
164
184
 
165
185
 
@@ -1,6 +1,37 @@
1
1
  import unittest
2
2
  import ray
3
- from deltacat.compute.compactor_v2.utils.task_options import _get_task_options
3
+ from deltacat.compute.compactor_v2.utils.task_options import (
4
+ _get_task_options,
5
+ _get_merge_task_options,
6
+ logger,
7
+ )
8
+ from deltacat.compute.resource_estimation.model import (
9
+ EstimateResourcesParams,
10
+ ResourceEstimationMethod,
11
+ )
12
+ from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
13
+ from deltacat.compute.compactor import (
14
+ PyArrowWriteResult,
15
+ RoundCompletionInfo,
16
+ )
17
+ from deltacat.types.media import (
18
+ ContentType,
19
+ ContentEncoding,
20
+ )
21
+ from deltacat.storage import (
22
+ DeltaLocator,
23
+ Manifest,
24
+ ManifestMeta,
25
+ ManifestEntry,
26
+ ManifestEntryList,
27
+ PartitionValues,
28
+ )
29
+ from unittest.mock import MagicMock
30
+ from typing import Optional
31
+
32
+ from deltacat.compute.compactor_v2.constants import (
33
+ AVERAGE_RECORD_SIZE_BYTES as DEFAULT_AVERAGE_RECORD_SIZE_BYTES,
34
+ )
4
35
 
5
36
 
6
37
  @ray.remote
@@ -14,11 +45,95 @@ def throwing_func():
14
45
 
15
46
 
16
47
  class TestTaskOptions(unittest.TestCase):
48
+ TEST_INDEX = 0
49
+ TEST_HB_GROUP_IDX = 0
50
+ TEST_STREAM_POSITION = 1_000_000
51
+ TEST_NUM_HASH_GROUPS = 1
52
+
17
53
  @classmethod
18
54
  def setUpClass(cls):
19
55
  ray.init(local_mode=True, ignore_reinit_error=True)
20
56
  super().setUpClass()
21
57
 
58
+ @classmethod
59
+ def tearDownClass(cls) -> None:
60
+ ray.shutdown()
61
+
62
+ def _make_estimate_resource_params(
63
+ cls,
64
+ resource_estimation_method: Optional[
65
+ ResourceEstimationMethod
66
+ ] = ResourceEstimationMethod.DEFAULT,
67
+ previous_inflation: Optional[int] = 7,
68
+ average_record_size_bytes: Optional[int] = 1000,
69
+ ):
70
+ return EstimateResourcesParams.of(
71
+ resource_estimation_method=resource_estimation_method,
72
+ previous_inflation=previous_inflation,
73
+ average_record_size_bytes=average_record_size_bytes,
74
+ )
75
+
76
+ def _make_manifest(
77
+ self,
78
+ source_content_length: Optional[int] = 1000,
79
+ content_type: Optional[ContentType] = ContentType.PARQUET,
80
+ content_encoding: Optional[ContentEncoding] = ContentEncoding.IDENTITY,
81
+ partition_values: Optional[PartitionValues] = None,
82
+ uri: Optional[str] = "test",
83
+ url: Optional[str] = "test",
84
+ author: Optional[str] = "foo",
85
+ entry_uuid: Optional[str] = "foo",
86
+ manifest_uuid: Optional[str] = "bar",
87
+ ) -> Manifest:
88
+ meta = ManifestMeta.of(
89
+ 10,
90
+ 10,
91
+ content_type=content_type,
92
+ content_encoding=content_encoding,
93
+ source_content_length=source_content_length,
94
+ partition_values=partition_values,
95
+ )
96
+
97
+ return Manifest.of(
98
+ entries=ManifestEntryList.of(
99
+ [
100
+ ManifestEntry.of(
101
+ uri=uri, url=url, meta=meta, mandatory=True, uuid=entry_uuid
102
+ )
103
+ ]
104
+ ),
105
+ author=author,
106
+ uuid=manifest_uuid,
107
+ )
108
+
109
+ def make_round_completion_info(
110
+ self,
111
+ high_watermark: Optional[int] = 1_000_000,
112
+ compacted_delta_locator: Optional[DeltaLocator] = None,
113
+ records_written: Optional[int] = 10,
114
+ bytes_written: Optional[int] = 10,
115
+ files_written: Optional[int] = 10,
116
+ rows_dropped: Optional[int] = 10,
117
+ sort_keys_bit_width: Optional[int] = 0,
118
+ hash_bucket_count: Optional[int] = 1,
119
+ hb_index_to_entry_range: Optional[dict] = None,
120
+ ) -> RoundCompletionInfo:
121
+ if compacted_delta_locator is None:
122
+ compacted_delta_locator = MagicMock(spec=DeltaLocator)
123
+
124
+ hb_index_to_entry_range = hb_index_to_entry_range or {"0": (0, 1)}
125
+
126
+ return RoundCompletionInfo.of(
127
+ compacted_delta_locator=compacted_delta_locator,
128
+ high_watermark=high_watermark,
129
+ compacted_pyarrow_write_result=PyArrowWriteResult.of(
130
+ records_written, bytes_written, files_written, rows_dropped
131
+ ),
132
+ sort_keys_bit_width=sort_keys_bit_width,
133
+ hb_index_to_entry_range=hb_index_to_entry_range,
134
+ hash_bucket_count=hash_bucket_count,
135
+ )
136
+
22
137
  def test_get_task_options_sanity(self):
23
138
  opts = _get_task_options(0.01, 0.01)
24
139
  result_ref = valid_func.options(**opts).remote()
@@ -31,3 +146,160 @@ class TestTaskOptions(unittest.TestCase):
31
146
  result_ref = throwing_func.options(**opts).remote()
32
147
 
33
148
  self.assertRaises(ConnectionAbortedError, lambda: ray.get(result_ref))
149
+
150
+ def test_get_merge_task_options_memory_logs_enabled_sanity(self):
151
+ test_index = 0
152
+ test_hb_group_idx = 0
153
+ test_debug_memory_params = {"merge_task_index": test_index}
154
+ test_estimate_memory_params = self._make_estimate_resource_params()
155
+ test_ray_custom_resources = {}
156
+ test_rcf = self.make_round_completion_info()
157
+ test_manifest = self._make_manifest()
158
+ expected_task_opts = {
159
+ "max_retries": 3,
160
+ "memory": 1680.64,
161
+ "num_cpus": 0.01,
162
+ "scheduling_strategy": "SPREAD",
163
+ }
164
+ expected_previous_inflation = 1.0
165
+ expected_average_record_size = 1.0
166
+ with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
167
+ # At least one log of level DEBUG must be emitted
168
+ actual_merge_tasks_opts = _get_merge_task_options(
169
+ index=test_index,
170
+ hb_group_idx=test_hb_group_idx,
171
+ data_size=1,
172
+ pk_size_bytes=1,
173
+ num_rows=1,
174
+ num_hash_groups=1,
175
+ total_memory_buffer_percentage=1,
176
+ incremental_index_array_size=1,
177
+ debug_memory_params=test_debug_memory_params,
178
+ ray_custom_resources=test_ray_custom_resources,
179
+ estimate_resources_params=test_estimate_memory_params,
180
+ round_completion_info=test_rcf,
181
+ compacted_delta_manifest=test_manifest,
182
+ memory_logs_enabled=True,
183
+ )
184
+ assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
185
+ log_message_round_completion_info = cm.records[0].getMessage()
186
+ log_message_debug_memory_params = cm.records[1].getMessage()
187
+ self.assertIn(
188
+ f"[Merge task {test_index}]: Using previous compaction rounds to calculate merge memory",
189
+ log_message_round_completion_info,
190
+ )
191
+ self.assertIn(
192
+ f"[Merge task {test_index}]: Params used for calculating merge memory",
193
+ log_message_debug_memory_params,
194
+ )
195
+ self.assertIn(
196
+ f"'previous_inflation': {expected_previous_inflation}",
197
+ log_message_debug_memory_params,
198
+ )
199
+ self.assertIn(
200
+ f"'average_record_size': {expected_average_record_size}",
201
+ log_message_debug_memory_params,
202
+ )
203
+
204
+ def test_get_merge_task_options_memory_logs_enabled_fallback_previous_inflation_fallback_average_record_size(
205
+ self,
206
+ ):
207
+ test_index = 0
208
+ test_hb_group_idx = 0
209
+ test_debug_memory_params = {"merge_task_index": test_index}
210
+ test_estimate_memory_params = self._make_estimate_resource_params()
211
+ test_ray_custom_resources = {}
212
+ test_rcf = self.make_round_completion_info(
213
+ bytes_written=0, records_written=0, files_written=0, rows_dropped=0
214
+ )
215
+ test_manifest = self._make_manifest()
216
+ expected_task_opts = {
217
+ "max_retries": 3,
218
+ "memory": 1680.64,
219
+ "num_cpus": 0.01,
220
+ "scheduling_strategy": "SPREAD",
221
+ }
222
+ expected_previous_inflation = PYARROW_INFLATION_MULTIPLIER
223
+ expected_average_record_size = DEFAULT_AVERAGE_RECORD_SIZE_BYTES
224
+ with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
225
+ # At least one log of level DEBUG must be emitted
226
+ actual_merge_tasks_opts = _get_merge_task_options(
227
+ index=test_index,
228
+ hb_group_idx=test_hb_group_idx,
229
+ data_size=1,
230
+ pk_size_bytes=1,
231
+ num_rows=1,
232
+ num_hash_groups=1,
233
+ total_memory_buffer_percentage=1,
234
+ incremental_index_array_size=1,
235
+ debug_memory_params=test_debug_memory_params,
236
+ ray_custom_resources=test_ray_custom_resources,
237
+ estimate_resources_params=test_estimate_memory_params,
238
+ round_completion_info=test_rcf,
239
+ compacted_delta_manifest=test_manifest,
240
+ memory_logs_enabled=True,
241
+ )
242
+ assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
243
+ log_message_round_completion_info = cm.records[0].getMessage()
244
+ log_message_debug_memory_params = cm.records[1].getMessage()
245
+ self.assertIn(
246
+ f"[Merge task {test_index}]: Using previous compaction rounds to calculate merge memory",
247
+ log_message_round_completion_info,
248
+ )
249
+ self.assertIn(
250
+ f"[Merge task {test_index}]: Params used for calculating merge memory",
251
+ log_message_debug_memory_params,
252
+ )
253
+ self.assertIn(
254
+ f"'previous_inflation': {expected_previous_inflation}",
255
+ log_message_debug_memory_params,
256
+ )
257
+ self.assertIn(
258
+ f"'average_record_size': {expected_average_record_size}",
259
+ log_message_debug_memory_params,
260
+ )
261
+
262
+ def test_get_merge_task_options_memory_logs_enabled_not_using_previous_round_completion_info(
263
+ self,
264
+ ):
265
+ test_index = 0
266
+ test_hb_group_idx = 0
267
+ test_debug_memory_params = {"merge_task_index": test_index}
268
+ test_estimate_memory_params = self._make_estimate_resource_params()
269
+ test_ray_custom_resources = {}
270
+ test_rcf = None
271
+ test_manifest = self._make_manifest()
272
+ expected_task_opts = {
273
+ "max_retries": 3,
274
+ "memory": 1680.64,
275
+ "num_cpus": 0.01,
276
+ "scheduling_strategy": "SPREAD",
277
+ }
278
+ with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
279
+ # At least one log of level DEBUG must be emitted
280
+ actual_merge_tasks_opts = _get_merge_task_options(
281
+ index=test_index,
282
+ hb_group_idx=test_hb_group_idx,
283
+ data_size=1,
284
+ pk_size_bytes=1,
285
+ num_rows=1,
286
+ num_hash_groups=1,
287
+ total_memory_buffer_percentage=1,
288
+ incremental_index_array_size=1,
289
+ debug_memory_params=test_debug_memory_params,
290
+ ray_custom_resources=test_ray_custom_resources,
291
+ estimate_resources_params=test_estimate_memory_params,
292
+ round_completion_info=test_rcf,
293
+ compacted_delta_manifest=test_manifest,
294
+ memory_logs_enabled=True,
295
+ )
296
+ assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
297
+ log_message_debug_memory_params = cm.records[0].getMessage()
298
+ self.assertIn(
299
+ f"[Merge task {test_index}]: Params used for calculating merge memory",
300
+ log_message_debug_memory_params,
301
+ )
302
+ self.assertNotIn(
303
+ "'average_record_size'",
304
+ log_message_debug_memory_params,
305
+ )
@@ -8,6 +8,7 @@ from deltacat.utils.pyarrow import (
8
8
  ReadKwargsProviderPyArrowSchemaOverride,
9
9
  RAISE_ON_EMPTY_CSV_KWARG,
10
10
  RAISE_ON_DECIMAL_OVERFLOW,
11
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG,
11
12
  )
12
13
  import decimal
13
14
  from deltacat.types.media import ContentEncoding, ContentType
@@ -812,3 +813,54 @@ class TestS3FileToTable(TestCase):
812
813
  schema = result.schema
813
814
  schema_index = schema.get_field_index("n_legs")
814
815
  self.assertEqual(schema.field(schema_index).type, "int64")
816
+
817
+ def test_s3_file_to_table_when_utsv_gzip_and_content_type_overridden(self):
818
+ schema = pa.schema(
819
+ [("is_active", pa.string()), ("ship_datetime_utc", pa.timestamp("us"))]
820
+ )
821
+
822
+ # OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG has no effect on uTSV files
823
+ pa_kwargs_provider = lambda content_type, kwargs: {
824
+ "reader_type": "pyarrow",
825
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG: ContentEncoding.IDENTITY.value,
826
+ **kwargs,
827
+ }
828
+
829
+ result = s3_file_to_table(
830
+ GZIP_COMPRESSED_FILE_UTSV_PATH,
831
+ ContentType.UNESCAPED_TSV.value,
832
+ ContentEncoding.GZIP.value,
833
+ ["is_active", "ship_datetime_utc"],
834
+ None,
835
+ pa_read_func_kwargs_provider=pa_kwargs_provider,
836
+ )
837
+
838
+ self.assertEqual(len(result), 3)
839
+ self.assertEqual(len(result.column_names), 2)
840
+ result_schema = result.schema
841
+ for index, field in enumerate(result_schema):
842
+ self.assertEqual(field.name, schema.field(index).name)
843
+
844
+ self.assertEqual(result.schema.field(0).type, "string")
845
+
846
+ def test_s3_file_to_table_when_parquet_gzip_and_encoding_overridden(self):
847
+ pa_kwargs_provider = lambda content_type, kwargs: {
848
+ "reader_type": "pyarrow",
849
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG: ContentEncoding.IDENTITY.value,
850
+ **kwargs,
851
+ }
852
+
853
+ result = s3_file_to_table(
854
+ PARQUET_FILE_PATH,
855
+ ContentType.PARQUET.value,
856
+ ContentEncoding.GZIP.value,
857
+ ["n_legs", "animal"],
858
+ ["n_legs"],
859
+ pa_read_func_kwargs_provider=pa_kwargs_provider,
860
+ )
861
+
862
+ self.assertEqual(len(result), 6)
863
+ self.assertEqual(len(result.column_names), 1)
864
+ schema = result.schema
865
+ schema_index = schema.get_field_index("n_legs")
866
+ self.assertEqual(schema.field(schema_index).type, "int64")
deltacat/utils/pyarrow.py CHANGED
@@ -47,6 +47,7 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
47
47
 
48
48
  RAISE_ON_EMPTY_CSV_KWARG = "raise_on_empty_csv"
49
49
  READER_TYPE_KWARG = "reader_type"
50
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG = "override_content_encoding_for_parquet"
50
51
 
51
52
  """
52
53
  By default, round decimal values using half_to_even round mode when
@@ -543,6 +544,15 @@ def s3_file_to_table(
543
544
  if pa_read_func_kwargs_provider is not None:
544
545
  kwargs = pa_read_func_kwargs_provider(content_type, kwargs)
545
546
 
547
+ if OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG in kwargs:
548
+ new_content_encoding = kwargs.pop(OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG)
549
+ if content_type == ContentType.PARQUET.value:
550
+ logger.debug(
551
+ f"Overriding {s3_url} content encoding from {content_encoding} "
552
+ f"to {new_content_encoding}"
553
+ )
554
+ content_encoding = new_content_encoding
555
+
546
556
  if (
547
557
  content_type == ContentType.PARQUET.value
548
558
  and content_encoding == ContentEncoding.IDENTITY.value
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.29
3
+ Version: 1.1.31
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=DoUiDxmgMh8HUGOEAG7CUY0Q9Ip-S7gePDsL8XQO5kk,1778
1
+ deltacat/__init__.py,sha256=gdOpCNy03T2HEQIQqSqopv0b0UL5pwXWa4McRHxMlAw,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -77,7 +77,7 @@ deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2
77
77
  deltacat/compute/compactor_v2/utils/io.py,sha256=3m4dorxj-WD6Yu9_3gRE6gz3C-eNJA7nn02sHKwo-J8,6018
78
78
  deltacat/compute/compactor_v2/utils/merge.py,sha256=EV_iKhNc3WflgfLW1Q46dXUvyClx8VebWHGtninEfsI,5311
79
79
  deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=SbQ97M1Cxld-zZik2QMSzlj20g6JlENaQx_0PhlCIP8,12034
80
- deltacat/compute/compactor_v2/utils/task_options.py,sha256=W0jyWIIZ0tcSAGp8mhpnu1G8p3rmX4d3juCPpAJxnDM,12649
80
+ deltacat/compute/compactor_v2/utils/task_options.py,sha256=0GoB_DLkCN1q8CVKTlWlDYt55qnpTDIa9fPyXJwB-cU,13801
81
81
  deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
82
82
  deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
83
83
  deltacat/compute/merge_on_read/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -156,7 +156,7 @@ deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=y8nNHq9ADH
156
156
  deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6iphCsVXxRp0zP1NTnKhfdmkg,328
157
157
  deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py,sha256=aFb9rzT_EK9k8qAMHPtpqd5btyEmll1So1loDmZkotQ,1769
159
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
159
+ deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=YDQKUKv3Vv8S1fe0YQmjHTrwnWSliqKHIWGu0fEdKnI,11478
160
160
  deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
161
  deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
162
162
  deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
@@ -180,7 +180,7 @@ deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iy
180
180
  deltacat/tests/utils/test_daft.py,sha256=kY8lkXoQvyWunok8UvOsh1An297rb3jcnstTuIAyAlc,8232
181
181
  deltacat/tests/utils/test_metrics.py,sha256=Ym9nOz1EtB180pLmvugihj1sDTNDMb5opIjjr5Nmcls,16339
182
182
  deltacat/tests/utils/test_placement.py,sha256=g61wVOMkHe4YJeR9Oxg_BOVQ6bhHHbC3IBYv8YhUu94,597
183
- deltacat/tests/utils/test_pyarrow.py,sha256=fDjDkGPjdRZA3kgjgiQRym9shdeDYgkdDPYU2a7IEUk,30790
183
+ deltacat/tests/utils/test_pyarrow.py,sha256=JmhcuphXD8B2SLnOgrPgrqCcdHg_BL6IjFAiNRmuA1I,32790
184
184
  deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
185
185
  deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
186
186
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -201,7 +201,7 @@ deltacat/utils/numpy.py,sha256=SpHKKvC-K8NINTWGVfTZ5-gBFTGYqaXjjgKFhsdUjwg,2049
201
201
  deltacat/utils/pandas.py,sha256=q99mlRB7tymICMcNbfGLfLqFu_C-feyPZKZm2CWJJVc,9574
202
202
  deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
203
203
  deltacat/utils/placement.py,sha256=Lj20fb-eq8rgMdm_M2MBMfDLwhDM1sS1nJj2DvIK56s,12060
204
- deltacat/utils/pyarrow.py,sha256=R3KkJPenE48rS3VrfFKSkJerX94f4e7X2dUPBQg44DY,34339
204
+ deltacat/utils/pyarrow.py,sha256=9Dggs8waJrbgP62NG4ssZsl-9fl3cJ4fjYLsJ1HjhHQ,34847
205
205
  deltacat/utils/resources.py,sha256=Ax1OgLLbZI4oYpp4Ki27OLaST-7I-AJgZwU87FVfY8g,8253
206
206
  deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
207
207
  deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
@@ -211,8 +211,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
211
211
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
212
212
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
213
213
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
214
- deltacat-1.1.29.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
215
- deltacat-1.1.29.dist-info/METADATA,sha256=ZverlgFUJV4wGJao8tusRCv_sRNX4KJ4RTNAGvBCJes,1733
216
- deltacat-1.1.29.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
217
- deltacat-1.1.29.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
218
- deltacat-1.1.29.dist-info/RECORD,,
214
+ deltacat-1.1.31.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
215
+ deltacat-1.1.31.dist-info/METADATA,sha256=JrWYw0uKVprpH34i-_cOUYjWI3egRQx0rhCn--OnE_0,1733
216
+ deltacat-1.1.31.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
217
+ deltacat-1.1.31.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
218
+ deltacat-1.1.31.dist-info/RECORD,,