deltacat 1.1.29__py3-none-any.whl → 1.1.31__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.29"
47
+ __version__ = "1.1.31"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -1,11 +1,16 @@
1
1
  import logging
2
2
  from typing import Dict, Optional, List, Tuple, Any
3
3
  from deltacat import logs
4
+ from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
5
+ from deltacat.compute.compactor_v2.constants import (
6
+ AVERAGE_RECORD_SIZE_BYTES as DEFAULT_AVERAGE_RECORD_SIZE_BYTES,
7
+ )
4
8
  from deltacat.compute.compactor_v2.model.merge_file_group import (
5
9
  LocalMergeFileGroupsProvider,
6
10
  )
7
11
  from deltacat.storage import (
8
12
  Manifest,
13
+ ManifestEntry,
9
14
  interface as unimplemented_deltacat_storage,
10
15
  )
11
16
  from deltacat.compute.compactor.model.delta_annotated import DeltaAnnotated
@@ -81,16 +86,27 @@ def _get_merge_task_options(
81
86
  and compacted_delta_manifest
82
87
  and round_completion_info.hb_index_to_entry_range
83
88
  ):
84
-
85
- previous_inflation = (
86
- round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
87
- / round_completion_info.compacted_pyarrow_write_result.file_bytes
89
+ logger.debug_conditional(
90
+ f"[Merge task {index}]: Using previous compaction rounds to calculate merge memory: {round_completion_info.compacted_pyarrow_write_result}",
91
+ memory_logs_enabled,
92
+ )
93
+ previous_inflation: float = (
94
+ (
95
+ round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
96
+ / round_completion_info.compacted_pyarrow_write_result.file_bytes
97
+ )
98
+ if round_completion_info.compacted_pyarrow_write_result.file_bytes
99
+ else PYARROW_INFLATION_MULTIPLIER
88
100
  )
89
101
  debug_memory_params["previous_inflation"] = previous_inflation
90
102
 
91
- average_record_size = (
92
- round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
93
- / round_completion_info.compacted_pyarrow_write_result.records
103
+ average_record_size: float = (
104
+ (
105
+ round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
106
+ / round_completion_info.compacted_pyarrow_write_result.records
107
+ )
108
+ if round_completion_info.compacted_pyarrow_write_result.records
109
+ else DEFAULT_AVERAGE_RECORD_SIZE_BYTES
94
110
  )
95
111
  debug_memory_params["average_record_size"] = average_record_size
96
112
 
@@ -106,31 +122,36 @@ def _get_merge_task_options(
106
122
  str(hb_idx)
107
123
  ]
108
124
  for entry_index in range(entry_start, entry_end):
109
- entry = compacted_delta_manifest.entries[entry_index]
110
-
111
- current_entry_size = estimate_manifest_entry_size_bytes(
112
- entry=entry,
113
- operation_type=OperationType.PYARROW_DOWNLOAD,
114
- estimate_resources_params=estimate_resources_params,
125
+ entry: ManifestEntry = compacted_delta_manifest.entries[entry_index]
126
+ current_entry_size: float = (
127
+ estimate_manifest_entry_size_bytes(
128
+ entry=entry,
129
+ operation_type=OperationType.PYARROW_DOWNLOAD,
130
+ estimate_resources_params=estimate_resources_params,
131
+ )
132
+ or 0.0
115
133
  )
116
- current_entry_rows = estimate_manifest_entry_num_rows(
117
- entry=entry,
118
- operation_type=OperationType.PYARROW_DOWNLOAD,
119
- estimate_resources_params=estimate_resources_params,
134
+ current_entry_rows: int = (
135
+ estimate_manifest_entry_num_rows(
136
+ entry=entry,
137
+ operation_type=OperationType.PYARROW_DOWNLOAD,
138
+ estimate_resources_params=estimate_resources_params,
139
+ )
140
+ or 0
120
141
  )
121
-
142
+ # NOTE: We can treat the current_entry_size and current_entry_rows as 0 as a None estimated entry size implies a 0 value
122
143
  data_size += current_entry_size
123
144
  num_rows += current_entry_rows
124
-
125
145
  if primary_keys:
126
- pk_size = estimate_manifest_entry_column_size_bytes(
146
+ pk_size: Optional[
147
+ float
148
+ ] = estimate_manifest_entry_column_size_bytes(
127
149
  entry=entry,
128
150
  columns=primary_keys,
129
151
  operation_type=OperationType.PYARROW_DOWNLOAD,
130
152
  estimate_resources_params=estimate_resources_params,
131
153
  )
132
-
133
- if pk_size is None:
154
+ if not pk_size:
134
155
  pk_size_bytes += current_entry_size
135
156
  else:
136
157
  pk_size_bytes += pk_size
@@ -159,7 +180,6 @@ def _get_merge_task_options(
159
180
  f"[Merge task {index}]: Params used for calculating merge memory: {debug_memory_params}",
160
181
  memory_logs_enabled,
161
182
  )
162
-
163
183
  return _get_task_options(0.01, total_memory, ray_custom_resources)
164
184
 
165
185
 
@@ -1,6 +1,37 @@
1
1
  import unittest
2
2
  import ray
3
- from deltacat.compute.compactor_v2.utils.task_options import _get_task_options
3
+ from deltacat.compute.compactor_v2.utils.task_options import (
4
+ _get_task_options,
5
+ _get_merge_task_options,
6
+ logger,
7
+ )
8
+ from deltacat.compute.resource_estimation.model import (
9
+ EstimateResourcesParams,
10
+ ResourceEstimationMethod,
11
+ )
12
+ from deltacat.constants import PYARROW_INFLATION_MULTIPLIER
13
+ from deltacat.compute.compactor import (
14
+ PyArrowWriteResult,
15
+ RoundCompletionInfo,
16
+ )
17
+ from deltacat.types.media import (
18
+ ContentType,
19
+ ContentEncoding,
20
+ )
21
+ from deltacat.storage import (
22
+ DeltaLocator,
23
+ Manifest,
24
+ ManifestMeta,
25
+ ManifestEntry,
26
+ ManifestEntryList,
27
+ PartitionValues,
28
+ )
29
+ from unittest.mock import MagicMock
30
+ from typing import Optional
31
+
32
+ from deltacat.compute.compactor_v2.constants import (
33
+ AVERAGE_RECORD_SIZE_BYTES as DEFAULT_AVERAGE_RECORD_SIZE_BYTES,
34
+ )
4
35
 
5
36
 
6
37
  @ray.remote
@@ -14,11 +45,95 @@ def throwing_func():
14
45
 
15
46
 
16
47
  class TestTaskOptions(unittest.TestCase):
48
+ TEST_INDEX = 0
49
+ TEST_HB_GROUP_IDX = 0
50
+ TEST_STREAM_POSITION = 1_000_000
51
+ TEST_NUM_HASH_GROUPS = 1
52
+
17
53
  @classmethod
18
54
  def setUpClass(cls):
19
55
  ray.init(local_mode=True, ignore_reinit_error=True)
20
56
  super().setUpClass()
21
57
 
58
+ @classmethod
59
+ def tearDownClass(cls) -> None:
60
+ ray.shutdown()
61
+
62
+ def _make_estimate_resource_params(
63
+ cls,
64
+ resource_estimation_method: Optional[
65
+ ResourceEstimationMethod
66
+ ] = ResourceEstimationMethod.DEFAULT,
67
+ previous_inflation: Optional[int] = 7,
68
+ average_record_size_bytes: Optional[int] = 1000,
69
+ ):
70
+ return EstimateResourcesParams.of(
71
+ resource_estimation_method=resource_estimation_method,
72
+ previous_inflation=previous_inflation,
73
+ average_record_size_bytes=average_record_size_bytes,
74
+ )
75
+
76
+ def _make_manifest(
77
+ self,
78
+ source_content_length: Optional[int] = 1000,
79
+ content_type: Optional[ContentType] = ContentType.PARQUET,
80
+ content_encoding: Optional[ContentEncoding] = ContentEncoding.IDENTITY,
81
+ partition_values: Optional[PartitionValues] = None,
82
+ uri: Optional[str] = "test",
83
+ url: Optional[str] = "test",
84
+ author: Optional[str] = "foo",
85
+ entry_uuid: Optional[str] = "foo",
86
+ manifest_uuid: Optional[str] = "bar",
87
+ ) -> Manifest:
88
+ meta = ManifestMeta.of(
89
+ 10,
90
+ 10,
91
+ content_type=content_type,
92
+ content_encoding=content_encoding,
93
+ source_content_length=source_content_length,
94
+ partition_values=partition_values,
95
+ )
96
+
97
+ return Manifest.of(
98
+ entries=ManifestEntryList.of(
99
+ [
100
+ ManifestEntry.of(
101
+ uri=uri, url=url, meta=meta, mandatory=True, uuid=entry_uuid
102
+ )
103
+ ]
104
+ ),
105
+ author=author,
106
+ uuid=manifest_uuid,
107
+ )
108
+
109
+ def make_round_completion_info(
110
+ self,
111
+ high_watermark: Optional[int] = 1_000_000,
112
+ compacted_delta_locator: Optional[DeltaLocator] = None,
113
+ records_written: Optional[int] = 10,
114
+ bytes_written: Optional[int] = 10,
115
+ files_written: Optional[int] = 10,
116
+ rows_dropped: Optional[int] = 10,
117
+ sort_keys_bit_width: Optional[int] = 0,
118
+ hash_bucket_count: Optional[int] = 1,
119
+ hb_index_to_entry_range: Optional[dict] = None,
120
+ ) -> RoundCompletionInfo:
121
+ if compacted_delta_locator is None:
122
+ compacted_delta_locator = MagicMock(spec=DeltaLocator)
123
+
124
+ hb_index_to_entry_range = hb_index_to_entry_range or {"0": (0, 1)}
125
+
126
+ return RoundCompletionInfo.of(
127
+ compacted_delta_locator=compacted_delta_locator,
128
+ high_watermark=high_watermark,
129
+ compacted_pyarrow_write_result=PyArrowWriteResult.of(
130
+ records_written, bytes_written, files_written, rows_dropped
131
+ ),
132
+ sort_keys_bit_width=sort_keys_bit_width,
133
+ hb_index_to_entry_range=hb_index_to_entry_range,
134
+ hash_bucket_count=hash_bucket_count,
135
+ )
136
+
22
137
  def test_get_task_options_sanity(self):
23
138
  opts = _get_task_options(0.01, 0.01)
24
139
  result_ref = valid_func.options(**opts).remote()
@@ -31,3 +146,160 @@ class TestTaskOptions(unittest.TestCase):
31
146
  result_ref = throwing_func.options(**opts).remote()
32
147
 
33
148
  self.assertRaises(ConnectionAbortedError, lambda: ray.get(result_ref))
149
+
150
+ def test_get_merge_task_options_memory_logs_enabled_sanity(self):
151
+ test_index = 0
152
+ test_hb_group_idx = 0
153
+ test_debug_memory_params = {"merge_task_index": test_index}
154
+ test_estimate_memory_params = self._make_estimate_resource_params()
155
+ test_ray_custom_resources = {}
156
+ test_rcf = self.make_round_completion_info()
157
+ test_manifest = self._make_manifest()
158
+ expected_task_opts = {
159
+ "max_retries": 3,
160
+ "memory": 1680.64,
161
+ "num_cpus": 0.01,
162
+ "scheduling_strategy": "SPREAD",
163
+ }
164
+ expected_previous_inflation = 1.0
165
+ expected_average_record_size = 1.0
166
+ with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
167
+ # At least one log of level DEBUG must be emitted
168
+ actual_merge_tasks_opts = _get_merge_task_options(
169
+ index=test_index,
170
+ hb_group_idx=test_hb_group_idx,
171
+ data_size=1,
172
+ pk_size_bytes=1,
173
+ num_rows=1,
174
+ num_hash_groups=1,
175
+ total_memory_buffer_percentage=1,
176
+ incremental_index_array_size=1,
177
+ debug_memory_params=test_debug_memory_params,
178
+ ray_custom_resources=test_ray_custom_resources,
179
+ estimate_resources_params=test_estimate_memory_params,
180
+ round_completion_info=test_rcf,
181
+ compacted_delta_manifest=test_manifest,
182
+ memory_logs_enabled=True,
183
+ )
184
+ assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
185
+ log_message_round_completion_info = cm.records[0].getMessage()
186
+ log_message_debug_memory_params = cm.records[1].getMessage()
187
+ self.assertIn(
188
+ f"[Merge task {test_index}]: Using previous compaction rounds to calculate merge memory",
189
+ log_message_round_completion_info,
190
+ )
191
+ self.assertIn(
192
+ f"[Merge task {test_index}]: Params used for calculating merge memory",
193
+ log_message_debug_memory_params,
194
+ )
195
+ self.assertIn(
196
+ f"'previous_inflation': {expected_previous_inflation}",
197
+ log_message_debug_memory_params,
198
+ )
199
+ self.assertIn(
200
+ f"'average_record_size': {expected_average_record_size}",
201
+ log_message_debug_memory_params,
202
+ )
203
+
204
+ def test_get_merge_task_options_memory_logs_enabled_fallback_previous_inflation_fallback_average_record_size(
205
+ self,
206
+ ):
207
+ test_index = 0
208
+ test_hb_group_idx = 0
209
+ test_debug_memory_params = {"merge_task_index": test_index}
210
+ test_estimate_memory_params = self._make_estimate_resource_params()
211
+ test_ray_custom_resources = {}
212
+ test_rcf = self.make_round_completion_info(
213
+ bytes_written=0, records_written=0, files_written=0, rows_dropped=0
214
+ )
215
+ test_manifest = self._make_manifest()
216
+ expected_task_opts = {
217
+ "max_retries": 3,
218
+ "memory": 1680.64,
219
+ "num_cpus": 0.01,
220
+ "scheduling_strategy": "SPREAD",
221
+ }
222
+ expected_previous_inflation = PYARROW_INFLATION_MULTIPLIER
223
+ expected_average_record_size = DEFAULT_AVERAGE_RECORD_SIZE_BYTES
224
+ with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
225
+ # At least one log of level DEBUG must be emitted
226
+ actual_merge_tasks_opts = _get_merge_task_options(
227
+ index=test_index,
228
+ hb_group_idx=test_hb_group_idx,
229
+ data_size=1,
230
+ pk_size_bytes=1,
231
+ num_rows=1,
232
+ num_hash_groups=1,
233
+ total_memory_buffer_percentage=1,
234
+ incremental_index_array_size=1,
235
+ debug_memory_params=test_debug_memory_params,
236
+ ray_custom_resources=test_ray_custom_resources,
237
+ estimate_resources_params=test_estimate_memory_params,
238
+ round_completion_info=test_rcf,
239
+ compacted_delta_manifest=test_manifest,
240
+ memory_logs_enabled=True,
241
+ )
242
+ assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
243
+ log_message_round_completion_info = cm.records[0].getMessage()
244
+ log_message_debug_memory_params = cm.records[1].getMessage()
245
+ self.assertIn(
246
+ f"[Merge task {test_index}]: Using previous compaction rounds to calculate merge memory",
247
+ log_message_round_completion_info,
248
+ )
249
+ self.assertIn(
250
+ f"[Merge task {test_index}]: Params used for calculating merge memory",
251
+ log_message_debug_memory_params,
252
+ )
253
+ self.assertIn(
254
+ f"'previous_inflation': {expected_previous_inflation}",
255
+ log_message_debug_memory_params,
256
+ )
257
+ self.assertIn(
258
+ f"'average_record_size': {expected_average_record_size}",
259
+ log_message_debug_memory_params,
260
+ )
261
+
262
+ def test_get_merge_task_options_memory_logs_enabled_not_using_previous_round_completion_info(
263
+ self,
264
+ ):
265
+ test_index = 0
266
+ test_hb_group_idx = 0
267
+ test_debug_memory_params = {"merge_task_index": test_index}
268
+ test_estimate_memory_params = self._make_estimate_resource_params()
269
+ test_ray_custom_resources = {}
270
+ test_rcf = None
271
+ test_manifest = self._make_manifest()
272
+ expected_task_opts = {
273
+ "max_retries": 3,
274
+ "memory": 1680.64,
275
+ "num_cpus": 0.01,
276
+ "scheduling_strategy": "SPREAD",
277
+ }
278
+ with self.assertLogs(logger=logger.name, level="DEBUG") as cm:
279
+ # At least one log of level DEBUG must be emitted
280
+ actual_merge_tasks_opts = _get_merge_task_options(
281
+ index=test_index,
282
+ hb_group_idx=test_hb_group_idx,
283
+ data_size=1,
284
+ pk_size_bytes=1,
285
+ num_rows=1,
286
+ num_hash_groups=1,
287
+ total_memory_buffer_percentage=1,
288
+ incremental_index_array_size=1,
289
+ debug_memory_params=test_debug_memory_params,
290
+ ray_custom_resources=test_ray_custom_resources,
291
+ estimate_resources_params=test_estimate_memory_params,
292
+ round_completion_info=test_rcf,
293
+ compacted_delta_manifest=test_manifest,
294
+ memory_logs_enabled=True,
295
+ )
296
+ assert {k: actual_merge_tasks_opts[k] for k in expected_task_opts}
297
+ log_message_debug_memory_params = cm.records[0].getMessage()
298
+ self.assertIn(
299
+ f"[Merge task {test_index}]: Params used for calculating merge memory",
300
+ log_message_debug_memory_params,
301
+ )
302
+ self.assertNotIn(
303
+ "'average_record_size'",
304
+ log_message_debug_memory_params,
305
+ )
@@ -8,6 +8,7 @@ from deltacat.utils.pyarrow import (
8
8
  ReadKwargsProviderPyArrowSchemaOverride,
9
9
  RAISE_ON_EMPTY_CSV_KWARG,
10
10
  RAISE_ON_DECIMAL_OVERFLOW,
11
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG,
11
12
  )
12
13
  import decimal
13
14
  from deltacat.types.media import ContentEncoding, ContentType
@@ -812,3 +813,54 @@ class TestS3FileToTable(TestCase):
812
813
  schema = result.schema
813
814
  schema_index = schema.get_field_index("n_legs")
814
815
  self.assertEqual(schema.field(schema_index).type, "int64")
816
+
817
+ def test_s3_file_to_table_when_utsv_gzip_and_content_type_overridden(self):
818
+ schema = pa.schema(
819
+ [("is_active", pa.string()), ("ship_datetime_utc", pa.timestamp("us"))]
820
+ )
821
+
822
+ # OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG has no effect on uTSV files
823
+ pa_kwargs_provider = lambda content_type, kwargs: {
824
+ "reader_type": "pyarrow",
825
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG: ContentEncoding.IDENTITY.value,
826
+ **kwargs,
827
+ }
828
+
829
+ result = s3_file_to_table(
830
+ GZIP_COMPRESSED_FILE_UTSV_PATH,
831
+ ContentType.UNESCAPED_TSV.value,
832
+ ContentEncoding.GZIP.value,
833
+ ["is_active", "ship_datetime_utc"],
834
+ None,
835
+ pa_read_func_kwargs_provider=pa_kwargs_provider,
836
+ )
837
+
838
+ self.assertEqual(len(result), 3)
839
+ self.assertEqual(len(result.column_names), 2)
840
+ result_schema = result.schema
841
+ for index, field in enumerate(result_schema):
842
+ self.assertEqual(field.name, schema.field(index).name)
843
+
844
+ self.assertEqual(result.schema.field(0).type, "string")
845
+
846
+ def test_s3_file_to_table_when_parquet_gzip_and_encoding_overridden(self):
847
+ pa_kwargs_provider = lambda content_type, kwargs: {
848
+ "reader_type": "pyarrow",
849
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG: ContentEncoding.IDENTITY.value,
850
+ **kwargs,
851
+ }
852
+
853
+ result = s3_file_to_table(
854
+ PARQUET_FILE_PATH,
855
+ ContentType.PARQUET.value,
856
+ ContentEncoding.GZIP.value,
857
+ ["n_legs", "animal"],
858
+ ["n_legs"],
859
+ pa_read_func_kwargs_provider=pa_kwargs_provider,
860
+ )
861
+
862
+ self.assertEqual(len(result), 6)
863
+ self.assertEqual(len(result.column_names), 1)
864
+ schema = result.schema
865
+ schema_index = schema.get_field_index("n_legs")
866
+ self.assertEqual(schema.field(schema_index).type, "int64")
deltacat/utils/pyarrow.py CHANGED
@@ -47,6 +47,7 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
47
47
 
48
48
  RAISE_ON_EMPTY_CSV_KWARG = "raise_on_empty_csv"
49
49
  READER_TYPE_KWARG = "reader_type"
50
+ OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG = "override_content_encoding_for_parquet"
50
51
 
51
52
  """
52
53
  By default, round decimal values using half_to_even round mode when
@@ -543,6 +544,15 @@ def s3_file_to_table(
543
544
  if pa_read_func_kwargs_provider is not None:
544
545
  kwargs = pa_read_func_kwargs_provider(content_type, kwargs)
545
546
 
547
+ if OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG in kwargs:
548
+ new_content_encoding = kwargs.pop(OVERRIDE_CONTENT_ENCODING_FOR_PARQUET_KWARG)
549
+ if content_type == ContentType.PARQUET.value:
550
+ logger.debug(
551
+ f"Overriding {s3_url} content encoding from {content_encoding} "
552
+ f"to {new_content_encoding}"
553
+ )
554
+ content_encoding = new_content_encoding
555
+
546
556
  if (
547
557
  content_type == ContentType.PARQUET.value
548
558
  and content_encoding == ContentEncoding.IDENTITY.value
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.29
3
+ Version: 1.1.31
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=DoUiDxmgMh8HUGOEAG7CUY0Q9Ip-S7gePDsL8XQO5kk,1778
1
+ deltacat/__init__.py,sha256=gdOpCNy03T2HEQIQqSqopv0b0UL5pwXWa4McRHxMlAw,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -77,7 +77,7 @@ deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2
77
77
  deltacat/compute/compactor_v2/utils/io.py,sha256=3m4dorxj-WD6Yu9_3gRE6gz3C-eNJA7nn02sHKwo-J8,6018
78
78
  deltacat/compute/compactor_v2/utils/merge.py,sha256=EV_iKhNc3WflgfLW1Q46dXUvyClx8VebWHGtninEfsI,5311
79
79
  deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=SbQ97M1Cxld-zZik2QMSzlj20g6JlENaQx_0PhlCIP8,12034
80
- deltacat/compute/compactor_v2/utils/task_options.py,sha256=W0jyWIIZ0tcSAGp8mhpnu1G8p3rmX4d3juCPpAJxnDM,12649
80
+ deltacat/compute/compactor_v2/utils/task_options.py,sha256=0GoB_DLkCN1q8CVKTlWlDYt55qnpTDIa9fPyXJwB-cU,13801
81
81
  deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
82
82
  deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
83
83
  deltacat/compute/merge_on_read/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -156,7 +156,7 @@ deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=y8nNHq9ADH
156
156
  deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6iphCsVXxRp0zP1NTnKhfdmkg,328
157
157
  deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py,sha256=aFb9rzT_EK9k8qAMHPtpqd5btyEmll1So1loDmZkotQ,1769
159
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
159
+ deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=YDQKUKv3Vv8S1fe0YQmjHTrwnWSliqKHIWGu0fEdKnI,11478
160
160
  deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
161
  deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
162
162
  deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
@@ -180,7 +180,7 @@ deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iy
180
180
  deltacat/tests/utils/test_daft.py,sha256=kY8lkXoQvyWunok8UvOsh1An297rb3jcnstTuIAyAlc,8232
181
181
  deltacat/tests/utils/test_metrics.py,sha256=Ym9nOz1EtB180pLmvugihj1sDTNDMb5opIjjr5Nmcls,16339
182
182
  deltacat/tests/utils/test_placement.py,sha256=g61wVOMkHe4YJeR9Oxg_BOVQ6bhHHbC3IBYv8YhUu94,597
183
- deltacat/tests/utils/test_pyarrow.py,sha256=fDjDkGPjdRZA3kgjgiQRym9shdeDYgkdDPYU2a7IEUk,30790
183
+ deltacat/tests/utils/test_pyarrow.py,sha256=JmhcuphXD8B2SLnOgrPgrqCcdHg_BL6IjFAiNRmuA1I,32790
184
184
  deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
185
185
  deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
186
186
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -201,7 +201,7 @@ deltacat/utils/numpy.py,sha256=SpHKKvC-K8NINTWGVfTZ5-gBFTGYqaXjjgKFhsdUjwg,2049
201
201
  deltacat/utils/pandas.py,sha256=q99mlRB7tymICMcNbfGLfLqFu_C-feyPZKZm2CWJJVc,9574
202
202
  deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
203
203
  deltacat/utils/placement.py,sha256=Lj20fb-eq8rgMdm_M2MBMfDLwhDM1sS1nJj2DvIK56s,12060
204
- deltacat/utils/pyarrow.py,sha256=R3KkJPenE48rS3VrfFKSkJerX94f4e7X2dUPBQg44DY,34339
204
+ deltacat/utils/pyarrow.py,sha256=9Dggs8waJrbgP62NG4ssZsl-9fl3cJ4fjYLsJ1HjhHQ,34847
205
205
  deltacat/utils/resources.py,sha256=Ax1OgLLbZI4oYpp4Ki27OLaST-7I-AJgZwU87FVfY8g,8253
206
206
  deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
207
207
  deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
@@ -211,8 +211,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
211
211
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
212
212
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
213
213
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
214
- deltacat-1.1.29.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
215
- deltacat-1.1.29.dist-info/METADATA,sha256=ZverlgFUJV4wGJao8tusRCv_sRNX4KJ4RTNAGvBCJes,1733
216
- deltacat-1.1.29.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
217
- deltacat-1.1.29.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
218
- deltacat-1.1.29.dist-info/RECORD,,
214
+ deltacat-1.1.31.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
215
+ deltacat-1.1.31.dist-info/METADATA,sha256=JrWYw0uKVprpH34i-_cOUYjWI3egRQx0rhCn--OnE_0,1733
216
+ deltacat-1.1.31.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
217
+ deltacat-1.1.31.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
218
+ deltacat-1.1.31.dist-info/RECORD,,