deltacat 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.16"
47
+ __version__ = "1.1.18"
48
48
 
49
49
 
50
50
  __all__ = [
deltacat/aws/constants.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import botocore
2
2
  from typing import Set
3
3
  from daft.exceptions import DaftTransientError
4
-
5
4
  from deltacat.utils.common import env_integer, env_string
6
5
 
7
6
 
@@ -27,8 +27,11 @@ def _append_sha1_hash_to_table(table: pa.Table, hash_column: pa.Array) -> pa.Tab
27
27
 
28
28
  result = []
29
29
  for hash_value in hash_column_np:
30
- assert hash_value is not None, f"Expected non-null primary key"
31
- result.append(hashlib.sha1(hash_value.encode("utf-8")).hexdigest())
30
+ if hash_value is None:
31
+ result.append(None)
32
+ logger.info("A primary key hash is null")
33
+ else:
34
+ result.append(hashlib.sha1(hash_value.encode("utf-8")).hexdigest())
32
35
 
33
36
  return sc.append_pk_hash_string_column(table, result)
34
37
 
@@ -191,7 +194,7 @@ def generate_pk_hash_column(
191
194
  pk_columns.append(sliced_string_cast(table[pk_name]))
192
195
 
193
196
  pk_columns.append(PK_DELIMITER)
194
- hash_column = pc.binary_join_element_wise(*pk_columns)
197
+ hash_column = pc.binary_join_element_wise(*pk_columns, null_handling="replace")
195
198
  return hash_column
196
199
 
197
200
  def _generate_uuid(table: pa.Table) -> pa.Array:
@@ -345,8 +348,10 @@ def hash_group_index_to_hash_bucket_indices(
345
348
  return range(hb_group, num_buckets, num_groups)
346
349
 
347
350
 
348
- def pk_digest_to_hash_bucket_index(digest: str, num_buckets: int) -> int:
351
+ def pk_digest_to_hash_bucket_index(digest: Optional[str], num_buckets: int) -> int:
349
352
  """
350
353
  Generates the hash bucket index from the given digest.
351
354
  """
355
+ if digest is None:
356
+ return 0
352
357
  return int(digest, 16) % num_buckets
deltacat/exceptions.py CHANGED
@@ -299,7 +299,7 @@ def _categorize_tenacity_error(e: tenacity.RetryError):
299
299
  def _categorize_dependency_pyarrow_error(e: ArrowException):
300
300
  if isinstance(e, ArrowInvalid):
301
301
  raise DependencyPyarrowInvalidError(
302
- f"Pyarrow Invalid error occurred. Reason: {e}"
302
+ f"Pyarrow Invalid error occurred. {e}"
303
303
  ) from e
304
304
  elif isinstance(e, ArrowCapacityError):
305
305
  raise DependencyPyarrowCapacityError("Pyarrow Capacity error occurred.") from e
@@ -308,9 +308,7 @@ def _categorize_dependency_pyarrow_error(e: ArrowException):
308
308
 
309
309
 
310
310
  def _categorize_assertion_error(e: BaseException):
311
- raise ValidationError(
312
- f"One of the assertions in DeltaCAT has failed. Reason: {e}"
313
- ) from e
311
+ raise ValidationError(f"One of the assertions in DeltaCAT has failed. {e}") from e
314
312
 
315
313
 
316
314
  def _categorize_daft_error(e: DaftCoreException):
deltacat/logs.py CHANGED
@@ -49,11 +49,13 @@ class JsonFormatter(logging.Formatter):
49
49
  fmt_dict: dict = None,
50
50
  time_format: str = "%Y-%m-%dT%H:%M:%S",
51
51
  msec_format: str = "%s.%03dZ",
52
+ context_kwargs: Optional[Dict[str, Any]] = None,
52
53
  ):
53
54
  self.fmt_dict = fmt_dict if fmt_dict is not None else {"message": "message"}
54
55
  self.default_time_format = time_format
55
56
  self.default_msec_format = msec_format
56
57
  self.datefmt = None
58
+ self.additional_context = context_kwargs or {}
57
59
  if ray.is_initialized():
58
60
  self.ray_runtime_ctx: RuntimeContext = ray.get_runtime_context()
59
61
  self.context = {}
@@ -117,6 +119,9 @@ class JsonFormatter(logging.Formatter):
117
119
 
118
120
  message_dict["ray_runtime_context"] = self.context
119
121
 
122
+ if self.additional_context:
123
+ message_dict["additional_context"] = self.additional_context
124
+
120
125
  return json.dumps(message_dict, default=str)
121
126
 
122
127
 
@@ -159,6 +164,7 @@ def _create_rotating_file_handler(
159
164
  max_bytes_per_log_file: int = DEFAULT_MAX_BYTES_PER_LOG,
160
165
  backup_count: int = DEFAULT_BACKUP_COUNT,
161
166
  logging_format: Union[str, dict] = DEFAULT_LOG_FORMAT,
167
+ context_kwargs: Dict[str, Any] = None,
162
168
  ) -> FileHandler:
163
169
 
164
170
  if type(logging_level) is str:
@@ -176,7 +182,9 @@ def _create_rotating_file_handler(
176
182
  if type(logging_format) is str:
177
183
  handler.setFormatter(logging.Formatter(logging_format))
178
184
  else:
179
- handler.setFormatter(JsonFormatter(logging_format))
185
+ handler.setFormatter(
186
+ JsonFormatter(logging_format, context_kwargs=context_kwargs)
187
+ )
180
188
 
181
189
  handler.setLevel(logging_level)
182
190
  return handler
@@ -205,6 +213,7 @@ def _configure_logger(
205
213
  log_dir: str,
206
214
  log_base_file_name: str,
207
215
  debug_log_base_file_name: str,
216
+ context_kwargs: Dict[str, Any] = None,
208
217
  ) -> Union[Logger, LoggerAdapter]:
209
218
  # This maintains log level of rotating file handlers
210
219
  primary_log_level = log_level
@@ -212,13 +221,19 @@ def _configure_logger(
212
221
  if log_level <= logging.getLevelName("DEBUG"):
213
222
  if not _file_handler_exists(logger, log_dir, debug_log_base_file_name):
214
223
  handler = _create_rotating_file_handler(
215
- log_dir, debug_log_base_file_name, "DEBUG"
224
+ log_dir,
225
+ debug_log_base_file_name,
226
+ "DEBUG",
227
+ context_kwargs=context_kwargs,
216
228
  )
217
229
  _add_logger_handler(logger, handler)
218
230
  primary_log_level = logging.getLevelName("INFO")
219
231
  if not _file_handler_exists(logger, log_dir, log_base_file_name):
220
232
  handler = _create_rotating_file_handler(
221
- log_dir, log_base_file_name, primary_log_level
233
+ log_dir,
234
+ log_base_file_name,
235
+ primary_log_level,
236
+ context_kwargs=context_kwargs,
222
237
  )
223
238
  _add_logger_handler(logger, handler)
224
239
 
@@ -226,7 +241,9 @@ def _configure_logger(
226
241
 
227
242
 
228
243
  def configure_deltacat_logger(
229
- logger: Logger, level: int = None
244
+ logger: Logger,
245
+ level: int = None,
246
+ context_kwargs: Dict[str, Any] = None,
230
247
  ) -> Union[Logger, LoggerAdapter]:
231
248
  if level is None:
232
249
  level = logging.getLevelName(DELTACAT_SYS_LOG_LEVEL)
@@ -237,11 +254,14 @@ def configure_deltacat_logger(
237
254
  DELTACAT_SYS_LOG_DIR,
238
255
  DELTACAT_SYS_INFO_LOG_BASE_FILE_NAME,
239
256
  DELTACAT_SYS_DEBUG_LOG_BASE_FILE_NAME,
257
+ context_kwargs,
240
258
  )
241
259
 
242
260
 
243
261
  def configure_application_logger(
244
- logger: Logger, level: int = None
262
+ logger: Logger,
263
+ level: int = None,
264
+ context_kwargs: Dict[str, Any] = None,
245
265
  ) -> Union[Logger, LoggerAdapter]:
246
266
  if level is None:
247
267
  level = logging.getLevelName(DELTACAT_APP_LOG_LEVEL)
@@ -252,4 +272,5 @@ def configure_application_logger(
252
272
  DELTACAT_APP_LOG_DIR,
253
273
  DELTACAT_APP_INFO_LOG_BASE_FILE_NAME,
254
274
  DELTACAT_APP_DEBUG_LOG_BASE_FILE_NAME,
275
+ context_kwargs,
255
276
  )
@@ -848,6 +848,83 @@ MULTIPLE_ROUNDS_TEST_CASES = {
848
848
  assert_compaction_audit=None,
849
849
  num_rounds=3,
850
850
  ),
851
+ # 4 input deltas (3 upsert, 1 delete delta), 2 rounds requested
852
+ # Expect to see a table that aggregates 10 records total
853
+ # (12 upserts - 2 deletes (null PK) = 10 records)
854
+ # (dropDuplicates = False)
855
+ "9-multiple-rounds-delete-deltas-with-null-pk": MultipleRoundsTestCaseParams(
856
+ primary_keys={"pk_col_1"},
857
+ sort_keys=ZERO_VALUED_SORT_KEY,
858
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
859
+ partition_values=["1"],
860
+ input_deltas=[
861
+ (
862
+ pa.Table.from_arrays(
863
+ [
864
+ pa.array([None, 11, 12, 13]),
865
+ pa.array(["a", "b", "c", "d"]),
866
+ ],
867
+ names=["pk_col_1", "col_1"],
868
+ ),
869
+ DeltaType.UPSERT,
870
+ None,
871
+ ),
872
+ (
873
+ pa.Table.from_arrays(
874
+ [
875
+ pa.array([14, 15, 16, 17]),
876
+ pa.array(["e", "f", "g", "h"]),
877
+ ],
878
+ names=["pk_col_1", "col_1"],
879
+ ),
880
+ DeltaType.UPSERT,
881
+ None,
882
+ ),
883
+ (
884
+ pa.Table.from_arrays(
885
+ [
886
+ pa.array([18, 19, 20, 21]),
887
+ pa.array(["i", "j", "k", "l"]),
888
+ ],
889
+ names=["pk_col_1", "col_1"],
890
+ ),
891
+ DeltaType.UPSERT,
892
+ None,
893
+ ),
894
+ (
895
+ pa.Table.from_arrays(
896
+ [pa.array([None, 11]), pa.array(["a", "b"])],
897
+ names=["pk_col_1", "col_1"],
898
+ ),
899
+ DeltaType.DELETE,
900
+ DeleteParameters.of(["pk_col_1", "col_1"]),
901
+ ),
902
+ ],
903
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
904
+ [
905
+ pa.array([i for i in range(12, 22)]),
906
+ pa.array(["c", "d", "e", "f", "g", "h", "i", "j", "k", "l"]),
907
+ ],
908
+ names=["pk_col_1", "col_1"],
909
+ ),
910
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
911
+ [
912
+ pa.array([i for i in range(12, 22)]),
913
+ pa.array(["c", "d", "e", "f", "g", "h", "i", "j", "k", "l"]),
914
+ ],
915
+ names=["pk_col_1", "col_1"],
916
+ ),
917
+ expected_terminal_exception=None,
918
+ expected_terminal_exception_message=None,
919
+ do_create_placement_group=False,
920
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
921
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
922
+ read_kwargs_provider=None,
923
+ drop_duplicates=False,
924
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
925
+ assert_compaction_audit=None,
926
+ num_rounds=2,
927
+ ),
851
928
  }
852
929
 
853
930
  MULTIPLE_ROUNDS_TEST_CASES = with_compactor_version_func_test_param(
@@ -84,6 +84,314 @@ REBASE_TEST_CASES = {
84
84
  skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
85
85
  assert_compaction_audit=None,
86
86
  ),
87
+ "2-rebase-with-null-pk": RebaseCompactionTestCaseParams(
88
+ primary_keys={"pk_col_1"},
89
+ sort_keys=[
90
+ SortKey.of(key_name="sk_col_1"),
91
+ SortKey.of(key_name="sk_col_2"),
92
+ ],
93
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
94
+ partition_values=["1"],
95
+ input_deltas=pa.Table.from_arrays(
96
+ [
97
+ pa.array([1, 2, None, 2, None, 1]),
98
+ pa.array([1, 2, 3, 4, 5, 6]),
99
+ pa.array(["foo"] * 6),
100
+ pa.array([5, 6, 7, 8, 9, 10]),
101
+ ],
102
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
103
+ ),
104
+ input_deltas_delta_type=DeltaType.UPSERT,
105
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
106
+ [
107
+ pa.array([None, 1, 2]),
108
+ pa.array([5, 6, 4]),
109
+ pa.array(["foo"] * 3),
110
+ pa.array([9, 10, 8]),
111
+ ],
112
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
113
+ ),
114
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
115
+ [
116
+ pa.array([None, 1, 2]),
117
+ pa.array([5, 6, 4]),
118
+ pa.array(["foo"] * 3),
119
+ pa.array([7, 10, 8]),
120
+ ],
121
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
122
+ ),
123
+ expected_terminal_exception=None,
124
+ expected_terminal_exception_message=None,
125
+ do_create_placement_group=False,
126
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
127
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
128
+ read_kwargs_provider=None,
129
+ drop_duplicates=True,
130
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
131
+ assert_compaction_audit=None,
132
+ ),
133
+ "3-rebase-with-null-two-pk": RebaseCompactionTestCaseParams(
134
+ primary_keys={"pk_col_1", "pk_col_2"},
135
+ sort_keys=[
136
+ SortKey.of(key_name="sk_col_1"),
137
+ ],
138
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
139
+ partition_values=["1"],
140
+ input_deltas=pa.Table.from_arrays(
141
+ [
142
+ pa.array([1, 2, None, 2, None, 1, 5]),
143
+ pa.array([1, None, 3, None, None, 1, 5]),
144
+ pa.array(["foo"] * 7),
145
+ pa.array([5, 6, 7, 8, 9, 10, 11]),
146
+ ],
147
+ names=["pk_col_1", "pk_col_2", "sk_col_1", "col_1"],
148
+ ),
149
+ input_deltas_delta_type=DeltaType.UPSERT,
150
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
151
+ [
152
+ pa.array([1, 2, None, 5, None]),
153
+ pa.array([1, None, 3, 5, None]),
154
+ pa.array(["foo"] * 5),
155
+ pa.array([10, 8, 7, 11, 9]),
156
+ ],
157
+ names=["pk_col_1", "pk_col_2", "sk_col_1", "col_1"],
158
+ ),
159
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
160
+ [
161
+ pa.array([1, 2, None, 5, None]),
162
+ pa.array([1, None, 3, 5, None]),
163
+ pa.array(["foo"] * 5),
164
+ pa.array([10, 8, 7, 11, 9]),
165
+ ],
166
+ names=["pk_col_1", "pk_col_2", "sk_col_1", "col_1"],
167
+ ),
168
+ expected_terminal_exception=None,
169
+ expected_terminal_exception_message=None,
170
+ do_create_placement_group=False,
171
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
172
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
173
+ read_kwargs_provider=None,
174
+ drop_duplicates=True,
175
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
176
+ assert_compaction_audit=None,
177
+ ),
178
+ "4-rebase-with-null-multiple-pk-different-types": RebaseCompactionTestCaseParams(
179
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
180
+ sort_keys=[],
181
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
182
+ partition_values=["1"],
183
+ input_deltas=pa.Table.from_arrays(
184
+ [
185
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
186
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
187
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
188
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
189
+ ],
190
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
191
+ ),
192
+ input_deltas_delta_type=DeltaType.UPSERT,
193
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
194
+ [
195
+ pa.array([1, 2, None, 5, None, None]),
196
+ pa.array([1, None, 3, 5, None, None]),
197
+ pa.array(["a", "b", "c", "g", "e", None]),
198
+ pa.array([10, 8, 7, 11, 12, 14]),
199
+ ],
200
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
201
+ ),
202
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
203
+ [
204
+ pa.array([1, 2, None, 5, None, None]),
205
+ pa.array([1, None, 3, 5, None, None]),
206
+ pa.array(["a", "b", "c", "g", "e", None]),
207
+ pa.array([10, 8, 7, 11, 12, 14]),
208
+ ],
209
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
210
+ ),
211
+ expected_terminal_exception=None,
212
+ expected_terminal_exception_message=None,
213
+ do_create_placement_group=False,
214
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
215
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
216
+ read_kwargs_provider=None,
217
+ drop_duplicates=True,
218
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
219
+ assert_compaction_audit=None,
220
+ ),
221
+ "5-rebase-with-null-multiple-pk-one-hash-bucket": RebaseCompactionTestCaseParams(
222
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
223
+ sort_keys=[],
224
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
225
+ partition_values=["1"],
226
+ input_deltas=pa.Table.from_arrays(
227
+ [
228
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
229
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
230
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
231
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
232
+ ],
233
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
234
+ ),
235
+ input_deltas_delta_type=DeltaType.UPSERT,
236
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
237
+ [
238
+ pa.array([1, 2, None, 5, None, None]),
239
+ pa.array([1, None, 3, 5, None, None]),
240
+ pa.array(["a", "b", "c", "g", "e", None]),
241
+ pa.array([10, 8, 7, 11, 12, 14]),
242
+ ],
243
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
244
+ ),
245
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
246
+ [
247
+ pa.array([1, 2, None, 5, None, None]),
248
+ pa.array([1, None, 3, 5, None, None]),
249
+ pa.array(["a", "b", "c", "g", "e", None]),
250
+ pa.array([10, 8, 7, 11, 12, 14]),
251
+ ],
252
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
253
+ ),
254
+ expected_terminal_exception=None,
255
+ expected_terminal_exception_message=None,
256
+ do_create_placement_group=False,
257
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
258
+ hash_bucket_count=1,
259
+ read_kwargs_provider=None,
260
+ drop_duplicates=True,
261
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
262
+ assert_compaction_audit=None,
263
+ ),
264
+ "6-rebase-with-null-multiple-pk-drop-duplicates-false": RebaseCompactionTestCaseParams(
265
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
266
+ sort_keys=[],
267
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
268
+ partition_values=["1"],
269
+ input_deltas=pa.Table.from_arrays(
270
+ [
271
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
272
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
273
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
274
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
275
+ ],
276
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
277
+ ),
278
+ input_deltas_delta_type=DeltaType.UPSERT,
279
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
280
+ [
281
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
282
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
283
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
284
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
285
+ ],
286
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
287
+ ),
288
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
289
+ [
290
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
291
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
292
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
293
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
294
+ ],
295
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
296
+ ),
297
+ expected_terminal_exception=None,
298
+ expected_terminal_exception_message=None,
299
+ do_create_placement_group=False,
300
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
301
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
302
+ read_kwargs_provider=None,
303
+ drop_duplicates=False,
304
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
305
+ assert_compaction_audit=None,
306
+ ),
307
+ "7-rebase-drop-duplicates-false": RebaseCompactionTestCaseParams(
308
+ primary_keys={"pk_col_1"},
309
+ sort_keys=[
310
+ SortKey.of(key_name="sk_col_1"),
311
+ ],
312
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
313
+ partition_values=["1"],
314
+ input_deltas=pa.Table.from_arrays(
315
+ [
316
+ pa.array([1, 2, 2, 3, 3, 1]),
317
+ pa.array([1, 2, 3, 4, 5, 6]),
318
+ pa.array(["a", "b", "c", "b", "e", "a"]),
319
+ pa.array([5, 6, 7, 8, 9, 10]),
320
+ ],
321
+ names=["pk_col_1", "sk_col_1", "col_1", "col_2"],
322
+ ),
323
+ input_deltas_delta_type=DeltaType.UPSERT,
324
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
325
+ [
326
+ pa.array([1, 2, 2, 3, 3, 1]),
327
+ pa.array([1, 2, 3, 4, 5, 6]),
328
+ pa.array(["a", "b", "c", "b", "e", "a"]),
329
+ pa.array([5, 6, 7, 8, 9, 10]),
330
+ ],
331
+ names=["pk_col_1", "sk_col_1", "col_1", "col_2"],
332
+ ),
333
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
334
+ [
335
+ pa.array([1, 2, 2, 3, 3, 1]),
336
+ pa.array([1, 2, 3, 4, 5, 6]),
337
+ pa.array(["a", "b", "c", "b", "e", "a"]),
338
+ pa.array([5, 6, 7, 8, 9, 10]),
339
+ ],
340
+ names=["pk_col_1", "sk_col_1", "col_1", "col_2"],
341
+ ),
342
+ expected_terminal_exception=None,
343
+ expected_terminal_exception_message=None,
344
+ do_create_placement_group=False,
345
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
346
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
347
+ read_kwargs_provider=None,
348
+ drop_duplicates=False,
349
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
350
+ assert_compaction_audit=None,
351
+ ),
352
+ "8-rebase-with-with-null-pk-duplicates-false-hash-bucket-1": RebaseCompactionTestCaseParams(
353
+ primary_keys={"pk_col_1", "pk_col_2", "pk_col_3"},
354
+ sort_keys=[],
355
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
356
+ partition_values=["1"],
357
+ input_deltas=pa.Table.from_arrays(
358
+ [
359
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
360
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
361
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
362
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
363
+ ],
364
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
365
+ ),
366
+ input_deltas_delta_type=DeltaType.UPSERT,
367
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
368
+ [
369
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
370
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
371
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
372
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
373
+ ],
374
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
375
+ ),
376
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
377
+ [
378
+ pa.array([1, 2, None, 2, None, 1, 5, None, None, None]),
379
+ pa.array([1, None, 3, None, None, 1, 5, None, None, None]),
380
+ pa.array(["a", "b", "c", "b", "e", "a", "g", "e", None, None]),
381
+ pa.array([5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
382
+ ],
383
+ names=["pk_col_1", "pk_col_2", "pk_col_3", "col_1"],
384
+ ),
385
+ expected_terminal_exception=None,
386
+ expected_terminal_exception_message=None,
387
+ do_create_placement_group=False,
388
+ records_per_compacted_file=1,
389
+ hash_bucket_count=1,
390
+ read_kwargs_provider=None,
391
+ drop_duplicates=False,
392
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
393
+ assert_compaction_audit=None,
394
+ ),
87
395
  }
88
396
 
89
397
  REBASE_TEST_CASES = with_compactor_version_func_test_param(REBASE_TEST_CASES)
@@ -798,6 +798,67 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
798
798
  skip_enabled_compact_partition_drivers=None,
799
799
  assert_compaction_audit=None,
800
800
  ),
801
+ "14-rebase-then-incremental-with-null-pk": RebaseThenIncrementalCompactionTestCaseParams(
802
+ primary_keys={"pk_col_1"},
803
+ sort_keys=[
804
+ SortKey.of(key_name="sk_col_1"),
805
+ SortKey.of(key_name="sk_col_2"),
806
+ ],
807
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
808
+ partition_values=["1"],
809
+ input_deltas=pa.Table.from_arrays(
810
+ [
811
+ pa.array([str(i) for i in range(9)] + [None]),
812
+ pa.array([i for i in range(0, 10)]),
813
+ pa.array(["foo"] * 10),
814
+ pa.array([i / 10 for i in range(10, 20)]),
815
+ ],
816
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
817
+ ),
818
+ input_deltas_delta_type=DeltaType.UPSERT,
819
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
820
+ [
821
+ pa.array([str(i) for i in range(9)] + [None]),
822
+ pa.array([i for i in range(0, 10)]),
823
+ pa.array(["foo"] * 10),
824
+ pa.array([i / 10 for i in range(10, 20)]),
825
+ ],
826
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
827
+ ),
828
+ incremental_deltas=[
829
+ (
830
+ pa.Table.from_arrays(
831
+ [
832
+ pa.array([str(i) for i in range(9)] + [None]),
833
+ pa.array([i for i in range(20, 30)]),
834
+ pa.array(["foo"] * 10),
835
+ pa.array([i / 10 for i in range(40, 50)]),
836
+ ],
837
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
838
+ ),
839
+ DeltaType.UPSERT,
840
+ None,
841
+ )
842
+ ],
843
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
844
+ [
845
+ pa.array([str(i) for i in range(9)] + [None]),
846
+ pa.array([i for i in range(20, 30)]),
847
+ pa.array(["foo"] * 10),
848
+ pa.array([i / 10 for i in range(40, 50)]),
849
+ ],
850
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
851
+ ),
852
+ expected_terminal_exception=None,
853
+ expected_terminal_exception_message=None,
854
+ do_create_placement_group=False,
855
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
856
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
857
+ read_kwargs_provider=None,
858
+ drop_duplicates=True,
859
+ skip_enabled_compact_partition_drivers=None,
860
+ assert_compaction_audit=assert_compaction_audit,
861
+ ),
801
862
  }
802
863
 
803
864
  REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
@@ -1983,6 +2044,104 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
1983
2044
  skip_enabled_compact_partition_drivers=None,
1984
2045
  assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
1985
2046
  ),
2047
+ "31-rebase-then-incremental-delete-delta-on-incremental-null-pk-delete-null": RebaseThenIncrementalCompactionTestCaseParams(
2048
+ primary_keys={"pk_col_1"},
2049
+ sort_keys=ZERO_VALUED_SORT_KEY,
2050
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
2051
+ partition_values=["1"],
2052
+ input_deltas=pa.Table.from_arrays(
2053
+ [
2054
+ pa.array([i for i in range(11)] + [None]),
2055
+ pa.array([str(i) for i in range(0, 12)]),
2056
+ ],
2057
+ names=["pk_col_1", "col_1"],
2058
+ ),
2059
+ input_deltas_delta_type=DeltaType.UPSERT,
2060
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
2061
+ [
2062
+ pa.array([i for i in range(11)] + [None]),
2063
+ pa.array([str(i) for i in range(0, 12)]),
2064
+ ],
2065
+ names=["pk_col_1", "col_1"],
2066
+ ),
2067
+ incremental_deltas=[
2068
+ (
2069
+ pa.Table.from_arrays(
2070
+ [
2071
+ pa.array([10, 11, None, 13]),
2072
+ pa.array(["a", "b", "c", "d"]),
2073
+ ],
2074
+ names=["pk_col_1", "col_1"],
2075
+ ),
2076
+ DeltaType.UPSERT,
2077
+ None,
2078
+ ),
2079
+ (
2080
+ pa.Table.from_arrays(
2081
+ [pa.array([10, 11]), pa.array(["a", "b"])],
2082
+ names=["pk_col_1", "col_1"],
2083
+ ),
2084
+ DeltaType.DELETE,
2085
+ DeleteParameters.of(["pk_col_1", "col_1"]),
2086
+ ),
2087
+ (
2088
+ pa.Table.from_arrays(
2089
+ [pa.array([None])], # Support deleting null PK records
2090
+ names=["pk_col_1"],
2091
+ ),
2092
+ DeltaType.DELETE,
2093
+ DeleteParameters.of(["pk_col_1"]),
2094
+ ),
2095
+ (
2096
+ pa.Table.from_arrays(
2097
+ [pa.array(["c"])],
2098
+ names=["col_1"],
2099
+ ),
2100
+ DeltaType.DELETE,
2101
+ DeleteParameters.of(["col_1"]),
2102
+ ),
2103
+ (
2104
+ pa.Table.from_arrays(
2105
+ [pa.array(["c"])],
2106
+ names=["col_1"],
2107
+ ),
2108
+ DeltaType.DELETE,
2109
+ DeleteParameters.of(["col_1"]),
2110
+ ),
2111
+ (
2112
+ pa.Table.from_arrays(
2113
+ [pa.array([10, 11]), pa.array(["a", "b"])],
2114
+ names=["pk_col_1", "col_1"],
2115
+ ),
2116
+ DeltaType.DELETE,
2117
+ DeleteParameters.of(["pk_col_1", "col_1"]),
2118
+ ),
2119
+ (
2120
+ pa.Table.from_arrays(
2121
+ [pa.array(["c"])],
2122
+ names=["col_1"],
2123
+ ),
2124
+ DeltaType.DELETE,
2125
+ DeleteParameters.of(["col_1"]),
2126
+ ),
2127
+ ],
2128
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
2129
+ [
2130
+ pa.array([i for i in range(10)] + [13]),
2131
+ pa.array([str(i) for i in range(0, 10)] + ["d"]),
2132
+ ],
2133
+ names=["pk_col_1", "col_1"],
2134
+ ),
2135
+ expected_terminal_exception=None,
2136
+ expected_terminal_exception_message=None,
2137
+ do_create_placement_group=False,
2138
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
2139
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
2140
+ read_kwargs_provider=None,
2141
+ drop_duplicates=True,
2142
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
2143
+ assert_compaction_audit=assert_compaction_audit,
2144
+ ),
1986
2145
  }
1987
2146
 
1988
2147
  REBASE_THEN_INCREMENTAL_TEST_CASES = with_compactor_version_func_test_param(
@@ -249,6 +249,7 @@ def test_compact_partition_rebase_same_source_and_destination(
249
249
  pgm = PlacementGroupManager(
250
250
  1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
251
251
  ).pgs[0]
252
+ last_stream_position_to_compact = source_partition.stream_position
252
253
  compact_partition_params = CompactPartitionParams.of(
253
254
  {
254
255
  "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
@@ -258,17 +259,19 @@ def test_compact_partition_rebase_same_source_and_destination(
258
259
  "deltacat_storage_kwargs": ds_mock_kwargs,
259
260
  "destination_partition_locator": rebased_partition.locator,
260
261
  "hash_bucket_count": hash_bucket_count_param,
261
- "last_stream_position_to_compact": source_partition.stream_position,
262
+ "last_stream_position_to_compact": last_stream_position_to_compact,
262
263
  "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
263
264
  "object_store": RayPlasmaObjectStore(),
264
265
  "pg_config": pgm,
265
266
  "primary_keys": primary_keys,
266
267
  "read_kwargs_provider": read_kwargs_provider_param,
267
268
  "rebase_source_partition_locator": source_partition.locator,
269
+ "rebase_source_partition_high_watermark": rebased_partition.stream_position,
268
270
  "records_per_compacted_file": records_per_compacted_file_param,
269
271
  "s3_client_kwargs": {},
270
272
  "source_partition_locator": rebased_partition.locator,
271
273
  "sort_keys": sort_keys if sort_keys else None,
274
+ "drop_duplicates": drop_duplicates_param,
272
275
  }
273
276
  )
274
277
 
@@ -300,14 +303,20 @@ def test_compact_partition_rebase_same_source_and_destination(
300
303
  compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
301
304
  s3_resource, rcf_file_s3_uri
302
305
  )
306
+ assert (
307
+ compacted_delta_locator.stream_position == last_stream_position_to_compact
308
+ ), "Compacted delta locator must be equal to last stream position"
303
309
  tables = ds.download_delta(
304
310
  compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
305
311
  )
306
312
  actual_rebase_compacted_table = pa.concat_tables(tables)
307
313
  # if no primary key is specified then sort by sort_key for consistent assertion
308
- sorting_cols: List[Any] = (
309
- [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
310
- )
314
+ sorting_cols: List[Any] = []
315
+ if primary_keys:
316
+ sorting_cols.extend([(val, "ascending") for val in primary_keys])
317
+ if sort_keys:
318
+ sorting_cols.extend(sort_keys)
319
+
311
320
  rebase_expected_compact_partition_result = (
312
321
  rebase_expected_compact_partition_result.combine_chunks().sort_by(sorting_cols)
313
322
  )
@@ -193,8 +193,10 @@ def create_src_w_deltas_destination_rebase_w_deltas_strategy(
193
193
  ds_mock_kwargs: Optional[Dict[str, Any]],
194
194
  ) -> Tuple[Stream, Stream, Optional[Stream]]:
195
195
  import deltacat.tests.local_deltacat_storage as ds
196
- from deltacat.storage import Partition, Stream
196
+ from deltacat.storage import Delta
197
+ from deltacat.utils.common import current_time_ms
197
198
 
199
+ last_stream_position = current_time_ms()
198
200
  source_namespace, source_table_name, source_table_version = create_src_table(
199
201
  primary_keys, sort_keys, partition_keys, ds_mock_kwargs
200
202
  )
@@ -208,10 +210,12 @@ def create_src_w_deltas_destination_rebase_w_deltas_strategy(
208
210
  staged_partition: Partition = ds.stage_partition(
209
211
  source_table_stream, partition_values, **ds_mock_kwargs
210
212
  )
213
+ staged_delta: Delta = ds.stage_delta(
214
+ input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
215
+ )
216
+ staged_delta.locator.stream_position = last_stream_position
211
217
  ds.commit_delta(
212
- ds.stage_delta(
213
- input_deltas, staged_partition, input_delta_type, **ds_mock_kwargs
214
- ),
218
+ staged_delta,
215
219
  **ds_mock_kwargs,
216
220
  )
217
221
  ds.commit_partition(staged_partition, **ds_mock_kwargs)
@@ -244,8 +248,12 @@ def create_src_w_deltas_destination_rebase_w_deltas_strategy(
244
248
  staged_partition: Partition = ds.stage_partition(
245
249
  rebasing_table_stream, partition_values, **ds_mock_kwargs
246
250
  )
251
+ staged_delta: Delta = ds.stage_delta(
252
+ input_deltas, staged_partition, **ds_mock_kwargs
253
+ )
254
+ staged_delta.locator.stream_position = last_stream_position
247
255
  ds.commit_delta(
248
- ds.stage_delta(input_deltas, staged_partition, **ds_mock_kwargs),
256
+ staged_delta,
249
257
  **ds_mock_kwargs,
250
258
  )
251
259
  ds.commit_partition(staged_partition, **ds_mock_kwargs)
@@ -126,3 +126,67 @@ class TestJsonFormatter(unittest.TestCase):
126
126
  "We expect task ID to be present inside a remote task",
127
127
  )
128
128
  ray.shutdown()
129
+
130
+ def test_format_when_ray_initialized_with_context_kwargs(self):
131
+ ray.init(local_mode=True, ignore_reinit_error=True)
132
+
133
+ formatter = JsonFormatter(
134
+ {"message": "msg"}, context_kwargs={"custom_key": "custom_val"}
135
+ )
136
+
137
+ record = LogRecord(
138
+ level="INFO",
139
+ name="test",
140
+ pathname="test",
141
+ lineno=0,
142
+ message="test_message",
143
+ msg="test_message",
144
+ args=None,
145
+ exc_info=None,
146
+ )
147
+
148
+ result = formatter.format(record)
149
+ result = json.loads(result)
150
+
151
+ self.assertEqual("test_message", result["message"])
152
+ self.assertTrue(ray.is_initialized())
153
+ self.assertIn("ray_runtime_context", result)
154
+ self.assertIn("job_id", result["ray_runtime_context"])
155
+ self.assertIn("node_id", result["ray_runtime_context"])
156
+ self.assertIn("worker_id", result["ray_runtime_context"])
157
+ self.assertNotIn(
158
+ "task_id",
159
+ result["ray_runtime_context"],
160
+ "We expect task ID not be present outside a remote task",
161
+ )
162
+ self.assertEqual("custom_val", result["additional_context"]["custom_key"])
163
+ ray.shutdown()
164
+
165
+ def test_format_with_context_kwargs(self):
166
+ ray.shutdown()
167
+ formatter = JsonFormatter(
168
+ {"message": "msg"}, context_kwargs={"custom_key": "custom_val"}
169
+ )
170
+
171
+ record = LogRecord(
172
+ level="INFO",
173
+ name="test",
174
+ pathname="test",
175
+ lineno=0,
176
+ message="test_message",
177
+ msg="test_message",
178
+ args=None,
179
+ exc_info=None,
180
+ )
181
+
182
+ result = formatter.format(record)
183
+
184
+ self.assertEqual(
185
+ {
186
+ "message": "test_message",
187
+ "additional_context": {"custom_key": "custom_val"},
188
+ },
189
+ json.loads(result),
190
+ )
191
+ self.assertFalse(ray.is_initialized())
192
+ self.assertNotIn("ray_runtime_context", json.loads(result))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.16
3
+ Version: 1.1.18
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,10 +1,10 @@
1
- deltacat/__init__.py,sha256=o3hcQ85nWUeDYkaXi3ADAbqwM_c5ajyxzlx-Z2jdKbI,1778
1
+ deltacat/__init__.py,sha256=-PrYkT-sQtWxMQFFAp4vobJJ8-dq-3EA5LjPiH5sFFQ,1778
2
2
  deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
3
- deltacat/exceptions.py,sha256=yWM4RXK7uRrQc1VgJv6Lv2UiNZWAx2wolLq7cBwjlkg,12770
4
- deltacat/logs.py,sha256=6g16VkEFidbaMjgenAjggE1r2l664drMVhreRs8B1IQ,8438
3
+ deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
+ deltacat/logs.py,sha256=_UAc_6GiQR3mxccys32Cp2CZOKOVZ9L-AkNUAlzepns,9091
5
5
  deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  deltacat/aws/clients.py,sha256=4eQvpkV1PzFfxog7EriuglOGGwNFHR5hbGYpjsNNPxk,6949
7
- deltacat/aws/constants.py,sha256=1HnDXrSokW-G3YA3qKEiv7fZVntDs1uSk6a7On-VG5k,1223
7
+ deltacat/aws/constants.py,sha256=hcYAUot4ahq9GXCMClQiuYCtiDs5XaOebdUoKg4V84k,1222
8
8
  deltacat/aws/s3u.py,sha256=IdT0XqDXVOkPdo5Em5u3qAkV1UXFpXaE1rTkUDKv4f4,28578
9
9
  deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
10
10
  deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -76,7 +76,7 @@ deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQ
76
76
  deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2OpHvUMqUL2ja3aw,3626
77
77
  deltacat/compute/compactor_v2/utils/io.py,sha256=5zwJEW_UHv9ttQ2exJ23ZnExwBQXn1KgN7FDx1MGYv0,5262
78
78
  deltacat/compute/compactor_v2/utils/merge.py,sha256=EV_iKhNc3WflgfLW1Q46dXUvyClx8VebWHGtninEfsI,5311
79
- deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=ghyIifjXtqXgi8lN3lfnVQ2vi8uk_ny0FE7hsQlLjRQ,11538
79
+ deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=QOMwWxGhZ7VWa3oE6InM4thR5pbjmT7ttNXvx_IiKjo,11676
80
80
  deltacat/compute/compactor_v2/utils/task_options.py,sha256=VXvoVVUq5re8NiOoyrfz34qSRiOTB0IkxHJlMqKsBmk,14066
81
81
  deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
82
82
  deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
@@ -138,25 +138,25 @@ deltacat/storage/model/transform.py,sha256=t4hg1dKua8VPeMFgyllkWdzq-L5M_DRG0HD9s
138
138
  deltacat/storage/model/types.py,sha256=hj7MmjjVmKT-R9sMUulOWG-FByGZKKaYXNnOWW32mP0,1608
139
139
  deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
140
  deltacat/tests/test_exceptions.py,sha256=V3jUQClHLD24tS18tnGvNIt0psn2WFT3Nf_CIvSqL08,3140
141
- deltacat/tests/test_logs.py,sha256=Bq0kfzOVW0UbJL4Hayqy1k1QDXLu5hlvQX_cOJdsgYs,3851
141
+ deltacat/tests/test_logs.py,sha256=QmnArioPo0dbOUXjaG-b7HEg1GaYI8NDhinPDmzwjfQ,5889
142
142
  deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
143
143
  deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
144
144
  deltacat/tests/aws/test_s3u.py,sha256=FsYCH8K8DsDRPOtTp-w1Nu3ATqt4p1mqDo6aVJV-SbU,7918
145
145
  deltacat/tests/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
146
  deltacat/tests/catalog/test_default_catalog_impl.py,sha256=2l5uwmtLlUJ9yH1LDggtj81fa-pHqbE0-VBt6G4Hyc0,3180
147
147
  deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py,sha256=vyqwuKYOb4FtRwC5r1SJf7kcZNYXoiGb-BUrBgr5_Xw,34852
149
- deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=NfcB1aTq6HeYMFrtooIIoifzLp5U0xFTN6F7Lpk8cYQ,3143
150
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=Kl5A7hoJ0pVOE-rZna_4XcuIjhuzQbJudvnfNYKHgGo,75436
148
+ deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py,sha256=kWyIJQMyF9oBemvgOp3ngGhMpH9zjkznV-67ewELgHQ,37719
149
+ deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFqNaJoqeCuj9xIBjM4Ch2bx-mJcO4BRrLo,16839
150
+ deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
151
151
  deltacat/tests/compute/compact_partition_test_cases.py,sha256=R9eiKvxCLqcoHjAx3iOogdnXZEO9TvLbRf0wA7bcJN4,26170
152
152
  deltacat/tests/compute/test_compact_partition_incremental.py,sha256=Z0hyQGhMZjCaOn1Vk4qUbgDiS7HDhtdNeFQyG1PJhqA,14559
153
153
  deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=xhKCurTA29Y78_1eksUVJ0W35zNNZYm40rMpMM9ynvM,11853
154
154
  deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
155
- deltacat/tests/compute/test_compact_partition_rebase.py,sha256=p97zJmEoC2t6R12luSkCKjjBl50l4UGzh-IHdiQdpCs,11445
155
+ deltacat/tests/compute/test_compact_partition_rebase.py,sha256=O_IwZ1Xeaff98V1XYOyVD8PoS_EpVXSQcHWz4In8bK4,11889
156
156
  deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=CHHfNFEJW8S1We7NE1Gg6EaoKEWnaOMRxWrLyirrahc,14643
157
157
  deltacat/tests/compute/test_util_common.py,sha256=oTkTuo6wscVN8hmoQASIKP_DJN-M0um_ySCOcXv9AJA,11699
158
158
  deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
159
- deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=k9lq_3r_kNMzruTSn4JE7yjdBBUT3Lh-l8khSYdYpYs,12945
159
+ deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=Q3HJj1fjoe2JwRUOW8KEjbTqPIIoP2o_T3ZGH6SJnCM,13244
160
160
  deltacat/tests/compute/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
161
  deltacat/tests/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
162
  deltacat/tests/compute/compactor/steps/test_repartition.py,sha256=0uRguPEKeLSYs746Jv8io-HZMWdyXNcOMBu8GO2mA0M,9305
@@ -220,8 +220,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
220
220
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
221
221
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
222
222
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
223
- deltacat-1.1.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
224
- deltacat-1.1.16.dist-info/METADATA,sha256=EvOmjI60akKZ6BKEWQ2_KZJxr-Bp6wMZOU6-zV1EDos,1734
225
- deltacat-1.1.16.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
226
- deltacat-1.1.16.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
227
- deltacat-1.1.16.dist-info/RECORD,,
223
+ deltacat-1.1.18.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
224
+ deltacat-1.1.18.dist-info/METADATA,sha256=aMHqD2bxsU5-IHUUjn47PFrRP01jh1wlrDtOHeHGGaA,1734
225
+ deltacat-1.1.18.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
226
+ deltacat-1.1.18.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
227
+ deltacat-1.1.18.dist-info/RECORD,,