deltacat 0.1.18b18__py3-none-any.whl → 0.1.18b19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "0.1.18b18"
47
+ __version__ = "0.1.18b19"
48
48
 
49
49
 
50
50
  __all__ = [
deltacat/aws/clients.py CHANGED
@@ -109,7 +109,7 @@ def block_until_instance_metadata_service_returns_success(
109
109
  url=INSTANCE_METADATA_SERVICE_IPV4_URI,
110
110
  retry_strategy=RetryIfRetryableHTTPStatusCode,
111
111
  wait_strategy=wait_fixed(2), # wait 2 seconds before retrying,
112
- stop_strategy=stop_after_delay(60 * 10), # stop trying after 10 minutes
112
+ stop_strategy=stop_after_delay(60 * 30), # stop trying after 30 minutes
113
113
  ) -> Optional[Response]:
114
114
  """Blocks until the instance metadata service returns a successful response.
115
115
 
deltacat/aws/constants.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from typing import List
2
2
 
3
- from deltacat.utils.common import env_integer
3
+ from deltacat.utils.common import env_integer, env_string
4
4
 
5
5
  BOTO_MAX_RETRIES = env_integer("BOTO_MAX_RETRIES", 15)
6
6
  TIMEOUT_ERROR_CODES: List[str] = ["ReadTimeoutError", "ConnectTimeoutError"]
7
+ AWS_REGION = env_string("AWS_REGION", "us-east-1")
@@ -90,6 +90,7 @@ class CompactPartitionParams(dict):
90
90
  "hash_group_count", result.hash_bucket_count
91
91
  )
92
92
  result.drop_duplicates = params.get("drop_duplicates", DROP_DUPLICATES)
93
+ result.ray_custom_resources = params.get("ray_custom_resources")
93
94
 
94
95
  if not importlib.util.find_spec("memray"):
95
96
  result.enable_profiler = False
@@ -288,6 +289,14 @@ class CompactPartitionParams(dict):
288
289
  def hash_group_count(self) -> int:
289
290
  return self["hash_group_count"]
290
291
 
292
+ @property
293
+ def ray_custom_resources(self) -> Dict:
294
+ return self["ray_custom_resources"]
295
+
296
+ @ray_custom_resources.setter
297
+ def ray_custom_resources(self, res) -> None:
298
+ self["ray_custom_resources"] = res
299
+
291
300
  @hash_group_count.setter
292
301
  def hash_group_count(self, count: int) -> None:
293
302
  self["hash_group_count"] = count
@@ -420,6 +420,21 @@ class CompactionSessionAuditInfo(dict):
420
420
  """
421
421
  return self.get("usedCPUSeconds")
422
422
 
423
+ @property
424
+ def used_memory_gb_seconds(self) -> float:
425
+ """
426
+ The used memory in the cluster weighted over time. This
427
+ determines opportunities for better memory estimation.
428
+ """
429
+ return self.get("usedMemoryGBSeconds")
430
+
431
+ @property
432
+ def total_memory_gb_seconds(self) -> float:
433
+ """
434
+ Total memory in the cluster weighted over time in GB.
435
+ """
436
+ return self.get("totalMemoryGBSeconds")
437
+
423
438
  @property
424
439
  def pyarrow_version(self) -> str:
425
440
  """
@@ -743,6 +758,14 @@ class CompactionSessionAuditInfo(dict):
743
758
  self["usedCPUSeconds"] = value
744
759
  return self
745
760
 
761
+ def set_used_memory_gb_seconds(self, value: float) -> CompactionSessionAuditInfo:
762
+ self["usedMemoryGBSeconds"] = value
763
+ return self
764
+
765
+ def set_total_memory_gb_seconds(self, value: float) -> CompactionSessionAuditInfo:
766
+ self["totalMemoryGBSeconds"] = value
767
+ return self
768
+
746
769
  def set_pyarrow_version(self, value: str) -> CompactionSessionAuditInfo:
747
770
  self["pyarrowVersion"] = value
748
771
  return self
@@ -286,8 +286,15 @@ class DeltaAnnotated(Delta):
286
286
 
287
287
  result.append(new_da)
288
288
  else:
289
+ logger.info(
290
+ f"Split was not performed on delta with locator: {delta_annotated.locator} "
291
+ "as partial parquet params was not found."
292
+ )
289
293
  return [delta_annotated]
290
294
 
295
+ if result:
296
+ return result
297
+
291
298
  logger.info(
292
299
  f"Split was not performed on the delta with locator: {delta_annotated.locator}"
293
300
  )
@@ -217,6 +217,7 @@ def _execute_compaction(
217
217
  previous_inflation=params.previous_inflation,
218
218
  average_record_size_bytes=params.average_record_size_bytes,
219
219
  primary_keys=params.primary_keys,
220
+ ray_custom_resources=params.ray_custom_resources,
220
221
  )
221
222
 
222
223
  hb_start = time.monotonic()
@@ -337,6 +338,7 @@ def _execute_compaction(
337
338
  primary_keys=params.primary_keys,
338
339
  deltacat_storage=params.deltacat_storage,
339
340
  deltacat_storage_kwargs=params.deltacat_storage_kwargs,
341
+ ray_custom_resources=params.ray_custom_resources,
340
342
  )
341
343
 
342
344
  def merge_input_provider(index, item):
@@ -479,6 +481,10 @@ def _execute_compaction(
479
481
  if cluster_util:
480
482
  compaction_audit.set_total_cpu_seconds(cluster_util.total_vcpu_seconds)
481
483
  compaction_audit.set_used_cpu_seconds(cluster_util.used_vcpu_seconds)
484
+ compaction_audit.set_used_memory_gb_seconds(cluster_util.used_memory_gb_seconds)
485
+ compaction_audit.set_total_memory_gb_seconds(
486
+ cluster_util.total_memory_gb_seconds
487
+ )
482
488
 
483
489
  s3_utils.upload(
484
490
  compaction_audit.audit_url,
@@ -23,10 +23,10 @@ AVERAGE_RECORD_SIZE_BYTES = 1000
23
23
  # r5.8xlarge EC2 instances.
24
24
  TASK_MAX_PARALLELISM = 5367
25
25
 
26
- # The percentage of memory that needs to be estimated
26
+ # The percentage of memory that needs to be allocated
27
27
  # as buffer. This value will ensure the job doesn't run out
28
28
  # of memory by considering buffer for uncertainities.
29
- TOTAL_MEMORY_BUFFER_PERCENTAGE = 20
29
+ TOTAL_MEMORY_BUFFER_PERCENTAGE = 30
30
30
 
31
31
  # The total size of records that will be hash bucketed at once
32
32
  # Since, sorting is nlogn, we ensure that is not performed
@@ -35,3 +35,8 @@ MAX_SIZE_OF_RECORD_BATCH_IN_GIB = 2 * 1024 * 1024 * 1024
35
35
 
36
36
  # Whether to drop duplicates during merge.
37
37
  DROP_DUPLICATES = True
38
+
39
+ # PARQUET to PYARROW inflation multiplier
40
+ # This is the observed upper bound inflation for parquet
41
+ # size in metadata to pyarrow table size.
42
+ PARQUET_TO_PYARROW_INFLATION = 4
@@ -109,7 +109,10 @@ def _group_file_records_by_pk_hash_bucket(
109
109
  if delta_file_envelopes is None:
110
110
  return None, 0, 0
111
111
 
112
- logger.info(f"Read all delta file envelopes: {len(delta_file_envelopes)}")
112
+ logger.info(
113
+ f"Read all delta file envelopes: {len(delta_file_envelopes)} "
114
+ f"and total_size_bytes={total_size_bytes} and records={total_record_count}"
115
+ )
113
116
 
114
117
  # group the data by primary key hash value
115
118
  hb_to_delta_file_envelopes = np.empty([num_hash_buckets], dtype="object")
@@ -46,7 +46,7 @@ def append_content_type_params(
46
46
  pq_files = deltacat_storage.download_delta(
47
47
  delta,
48
48
  table_type=TableType.PYARROW_PARQUET,
49
- storage_type=StorageType.DISTRIBUTED,
49
+ storage_type=StorageType.LOCAL,
50
50
  **deltacat_storage_kwargs,
51
51
  )
52
52
 
@@ -11,7 +11,10 @@ from deltacat.compute.compactor.model.round_completion_info import RoundCompleti
11
11
  from deltacat.compute.compactor_v2.utils.primary_key_index import (
12
12
  hash_group_index_to_hash_bucket_indices,
13
13
  )
14
- from deltacat.compute.compactor_v2.constants import TOTAL_MEMORY_BUFFER_PERCENTAGE
14
+ from deltacat.compute.compactor_v2.constants import (
15
+ TOTAL_MEMORY_BUFFER_PERCENTAGE,
16
+ PARQUET_TO_PYARROW_INFLATION,
17
+ )
15
18
 
16
19
 
17
20
  def _get_parquet_type_params_if_exist(
@@ -45,7 +48,19 @@ def _calculate_parquet_column_size(
45
48
  "Columns not found in the parquet data as "
46
49
  f"{columns_found} != {len(columns)}"
47
50
  )
48
- return column_size
51
+ return column_size * PARQUET_TO_PYARROW_INFLATION
52
+
53
+
54
+ def _get_task_options(
55
+ cpu: float, memory: float, ray_custom_resources: Optional[Dict] = None
56
+ ) -> Dict:
57
+
58
+ task_opts = {"num_cpus": cpu, "memory": memory}
59
+
60
+ if ray_custom_resources:
61
+ task_opts["resources"] = ray_custom_resources
62
+
63
+ return task_opts
49
64
 
50
65
 
51
66
  def estimate_manifest_entry_size_bytes(
@@ -57,7 +72,7 @@ def estimate_manifest_entry_size_bytes(
57
72
  type_params = _get_parquet_type_params_if_exist(entry=entry)
58
73
 
59
74
  if type_params:
60
- return type_params.in_memory_size_bytes
75
+ return type_params.in_memory_size_bytes * PARQUET_TO_PYARROW_INFLATION
61
76
 
62
77
  return entry.meta.content_length * previous_inflation
63
78
 
@@ -103,6 +118,7 @@ def hash_bucket_resource_options_provider(
103
118
  previous_inflation: float,
104
119
  average_record_size_bytes: float,
105
120
  primary_keys: List[str] = None,
121
+ ray_custom_resources: Optional[Dict] = None,
106
122
  **kwargs,
107
123
  ) -> Dict:
108
124
  size_bytes = 0.0
@@ -141,7 +157,7 @@ def hash_bucket_resource_options_provider(
141
157
  # Consider buffer
142
158
  total_memory = total_memory * (1 + TOTAL_MEMORY_BUFFER_PERCENTAGE / 100.0)
143
159
 
144
- return {"num_cpus": 0.01, "memory": total_memory}
160
+ return _get_task_options(0.01, total_memory, ray_custom_resources)
145
161
 
146
162
 
147
163
  def merge_resource_options_provider(
@@ -152,6 +168,7 @@ def merge_resource_options_provider(
152
168
  hash_group_num_rows: Dict[int, int],
153
169
  round_completion_info: Optional[RoundCompletionInfo] = None,
154
170
  compacted_delta_manifest: Optional[Manifest] = None,
171
+ ray_custom_resources: Optional[Dict] = None,
155
172
  primary_keys: Optional[List[str]] = None,
156
173
  deltacat_storage=unimplemented_deltacat_storage,
157
174
  deltacat_storage_kwargs: Optional[Dict] = {},
@@ -218,4 +235,4 @@ def merge_resource_options_provider(
218
235
 
219
236
  total_memory = total_memory * (1 + TOTAL_MEMORY_BUFFER_PERCENTAGE / 100.0)
220
237
 
221
- return {"num_cpus": 0.01, "memory": total_memory}
238
+ return _get_task_options(0.01, total_memory, ray_custom_resources)
@@ -8,6 +8,7 @@ from ray.types import ObjectRef
8
8
  from deltacat import logs
9
9
  from deltacat.aws import s3u as s3_utils
10
10
  from deltacat.aws.clients import client_cache
11
+ from deltacat.aws.constants import AWS_REGION
11
12
  from deltacat.compute.compactor import DeltaAnnotated
12
13
  from deltacat.compute.metastats.utils.io import (
13
14
  cache_inflation_rate_data_for_delta_stats_ready,
@@ -94,7 +95,7 @@ def start_stats_collection(
94
95
 
95
96
 
96
97
  def _get_account_id() -> str:
97
- client = client_cache("sts", None)
98
+ client = client_cache("sts", region_name=AWS_REGION)
98
99
  account_id = client.get_caller_identity()["Account"]
99
100
  return account_id
100
101
 
@@ -29,6 +29,7 @@ class MemcachedObjectStore(IObjectStore):
29
29
  self.port = port
30
30
  self.storage_node_ips = storage_node_ips
31
31
  self.hasher = None
32
+ logger.info(f"The storage node IPs: {self.storage_node_ips}")
32
33
  super().__init__()
33
34
 
34
35
  def initialize_hasher(self):
@@ -129,9 +130,13 @@ class MemcachedObjectStore(IObjectStore):
129
130
  base_client = Client((ip_address, self.port))
130
131
  client = RetryingClient(
131
132
  base_client,
132
- attempts=3,
133
- retry_delay=0.01,
134
- retry_for=[MemcacheUnexpectedCloseError, ConnectionResetError],
133
+ attempts=15,
134
+ retry_delay=1,
135
+ retry_for=[
136
+ MemcacheUnexpectedCloseError,
137
+ ConnectionResetError,
138
+ BrokenPipeError,
139
+ ],
135
140
  )
136
141
 
137
142
  self.client_cache[ip_address] = client
@@ -46,3 +46,5 @@ class TestClusterUtilizationOverTimeRange(unittest.TestCase):
46
46
  self.assertTrue(
47
47
  cu.total_vcpu_seconds >= cu.used_vcpu_seconds
48
48
  ) # total is greater than used
49
+ self.assertIsNotNone(cu.total_memory_gb_seconds)
50
+ self.assertIsNotNone(cu.used_memory_gb_seconds)
@@ -15,6 +15,7 @@ from resource import getrusage, RUSAGE_SELF
15
15
  import platform
16
16
  import psutil
17
17
  import schedule
18
+ from deltacat.constants import BYTES_PER_GIBIBYTE
18
19
 
19
20
 
20
21
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
@@ -73,9 +74,11 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
73
74
  def __init__(self) -> None:
74
75
  self.total_vcpu_seconds = 0.0
75
76
  self.used_vcpu_seconds = 0.0
77
+ self.total_memory_gb_seconds = 0.0
78
+ self.used_memory_gb_seconds = 0.0
76
79
 
77
80
  def __enter__(self) -> Any:
78
- schedule.every().second.do(self._update_vcpus)
81
+ schedule.every().second.do(self._update_resources)
79
82
  self.stop_run_schedules = self._run_schedule()
80
83
  return super().__enter__()
81
84
 
@@ -94,7 +97,7 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
94
97
 
95
98
  # It is not truely parallel(due to GIL Ref: https://wiki.python.org/moin/GlobalInterpreterLock)
96
99
  # even if we are using threading library. However, it averages out and gives a very good approximation.
97
- def _update_vcpus(self):
100
+ def _update_resources(self):
98
101
  cluster_resources = ray.cluster_resources()
99
102
  available_resources = ray.available_resources()
100
103
  if "CPU" not in cluster_resources:
@@ -104,14 +107,28 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
104
107
  self.used_vcpu_seconds = self.used_vcpu_seconds + float(
105
108
  str(cluster_resources["CPU"] - available_resources["CPU"])
106
109
  )
107
- self.total_vcpu_seconds = self.total_vcpu_seconds + float(
108
- str(cluster_resources["CPU"])
109
- )
110
- else:
111
- self.total_vcpu_seconds = self.total_vcpu_seconds + float(
112
- str(cluster_resources["CPU"])
110
+
111
+ self.total_vcpu_seconds = self.total_vcpu_seconds + float(
112
+ str(cluster_resources["CPU"])
113
+ )
114
+
115
+ if "memory" not in cluster_resources:
116
+ return
117
+
118
+ if "memory" in available_resources:
119
+ self.used_memory_gb_seconds = (
120
+ self.used_memory_gb_seconds
121
+ + float(
122
+ str(cluster_resources["memory"] - available_resources["memory"])
123
+ )
124
+ / BYTES_PER_GIBIBYTE
113
125
  )
114
126
 
127
+ self.total_memory_gb_seconds = (
128
+ self.total_memory_gb_seconds
129
+ + float(str(cluster_resources["memory"])) / BYTES_PER_GIBIBYTE
130
+ )
131
+
115
132
  def _run_schedule(self, interval: Optional[float] = 1.0):
116
133
  cease_continuous_run = threading.Event()
117
134
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 0.1.18b18
3
+ Version: 0.1.18b19
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -26,7 +26,7 @@ Requires-Dist: tenacity ==8.1.0
26
26
  Requires-Dist: typing-extensions ==4.4.0
27
27
  Requires-Dist: pymemcache ==4.0.0
28
28
  Requires-Dist: redis ==4.6.0
29
- Requires-Dist: getdaft ==0.1.15
29
+ Requires-Dist: getdaft ==0.1.16
30
30
  Requires-Dist: schedule ==1.2.0
31
31
 
32
32
  # DeltaCAT
@@ -1,10 +1,10 @@
1
- deltacat/__init__.py,sha256=vI-ObX4U4pRO-mTmsIu2kCyB_jc95ZwmHbqfwIV16i4,1781
1
+ deltacat/__init__.py,sha256=0iDCTG5vkP_WA3KAWsoKCN1p_8ZONakFWjeoGeVspDU,1781
2
2
  deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
3
3
  deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
4
4
  deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
5
5
  deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- deltacat/aws/clients.py,sha256=6a95FylQjv2Yg5QVXtCfCmqsvWLH9aueWGOHtQtlpzA,6376
7
- deltacat/aws/constants.py,sha256=4OnwC1H12FPs2bpVN7tXkxn-DAEJS4MYRrFh5HKFv7s,204
6
+ deltacat/aws/clients.py,sha256=1_h6SLlTWGuy9ZaUEtv96F34zfS4e8Rs94MFhLjSd7c,6376
7
+ deltacat/aws/constants.py,sha256=01jrMFz3ZduGA0c7vz1wCHR7gBoNCMXXLi4JFYzZA9M,267
8
8
  deltacat/aws/s3u.py,sha256=mdJrX9z5O8kh00jUL0w8CYBxKAemVYs26sRDzwSonfg,18390
9
9
  deltacat/aws/redshift/__init__.py,sha256=fjuv3jWdPE8IgF4uSrL0YEqV3XUfqDULX3xV27ICceo,266
10
10
  deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -23,10 +23,10 @@ deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlR
23
23
  deltacat/compute/compactor/compaction_session.py,sha256=aHCkhjcJ3kgRcDDJ6snSgmPts7nLvtm_oGTqoxA3-68,27408
24
24
  deltacat/compute/compactor/repartition_session.py,sha256=f5BTTGNv365qSuTioL7QUuVm-px_l8-zz-OC_p7gXt4,7240
25
25
  deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- deltacat/compute/compactor/model/compact_partition_params.py,sha256=qVm8ZCgsL7Taqq2xK6pADMYVI9NjW48qB7edPGM4sx4,13981
27
- deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=24yElKW5856O90wxgMha4WGiXtHPi5Y9k-dM2BINe8Q,30273
26
+ deltacat/compute/compactor/model/compact_partition_params.py,sha256=ASPbmI9GMNG7Ho6XEJHyGDXbrHe-ytRvhjq_Bys2Oec,14283
27
+ deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=GxAbBxEPJvCJoWqCJlojldYRQO635P915wy9XT9jgKE,31034
28
28
  deltacat/compute/compactor/model/dedupe_result.py,sha256=1OCV944qJdLQ_-8scisVKl45ej1eRv9OV539QYZtQ-U,292
29
- deltacat/compute/compactor/model/delta_annotated.py,sha256=3by7pj_LNk4yn87Q1CQ0Ck5YI9NOgREB5ZlFXFCsJwY,11989
29
+ deltacat/compute/compactor/model/delta_annotated.py,sha256=NERB9rOtYg-xzBwvqGJ7_hBOzBC7g6X5M9-Cq5pbdH8,12258
30
30
  deltacat/compute/compactor/model/delta_file_envelope.py,sha256=et1KXJLwheEpzvy8vNjlYcgGavvwaNElZZYaCu7kyVA,2821
31
31
  deltacat/compute/compactor/model/delta_file_locator.py,sha256=AmhPGPDsmahVhp91rohJMx4ByumcIY5feqRLZTrNu4s,1905
32
32
  deltacat/compute/compactor/model/hash_bucket_result.py,sha256=71qGmaT1Mks-r3-aatjNbn2x3yWIgT8RmV0bRWe6pdA,275
@@ -47,25 +47,25 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
47
47
  deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
48
48
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
49
49
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- deltacat/compute/compactor_v2/compaction_session.py,sha256=YqkorTbVyWsJt1wfJP9vWn__0DU7FxGdXCFJqzqeEr4,18916
51
- deltacat/compute/compactor_v2/constants.py,sha256=aHpQrGL_Lm4apUpTtp2WqUWLEfQJvZefpAvu2m-TNbU,1296
50
+ deltacat/compute/compactor_v2/compaction_session.py,sha256=rxIyLaOuoGEpZk-UdtGAPurJA7oB9X-Vkvr7hgWTlV0,19234
51
+ deltacat/compute/compactor_v2/constants.py,sha256=skNkIOkvyfGm4z086ekln7niMwHmfMErhV9H0k8b1cc,1471
52
52
  deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
54
54
  deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
55
55
  deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
56
56
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
57
57
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=_3bTqf67T8PvdmDmPLQrdcDA0UbW3W8PTR_Brzv0a2o,7276
58
+ deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=NR-IDva6iB2YeNgxim_WsuZfEk5ooV8jAwzDJjdrsDE,7375
59
59
  deltacat/compute/compactor_v2/steps/merge.py,sha256=hgQiY2ui49HN-7ByIQlXVUCRbyrG7Jr61kohyGh6abY,17258
60
60
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=PY6xW1gCJJDHleau_eeGQbH9LXujxKKaeAJD5KLOcgA,2162
61
+ deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkwX8eHUQiFoTR1V-E66pMqWigtrs618,2156
62
62
  deltacat/compute/compactor_v2/utils/dedupe.py,sha256=A1xs5CU419h0nKv0B7R4tDkdgYAUIFQB_DWryRhpL98,1710
63
63
  deltacat/compute/compactor_v2/utils/io.py,sha256=4KV13VKwEtIzkwPJLJmEnp1dMOKHSxkEOQNQVbYrcwY,5177
64
64
  deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=NNF-h4zKRegVluAtXSDW4YRdOd4xJ6z_6uDaxfJyBiw,11122
65
- deltacat/compute/compactor_v2/utils/task_options.py,sha256=5QCxA84HdTshb0V2ycBsGxZjYyQ24Ibm7ycyajknHBU,7480
65
+ deltacat/compute/compactor_v2/utils/task_options.py,sha256=jjN-NNkMQetbcFgJMhHqVBqNdWfXFrRglMaFYjTfeBo,7987
66
66
  deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
67
  deltacat/compute/metastats/meta_stats.py,sha256=78hN3aN5wLHUFJsZXuv2JLeqA35HZ8mLUWJDMslMj5Q,18731
68
- deltacat/compute/metastats/stats.py,sha256=ftZs8ogLWKZgmrj5Dbrj27lp5wuQ6du4CNTAiQughZI,7299
68
+ deltacat/compute/metastats/stats.py,sha256=8iUiSXOAjqiEeNP5RIb5gvhykBgpNHD5IKkB8zsPR0E,7363
69
69
  deltacat/compute/metastats/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
70
  deltacat/compute/metastats/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  deltacat/compute/metastats/model/partition_stats_dict.py,sha256=FbfoOxmTZfjRT7iHwc_96gHmB_r6iUvVM9BoTldD5mY,1123
@@ -91,7 +91,7 @@ deltacat/compute/stats/utils/manifest_stats_file.py,sha256=PtqW5Zc5e09HcfiAgvoZH
91
91
  deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
92
  deltacat/io/dataset.py,sha256=8w9sPVDpGnjjGVDWB39YSKWxq4zRv9VEfDtj7PYwjqM,3755
93
93
  deltacat/io/file_object_store.py,sha256=HCFeXu9cWXPXVk54MHel_nw3-wIuzhMt2RI6jKzjRYM,1346
94
- deltacat/io/memcached_object_store.py,sha256=Vi_Yd4F3tbgRSwMPGjQ0BW5YZSgQ3Fdnh6-ra8pl4RU,4956
94
+ deltacat/io/memcached_object_store.py,sha256=eA0Ggk6BedFcyT_lbgScjRj2TSB2p45YwAs0B-qJEt0,5104
95
95
  deltacat/io/object_store.py,sha256=GX4pK-LY92s3uXRGcj8YsG2FFoiKfcJr2USIVz1ruGg,1380
96
96
  deltacat/io/ray_plasma_object_store.py,sha256=pupw7ulZY_EV5dERJDCCW_y_hzVx3Hl_uAvpQTNIh-E,705
97
97
  deltacat/io/read_api.py,sha256=BhkjL3xjY-fsa62AA9Yv20_88uTskn4_Bv2W6VmMXVA,7023
@@ -145,7 +145,7 @@ deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
145
145
  deltacat/tests/utils/test_daft.py,sha256=dfg4PYs6W4buBwj0FakTF2i7uFF6G4nj_48Dc8R11HQ,2852
146
146
  deltacat/tests/utils/test_pyarrow.py,sha256=QspOrQRc6tLM52-taHYgw5v_49qKgvdgW7E4eNSv5Mk,15708
147
147
  deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
148
- deltacat/tests/utils/test_resources.py,sha256=kMTIUmpx5-Y3ZkNPknM9Vj7Kx2OeG39Hjth8TCzoj5Y,1735
148
+ deltacat/tests/utils/test_resources.py,sha256=NMiJl9wlKNnK-edBaftN2CWxDhFmR2AGsseNsg8n-zg,1856
149
149
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
150
  deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
151
  deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
@@ -161,7 +161,7 @@ deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
161
161
  deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
162
162
  deltacat/utils/placement.py,sha256=S80CwD1eEK47lQNr0xTmF9kq092-z6lTTmOOBv8cW_o,11723
163
163
  deltacat/utils/pyarrow.py,sha256=Bm_B-gtI6enhlgDuzpSiaNUc20p1rHS-3Y6OdHySl0E,26352
164
- deltacat/utils/resources.py,sha256=OExdoecfKmQQCKeYX5slaOGgdQCodBpxo5pUr7_rBYQ,5511
164
+ deltacat/utils/resources.py,sha256=2nrYzDm6BPedrpsWM_4NxCZZ4cubVYitO1Myev0B5W0,6044
165
165
  deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
166
166
  deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
167
  deltacat/utils/ray_utils/collections.py,sha256=hj20s4D2RF2jZETU_44r6mFbsczA0JI_I_4kWKTmqes,1951
@@ -169,8 +169,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=MlpOHlKgJKSXzLsSR8mg4V_dWSVP_udrl
169
169
  deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
170
170
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
171
171
  deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
172
- deltacat-0.1.18b18.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
173
- deltacat-0.1.18b18.dist-info/METADATA,sha256=WaOJwxEXb5HDAY7Nnf1LaL5FyEhKARCl82nyXfB59HY,1740
174
- deltacat-0.1.18b18.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
175
- deltacat-0.1.18b18.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
176
- deltacat-0.1.18b18.dist-info/RECORD,,
172
+ deltacat-0.1.18b19.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
173
+ deltacat-0.1.18b19.dist-info/METADATA,sha256=117_kv8NZ-J8NXxHG2F_Msgm_XvyjUJ_f2oEXJZ2PbU,1740
174
+ deltacat-0.1.18b19.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
175
+ deltacat-0.1.18b19.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
176
+ deltacat-0.1.18b19.dist-info/RECORD,,