deltacat 1.1.17__py3-none-any.whl → 1.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/aws/constants.py +0 -1
  3. deltacat/compute/compactor/model/compact_partition_params.py +76 -0
  4. deltacat/compute/compactor/model/compaction_session_audit_info.py +26 -0
  5. deltacat/compute/compactor/model/delta_annotated.py +16 -9
  6. deltacat/compute/compactor_v2/constants.py +3 -0
  7. deltacat/compute/compactor_v2/private/compaction_utils.py +9 -5
  8. deltacat/compute/compactor_v2/utils/content_type_params.py +185 -34
  9. deltacat/compute/compactor_v2/utils/io.py +28 -14
  10. deltacat/compute/compactor_v2/utils/primary_key_index.py +9 -4
  11. deltacat/compute/compactor_v2/utils/task_options.py +128 -183
  12. deltacat/compute/resource_estimation/__init__.py +27 -0
  13. deltacat/compute/resource_estimation/delta.py +271 -0
  14. deltacat/compute/resource_estimation/manifest.py +394 -0
  15. deltacat/compute/resource_estimation/model.py +165 -0
  16. deltacat/compute/resource_estimation/parquet.py +108 -0
  17. deltacat/constants.py +5 -0
  18. deltacat/exceptions.py +2 -4
  19. deltacat/logs.py +8 -0
  20. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +77 -0
  21. deltacat/tests/compute/compact_partition_rebase_test_cases.py +308 -0
  22. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +159 -0
  23. deltacat/tests/compute/compactor_v2/test_compaction_session.py +157 -0
  24. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +3 -3
  25. deltacat/tests/compute/resource_estimation/test_delta.py +605 -0
  26. deltacat/tests/compute/resource_estimation/test_manifest.py +921 -0
  27. deltacat/tests/compute/test_compact_partition_rebase.py +13 -4
  28. deltacat/tests/compute/test_util_common.py +2 -0
  29. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -5
  30. deltacat/tests/test_logs.py +34 -0
  31. deltacat/tests/test_utils/pyarrow.py +15 -5
  32. {deltacat-1.1.17.dist-info → deltacat-1.1.19.dist-info}/METADATA +2 -2
  33. {deltacat-1.1.17.dist-info → deltacat-1.1.19.dist-info}/RECORD +38 -54
  34. deltacat/compute/metastats/meta_stats.py +0 -479
  35. deltacat/compute/metastats/model/__init__.py +0 -0
  36. deltacat/compute/metastats/model/partition_stats_dict.py +0 -34
  37. deltacat/compute/metastats/model/stats_cluster_size_estimator.py +0 -68
  38. deltacat/compute/metastats/stats.py +0 -182
  39. deltacat/compute/metastats/utils/__init__.py +0 -0
  40. deltacat/compute/metastats/utils/constants.py +0 -16
  41. deltacat/compute/metastats/utils/io.py +0 -223
  42. deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +0 -18
  43. deltacat/compute/metastats/utils/ray_utils.py +0 -129
  44. deltacat/compute/stats/basic.py +0 -226
  45. deltacat/compute/stats/models/__init__.py +0 -0
  46. deltacat/compute/stats/models/delta_column_stats.py +0 -98
  47. deltacat/compute/stats/models/delta_stats.py +0 -233
  48. deltacat/compute/stats/models/delta_stats_cache_result.py +0 -49
  49. deltacat/compute/stats/models/manifest_entry_stats.py +0 -72
  50. deltacat/compute/stats/models/stats_result.py +0 -104
  51. deltacat/compute/stats/utils/__init__.py +0 -0
  52. deltacat/compute/stats/utils/intervals.py +0 -94
  53. deltacat/compute/stats/utils/io.py +0 -230
  54. deltacat/compute/stats/utils/manifest_stats_file.py +0 -100
  55. deltacat/tests/stats/__init__.py +0 -0
  56. deltacat/tests/stats/test_intervals.py +0 -49
  57. /deltacat/{compute/metastats → tests/compute/resource_estimation}/__init__.py +0 -0
  58. /deltacat/{compute/metastats/config → tests/compute/resource_estimation/data}/__init__.py +0 -0
  59. {deltacat-1.1.17.dist-info → deltacat-1.1.19.dist-info}/LICENSE +0 -0
  60. {deltacat-1.1.17.dist-info → deltacat-1.1.19.dist-info}/WHEEL +0 -0
  61. {deltacat-1.1.17.dist-info → deltacat-1.1.19.dist-info}/top_level.txt +0 -0
@@ -4,11 +4,8 @@ from deltacat import logs
4
4
  from deltacat.compute.compactor_v2.model.merge_file_group import (
5
5
  LocalMergeFileGroupsProvider,
6
6
  )
7
- from deltacat.types.media import ContentEncoding, ContentType
8
- from deltacat.types.partial_download import PartialParquetParameters
9
7
  from deltacat.storage import (
10
8
  Manifest,
11
- ManifestEntry,
12
9
  interface as unimplemented_deltacat_storage,
13
10
  )
14
11
  from deltacat.compute.compactor.model.delta_annotated import DeltaAnnotated
@@ -16,50 +13,25 @@ from deltacat.compute.compactor.model.round_completion_info import RoundCompleti
16
13
  from deltacat.compute.compactor_v2.utils.primary_key_index import (
17
14
  hash_group_index_to_hash_bucket_indices,
18
15
  )
19
- from deltacat.compute.compactor_v2.constants import (
20
- PARQUET_TO_PYARROW_INFLATION,
16
+ from deltacat.compute.resource_estimation.manifest import (
17
+ estimate_manifest_entry_num_rows,
18
+ estimate_manifest_entry_size_bytes,
19
+ estimate_manifest_entry_column_size_bytes,
20
+ )
21
+ from deltacat.compute.resource_estimation.model import (
22
+ EstimateResourcesParams,
23
+ OperationType,
21
24
  )
22
25
  from deltacat.exceptions import RetryableError
23
26
 
24
27
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
25
28
 
26
29
 
27
- def _get_parquet_type_params_if_exist(
28
- entry: ManifestEntry,
29
- ) -> Optional[PartialParquetParameters]:
30
- if (
31
- entry.meta
32
- and entry.meta.content_type == ContentType.PARQUET
33
- and entry.meta.content_encoding == ContentEncoding.IDENTITY
34
- and entry.meta.content_type_parameters
35
- ):
36
- for type_params in entry.meta.content_type_parameters:
37
- if isinstance(type_params, PartialParquetParameters):
38
- return type_params
39
- return None
40
-
41
-
42
- def _calculate_parquet_column_size(
43
- type_params: PartialParquetParameters, columns: List[str]
44
- ):
45
- column_size = 0.0
46
- for rg in type_params.row_groups_to_download:
47
- columns_found = 0
48
- row_group_meta = type_params.pq_metadata.row_group(rg)
49
- for col in range(row_group_meta.num_columns):
50
- column_meta = row_group_meta.column(col)
51
- if column_meta.path_in_schema in columns:
52
- columns_found += 1
53
- column_size += column_meta.total_uncompressed_size
54
- assert columns_found == len(columns), (
55
- "Columns not found in the parquet data as "
56
- f"{columns_found} != {len(columns)}"
57
- )
58
- return column_size * PARQUET_TO_PYARROW_INFLATION
59
-
60
-
61
- def get_task_options(
62
- cpu: float, memory: float, ray_custom_resources: Optional[Dict] = None
30
+ def _get_task_options(
31
+ cpu: float,
32
+ memory: float,
33
+ ray_custom_resources: Optional[Dict] = None,
34
+ scheduling_strategy: str = "SPREAD",
63
35
  ) -> Dict:
64
36
 
65
37
  # NOTE: With DEFAULT scheduling strategy in Ray 2.20.0, autoscaler does
@@ -67,7 +39,11 @@ def get_task_options(
67
39
  # 20 tasks get scheduled out of 100 tasks in queue. Hence, we use SPREAD
68
40
  # which is also ideal for merge and hash bucket tasks.
69
41
  # https://docs.ray.io/en/latest/ray-core/scheduling/index.html
70
- task_opts = {"num_cpus": cpu, "memory": memory, "scheduling_strategy": "SPREAD"}
42
+ task_opts = {
43
+ "num_cpus": cpu,
44
+ "memory": memory,
45
+ "scheduling_strategy": scheduling_strategy,
46
+ }
71
47
 
72
48
  if ray_custom_resources:
73
49
  task_opts["resources"] = ray_custom_resources
@@ -81,53 +57,110 @@ def get_task_options(
81
57
  return task_opts
82
58
 
83
59
 
84
- def estimate_manifest_entry_size_bytes(
85
- entry: ManifestEntry, previous_inflation: float, **kwargs
86
- ) -> float:
87
- if entry.meta.source_content_length:
88
- return entry.meta.source_content_length
89
-
90
- type_params = _get_parquet_type_params_if_exist(entry=entry)
91
-
92
- if type_params:
93
- return type_params.in_memory_size_bytes * PARQUET_TO_PYARROW_INFLATION
60
+ def _get_merge_task_options(
61
+ index: int,
62
+ hb_group_idx: int,
63
+ data_size: float,
64
+ pk_size_bytes: float,
65
+ num_rows: int,
66
+ num_hash_groups: int,
67
+ total_memory_buffer_percentage: int,
68
+ incremental_index_array_size: int,
69
+ debug_memory_params: Dict[str, Any],
70
+ ray_custom_resources: Optional[Dict],
71
+ estimate_resources_params: EstimateResourcesParams,
72
+ round_completion_info: Optional[RoundCompletionInfo] = None,
73
+ compacted_delta_manifest: Optional[Manifest] = None,
74
+ primary_keys: Optional[List[str]] = None,
75
+ deltacat_storage=unimplemented_deltacat_storage,
76
+ deltacat_storage_kwargs: Optional[Dict] = {},
77
+ memory_logs_enabled: Optional[bool] = None,
78
+ ) -> Dict[str, Any]:
79
+ if (
80
+ round_completion_info
81
+ and compacted_delta_manifest
82
+ and round_completion_info.hb_index_to_entry_range
83
+ ):
94
84
 
95
- return entry.meta.content_length * previous_inflation
85
+ previous_inflation = (
86
+ round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
87
+ / round_completion_info.compacted_pyarrow_write_result.file_bytes
88
+ )
89
+ debug_memory_params["previous_inflation"] = previous_inflation
96
90
 
91
+ average_record_size = (
92
+ round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
93
+ / round_completion_info.compacted_pyarrow_write_result.records
94
+ )
95
+ debug_memory_params["average_record_size"] = average_record_size
97
96
 
98
- def estimate_manifest_entry_num_rows(
99
- entry: ManifestEntry,
100
- average_record_size_bytes: float,
101
- previous_inflation: float,
102
- **kwargs,
103
- ) -> int:
104
- if entry.meta.record_count:
105
- return entry.meta.record_count
97
+ iterable = hash_group_index_to_hash_bucket_indices(
98
+ hb_group_idx, round_completion_info.hash_bucket_count, num_hash_groups
99
+ )
106
100
 
107
- type_params = _get_parquet_type_params_if_exist(entry=entry)
101
+ for hb_idx in iterable:
102
+ if round_completion_info.hb_index_to_entry_range.get(str(hb_idx)) is None:
103
+ continue
108
104
 
109
- if type_params:
110
- return type_params.num_rows
105
+ entry_start, entry_end = round_completion_info.hb_index_to_entry_range[
106
+ str(hb_idx)
107
+ ]
108
+ for entry_index in range(entry_start, entry_end):
109
+ entry = compacted_delta_manifest.entries[entry_index]
111
110
 
112
- total_size_bytes = estimate_manifest_entry_size_bytes(
113
- entry=entry, previous_inflation=previous_inflation, **kwargs
114
- )
111
+ current_entry_size = estimate_manifest_entry_size_bytes(
112
+ entry=entry,
113
+ operation_type=OperationType.PYARROW_DOWNLOAD,
114
+ estimate_resources_params=estimate_resources_params,
115
+ )
116
+ current_entry_rows = estimate_manifest_entry_num_rows(
117
+ entry=entry,
118
+ operation_type=OperationType.PYARROW_DOWNLOAD,
119
+ estimate_resources_params=estimate_resources_params,
120
+ )
115
121
 
116
- return int(total_size_bytes / average_record_size_bytes)
122
+ data_size += current_entry_size
123
+ num_rows += current_entry_rows
117
124
 
125
+ if primary_keys:
126
+ pk_size = estimate_manifest_entry_column_size_bytes(
127
+ entry=entry,
128
+ columns=primary_keys,
129
+ operation_type=OperationType.PYARROW_DOWNLOAD,
130
+ estimate_resources_params=estimate_resources_params,
131
+ )
118
132
 
119
- def estimate_manifest_entry_column_size_bytes(
120
- entry: ManifestEntry, columns: Optional[List[str]] = None
121
- ) -> Optional[float]:
122
- if not columns:
123
- return 0
133
+ if pk_size is None:
134
+ pk_size_bytes += current_entry_size
135
+ else:
136
+ pk_size_bytes += pk_size
124
137
 
125
- type_params = _get_parquet_type_params_if_exist(entry=entry)
138
+ # total data downloaded + primary key hash column + pyarrow-to-numpy conversion
139
+ # + primary key column + hashlib inefficiency + dict size for merge + incremental index array size
140
+ total_memory = (
141
+ data_size
142
+ + pk_size_bytes
143
+ + pk_size_bytes
144
+ + num_rows * 20
145
+ + num_rows * 20
146
+ + num_rows * 20
147
+ + incremental_index_array_size
148
+ )
149
+ debug_memory_params["data_size"] = data_size
150
+ debug_memory_params["num_rows"] = num_rows
151
+ debug_memory_params["pk_size_bytes"] = pk_size_bytes
152
+ debug_memory_params["incremental_index_array_size"] = incremental_index_array_size
153
+ debug_memory_params["total_memory"] = total_memory
154
+ debug_memory_params["estimate_resources_params"] = estimate_resources_params
126
155
 
127
- if type_params and type_params.pq_metadata:
128
- return _calculate_parquet_column_size(type_params=type_params, columns=columns)
156
+ total_memory = total_memory * (1 + total_memory_buffer_percentage / 100.0)
157
+ debug_memory_params["total_memory_with_buffer"] = total_memory
158
+ logger.debug_conditional(
159
+ f"[Merge task {index}]: Params used for calculating merge memory: {debug_memory_params}",
160
+ memory_logs_enabled,
161
+ )
129
162
 
130
- return None
163
+ return _get_task_options(0.01, total_memory, ray_custom_resources)
131
164
 
132
165
 
133
166
  def hash_bucket_resource_options_provider(
@@ -136,6 +169,7 @@ def hash_bucket_resource_options_provider(
136
169
  previous_inflation: float,
137
170
  average_record_size_bytes: float,
138
171
  total_memory_buffer_percentage: int,
172
+ estimate_resources_params: EstimateResourcesParams,
139
173
  primary_keys: List[str] = None,
140
174
  ray_custom_resources: Optional[Dict] = None,
141
175
  memory_logs_enabled: Optional[bool] = None,
@@ -153,19 +187,25 @@ def hash_bucket_resource_options_provider(
153
187
 
154
188
  for entry in item.manifest.entries:
155
189
  entry_size = estimate_manifest_entry_size_bytes(
156
- entry=entry, previous_inflation=previous_inflation
190
+ entry=entry,
191
+ operation_type=OperationType.PYARROW_DOWNLOAD,
192
+ estimate_resources_params=estimate_resources_params,
193
+ **kwargs,
157
194
  )
158
195
  num_rows += estimate_manifest_entry_num_rows(
159
196
  entry=entry,
160
- previous_inflation=previous_inflation,
161
- average_record_size_bytes=average_record_size_bytes,
197
+ operation_type=OperationType.PYARROW_DOWNLOAD,
198
+ estimate_resources_params=estimate_resources_params,
199
+ **kwargs,
162
200
  )
163
201
  size_bytes += entry_size
164
202
 
165
203
  if primary_keys:
166
204
  pk_size = estimate_manifest_entry_column_size_bytes(
167
205
  entry=entry,
206
+ operation_type=OperationType.PYARROW_DOWNLOAD,
168
207
  columns=primary_keys,
208
+ estimate_resources_params=estimate_resources_params,
169
209
  )
170
210
 
171
211
  if pk_size is None:
@@ -187,6 +227,7 @@ def hash_bucket_resource_options_provider(
187
227
  debug_memory_params["num_rows"] = num_rows
188
228
  debug_memory_params["total_pk_size"] = total_pk_size
189
229
  debug_memory_params["total_memory"] = total_memory
230
+ debug_memory_params["estimate_resources_params"] = estimate_resources_params
190
231
 
191
232
  debug_memory_params["previous_inflation"] = previous_inflation
192
233
  debug_memory_params["average_record_size_bytes"] = average_record_size_bytes
@@ -199,7 +240,7 @@ def hash_bucket_resource_options_provider(
199
240
  memory_logs_enabled,
200
241
  )
201
242
 
202
- return get_task_options(0.01, total_memory, ray_custom_resources)
243
+ return _get_task_options(0.01, total_memory, ray_custom_resources)
203
244
 
204
245
 
205
246
  def merge_resource_options_provider(
@@ -209,6 +250,7 @@ def merge_resource_options_provider(
209
250
  hash_group_size_bytes: Dict[int, int],
210
251
  hash_group_num_rows: Dict[int, int],
211
252
  total_memory_buffer_percentage: int,
253
+ estimate_resources_params: EstimateResourcesParams,
212
254
  round_completion_info: Optional[RoundCompletionInfo] = None,
213
255
  compacted_delta_manifest: Optional[Manifest] = None,
214
256
  ray_custom_resources: Optional[Dict] = None,
@@ -230,7 +272,7 @@ def merge_resource_options_provider(
230
272
  pk_size_bytes = data_size
231
273
  incremental_index_array_size = num_rows * 4
232
274
 
233
- return get_merge_task_options(
275
+ return _get_merge_task_options(
234
276
  index,
235
277
  hb_group_idx,
236
278
  data_size,
@@ -247,6 +289,7 @@ def merge_resource_options_provider(
247
289
  deltacat_storage=deltacat_storage,
248
290
  deltacat_storage_kwargs=deltacat_storage_kwargs,
249
291
  memory_logs_enabled=memory_logs_enabled,
292
+ estimate_resources_params=estimate_resources_params,
250
293
  )
251
294
 
252
295
 
@@ -254,6 +297,7 @@ def local_merge_resource_options_provider(
254
297
  estimated_da_size: float,
255
298
  estimated_num_rows: int,
256
299
  total_memory_buffer_percentage: int,
300
+ estimate_resources_params: EstimateResourcesParams,
257
301
  round_completion_info: Optional[RoundCompletionInfo] = None,
258
302
  compacted_delta_manifest: Optional[Manifest] = None,
259
303
  ray_custom_resources: Optional[Dict] = None,
@@ -270,7 +314,7 @@ def local_merge_resource_options_provider(
270
314
  pk_size_bytes = estimated_da_size
271
315
  incremental_index_array_size = estimated_num_rows * 4
272
316
 
273
- return get_merge_task_options(
317
+ return _get_merge_task_options(
274
318
  index=index,
275
319
  hb_group_idx=hb_group_idx,
276
320
  data_size=estimated_da_size,
@@ -287,104 +331,5 @@ def local_merge_resource_options_provider(
287
331
  deltacat_storage=deltacat_storage,
288
332
  deltacat_storage_kwargs=deltacat_storage_kwargs,
289
333
  memory_logs_enabled=memory_logs_enabled,
334
+ estimate_resources_params=estimate_resources_params,
290
335
  )
291
-
292
-
293
- def get_merge_task_options(
294
- index: int,
295
- hb_group_idx: int,
296
- data_size: float,
297
- pk_size_bytes: float,
298
- num_rows: int,
299
- num_hash_groups: int,
300
- total_memory_buffer_percentage: int,
301
- incremental_index_array_size: int,
302
- debug_memory_params: Dict[str, Any],
303
- ray_custom_resources: Optional[Dict],
304
- round_completion_info: Optional[RoundCompletionInfo] = None,
305
- compacted_delta_manifest: Optional[Manifest] = None,
306
- primary_keys: Optional[List[str]] = None,
307
- deltacat_storage=unimplemented_deltacat_storage,
308
- deltacat_storage_kwargs: Optional[Dict] = {},
309
- memory_logs_enabled: Optional[bool] = None,
310
- ) -> Dict[str, Any]:
311
- if (
312
- round_completion_info
313
- and compacted_delta_manifest
314
- and round_completion_info.hb_index_to_entry_range
315
- ):
316
-
317
- previous_inflation = (
318
- round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
319
- / round_completion_info.compacted_pyarrow_write_result.file_bytes
320
- )
321
- debug_memory_params["previous_inflation"] = previous_inflation
322
-
323
- average_record_size = (
324
- round_completion_info.compacted_pyarrow_write_result.pyarrow_bytes
325
- / round_completion_info.compacted_pyarrow_write_result.records
326
- )
327
- debug_memory_params["average_record_size"] = average_record_size
328
-
329
- iterable = hash_group_index_to_hash_bucket_indices(
330
- hb_group_idx, round_completion_info.hash_bucket_count, num_hash_groups
331
- )
332
-
333
- for hb_idx in iterable:
334
- if round_completion_info.hb_index_to_entry_range.get(str(hb_idx)) is None:
335
- continue
336
-
337
- entry_start, entry_end = round_completion_info.hb_index_to_entry_range[
338
- str(hb_idx)
339
- ]
340
- for entry_index in range(entry_start, entry_end):
341
- entry = compacted_delta_manifest.entries[entry_index]
342
-
343
- current_entry_size = estimate_manifest_entry_size_bytes(
344
- entry=entry, previous_inflation=previous_inflation
345
- )
346
- current_entry_rows = estimate_manifest_entry_num_rows(
347
- entry=entry,
348
- average_record_size_bytes=average_record_size,
349
- previous_inflation=previous_inflation,
350
- )
351
-
352
- data_size += current_entry_size
353
- num_rows += current_entry_rows
354
-
355
- if primary_keys:
356
- pk_size = estimate_manifest_entry_column_size_bytes(
357
- entry=entry,
358
- columns=primary_keys,
359
- )
360
-
361
- if pk_size is None:
362
- pk_size_bytes += current_entry_size
363
- else:
364
- pk_size_bytes += pk_size
365
-
366
- # total data downloaded + primary key hash column + pyarrow-to-numpy conversion
367
- # + primary key column + hashlib inefficiency + dict size for merge + incremental index array size
368
- total_memory = (
369
- data_size
370
- + pk_size_bytes
371
- + pk_size_bytes
372
- + num_rows * 20
373
- + num_rows * 20
374
- + num_rows * 20
375
- + incremental_index_array_size
376
- )
377
- debug_memory_params["data_size"] = data_size
378
- debug_memory_params["num_rows"] = num_rows
379
- debug_memory_params["pk_size_bytes"] = pk_size_bytes
380
- debug_memory_params["incremental_index_array_size"] = incremental_index_array_size
381
- debug_memory_params["total_memory"] = total_memory
382
-
383
- total_memory = total_memory * (1 + total_memory_buffer_percentage / 100.0)
384
- debug_memory_params["total_memory_with_buffer"] = total_memory
385
- logger.debug_conditional(
386
- f"[Merge task {index}]: Params used for calculating merge memory: {debug_memory_params}",
387
- memory_logs_enabled,
388
- )
389
-
390
- return get_task_options(0.01, total_memory, ray_custom_resources)
@@ -0,0 +1,27 @@
1
+ from deltacat.compute.resource_estimation.model import (
2
+ ResourceEstimationMethod,
3
+ EstimatedResources,
4
+ Statistics,
5
+ EstimateResourcesParams,
6
+ OperationType,
7
+ )
8
+ from deltacat.compute.resource_estimation.manifest import (
9
+ estimate_manifest_entry_column_size_bytes,
10
+ estimate_manifest_entry_num_rows,
11
+ estimate_manifest_entry_size_bytes,
12
+ )
13
+ from deltacat.compute.resource_estimation.delta import (
14
+ estimate_resources_required_to_process_delta,
15
+ )
16
+
17
+ __all__ = [
18
+ "ResourceEstimationMethod",
19
+ "EstimatedResources",
20
+ "EstimateResourcesParams",
21
+ "Statistics",
22
+ "estimate_resources_required_to_process_delta",
23
+ "estimate_manifest_entry_size_bytes",
24
+ "estimate_manifest_entry_num_rows",
25
+ "estimate_manifest_entry_column_size_bytes",
26
+ "OperationType",
27
+ ]