deltacat 1.0.2__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/model/compact_partition_params.py +25 -0
  3. deltacat/compute/compactor/model/compaction_session_audit_info.py +11 -0
  4. deltacat/compute/compactor/model/delta_file_envelope.py +21 -3
  5. deltacat/compute/compactor/model/table_object_store.py +51 -0
  6. deltacat/compute/compactor/utils/io.py +1 -1
  7. deltacat/compute/compactor_v2/compaction_session.py +80 -14
  8. deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  9. deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +83 -0
  10. deltacat/compute/compactor_v2/deletes/delete_strategy.py +82 -0
  11. deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +161 -0
  12. deltacat/compute/compactor_v2/deletes/model.py +23 -0
  13. deltacat/compute/compactor_v2/deletes/utils.py +164 -0
  14. deltacat/compute/compactor_v2/model/hash_bucket_input.py +6 -0
  15. deltacat/compute/compactor_v2/model/merge_input.py +24 -1
  16. deltacat/compute/compactor_v2/model/merge_result.py +1 -0
  17. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -6
  18. deltacat/compute/compactor_v2/steps/merge.py +221 -50
  19. deltacat/compute/compactor_v2/utils/delta.py +11 -1
  20. deltacat/compute/compactor_v2/utils/merge.py +10 -0
  21. deltacat/compute/compactor_v2/utils/task_options.py +94 -8
  22. deltacat/io/memcached_object_store.py +20 -0
  23. deltacat/io/ray_plasma_object_store.py +6 -0
  24. deltacat/logs.py +29 -2
  25. deltacat/storage/__init__.py +3 -0
  26. deltacat/storage/interface.py +2 -0
  27. deltacat/storage/model/delete_parameters.py +40 -0
  28. deltacat/storage/model/delta.py +25 -1
  29. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +1930 -0
  30. deltacat/tests/compute/compact_partition_test_cases.py +16 -822
  31. deltacat/tests/compute/compactor/utils/test_io.py +4 -4
  32. deltacat/tests/compute/test_compact_partition_incremental.py +4 -0
  33. deltacat/tests/compute/test_compact_partition_params.py +5 -0
  34. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +32 -20
  35. deltacat/tests/compute/test_util_create_table_deltas_repo.py +28 -10
  36. deltacat/tests/io/test_memcached_object_store.py +19 -0
  37. deltacat/tests/local_deltacat_storage/__init__.py +3 -0
  38. deltacat/tests/test_utils/constants.py +1 -2
  39. deltacat/tests/test_utils/pyarrow.py +27 -10
  40. deltacat/utils/pandas.py +1 -1
  41. deltacat/utils/ray_utils/runtime.py +3 -3
  42. deltacat/utils/resources.py +7 -5
  43. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/METADATA +1 -1
  44. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/RECORD +47 -38
  45. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/LICENSE +0 -0
  46. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/WHEEL +0 -0
  47. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import unittest
2
2
  from unittest import mock
3
- from deltacat.tests.test_utils.constants import TEST_DELTA
3
+ from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
4
4
  from typing import Any, Dict
5
5
 
6
6
  DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
@@ -41,7 +41,7 @@ class TestFitInputDeltas(unittest.TestCase):
41
41
  high_watermark,
42
42
  require_multiple_rounds,
43
43
  ) = io.fit_input_deltas(
44
- [TEST_DELTA],
44
+ [TEST_UPSERT_DELTA],
45
45
  {"CPU": 1, "memory": 20000000},
46
46
  self.COMPACTION_AUDIT,
47
47
  None,
@@ -68,7 +68,7 @@ class TestFitInputDeltas(unittest.TestCase):
68
68
  high_watermark,
69
69
  require_multiple_rounds,
70
70
  ) = io.fit_input_deltas(
71
- [TEST_DELTA],
71
+ [TEST_UPSERT_DELTA],
72
72
  {"CPU": 1, "memory": 20000000},
73
73
  self.COMPACTION_AUDIT,
74
74
  20,
@@ -91,7 +91,7 @@ class TestFitInputDeltas(unittest.TestCase):
91
91
  high_watermark,
92
92
  require_multiple_rounds,
93
93
  ) = io.fit_input_deltas(
94
- [TEST_DELTA],
94
+ [TEST_UPSERT_DELTA],
95
95
  {"CPU": 2, "memory": 10},
96
96
  self.COMPACTION_AUDIT,
97
97
  20,
@@ -100,6 +100,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
100
100
  "input_deltas",
101
101
  "input_deltas_delta_type",
102
102
  "expected_terminal_compact_partition_result",
103
+ "expected_terminal_exception",
103
104
  "create_placement_group_param",
104
105
  "records_per_compacted_file_param",
105
106
  "hash_bucket_count_param",
@@ -118,6 +119,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
118
119
  input_deltas_param,
119
120
  input_deltas_delta_type,
120
121
  expected_terminal_compact_partition_result,
122
+ expected_terminal_exception,
121
123
  create_placement_group_param,
122
124
  records_per_compacted_file_param,
123
125
  hash_bucket_count_param,
@@ -134,6 +136,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
134
136
  input_deltas_param,
135
137
  input_deltas_delta_type,
136
138
  expected_terminal_compact_partition_result,
139
+ expected_terminal_exception,
137
140
  create_placement_group_param,
138
141
  records_per_compacted_file_param,
139
142
  hash_bucket_count_param,
@@ -157,6 +160,7 @@ def test_compact_partition_incremental(
157
160
  input_deltas: pa.Table,
158
161
  input_deltas_delta_type: str,
159
162
  expected_terminal_compact_partition_result: pa.Table,
163
+ expected_terminal_exception: BaseException,
160
164
  create_placement_group_param: bool,
161
165
  records_per_compacted_file_param: int,
162
166
  hash_bucket_count_param: int,
@@ -72,6 +72,7 @@ class TestCompactPartitionParams(unittest.TestCase):
72
72
  "partitionValues": [],
73
73
  "partitionId": "79612ea39ac5493eae925abe60767d42",
74
74
  },
75
+ "memory_logs_enabled": True,
75
76
  "metrics_config": MetricsConfig("us-east-1", MetricsTarget.CLOUDWATCH_EMF),
76
77
  }
77
78
 
@@ -135,6 +136,10 @@ class TestCompactPartitionParams(unittest.TestCase):
135
136
  json.loads(serialized_params)["destination_partition_locator"]
136
137
  == params.destination_partition_locator
137
138
  )
139
+ assert (
140
+ json.loads(serialized_params)["memory_logs_enabled"]
141
+ == params.memory_logs_enabled
142
+ )
138
143
  assert (
139
144
  json.loads(serialized_params)["metrics_config"]["metrics_target"]
140
145
  == params.metrics_config.metrics_target
@@ -5,6 +5,7 @@ import pytest
5
5
  import boto3
6
6
  from boto3.resources.base import ServiceResource
7
7
  import pyarrow as pa
8
+ from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
8
9
  from pytest_benchmark.fixture import BenchmarkFixture
9
10
 
10
11
  from deltacat.tests.compute.test_util_constant import (
@@ -15,6 +16,7 @@ from deltacat.tests.compute.test_util_constant import (
15
16
  DEFAULT_NUM_WORKERS,
16
17
  DEFAULT_WORKER_INSTANCE_CPUS,
17
18
  )
19
+ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
18
20
  from deltacat.tests.compute.test_util_common import (
19
21
  get_rcf,
20
22
  )
@@ -28,11 +30,14 @@ from deltacat.tests.compute.test_util_create_table_deltas_repo import (
28
30
  from deltacat.tests.compute.test_util_create_table_deltas_repo import (
29
31
  create_src_w_deltas_destination_rebase_w_deltas_strategy,
30
32
  )
31
- from deltacat.tests.compute.compact_partition_test_cases import (
33
+ from deltacat.tests.compute.compact_partition_rebase_then_incremental_test_cases import (
32
34
  REBASE_THEN_INCREMENTAL_TEST_CASES,
33
35
  )
34
- from typing import Any, Callable, Dict, List, Optional, Set
36
+ from typing import Any, Callable, Dict, List, Optional, Set, Tuple
35
37
  from deltacat.types.media import StorageType
38
+ from deltacat.storage import (
39
+ DeltaType,
40
+ )
36
41
 
37
42
  DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
38
43
  "db_file_path",
@@ -89,7 +94,7 @@ FUNCTION scoped fixtures
89
94
 
90
95
 
91
96
  @pytest.fixture(scope="function")
92
- def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
97
+ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
93
98
  # see deltacat/tests/local_deltacat_storage/README.md for documentation
94
99
  kwargs_for_local_deltacat_storage: Dict[str, Any] = {
95
100
  DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
@@ -109,6 +114,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
109
114
  "input_deltas_param",
110
115
  "input_deltas_delta_type",
111
116
  "expected_terminal_compact_partition_result",
117
+ "expected_terminal_exception",
112
118
  "create_placement_group_param",
113
119
  "records_per_compacted_file_param",
114
120
  "hash_bucket_count_param",
@@ -116,7 +122,6 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
116
122
  "drop_duplicates_param",
117
123
  "skip_enabled_compact_partition_drivers",
118
124
  "incremental_deltas",
119
- "incremental_deltas_delta_type",
120
125
  "rebase_expected_compact_partition_result",
121
126
  "compact_partition_func",
122
127
  ],
@@ -130,6 +135,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
130
135
  input_deltas,
131
136
  input_deltas_delta_type,
132
137
  expected_terminal_compact_partition_result,
138
+ expected_terminal_exception,
133
139
  create_placement_group_param,
134
140
  records_per_compacted_file_param,
135
141
  hash_bucket_count_param,
@@ -137,7 +143,6 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
137
143
  read_kwargs_provider,
138
144
  skip_enabled_compact_partition_drivers,
139
145
  incremental_deltas,
140
- incremental_deltas_delta_type,
141
146
  rebase_expected_compact_partition_result,
142
147
  compact_partition_func,
143
148
  )
@@ -149,6 +154,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
149
154
  input_deltas,
150
155
  input_deltas_delta_type,
151
156
  expected_terminal_compact_partition_result,
157
+ expected_terminal_exception,
152
158
  create_placement_group_param,
153
159
  records_per_compacted_file_param,
154
160
  hash_bucket_count_param,
@@ -156,17 +162,15 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
156
162
  read_kwargs_provider,
157
163
  skip_enabled_compact_partition_drivers,
158
164
  incremental_deltas,
159
- incremental_deltas_delta_type,
160
165
  rebase_expected_compact_partition_result,
161
166
  compact_partition_func,
162
167
  ) in REBASE_THEN_INCREMENTAL_TEST_CASES.items()
163
168
  ],
164
169
  ids=[test_name for test_name in REBASE_THEN_INCREMENTAL_TEST_CASES],
165
- indirect=[],
166
170
  )
167
171
  def test_compact_partition_rebase_then_incremental(
168
172
  setup_s3_resource: ServiceResource,
169
- offer_local_deltacat_storage_kwargs: Dict[str, Any],
173
+ local_deltacat_storage_kwargs: Dict[str, Any],
170
174
  test_name: str,
171
175
  primary_keys: Set[str],
172
176
  sort_keys: List[Optional[Any]],
@@ -175,15 +179,15 @@ def test_compact_partition_rebase_then_incremental(
175
179
  input_deltas_param: List[pa.Array],
176
180
  input_deltas_delta_type: str,
177
181
  expected_terminal_compact_partition_result: pa.Table,
182
+ expected_terminal_exception: BaseException,
178
183
  create_placement_group_param: bool,
179
184
  records_per_compacted_file_param: int,
180
185
  hash_bucket_count_param: int,
181
186
  drop_duplicates_param: bool,
182
187
  read_kwargs_provider_param: Any,
183
- incremental_deltas: pa.Table,
184
- incremental_deltas_delta_type: str,
188
+ incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
185
189
  rebase_expected_compact_partition_result: pa.Table,
186
- skip_enabled_compact_partition_drivers,
190
+ skip_enabled_compact_partition_drivers: List[CompactorVersion],
187
191
  compact_partition_func: Callable,
188
192
  benchmark: BenchmarkFixture,
189
193
  ):
@@ -204,7 +208,7 @@ def test_compact_partition_rebase_then_incremental(
204
208
  CompactionSessionAuditInfo,
205
209
  )
206
210
 
207
- ds_mock_kwargs = offer_local_deltacat_storage_kwargs
211
+ ds_mock_kwargs = local_deltacat_storage_kwargs
208
212
  ray.shutdown()
209
213
  ray.init(local_mode=True, ignore_reinit_error=True)
210
214
  """
@@ -258,6 +262,7 @@ def test_compact_partition_rebase_then_incremental(
258
262
  "hash_bucket_count": hash_bucket_count_param,
259
263
  "last_stream_position_to_compact": source_partition.stream_position,
260
264
  "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
265
+ "object_store": RayPlasmaObjectStore(),
261
266
  "pg_config": pgm,
262
267
  "primary_keys": primary_keys,
263
268
  "read_kwargs_provider": read_kwargs_provider_param,
@@ -296,6 +301,8 @@ def test_compact_partition_rebase_then_incremental(
296
301
  (
297
302
  source_partition_locator_w_deltas,
298
303
  new_delta,
304
+ incremental_delta_length,
305
+ has_delete_deltas,
299
306
  ) = create_incremental_deltas_on_source_table(
300
307
  BASE_TEST_SOURCE_NAMESPACE,
301
308
  BASE_TEST_SOURCE_TABLE_NAME,
@@ -303,7 +310,6 @@ def test_compact_partition_rebase_then_incremental(
303
310
  source_table_stream,
304
311
  partition_values_param,
305
312
  incremental_deltas,
306
- incremental_deltas_delta_type,
307
313
  ds_mock_kwargs,
308
314
  )
309
315
  compact_partition_params = CompactPartitionParams.of(
@@ -318,6 +324,7 @@ def test_compact_partition_rebase_then_incremental(
318
324
  "hash_bucket_count": hash_bucket_count_param,
319
325
  "last_stream_position_to_compact": new_delta.stream_position,
320
326
  "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
327
+ "object_store": RayPlasmaObjectStore(),
321
328
  "pg_config": pgm,
322
329
  "primary_keys": primary_keys,
323
330
  "read_kwargs_provider": read_kwargs_provider_param,
@@ -329,6 +336,10 @@ def test_compact_partition_rebase_then_incremental(
329
336
  "sort_keys": sort_keys if sort_keys else None,
330
337
  }
331
338
  )
339
+ if expected_terminal_exception:
340
+ with pytest.raises(expected_terminal_exception):
341
+ compact_partition_func(compact_partition_params)
342
+ return
332
343
  rcf_file_s3_uri = compact_partition_func(compact_partition_params)
333
344
  round_completion_info = get_rcf(setup_s3_resource, rcf_file_s3_uri)
334
345
  compacted_delta_locator_incremental: DeltaLocator = (
@@ -358,13 +369,14 @@ def test_compact_partition_rebase_then_incremental(
358
369
  actual_compacted_table = actual_compacted_table.combine_chunks().sort_by(
359
370
  sorting_cols
360
371
  )
361
-
362
- assert compaction_audit.input_records == (
363
- len(incremental_deltas) if incremental_deltas else 0
364
- ) + len(actual_rebase_compacted_table), (
365
- "Total input records must be equal to incremental deltas"
366
- "+ previous compacted table size"
367
- )
372
+ # NOTE: if delete type-deltas are present this relationship no longer holds true
373
+ if not has_delete_deltas:
374
+ assert compaction_audit.input_records == (
375
+ incremental_delta_length if incremental_deltas else 0
376
+ ) + len(actual_rebase_compacted_table), (
377
+ " Total input records must be equal to incremental deltas"
378
+ " + previous compacted table size"
379
+ )
368
380
 
369
381
  assert actual_compacted_table.equals(
370
382
  expected_terminal_compact_partition_result
@@ -27,23 +27,36 @@ def create_incremental_deltas_on_source_table(
27
27
  source_table_version: str,
28
28
  source_table_stream: Stream,
29
29
  partition_values_param,
30
- incremental_deltas: pa.Table,
31
- incremental_delta_type: DeltaType,
30
+ incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
32
31
  ds_mock_kwargs: Optional[Dict[str, Any]] = None,
33
- ) -> Tuple[PartitionLocator, Delta]:
32
+ ) -> Tuple[PartitionLocator, Delta, int, bool]:
34
33
  import deltacat.tests.local_deltacat_storage as ds
35
34
 
35
+ incremental_delta_length = 0
36
+ is_delete = False
36
37
  src_partition: Partition = ds.get_partition(
37
38
  source_table_stream.locator,
38
39
  partition_values_param,
39
40
  **ds_mock_kwargs,
40
41
  )
41
- new_delta: Delta = ds.commit_delta(
42
- ds.stage_delta(
43
- incremental_deltas, src_partition, incremental_delta_type, **ds_mock_kwargs
44
- ),
45
- **ds_mock_kwargs,
46
- )
42
+ for (
43
+ incremental_data,
44
+ incremental_delta_type,
45
+ incremental_delete_parameters,
46
+ ) in incremental_deltas:
47
+ if incremental_delta_type is DeltaType.DELETE:
48
+ is_delete = True
49
+ incremental_delta: Delta = ds.commit_delta(
50
+ ds.stage_delta(
51
+ incremental_data,
52
+ src_partition,
53
+ incremental_delta_type,
54
+ delete_parameters=incremental_delete_parameters,
55
+ **ds_mock_kwargs,
56
+ ),
57
+ **ds_mock_kwargs,
58
+ )
59
+ incremental_delta_length += len(incremental_data) if incremental_data else 0
47
60
  src_table_stream_after_committed_delta: Stream = ds.get_stream(
48
61
  source_namespace,
49
62
  source_table_name,
@@ -55,7 +68,12 @@ def create_incremental_deltas_on_source_table(
55
68
  partition_values_param,
56
69
  **ds_mock_kwargs,
57
70
  )
58
- return src_partition_after_committed_delta.locator, new_delta
71
+ return (
72
+ src_partition_after_committed_delta.locator,
73
+ incremental_delta,
74
+ incremental_delta_length,
75
+ is_delete,
76
+ )
59
77
 
60
78
 
61
79
  def create_src_w_deltas_destination_plus_destination(
@@ -25,6 +25,10 @@ class MockPyMemcacheClient:
25
25
  def get(self, key, *args, **kwargs):
26
26
  return self.store.get(key)
27
27
 
28
+ def flush_all(self, *args, **kwargs):
29
+ for key, value in self.store.items():
30
+ self.store[key] = None
31
+
28
32
 
29
33
  class TestMemcachedObjectStore(unittest.TestCase):
30
34
 
@@ -192,3 +196,18 @@ class TestMemcachedObjectStore(unittest.TestCase):
192
196
  # assert
193
197
  result = self.object_store.get(ref)
194
198
  self.assertEqual(result, self.TEST_VALUE_LARGE)
199
+
200
+ @mock.patch("deltacat.io.memcached_object_store.Client")
201
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
202
+ def test_clear_sanity(self, mock_retrying_client, mock_client):
203
+ # setup
204
+ mock_client.return_value = MockPyMemcacheClient()
205
+ mock_retrying_client.return_value = mock_client.return_value
206
+
207
+ # action
208
+ ref = self.object_store.put(self.TEST_VALUE_LARGE)
209
+ self.object_store.clear()
210
+
211
+ # assert
212
+ with self.assertRaises(ValueError):
213
+ self.object_store.get(ref)
@@ -39,6 +39,7 @@ from deltacat.storage import (
39
39
  ManifestMeta,
40
40
  ManifestEntry,
41
41
  ManifestEntryList,
42
+ DeleteParameters,
42
43
  )
43
44
  from deltacat.types.media import (
44
45
  ContentType,
@@ -892,6 +893,7 @@ def stage_delta(
892
893
  properties: Optional[Dict[str, str]] = None,
893
894
  s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
894
895
  content_type: ContentType = ContentType.PARQUET,
896
+ delete_parameters: Optional[DeleteParameters] = None,
895
897
  *args,
896
898
  **kwargs,
897
899
  ) -> Delta:
@@ -958,6 +960,7 @@ def stage_delta(
958
960
  properties=properties,
959
961
  manifest=manifest,
960
962
  previous_stream_position=partition.stream_position,
963
+ delete_parameters=delete_parameters,
961
964
  )
962
965
 
963
966
  params = (uri, serialized_data)
@@ -3,5 +3,4 @@ from deltacat.storage import Delta
3
3
 
4
4
  test_delta_file = open("deltacat/tests/test_utils/resources/test_delta.json")
5
5
  test_delta_dict = json.load(test_delta_file)
6
-
7
- TEST_DELTA = Delta(test_delta_dict)
6
+ TEST_UPSERT_DELTA = Delta(test_delta_dict)
@@ -1,24 +1,29 @@
1
- from typing import List, Optional
1
+ from typing import List, Optional, Union
2
2
  import pyarrow as pa
3
- from deltacat.storage import Delta, Partition, PartitionLocator
3
+ from deltacat.storage import Delta, Partition, PartitionLocator, DeltaLocator
4
4
  import deltacat.tests.local_deltacat_storage as ds
5
+ from deltacat.types.media import StorageType
5
6
 
6
7
 
7
8
  def create_delta_from_csv_file(
8
9
  namespace: str,
9
10
  file_paths: List[str],
10
11
  table_name: Optional[str] = None,
12
+ table_version: int = 1,
11
13
  *args,
12
- **kwargs
14
+ **kwargs,
13
15
  ) -> Delta:
14
16
  staged_partition = stage_partition_from_file_paths(
15
- namespace, file_paths, *args, table_name=table_name, **kwargs
17
+ namespace,
18
+ file_paths,
19
+ *args,
20
+ table_name=table_name,
21
+ table_version=table_version,
22
+ **kwargs,
16
23
  )
17
-
18
24
  committed_delta = commit_delta_to_staged_partition(
19
25
  staged_partition, file_paths, *args, **kwargs
20
26
  )
21
-
22
27
  return committed_delta
23
28
 
24
29
 
@@ -26,14 +31,15 @@ def stage_partition_from_file_paths(
26
31
  namespace: str,
27
32
  file_paths: List[str],
28
33
  table_name: Optional[str] = None,
34
+ table_version: int = 1,
29
35
  *args,
30
- **kwargs
36
+ **kwargs,
31
37
  ) -> Partition:
32
38
  ds.create_namespace(namespace, {}, **kwargs)
33
39
  if table_name is None:
34
40
  table_name = "-".join(file_paths).replace("/", "_")
35
- ds.create_table_version(namespace, table_name, "1", **kwargs)
36
- stream = ds.get_stream(namespace, table_name, "1", **kwargs)
41
+ ds.create_table_version(namespace, table_name, str(table_version), **kwargs)
42
+ stream = ds.get_stream(namespace, table_name, str(table_version), **kwargs)
37
43
  staged_partition = ds.stage_partition(stream, [], **kwargs)
38
44
  return staged_partition
39
45
 
@@ -42,12 +48,23 @@ def commit_delta_to_staged_partition(
42
48
  staged_partition, file_paths: List[str], *args, **kwargs
43
49
  ) -> Delta:
44
50
  committed_delta = commit_delta_to_partition(
45
- staged_partition, file_paths=file_paths, *args, **kwargs
51
+ staged_partition, *args, file_paths=file_paths, **kwargs
46
52
  )
47
53
  ds.commit_partition(staged_partition, **kwargs)
48
54
  return committed_delta
49
55
 
50
56
 
57
+ def download_delta(delta_like: Union[Delta, DeltaLocator], *args, **kwargs) -> Delta:
58
+ return pa.concat_tables(
59
+ ds.download_delta(
60
+ delta_like,
61
+ storage_type=StorageType.LOCAL,
62
+ *args,
63
+ **kwargs,
64
+ )
65
+ )
66
+
67
+
51
68
  def commit_delta_to_partition(
52
69
  partition: Partition, file_paths: List[str], *args, **kwargs
53
70
  ) -> Delta:
deltacat/utils/pandas.py CHANGED
@@ -126,7 +126,7 @@ def _add_column_kwargs(
126
126
  kwargs["usecols"] = include_columns
127
127
  else:
128
128
  if content_type in TABULAR_CONTENT_TYPES:
129
- kwargs["columns"]: include_columns
129
+ kwargs["columns"] = include_columns
130
130
  else:
131
131
  if include_columns:
132
132
  logger.warning(
@@ -37,7 +37,7 @@ def current_node_resource_key() -> str:
37
37
  actors on that node via:
38
38
  `foo.options(resources={get_current_node_resource_key(): 0.01}).remote()`
39
39
  """
40
- current_node_id = ray.get_runtime_context().node_id.hex()
40
+ current_node_id = ray.get_runtime_context().get_node_id().hex()
41
41
  keys = node_resource_keys(lambda n: n["NodeID"] == current_node_id)
42
42
  assert (
43
43
  len(keys) <= 1
@@ -83,7 +83,7 @@ def other_live_node_resource_keys() -> List[str]:
83
83
 
84
84
  For example, invoking this function from your Ray application driver on the
85
85
  head node returns the resource keys of all live worker nodes."""
86
- current_node_id = ray.get_runtime_context().node_id.hex()
86
+ current_node_id = ray.get_runtime_context().get_node_id().hex()
87
87
  return node_resource_keys(
88
88
  lambda n: n["NodeID"] != current_node_id and is_node_alive(n)
89
89
  )
@@ -97,7 +97,7 @@ def other_node_resource_keys() -> List[str]:
97
97
 
98
98
  For example, invoking this function from your Ray application driver on the
99
99
  head node returns the resource keys of all worker nodes."""
100
- current_node_id = ray.get_runtime_context().node_id.hex()
100
+ current_node_id = ray.get_runtime_context().get_node_id().hex()
101
101
  return node_resource_keys(lambda n: n["NodeID"] != current_node_id)
102
102
 
103
103
 
@@ -36,13 +36,15 @@ class ClusterUtilization:
36
36
  used_resources[key] = cluster_resources[key] - available_resources[key]
37
37
 
38
38
  self.total_memory_bytes = cluster_resources.get("memory")
39
- self.used_memory_bytes = used_resources.get("memory")
39
+ self.used_memory_bytes = used_resources.get("memory", 0.0)
40
40
  self.total_cpu = cluster_resources.get("CPU")
41
- self.used_cpu = used_resources.get("CPU")
41
+ self.used_cpu = used_resources.get("CPU", 0)
42
42
  self.total_object_store_memory_bytes = cluster_resources.get(
43
43
  "object_store_memory"
44
44
  )
45
- self.used_object_store_memory_bytes = used_resources.get("object_store_memory")
45
+ self.used_object_store_memory_bytes = used_resources.get(
46
+ "object_store_memory", 0.0
47
+ )
46
48
  self.used_memory_percent = (
47
49
  self.used_memory_bytes / self.total_memory_bytes
48
50
  ) * 100
@@ -92,7 +94,7 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
92
94
  ) -> bool | None:
93
95
  if __exc_value:
94
96
  logger.error(
95
- f"Error ocurred while calculating cluster resources: {__exc_value}"
97
+ f"Error occurred while calculating cluster resources: {__exc_value}"
96
98
  )
97
99
  self.stop_run_schedules.set()
98
100
  return super().__exit__(__exc_type, __exc_value, __traceback)
@@ -202,7 +204,7 @@ class ProcessUtilizationOverTimeRange(AbstractContextManager):
202
204
  ) -> bool | None:
203
205
  if __exc_value:
204
206
  logger.error(
205
- f"Error ocurred while calculating process resources: {__exc_value}"
207
+ f"Error occurred while calculating process resources: {__exc_value}"
206
208
  )
207
209
  self.stop_run_schedules.set()
208
210
  return super().__exit__(__exc_type, __exc_value, __traceback)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.0.2
3
+ Version: 1.1.1
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team