deltacat 1.1.36__py3-none-any.whl → 2.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.0b2.dist-info/METADATA +65 -0
- deltacat-2.0.0b2.dist-info/RECORD +349 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Any, Callable, Dict, List, Optional,
|
1
|
+
from typing import Any, Callable, Dict, List, Optional, Union, Tuple
|
2
2
|
|
3
3
|
import pyarrow as pa
|
4
4
|
import daft
|
@@ -16,38 +16,39 @@ from deltacat.utils.common import current_time_ms
|
|
16
16
|
from deltacat.storage import (
|
17
17
|
Delta,
|
18
18
|
DeltaLocator,
|
19
|
+
DeltaProperties,
|
19
20
|
DeltaType,
|
20
21
|
DistributedDataset,
|
21
22
|
LifecycleState,
|
22
23
|
ListResult,
|
23
24
|
LocalDataset,
|
24
25
|
LocalTable,
|
25
|
-
Manifest,
|
26
26
|
ManifestAuthor,
|
27
27
|
Namespace,
|
28
28
|
NamespaceLocator,
|
29
|
+
NamespaceProperties,
|
29
30
|
Partition,
|
30
|
-
|
31
|
+
PartitionScheme,
|
32
|
+
Schema,
|
31
33
|
Stream,
|
32
34
|
StreamLocator,
|
33
35
|
Table,
|
34
36
|
TableVersion,
|
35
37
|
TableVersionLocator,
|
38
|
+
TableVersionProperties,
|
36
39
|
TableLocator,
|
40
|
+
TableProperties,
|
37
41
|
CommitState,
|
38
|
-
|
42
|
+
SortScheme,
|
39
43
|
PartitionLocator,
|
40
|
-
ManifestMeta,
|
41
44
|
ManifestEntry,
|
42
45
|
ManifestEntryList,
|
43
|
-
|
44
|
-
PartitionFilter,
|
46
|
+
EntryParams,
|
45
47
|
PartitionValues,
|
46
|
-
DeltaPartitionSpec,
|
47
|
-
StreamPartitionSpec,
|
48
48
|
TransformName,
|
49
|
-
|
49
|
+
StreamFormat,
|
50
50
|
)
|
51
|
+
from deltacat.storage.model.manifest import Manifest, ManifestMeta, EntryType
|
51
52
|
from deltacat.types.media import (
|
52
53
|
ContentType,
|
53
54
|
StorageType,
|
@@ -65,7 +66,7 @@ SQLITE_CUR_ARG = "sqlite3_cur"
|
|
65
66
|
SQLITE_CON_ARG = "sqlite3_con"
|
66
67
|
DB_FILE_PATH_ARG = "db_file_path"
|
67
68
|
|
68
|
-
|
69
|
+
STREAM_FORMAT = StreamFormat.SQLITE3
|
69
70
|
STREAM_ID_PROPERTY = "stream_id"
|
70
71
|
CREATE_NAMESPACES_TABLE = (
|
71
72
|
"CREATE TABLE IF NOT EXISTS namespaces(locator, value, PRIMARY KEY (locator))"
|
@@ -206,7 +207,7 @@ def list_deltas(
|
|
206
207
|
last_stream_position: Optional[int] = None,
|
207
208
|
ascending_order: Optional[bool] = None,
|
208
209
|
include_manifest: bool = False,
|
209
|
-
|
210
|
+
partition_scheme_id: Optional[str] = None,
|
210
211
|
*args,
|
211
212
|
**kwargs,
|
212
213
|
) -> ListResult[Delta]:
|
@@ -214,13 +215,6 @@ def list_deltas(
|
|
214
215
|
if stream is None:
|
215
216
|
return ListResult.of([], None, None)
|
216
217
|
|
217
|
-
if partition_values is not None and partition_filter is not None:
|
218
|
-
raise ValueError(
|
219
|
-
"Only one of partition_values or partition_filter must be provided"
|
220
|
-
)
|
221
|
-
if partition_filter is not None:
|
222
|
-
partition_values = partition_filter.partition_values
|
223
|
-
|
224
218
|
partition = get_partition(stream.locator, partition_values, *args, **kwargs)
|
225
219
|
|
226
220
|
all_deltas = list_partition_deltas(
|
@@ -314,7 +308,7 @@ def get_delta(
|
|
314
308
|
partition_values: Optional[PartitionValues] = None,
|
315
309
|
table_version: Optional[str] = None,
|
316
310
|
include_manifest: bool = False,
|
317
|
-
|
311
|
+
partition_scheme_id: Optional[str] = None,
|
318
312
|
*args,
|
319
313
|
**kwargs,
|
320
314
|
) -> Optional[Delta]:
|
@@ -322,14 +316,6 @@ def get_delta(
|
|
322
316
|
|
323
317
|
stream = get_stream(namespace, table_name, table_version, *args, **kwargs)
|
324
318
|
|
325
|
-
if partition_values is not None and partition_filter is not None:
|
326
|
-
raise ValueError(
|
327
|
-
"Only one of partition_values or partition_filter must be provided"
|
328
|
-
)
|
329
|
-
|
330
|
-
if partition_filter is not None:
|
331
|
-
partition_values = partition_filter.partition_values
|
332
|
-
|
333
319
|
partition = get_partition(stream.locator, partition_values, *args, **kwargs)
|
334
320
|
delta_locator = DeltaLocator.of(partition.locator, stream_position)
|
335
321
|
|
@@ -355,7 +341,7 @@ def get_latest_delta(
|
|
355
341
|
partition_values: Optional[PartitionValues] = None,
|
356
342
|
table_version: Optional[str] = None,
|
357
343
|
include_manifest: bool = False,
|
358
|
-
|
344
|
+
partition_scheme_id: Optional[str] = None,
|
359
345
|
*args,
|
360
346
|
**kwargs,
|
361
347
|
) -> Optional[Delta]:
|
@@ -369,7 +355,7 @@ def get_latest_delta(
|
|
369
355
|
last_stream_position=None,
|
370
356
|
ascending_order=False,
|
371
357
|
include_manifest=include_manifest,
|
372
|
-
|
358
|
+
partition_scheme_id=partition_scheme_id,
|
373
359
|
*args,
|
374
360
|
**kwargs,
|
375
361
|
).all_items()
|
@@ -389,7 +375,6 @@ def download_delta(
|
|
389
375
|
file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
390
376
|
ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
|
391
377
|
distributed_dataset_type: DistributedDatasetType = DistributedDatasetType.RAY_DATASET,
|
392
|
-
partition_filter: Optional[PartitionFilter] = None,
|
393
378
|
*args,
|
394
379
|
**kwargs,
|
395
380
|
) -> Union[LocalDataset, DistributedDataset]: # type: ignore
|
@@ -398,16 +383,7 @@ def download_delta(
|
|
398
383
|
manifest = Delta(delta_like).manifest
|
399
384
|
else:
|
400
385
|
manifest = get_delta_manifest(delta_like, *args, **kwargs)
|
401
|
-
partition_values: PartitionValues = None
|
402
|
-
if partition_filter is not None:
|
403
|
-
partition_values = partition_filter.partition_values
|
404
386
|
for entry_index in range(len(manifest.entries)):
|
405
|
-
if (
|
406
|
-
partition_values is not None
|
407
|
-
and partition_values != manifest.entries[entry_index].meta.partition_values
|
408
|
-
):
|
409
|
-
continue
|
410
|
-
|
411
387
|
result.append(
|
412
388
|
download_delta_manifest_entry(
|
413
389
|
delta_like=delta_like,
|
@@ -515,11 +491,11 @@ def get_delta_manifest(
|
|
515
491
|
|
516
492
|
|
517
493
|
def create_namespace(
|
518
|
-
namespace: str,
|
494
|
+
namespace: str, properties: NamespaceProperties, *args, **kwargs
|
519
495
|
) -> Namespace:
|
520
496
|
cur, con = _get_sqlite3_cursor_con(kwargs)
|
521
497
|
locator = NamespaceLocator.of(namespace)
|
522
|
-
result = Namespace.of(locator,
|
498
|
+
result = Namespace.of(locator, properties)
|
523
499
|
params = (locator.canonical_string(), json.dumps(result))
|
524
500
|
cur.execute(CREATE_NAMESPACES_TABLE)
|
525
501
|
cur.execute(CREATE_TABLES_TABLE)
|
@@ -535,7 +511,7 @@ def create_namespace(
|
|
535
511
|
|
536
512
|
def update_namespace(
|
537
513
|
namespace: str,
|
538
|
-
|
514
|
+
properties: NamespaceProperties = None,
|
539
515
|
new_namespace: Optional[str] = None,
|
540
516
|
*args,
|
541
517
|
**kwargs,
|
@@ -543,7 +519,7 @@ def update_namespace(
|
|
543
519
|
assert new_namespace is None, "namespace name cannot be changed"
|
544
520
|
cur, con = _get_sqlite3_cursor_con(kwargs)
|
545
521
|
locator = NamespaceLocator.of(namespace)
|
546
|
-
result = Namespace.of(locator,
|
522
|
+
result = Namespace.of(locator, properties)
|
547
523
|
params = (json.dumps(result), locator.canonical_string())
|
548
524
|
cur.execute("UPDATE namespaces SET value = ? WHERE locator = ?", params)
|
549
525
|
con.commit()
|
@@ -553,39 +529,41 @@ def create_table_version(
|
|
553
529
|
namespace: str,
|
554
530
|
table_name: str,
|
555
531
|
table_version: Optional[str] = None,
|
556
|
-
schema: Optional[Union[pa.Schema,
|
557
|
-
|
558
|
-
|
559
|
-
primary_key_column_names: Optional[Set[str]] = None,
|
560
|
-
sort_keys: Optional[List[SortKey]] = None,
|
532
|
+
schema: Optional[Union[pa.Schema, Any]] = None,
|
533
|
+
partition_scheme: Optional[PartitionScheme] = None,
|
534
|
+
sort_keys: Optional[SortScheme] = None,
|
561
535
|
table_version_description: Optional[str] = None,
|
562
|
-
table_version_properties: Optional[
|
563
|
-
table_permissions: Optional[Dict[str, Any]] = None,
|
536
|
+
table_version_properties: Optional[TableVersionProperties] = None,
|
564
537
|
table_description: Optional[str] = None,
|
565
|
-
table_properties: Optional[
|
538
|
+
table_properties: Optional[TableProperties] = None,
|
566
539
|
supported_content_types: Optional[List[ContentType]] = None,
|
567
|
-
partition_spec: Optional[StreamPartitionSpec] = None,
|
568
540
|
*args,
|
569
541
|
**kwargs,
|
570
542
|
) -> Stream:
|
571
543
|
cur, con = _get_sqlite3_cursor_con(kwargs)
|
572
544
|
|
573
|
-
if
|
574
|
-
raise ValueError(
|
575
|
-
"Only one of partition_keys or partition_spec must be provided"
|
576
|
-
)
|
577
|
-
if partition_spec is not None:
|
545
|
+
if partition_scheme is not None:
|
578
546
|
assert (
|
579
|
-
|
580
|
-
), "
|
581
|
-
|
582
|
-
|
583
|
-
|
547
|
+
partition_scheme.keys is not None
|
548
|
+
), "Partition keys must be specified with partition scheme"
|
549
|
+
for key in partition_scheme.keys:
|
550
|
+
assert (
|
551
|
+
key.transform is None or key.transform.name == TransformName.IDENTITY
|
552
|
+
), (
|
584
553
|
"Local DeltaCAT storage does not support creating table versions "
|
585
554
|
"with non identity transform partition spec"
|
586
555
|
)
|
587
|
-
|
588
|
-
|
556
|
+
if sort_keys is not None:
|
557
|
+
assert (
|
558
|
+
sort_keys.keys is not None
|
559
|
+
), "Sort keys must be specified with sort scheme"
|
560
|
+
for key in sort_keys.keys:
|
561
|
+
assert (
|
562
|
+
key.transform is None or key.transform.name == TransformName.IDENTITY
|
563
|
+
), (
|
564
|
+
"Local DeltaCAT storage does not support creating table versions "
|
565
|
+
"with non identity transform sort spec"
|
566
|
+
)
|
589
567
|
|
590
568
|
latest_version = get_latest_table_version(namespace, table_name, *args, **kwargs)
|
591
569
|
if (
|
@@ -602,9 +580,7 @@ def create_table_version(
|
|
602
580
|
)
|
603
581
|
|
604
582
|
table_locator = TableLocator.of(NamespaceLocator.of(namespace), table_name)
|
605
|
-
table_obj = Table.of(
|
606
|
-
table_locator, table_permissions, table_description, table_properties
|
607
|
-
)
|
583
|
+
table_obj = Table.of(table_locator, table_description, table_properties)
|
608
584
|
table_version_locator = TableVersionLocator.of(
|
609
585
|
table_locator=table_locator, table_version=table_version
|
610
586
|
)
|
@@ -617,19 +593,18 @@ def create_table_version(
|
|
617
593
|
properties = {**table_version_properties, STREAM_ID_PROPERTY: stream_id}
|
618
594
|
table_version_obj = TableVersion.of(
|
619
595
|
table_version_locator,
|
620
|
-
schema=schema,
|
621
|
-
|
622
|
-
primary_key_columns=primary_key_column_names,
|
596
|
+
schema=Schema.of(schema) if schema else None,
|
597
|
+
partition_scheme=partition_scheme,
|
623
598
|
description=table_version_description,
|
624
599
|
properties=properties,
|
625
|
-
|
600
|
+
sort_scheme=sort_keys,
|
626
601
|
content_types=supported_content_types,
|
627
602
|
)
|
628
603
|
stream_locator = StreamLocator.of(
|
629
|
-
table_version_obj.locator, stream_id=stream_id,
|
604
|
+
table_version_obj.locator, stream_id=stream_id, stream_format=STREAM_FORMAT
|
630
605
|
)
|
631
606
|
result_stream = Stream.of(
|
632
|
-
stream_locator,
|
607
|
+
stream_locator, partition_scheme=partition_scheme, state=CommitState.COMMITTED
|
633
608
|
)
|
634
609
|
|
635
610
|
params = (
|
@@ -658,16 +633,15 @@ def create_table_version(
|
|
658
633
|
def update_table(
|
659
634
|
namespace: str,
|
660
635
|
table_name: str,
|
661
|
-
permissions: Optional[Dict[str, Any]] = None,
|
662
636
|
description: Optional[str] = None,
|
663
|
-
properties: Optional[
|
637
|
+
properties: Optional[TableProperties] = None,
|
664
638
|
new_table_name: Optional[str] = None,
|
665
639
|
*args,
|
666
640
|
**kwargs,
|
667
641
|
) -> None:
|
668
642
|
cur, con = _get_sqlite3_cursor_con(kwargs)
|
669
643
|
table_locator = TableLocator.of(NamespaceLocator.of(namespace), table_name)
|
670
|
-
table_obj = Table.of(table_locator,
|
644
|
+
table_obj = Table.of(table_locator, description, properties)
|
671
645
|
|
672
646
|
params = (table_locator.canonical_string(),)
|
673
647
|
cur.execute("DELETE FROM tables WHERE locator = ?", params)
|
@@ -685,10 +659,9 @@ def update_table_version(
|
|
685
659
|
table_name: str,
|
686
660
|
table_version: str,
|
687
661
|
lifecycle_state: Optional[LifecycleState] = None,
|
688
|
-
schema: Optional[Union[pa.Schema,
|
689
|
-
schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
|
662
|
+
schema: Optional[Union[pa.Schema, Any]] = None,
|
690
663
|
description: Optional[str] = None,
|
691
|
-
properties: Optional[
|
664
|
+
properties: Optional[TableVersionProperties] = None,
|
692
665
|
*args,
|
693
666
|
**kwargs,
|
694
667
|
) -> None:
|
@@ -720,12 +693,11 @@ def update_table_version(
|
|
720
693
|
tv_properties = {**properties, **current_props}
|
721
694
|
table_version_obj = TableVersion.of(
|
722
695
|
table_version_locator,
|
723
|
-
schema=schema,
|
724
|
-
|
725
|
-
primary_key_columns=current_table_version_obj.primary_keys,
|
696
|
+
schema=Schema.of(schema) if schema else None,
|
697
|
+
partition_scheme=current_table_version_obj.partition_scheme,
|
726
698
|
description=description,
|
727
699
|
properties=tv_properties,
|
728
|
-
|
700
|
+
sort_scheme=current_table_version_obj.sort_scheme,
|
729
701
|
content_types=current_table_version_obj.content_types,
|
730
702
|
)
|
731
703
|
|
@@ -757,11 +729,11 @@ def stage_stream(
|
|
757
729
|
|
758
730
|
stream_id = uuid.uuid4().__str__()
|
759
731
|
new_stream_locator = StreamLocator.of(
|
760
|
-
existing_table_version.locator, stream_id,
|
732
|
+
existing_table_version.locator, stream_id, STREAM_FORMAT
|
761
733
|
)
|
762
734
|
new_stream = Stream.of(
|
763
735
|
new_stream_locator,
|
764
|
-
existing_stream.
|
736
|
+
existing_stream.partition_scheme,
|
765
737
|
CommitState.STAGED,
|
766
738
|
existing_stream.locator.canonical_string(),
|
767
739
|
)
|
@@ -785,9 +757,9 @@ def commit_stream(stream: Stream, *args, **kwargs) -> Stream:
|
|
785
757
|
)
|
786
758
|
stream_to_commit = Stream.of(
|
787
759
|
stream.locator,
|
788
|
-
stream.
|
760
|
+
stream.partition_scheme,
|
789
761
|
CommitState.COMMITTED,
|
790
|
-
stream.
|
762
|
+
stream.previous_stream_id,
|
791
763
|
)
|
792
764
|
|
793
765
|
existing_table_version.properties[
|
@@ -989,12 +961,10 @@ def stage_delta(
|
|
989
961
|
delta_type: DeltaType = DeltaType.UPSERT,
|
990
962
|
max_records_per_entry: Optional[int] = None,
|
991
963
|
author: Optional[ManifestAuthor] = None,
|
992
|
-
properties: Optional[
|
964
|
+
properties: Optional[DeltaProperties] = None,
|
993
965
|
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
994
966
|
content_type: ContentType = ContentType.PARQUET,
|
995
|
-
|
996
|
-
partition_spec: Optional[DeltaPartitionSpec] = None,
|
997
|
-
partition_values: Optional[PartitionValues] = None,
|
967
|
+
entry_params: Optional[EntryParams] = None,
|
998
968
|
*args,
|
999
969
|
**kwargs,
|
1000
970
|
) -> Delta:
|
@@ -1016,12 +986,6 @@ def stage_delta(
|
|
1016
986
|
con.commit()
|
1017
987
|
return delta
|
1018
988
|
|
1019
|
-
if partition_spec:
|
1020
|
-
assert partition_values is not None, (
|
1021
|
-
"partition_values must be provided as local "
|
1022
|
-
"storage does not support computing it from input data"
|
1023
|
-
)
|
1024
|
-
|
1025
989
|
serialized_data = None
|
1026
990
|
if content_type == ContentType.PARQUET:
|
1027
991
|
buffer = io.BytesIO()
|
@@ -1040,25 +1004,35 @@ def stage_delta(
|
|
1040
1004
|
stream_position = current_time_ms()
|
1041
1005
|
delta_locator = DeltaLocator.of(partition.locator, stream_position=stream_position)
|
1042
1006
|
|
1007
|
+
entry_type = (
|
1008
|
+
EntryType.EQUALITY_DELETE if delta_type is DeltaType.DELETE else EntryType.DATA
|
1009
|
+
)
|
1043
1010
|
meta = ManifestMeta.of(
|
1044
1011
|
len(data),
|
1045
1012
|
len(serialized_data),
|
1046
1013
|
content_type=content_type,
|
1047
1014
|
content_encoding=ContentEncoding.IDENTITY,
|
1048
1015
|
source_content_length=data.nbytes,
|
1049
|
-
|
1016
|
+
entry_type=entry_type,
|
1017
|
+
entry_params=entry_params,
|
1050
1018
|
)
|
1051
1019
|
|
1052
1020
|
manifest = Manifest.of(
|
1053
1021
|
entries=ManifestEntryList.of(
|
1054
1022
|
[
|
1055
1023
|
ManifestEntry.of(
|
1056
|
-
uri=uri,
|
1024
|
+
uri=uri,
|
1025
|
+
url=uri,
|
1026
|
+
meta=meta,
|
1027
|
+
mandatory=True,
|
1028
|
+
uuid=manifest_id,
|
1057
1029
|
)
|
1058
1030
|
]
|
1059
1031
|
),
|
1060
1032
|
author=author,
|
1061
1033
|
uuid=manifest_id,
|
1034
|
+
entry_type=entry_type,
|
1035
|
+
entry_params=entry_params,
|
1062
1036
|
)
|
1063
1037
|
|
1064
1038
|
delta = Delta.of(
|
@@ -1068,7 +1042,6 @@ def stage_delta(
|
|
1068
1042
|
properties=properties,
|
1069
1043
|
manifest=manifest,
|
1070
1044
|
previous_stream_position=partition.stream_position,
|
1071
|
-
delete_parameters=delete_parameters,
|
1072
1045
|
)
|
1073
1046
|
|
1074
1047
|
params = (uri, serialized_data)
|
@@ -1194,7 +1167,7 @@ def get_table_version_schema(
|
|
1194
1167
|
table_version: Optional[str] = None,
|
1195
1168
|
*args,
|
1196
1169
|
**kwargs,
|
1197
|
-
) -> Optional[Union[pa.Schema,
|
1170
|
+
) -> Optional[Union[pa.Schema, Any]]:
|
1198
1171
|
obj = get_table_version(namespace, table_name, table_version, *args, **kwargs)
|
1199
1172
|
|
1200
1173
|
return obj.schema
|
@@ -1227,7 +1200,7 @@ def get_stream(
|
|
1227
1200
|
|
1228
1201
|
cur, con = _get_sqlite3_cursor_con(kwargs)
|
1229
1202
|
stream_locator = StreamLocator.of(
|
1230
|
-
obj.locator, stream_id=stream_id,
|
1203
|
+
obj.locator, stream_id=stream_id, stream_format=STREAM_FORMAT
|
1231
1204
|
)
|
1232
1205
|
res = cur.execute(
|
1233
1206
|
"SELECT * FROM streams WHERE locator = ?", (stream_locator.canonical_string(),)
|
File without changes
|
@@ -0,0 +1,25 @@
|
|
1
|
+
import tempfile
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
from deltacat.catalog import CatalogProperties
|
5
|
+
from deltacat.tests.test_utils.filesystem import temp_dir_autocleanup
|
6
|
+
|
7
|
+
|
8
|
+
@pytest.fixture
|
9
|
+
def temp_dir():
|
10
|
+
"""
|
11
|
+
Temp dir which is removed after usage
|
12
|
+
note that each method which is injected with temp_dir will get a separate new tmp directory
|
13
|
+
"""
|
14
|
+
with temp_dir_autocleanup() as tmp_dir:
|
15
|
+
yield tmp_dir
|
16
|
+
|
17
|
+
|
18
|
+
@pytest.fixture
|
19
|
+
def keep_temp_dir():
|
20
|
+
return tempfile.mkdtemp()
|
21
|
+
|
22
|
+
|
23
|
+
@pytest.fixture
|
24
|
+
def temp_catalog(temp_dir):
|
25
|
+
return CatalogProperties(root=temp_dir)
|
File without changes
|