deltacat 1.1.35__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +2 -3
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -1
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -5
- deltacat/compute/compactor_v2/steps/merge.py +11 -80
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.35.dist-info/METADATA +0 -64
- deltacat-1.1.35.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,308 @@
|
|
1
|
+
import pytest
|
2
|
+
import pyarrow as pa
|
3
|
+
|
4
|
+
from deltacat.storage.model.schema import (
|
5
|
+
Schema,
|
6
|
+
Field,
|
7
|
+
BASE_SCHEMA_NAME,
|
8
|
+
)
|
9
|
+
|
10
|
+
|
11
|
+
@pytest.fixture
|
12
|
+
def schema_a():
|
13
|
+
return Schema.of(
|
14
|
+
[
|
15
|
+
Field.of(
|
16
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
17
|
+
field_id=1,
|
18
|
+
is_merge_key=True,
|
19
|
+
)
|
20
|
+
]
|
21
|
+
)
|
22
|
+
|
23
|
+
|
24
|
+
@pytest.fixture
|
25
|
+
def schema_b():
|
26
|
+
return Schema.of(
|
27
|
+
[
|
28
|
+
Field.of(
|
29
|
+
field=pa.field("col2", pa.string(), nullable=True),
|
30
|
+
field_id=2,
|
31
|
+
is_merge_key=False,
|
32
|
+
)
|
33
|
+
]
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
@pytest.fixture
|
38
|
+
def schema_c():
|
39
|
+
return Schema.of(
|
40
|
+
[
|
41
|
+
Field.of(
|
42
|
+
field=pa.field("col3", pa.float64(), nullable=False),
|
43
|
+
field_id=3,
|
44
|
+
is_merge_key=False,
|
45
|
+
)
|
46
|
+
]
|
47
|
+
)
|
48
|
+
|
49
|
+
|
50
|
+
@pytest.fixture
|
51
|
+
def schema_d():
|
52
|
+
schema = Schema.of(
|
53
|
+
[
|
54
|
+
Field.of(
|
55
|
+
field=pa.field("col_named", pa.int64(), nullable=True),
|
56
|
+
field_id=4,
|
57
|
+
is_merge_key=False,
|
58
|
+
)
|
59
|
+
]
|
60
|
+
)
|
61
|
+
return schema
|
62
|
+
|
63
|
+
|
64
|
+
def test_of_with_dict(schema_a, schema_b):
|
65
|
+
input_dict = {"schema_a": schema_a.arrow, "schema_b": schema_b.arrow}
|
66
|
+
schema = Schema.of(input_dict)
|
67
|
+
|
68
|
+
assert isinstance(schema, Schema)
|
69
|
+
assert list(schema.subschemas.keys()) == ["schema_a", "schema_b"]
|
70
|
+
assert list(schema.subschemas_to_field_ids.keys()) == ["schema_a", "schema_b"]
|
71
|
+
assert schema.subschemas["schema_a"].equivalent_to(schema_a, True)
|
72
|
+
assert schema.subschemas["schema_b"].equivalent_to(schema_b, True)
|
73
|
+
|
74
|
+
|
75
|
+
def test_of_invalid_input():
|
76
|
+
with pytest.raises(ValueError):
|
77
|
+
Schema.of(42)
|
78
|
+
with pytest.raises(ValueError):
|
79
|
+
Schema.of(["foo"])
|
80
|
+
with pytest.raises(ValueError):
|
81
|
+
Schema.of({"foo": [42]})
|
82
|
+
|
83
|
+
|
84
|
+
def test_insert_explicit_name(schema_a, schema_b):
|
85
|
+
schema = Schema.of(schema_a.arrow)
|
86
|
+
assert not schema.subschemas
|
87
|
+
new_schema = schema.add_subschema("explicit", schema_b.arrow)
|
88
|
+
assert not schema.subschemas
|
89
|
+
assert "explicit" in new_schema.subschemas
|
90
|
+
assert "explicit" in new_schema.subschemas_to_field_ids
|
91
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas
|
92
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas_to_field_ids
|
93
|
+
assert new_schema.subschemas[BASE_SCHEMA_NAME].equivalent_to(schema_a)
|
94
|
+
assert new_schema.subschemas["explicit"].equivalent_to(schema_b)
|
95
|
+
|
96
|
+
|
97
|
+
def test_insert_reserved_name_fails(schema_a, schema_b):
|
98
|
+
schema = Schema.of(schema_a.arrow)
|
99
|
+
with pytest.raises(ValueError):
|
100
|
+
schema.add_subschema(BASE_SCHEMA_NAME, schema_b.arrow)
|
101
|
+
|
102
|
+
|
103
|
+
def test_insert_duplicate_schema_name_fails(schema_a, schema_b):
|
104
|
+
schema = Schema.of({"dupe": schema_a.arrow})
|
105
|
+
with pytest.raises(ValueError):
|
106
|
+
schema.add_subschema("dupe", schema_b.arrow)
|
107
|
+
|
108
|
+
|
109
|
+
def test_insert_duplicate_field_name_fails(schema_a):
|
110
|
+
schema = Schema.of(schema_a.arrow)
|
111
|
+
with pytest.raises(ValueError):
|
112
|
+
schema.add_subschema("dupe_field_name", schema_a.arrow)
|
113
|
+
|
114
|
+
|
115
|
+
def test_insert_autofill_field_id():
|
116
|
+
schema1 = [
|
117
|
+
Field.of(
|
118
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
119
|
+
is_merge_key=True,
|
120
|
+
)
|
121
|
+
]
|
122
|
+
schema2 = [
|
123
|
+
Field.of(
|
124
|
+
field=pa.field("col2", pa.int32(), nullable=False),
|
125
|
+
is_merge_key=True,
|
126
|
+
)
|
127
|
+
]
|
128
|
+
schema = Schema.of(schema1)
|
129
|
+
new_schema = schema.add_subschema("explicit", schema2)
|
130
|
+
assert not schema.subschemas
|
131
|
+
assert "explicit" in new_schema.subschemas
|
132
|
+
assert "explicit" in new_schema.subschemas_to_field_ids
|
133
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas
|
134
|
+
assert BASE_SCHEMA_NAME in new_schema.subschemas_to_field_ids
|
135
|
+
assert len(new_schema.subschemas[BASE_SCHEMA_NAME].fields) == 1
|
136
|
+
assert len(new_schema.subschemas["explicit"].fields) == 1
|
137
|
+
assert new_schema.subschemas[BASE_SCHEMA_NAME].fields[0].id == 0
|
138
|
+
assert new_schema.subschemas["explicit"].fields[0].id == 1
|
139
|
+
assert new_schema.subschemas[BASE_SCHEMA_NAME].equivalent_to(Schema.of(schema1))
|
140
|
+
schema2_with_expected_field_id = [
|
141
|
+
Field.of(
|
142
|
+
field=pa.field("col2", pa.int32(), nullable=False),
|
143
|
+
field_id=1,
|
144
|
+
is_merge_key=True,
|
145
|
+
)
|
146
|
+
]
|
147
|
+
assert new_schema.subschemas["explicit"].equivalent_to(
|
148
|
+
Schema.of(schema2_with_expected_field_id)
|
149
|
+
)
|
150
|
+
|
151
|
+
|
152
|
+
def test_insert_duplicate_field_name_case_insensitive_fails():
|
153
|
+
schema1 = Schema.of(
|
154
|
+
[
|
155
|
+
Field.of(
|
156
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
157
|
+
field_id=1,
|
158
|
+
is_merge_key=True,
|
159
|
+
)
|
160
|
+
]
|
161
|
+
)
|
162
|
+
schema2 = Schema.of(
|
163
|
+
[
|
164
|
+
Field.of(
|
165
|
+
field=pa.field("COL1", pa.int32(), nullable=False),
|
166
|
+
field_id=2,
|
167
|
+
is_merge_key=True,
|
168
|
+
)
|
169
|
+
]
|
170
|
+
)
|
171
|
+
schema = Schema.of(schema1.arrow)
|
172
|
+
with pytest.raises(ValueError):
|
173
|
+
schema.add_subschema("dupe_field_name_case_insensitive", schema2.arrow)
|
174
|
+
|
175
|
+
|
176
|
+
def test_insert_duplicate_field_id_fails():
|
177
|
+
schema1 = Schema.of(
|
178
|
+
[
|
179
|
+
Field.of(
|
180
|
+
field=pa.field("col1", pa.int32(), nullable=False),
|
181
|
+
field_id=1,
|
182
|
+
is_merge_key=True,
|
183
|
+
)
|
184
|
+
]
|
185
|
+
)
|
186
|
+
schema2 = Schema.of(
|
187
|
+
[
|
188
|
+
Field.of(
|
189
|
+
field=pa.field("col2", pa.int32(), nullable=False),
|
190
|
+
field_id=1,
|
191
|
+
is_merge_key=True,
|
192
|
+
)
|
193
|
+
]
|
194
|
+
)
|
195
|
+
schema = Schema.of(schema1.arrow)
|
196
|
+
with pytest.raises(ValueError):
|
197
|
+
schema.add_subschema("dupe_field_id", schema2.arrow)
|
198
|
+
|
199
|
+
|
200
|
+
def test_update_success(schema_a, schema_b):
|
201
|
+
schema = Schema.of({"key": schema_a.arrow})
|
202
|
+
new_schema = schema.replace_subschema("key", schema_b.arrow)
|
203
|
+
assert schema.subschemas["key"].equivalent_to(schema_a)
|
204
|
+
assert new_schema.subschemas["key"].equivalent_to(schema_b)
|
205
|
+
|
206
|
+
|
207
|
+
def test_update_not_exist(schema_a):
|
208
|
+
schema = Schema.of({"key": schema_a.arrow})
|
209
|
+
with pytest.raises(ValueError):
|
210
|
+
schema.replace_subschema("nonexistent", schema_a.arrow)
|
211
|
+
|
212
|
+
|
213
|
+
def test_delete_schema_success(schema_a, schema_b):
|
214
|
+
schema = Schema.of({"key1": schema_a.arrow, "key2": schema_b.arrow})
|
215
|
+
new_schema = schema.delete_subschema("key1")
|
216
|
+
assert "key1" in schema.subschemas
|
217
|
+
assert "key2" in schema.subschemas
|
218
|
+
assert "key1" not in new_schema.subschemas
|
219
|
+
assert "key2" in new_schema.subschemas
|
220
|
+
assert "key1" in schema.subschemas_to_field_ids
|
221
|
+
assert "key2" in schema.subschemas_to_field_ids
|
222
|
+
assert "key1" not in new_schema.subschemas_to_field_ids
|
223
|
+
assert "key2" in new_schema.subschemas_to_field_ids
|
224
|
+
|
225
|
+
|
226
|
+
def test_delete_only_schema_fails(schema_a):
|
227
|
+
schema = Schema.of({"key": schema_a.arrow})
|
228
|
+
with pytest.raises(ValueError):
|
229
|
+
schema.delete_subschema("key")
|
230
|
+
|
231
|
+
|
232
|
+
def test_delete_schema_not_exist(schema_a):
|
233
|
+
schema = Schema.of({"key": schema_a.arrow})
|
234
|
+
with pytest.raises(ValueError):
|
235
|
+
schema.delete_subschema("nonexistent")
|
236
|
+
|
237
|
+
|
238
|
+
def test_get_schemas_order(schema_a, schema_b, schema_c):
|
239
|
+
schema = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow, "c": schema_c.arrow})
|
240
|
+
assert list(schema.subschemas.keys()) == ["a", "b", "c"]
|
241
|
+
assert list(schema.subschemas.values()) == [schema_a, schema_b, schema_c]
|
242
|
+
schema = Schema.of({"a": schema_a.arrow})
|
243
|
+
schema = schema.add_subschema("b", schema_b.arrow)
|
244
|
+
assert list(schema.subschemas.keys()) == ["a", "b"]
|
245
|
+
assert list(schema.subschemas.values()) == [schema_a, schema_b]
|
246
|
+
schema = schema.add_subschema("c", schema_c.arrow)
|
247
|
+
assert list(schema.subschemas.keys()) == ["a", "b", "c"]
|
248
|
+
assert list(schema.subschemas.values()) == [schema_a, schema_b, schema_c]
|
249
|
+
|
250
|
+
|
251
|
+
def test_equivalent_to_same(schema_a, schema_b):
|
252
|
+
schema1 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
253
|
+
schema2 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
254
|
+
assert schema1.equivalent_to(schema2)
|
255
|
+
assert schema2.equivalent_to(schema1)
|
256
|
+
|
257
|
+
|
258
|
+
def test_equivalent_to_different_subschema_names(schema_a, schema_b):
|
259
|
+
schema1 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
260
|
+
schema2 = Schema.of({"x": schema_a.arrow, "b": schema_b.arrow})
|
261
|
+
assert schema1.equivalent_to(schema2)
|
262
|
+
assert not schema1.equivalent_to(schema2, True)
|
263
|
+
|
264
|
+
|
265
|
+
def test_not_equivalent_to_different_subschema_order(schema_a, schema_b):
|
266
|
+
schema1 = Schema.of({"a": schema_a.arrow, "b": schema_b.arrow})
|
267
|
+
schema2 = Schema.of({"b": schema_b.arrow, "a": schema_a.arrow})
|
268
|
+
assert not schema1.equivalent_to(schema2)
|
269
|
+
|
270
|
+
|
271
|
+
def test_equivalent_to_non_schema(schema_a):
|
272
|
+
schema = Schema.of({"a": schema_a.arrow})
|
273
|
+
assert not schema.equivalent_to("not a schema")
|
274
|
+
|
275
|
+
|
276
|
+
def test_equivalent_schemas_different_instances():
|
277
|
+
"""
|
278
|
+
Edge case: ensure equivalent schemas with different instances
|
279
|
+
are considered equivalent.
|
280
|
+
"""
|
281
|
+
schema1 = Schema.of(
|
282
|
+
[
|
283
|
+
Field.of(
|
284
|
+
pa.field("col1", pa.int32(), nullable=False),
|
285
|
+
field_id=1,
|
286
|
+
is_merge_key=True,
|
287
|
+
)
|
288
|
+
]
|
289
|
+
)
|
290
|
+
schema2 = Schema.of(
|
291
|
+
[
|
292
|
+
Field.of(
|
293
|
+
pa.field("col1", pa.int32(), nullable=False),
|
294
|
+
field_id=1,
|
295
|
+
is_merge_key=True,
|
296
|
+
)
|
297
|
+
]
|
298
|
+
)
|
299
|
+
schema1 = Schema.of({"key": schema1.arrow})
|
300
|
+
schema2 = Schema.of({"key": schema2.arrow})
|
301
|
+
assert schema1.equivalent_to(schema2)
|
302
|
+
|
303
|
+
|
304
|
+
def test_empty_schema_fails():
|
305
|
+
with pytest.raises(ValueError):
|
306
|
+
Schema.of({})
|
307
|
+
with pytest.raises(ValueError):
|
308
|
+
Schema.of([])
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
from deltacat.storage.model.shard import ShardingStrategy
|
4
|
+
|
5
|
+
|
6
|
+
def test_sharding_strategy_from_string_range():
|
7
|
+
"""
|
8
|
+
Tests that from_string('range') returns an instance of RangeShardingStrategy.
|
9
|
+
"""
|
10
|
+
from deltacat.storage.rivulet.shard.range_shard import RangeShardingStrategy
|
11
|
+
|
12
|
+
strategy = ShardingStrategy.from_string("range")
|
13
|
+
assert isinstance(strategy, RangeShardingStrategy)
|
14
|
+
|
15
|
+
|
16
|
+
def test_sharding_strategy_from_string_invalid():
|
17
|
+
"""
|
18
|
+
Tests that from_string(...) raises ValueError for an unknown strategy string.
|
19
|
+
"""
|
20
|
+
with pytest.raises(ValueError) as exc_info:
|
21
|
+
ShardingStrategy.from_string("unknown_strategy")
|
22
|
+
assert "Unsupported sharding strategy type: unknown_strategy" in str(exc_info.value)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import pytest
|
2
|
+
|
3
|
+
from deltacat.constants import BYTES_PER_KIBIBYTE
|
4
|
+
from deltacat.storage.model.table_version import TableVersion, TableVersionLocator
|
5
|
+
|
6
|
+
|
7
|
+
@pytest.mark.parametrize(
|
8
|
+
"previous_version, expected_next_version",
|
9
|
+
[
|
10
|
+
(None, "1"),
|
11
|
+
("v.1", "v.2"),
|
12
|
+
("1", "2"),
|
13
|
+
("1.0", "1.1"),
|
14
|
+
("v.999", "v.1000"),
|
15
|
+
],
|
16
|
+
)
|
17
|
+
def test_next_version(previous_version, expected_next_version):
|
18
|
+
new_version = TableVersion.next_version(previous_version)
|
19
|
+
assert isinstance(new_version, str)
|
20
|
+
assert new_version == expected_next_version
|
21
|
+
|
22
|
+
|
23
|
+
@pytest.mark.parametrize(
|
24
|
+
"table_version, expected_parsed_version",
|
25
|
+
[
|
26
|
+
("v.1", ("v.", 1)),
|
27
|
+
("1", (None, 1)),
|
28
|
+
("1.0", ("1.", 0)),
|
29
|
+
("v.999", ("v.", 999)),
|
30
|
+
],
|
31
|
+
)
|
32
|
+
def test_parse_version(table_version, expected_parsed_version):
|
33
|
+
prefix, version_number = TableVersion.parse_table_version(table_version)
|
34
|
+
if prefix is not None:
|
35
|
+
assert isinstance(prefix, str)
|
36
|
+
assert isinstance(version_number, int)
|
37
|
+
assert (prefix, version_number) == expected_parsed_version
|
38
|
+
|
39
|
+
|
40
|
+
def test_version_validation_invalid():
|
41
|
+
with pytest.raises(ValueError):
|
42
|
+
TableVersionLocator.at(
|
43
|
+
namespace="test_namespace",
|
44
|
+
table_name="test_table",
|
45
|
+
table_version="invalid_version",
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
def test_version_validation_valid_to_invalid():
|
50
|
+
valid_tv_locator = TableVersionLocator.at(
|
51
|
+
namespace="test_namespace",
|
52
|
+
table_name="test_table",
|
53
|
+
table_version="1",
|
54
|
+
)
|
55
|
+
assert valid_tv_locator.table_version == "1"
|
56
|
+
tv = TableVersion.of(
|
57
|
+
locator=valid_tv_locator,
|
58
|
+
schema=None,
|
59
|
+
)
|
60
|
+
assert tv.current_version_number() == 1
|
61
|
+
with pytest.raises(ValueError):
|
62
|
+
valid_tv_locator.table_version = "invalid_version"
|
63
|
+
|
64
|
+
|
65
|
+
def test_version_validation_numeric_name():
|
66
|
+
valid_tv_locator = TableVersionLocator.at(
|
67
|
+
namespace="test_namespace",
|
68
|
+
table_name="test_table",
|
69
|
+
table_version="1.0",
|
70
|
+
)
|
71
|
+
assert valid_tv_locator.table_version == "1.0"
|
72
|
+
tv = TableVersion.of(
|
73
|
+
locator=valid_tv_locator,
|
74
|
+
schema=None,
|
75
|
+
)
|
76
|
+
assert tv.current_version_number() == 0
|
77
|
+
|
78
|
+
|
79
|
+
def test_version_validation_truncate_leading_zeros():
|
80
|
+
valid_tv_locator = TableVersionLocator.at(
|
81
|
+
namespace="test_namespace",
|
82
|
+
table_name="test_table",
|
83
|
+
table_version="1.00002",
|
84
|
+
)
|
85
|
+
# ensure that leading 0's are truncated
|
86
|
+
assert valid_tv_locator.table_version == "1.2"
|
87
|
+
tv = TableVersion.of(
|
88
|
+
locator=valid_tv_locator,
|
89
|
+
schema=None,
|
90
|
+
)
|
91
|
+
assert tv.current_version_number() == 2
|
92
|
+
|
93
|
+
|
94
|
+
def test_version_validation_version_id_length_limits():
|
95
|
+
# ensure that long version identifiers are accepted
|
96
|
+
long_tv_id = "a" * (BYTES_PER_KIBIBYTE - 2) + ".1"
|
97
|
+
valid_tv_locator = TableVersionLocator.at(
|
98
|
+
namespace="test_namespace",
|
99
|
+
table_name="test_table",
|
100
|
+
table_version=long_tv_id,
|
101
|
+
)
|
102
|
+
assert valid_tv_locator.table_version == long_tv_id
|
103
|
+
tv = TableVersion.of(
|
104
|
+
locator=valid_tv_locator,
|
105
|
+
schema=None,
|
106
|
+
)
|
107
|
+
assert tv.current_version_number() == 1
|
108
|
+
# ensure that an excessively long version identifier is rejected
|
109
|
+
with pytest.raises(ValueError):
|
110
|
+
valid_tv_locator.table_version = "0" * (BYTES_PER_KIBIBYTE + 1)
|