deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,294 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import threading
|
4
|
+
from threading import Thread
|
5
|
+
from typing import Any, List, Set, Protocol, TypeVar, Dict, Iterable
|
6
|
+
|
7
|
+
from pyarrow import RecordBatch, Table
|
8
|
+
from deltacat.storage.model.partition import PartitionLocator
|
9
|
+
from deltacat.storage.rivulet.metastore.delta import ManifestIO, DeltacatManifestIO
|
10
|
+
|
11
|
+
from deltacat.storage.rivulet import Schema
|
12
|
+
from deltacat.storage.rivulet.metastore.json_sst import JsonSstWriter
|
13
|
+
from deltacat.storage.rivulet.serializer import MEMTABLE_DATA, DataSerializer
|
14
|
+
from deltacat.storage.rivulet.serializer_factory import DataSerializerFactory
|
15
|
+
from deltacat.storage.rivulet.writer.dataset_writer import DatasetWriter, DATA
|
16
|
+
from deltacat.storage.rivulet.metastore.sst import SSTWriter
|
17
|
+
from deltacat.storage.rivulet.fs.file_provider import FileProvider
|
18
|
+
|
19
|
+
INPUT_ROW = TypeVar("INPUT_ROW")
|
20
|
+
|
21
|
+
|
22
|
+
class Memtable(Protocol[INPUT_ROW]):
|
23
|
+
"""
|
24
|
+
Protocol defining the interface for a memtable that can store and sort records of type T.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def add_record(self, record: INPUT_ROW) -> bool:
|
28
|
+
"""
|
29
|
+
Add a record to the memtable.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
record: The record to add of type INPUT_ROW
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
bool: True if the memtable is full after adding the record, False otherwise
|
36
|
+
"""
|
37
|
+
...
|
38
|
+
|
39
|
+
def get_sorted_records(self, schema: Schema) -> MEMTABLE_DATA:
|
40
|
+
"""
|
41
|
+
Get all records in the memtable in sorted order.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
List[T]: A list of sorted records
|
45
|
+
"""
|
46
|
+
...
|
47
|
+
|
48
|
+
|
49
|
+
class DictMemTable(Memtable[Dict[str, Any]]):
|
50
|
+
"""
|
51
|
+
Unit of in memory buffering of sorted records before records are written to file
|
52
|
+
|
53
|
+
TODO future improvements:
|
54
|
+
1. build b+ tree of record indexes on insertion
|
55
|
+
OR If we end up using arrow as intermediate format, we can use
|
56
|
+
pyarrow compute sort
|
57
|
+
2. Probably we will re-write in rust
|
58
|
+
"""
|
59
|
+
|
60
|
+
def __init__(self, merge_key: str):
|
61
|
+
self.row_size = 0
|
62
|
+
self.merge_key = merge_key
|
63
|
+
|
64
|
+
self._records: List[Dict[str, Any]] = []
|
65
|
+
self.lock = threading.Lock()
|
66
|
+
|
67
|
+
def add_record(self, record: Dict[str, Any]):
|
68
|
+
with self.lock:
|
69
|
+
self._records.append(record)
|
70
|
+
self.row_size += 1
|
71
|
+
|
72
|
+
if self.row_size >= MemtableDatasetWriter.MAX_ROW_SIZE:
|
73
|
+
return True
|
74
|
+
return False
|
75
|
+
|
76
|
+
def get_sorted_records(self, schema: Schema) -> List[Dict[str, Any]]:
|
77
|
+
"""
|
78
|
+
Gets sorted records
|
79
|
+
|
80
|
+
:return: iterator over sorted record
|
81
|
+
"""
|
82
|
+
with self.lock:
|
83
|
+
self._records.sort(key=lambda x: x.__getitem__(self.merge_key))
|
84
|
+
return self._records
|
85
|
+
|
86
|
+
|
87
|
+
class RecordBatchMemTable(Memtable[RecordBatch]):
|
88
|
+
"""
|
89
|
+
Note that this will not respect max row size.
|
90
|
+
"""
|
91
|
+
|
92
|
+
def __init__(self, merge_key: str):
|
93
|
+
self.row_size = 0
|
94
|
+
self.merge_key = merge_key
|
95
|
+
|
96
|
+
# list of full record batches in memtable
|
97
|
+
self._records_batches: List[RecordBatch] = []
|
98
|
+
self.lock = threading.Lock()
|
99
|
+
|
100
|
+
def add_record(self, record: RecordBatch):
|
101
|
+
with self.lock:
|
102
|
+
self._records_batches.append(record)
|
103
|
+
self.row_size += record.num_rows
|
104
|
+
|
105
|
+
if self.row_size >= MemtableDatasetWriter.MAX_ROW_SIZE:
|
106
|
+
return True
|
107
|
+
return False
|
108
|
+
|
109
|
+
def get_sorted_records(self, schema: Schema) -> Table:
|
110
|
+
"""
|
111
|
+
Gets sorted records
|
112
|
+
|
113
|
+
:return: iterator over sorted record
|
114
|
+
"""
|
115
|
+
with self.lock:
|
116
|
+
# Note that we are providing schema so that pyarrow does not infer it
|
117
|
+
table = Table.from_batches(self._records_batches, schema.to_pyarrow())
|
118
|
+
return table.sort_by(self.merge_key)
|
119
|
+
|
120
|
+
|
121
|
+
class MemtableDatasetWriter(DatasetWriter):
|
122
|
+
# Note that this max row size is not respected when PyArrow RecordBatches are used
|
123
|
+
# In that case, the entire record batch is written within one memtable even if the row count overflows
|
124
|
+
MAX_ROW_SIZE = 1000000
|
125
|
+
"""
|
126
|
+
Buffers data into rotating memtables. When a memtable reaches a certain size, it is flushed to disk and a new memtable is allocated
|
127
|
+
|
128
|
+
Uses DataWriter which will be format specific for writing data
|
129
|
+
Uses MetadataWriter for writing metadata
|
130
|
+
|
131
|
+
TODO Future Improvements
|
132
|
+
1. Maybe we should re-write this class in Rust (pending testing)
|
133
|
+
"""
|
134
|
+
|
135
|
+
def __init__(
|
136
|
+
self,
|
137
|
+
file_provider: FileProvider,
|
138
|
+
schema: Schema,
|
139
|
+
locator: PartitionLocator,
|
140
|
+
file_format: str | None = None,
|
141
|
+
sst_writer: SSTWriter = None,
|
142
|
+
manifest_io: ManifestIO = None,
|
143
|
+
):
|
144
|
+
|
145
|
+
if not sst_writer:
|
146
|
+
sst_writer = JsonSstWriter()
|
147
|
+
if not manifest_io:
|
148
|
+
manifest_io = DeltacatManifestIO(file_provider.uri, locator)
|
149
|
+
|
150
|
+
self.schema = schema
|
151
|
+
|
152
|
+
self.file_provider = file_provider
|
153
|
+
self.data_serializer: DataSerializer = DataSerializerFactory.get_serializer(
|
154
|
+
self.schema, self.file_provider, file_format
|
155
|
+
)
|
156
|
+
self.sst_writer = sst_writer
|
157
|
+
self.manifest_io = manifest_io
|
158
|
+
|
159
|
+
self._sst_files: Set[str] = set()
|
160
|
+
self.__curr_memtable = None
|
161
|
+
self.__open_memtables = []
|
162
|
+
self.__rlock = threading.RLock()
|
163
|
+
self.__open_threads: List[Thread] = []
|
164
|
+
self._locator = locator
|
165
|
+
|
166
|
+
def write_dict(self, record: Dict[str, Any]) -> None:
|
167
|
+
|
168
|
+
# Construct memtable if doesn't exist. If previous memtable wrong type, rotate
|
169
|
+
memtable_ctor = lambda: DictMemTable(self.schema.get_merge_key())
|
170
|
+
if not self.__curr_memtable:
|
171
|
+
self.__curr_memtable = memtable_ctor()
|
172
|
+
try:
|
173
|
+
isinstance(self.__curr_memtable, DictMemTable)
|
174
|
+
except TypeError:
|
175
|
+
self.__rotate_memtable(memtable_ctor)
|
176
|
+
|
177
|
+
# Write record(s). If memtable is full, rotate
|
178
|
+
if self.__curr_memtable.add_record(record):
|
179
|
+
self.__rotate_memtable(memtable_ctor)
|
180
|
+
|
181
|
+
def write_record_batch(self, record: RecordBatch) -> None:
|
182
|
+
# Construct memtable if doesn't exist. If previous memtable wrong type, rotate
|
183
|
+
memtable_ctor = lambda: RecordBatchMemTable(self.schema.get_merge_key())
|
184
|
+
if not self.__curr_memtable:
|
185
|
+
self.__curr_memtable = memtable_ctor()
|
186
|
+
|
187
|
+
try:
|
188
|
+
isinstance(self.__curr_memtable, RecordBatchMemTable)
|
189
|
+
except TypeError:
|
190
|
+
self.__rotate_memtable(memtable_ctor)
|
191
|
+
|
192
|
+
# Write record(s). If memtable is full, rotate
|
193
|
+
if self.__curr_memtable.add_record(record):
|
194
|
+
self.__rotate_memtable(memtable_ctor)
|
195
|
+
|
196
|
+
def write(self, data: DATA) -> None:
|
197
|
+
if isinstance(data, RecordBatch):
|
198
|
+
self.write_record_batch(data)
|
199
|
+
elif isinstance(data, Iterable):
|
200
|
+
for x in data:
|
201
|
+
if isinstance(x, dict):
|
202
|
+
self.write_dict(x)
|
203
|
+
elif isinstance(x, RecordBatch):
|
204
|
+
self.write_record_batch(x)
|
205
|
+
else:
|
206
|
+
raise ValueError(
|
207
|
+
f"Iterable contained unsupported type {type(x).__name__}."
|
208
|
+
f" Supported data types to write are: {DATA}"
|
209
|
+
)
|
210
|
+
else:
|
211
|
+
raise ValueError(
|
212
|
+
f"Unsupported data type {type(data).__name__}. Supported data types to write are: {DATA}"
|
213
|
+
)
|
214
|
+
|
215
|
+
def flush(self) -> str:
|
216
|
+
"""
|
217
|
+
Explicitly flush any data and metadata and commit to dataset
|
218
|
+
"""
|
219
|
+
self.__flush_memtable(self.__curr_memtable)
|
220
|
+
for thread in [t for t in self.__open_threads if t.is_alive()]:
|
221
|
+
thread.join()
|
222
|
+
|
223
|
+
manifest_location = self.__write_manifest_file()
|
224
|
+
self._sst_files.clear()
|
225
|
+
|
226
|
+
return manifest_location
|
227
|
+
|
228
|
+
def __enter__(self) -> Any:
|
229
|
+
"""
|
230
|
+
Enter and exit method allows python "with" statement
|
231
|
+
"""
|
232
|
+
return self
|
233
|
+
|
234
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
235
|
+
"""
|
236
|
+
Closes all open memtables and ensures all data is flushed.
|
237
|
+
"""
|
238
|
+
self.flush()
|
239
|
+
# return False to propogate up error messages
|
240
|
+
return False
|
241
|
+
|
242
|
+
def __rotate_memtable(self, memtable_constructor_closure):
|
243
|
+
"""
|
244
|
+
Replace the active memtable
|
245
|
+
:return:
|
246
|
+
"""
|
247
|
+
with self.__rlock:
|
248
|
+
self.__flush_memtable(self.__curr_memtable)
|
249
|
+
self.__curr_memtable = memtable_constructor_closure()
|
250
|
+
self.__open_memtables.append(self.__curr_memtable)
|
251
|
+
|
252
|
+
# Reap dead threads
|
253
|
+
self.__open_threads = [t for t in self.__open_threads if t.is_alive()]
|
254
|
+
|
255
|
+
def __flush_memtable(self, memtable):
|
256
|
+
thread = threading.Thread(target=self.__flush_memtable_async, args=(memtable,))
|
257
|
+
thread.start()
|
258
|
+
with self.__rlock:
|
259
|
+
self.__open_threads.append(thread)
|
260
|
+
|
261
|
+
def __flush_memtable_async(self, memtable: Memtable):
|
262
|
+
"""
|
263
|
+
Flushes data and metadata for a given memtable
|
264
|
+
Called asynchronously in background thread
|
265
|
+
"""
|
266
|
+
if not memtable:
|
267
|
+
return
|
268
|
+
|
269
|
+
sst_metadata_list = self.data_serializer.flush_batch(
|
270
|
+
memtable.get_sorted_records(self.schema)
|
271
|
+
)
|
272
|
+
|
273
|
+
# short circuit if no data/metadata written
|
274
|
+
if not sst_metadata_list:
|
275
|
+
with self.__rlock:
|
276
|
+
self.__open_memtables.remove(memtable)
|
277
|
+
return
|
278
|
+
|
279
|
+
# Write SST. Each memtable is going to have a dedicated L0 SST file because that is the unit at which
|
280
|
+
# we have contiguously sorted data
|
281
|
+
sst_file = self.file_provider.provide_l0_sst_file()
|
282
|
+
|
283
|
+
with self.__rlock:
|
284
|
+
self.sst_writer.write(sst_file, sst_metadata_list)
|
285
|
+
self._sst_files.add(sst_file.location)
|
286
|
+
|
287
|
+
if memtable in self.__open_memtables:
|
288
|
+
self.__open_memtables.remove(memtable)
|
289
|
+
|
290
|
+
def __write_manifest_file(self) -> str:
|
291
|
+
"""
|
292
|
+
Write the manifest file to the filesystem at the given URI.
|
293
|
+
"""
|
294
|
+
return self.manifest_io.write(list(self._sst_files), self.schema, 0)
|
@@ -0,0 +1 @@
|
|
1
|
+
# NOTE - this module is renamed because it is shadowing the stdlib io module when running tests in Pycharm
|
@@ -0,0 +1,324 @@
|
|
1
|
+
import unittest
|
2
|
+
import pytest
|
3
|
+
import ray
|
4
|
+
import tempfile
|
5
|
+
import shutil
|
6
|
+
import uuid
|
7
|
+
from unittest import mock
|
8
|
+
import os
|
9
|
+
|
10
|
+
from deltacat.catalog import CatalogProperties
|
11
|
+
from pyiceberg.catalog import Catalog as IcebergCatalog
|
12
|
+
|
13
|
+
from deltacat.catalog.model.catalog import (
|
14
|
+
Catalog,
|
15
|
+
init,
|
16
|
+
get_catalog,
|
17
|
+
put_catalog,
|
18
|
+
is_initialized,
|
19
|
+
)
|
20
|
+
from deltacat.catalog.iceberg.iceberg_catalog_config import IcebergCatalogConfig
|
21
|
+
|
22
|
+
from pyiceberg.catalog import CatalogType
|
23
|
+
|
24
|
+
|
25
|
+
# Test module to mock a catalog implementation
|
26
|
+
class MockCatalogImpl:
|
27
|
+
@staticmethod
|
28
|
+
def initialize(*args, **kwargs):
|
29
|
+
# Return some state that the catalog would normally maintain
|
30
|
+
return {"initialized": True, "args": args, "kwargs": kwargs}
|
31
|
+
|
32
|
+
|
33
|
+
@pytest.fixture(scope="function")
|
34
|
+
def reset_catalogs_ray_actor():
|
35
|
+
"""
|
36
|
+
Setup and teardown for Ray environment for tests.
|
37
|
+
|
38
|
+
This will kill the actor all_catalogs, essentially wiping global state for catalogs
|
39
|
+
|
40
|
+
NOTE: tests using this fixture must be run serially. As of April 7 2025, the unit test suite had various
|
41
|
+
failures if run in parallel, in part because the state of all_catalogs in ray is shared across tests.
|
42
|
+
|
43
|
+
NOTE: when using this fixture, ensure you pass ray_init_args={"ignore_reinit_error": True} into all
|
44
|
+
functions which may re-initialize ray. This is because the production code checks the all_catalogs actor
|
45
|
+
in order to determine whether it needs to initialize Ray
|
46
|
+
"""
|
47
|
+
# Reset the global catalog_actor state before each test
|
48
|
+
import deltacat.catalog.model.catalog as catalog_module
|
49
|
+
|
50
|
+
# Initialize Ray if not already initialized
|
51
|
+
if not ray.is_initialized():
|
52
|
+
ray.init(ignore_reinit_error=True)
|
53
|
+
yield
|
54
|
+
|
55
|
+
# Clean up the actor if it exists
|
56
|
+
if catalog_module.all_catalogs is not None:
|
57
|
+
try:
|
58
|
+
ray.kill(catalog_module.all_catalogs)
|
59
|
+
except Exception:
|
60
|
+
pass
|
61
|
+
finally:
|
62
|
+
catalog_module.all_catalogs = None
|
63
|
+
|
64
|
+
|
65
|
+
class TestCatalog(unittest.TestCase):
|
66
|
+
"""Tests for the Catalog class itself, without Ray initialization."""
|
67
|
+
|
68
|
+
def test_catalog_constructor(self):
|
69
|
+
"""Test that the Catalog constructor correctly initializes with the given implementation."""
|
70
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
71
|
+
|
72
|
+
self.assertEqual(catalog.impl, MockCatalogImpl)
|
73
|
+
|
74
|
+
# Check that inner state was correctly initialized
|
75
|
+
# This just asserts that kwargs were plumbed through from Catalog constructor
|
76
|
+
self.assertTrue(catalog.inner["initialized"])
|
77
|
+
self.assertEqual(catalog.inner["args"], ())
|
78
|
+
self.assertEqual(catalog.inner["kwargs"], {})
|
79
|
+
|
80
|
+
def test_iceberg_factory_method(self):
|
81
|
+
"""Test the iceberg factory method correctly creates an Iceberg catalog."""
|
82
|
+
# Create a mock for the Iceberg catalog module
|
83
|
+
with mock.patch(
|
84
|
+
"deltacat.catalog.model.catalog.IcebergCatalog"
|
85
|
+
) as mock_iceberg_catalog:
|
86
|
+
# Configure the mock to return a known value when initialize is called
|
87
|
+
mock_iceberg_catalog.initialize.return_value = {"iceberg": True}
|
88
|
+
|
89
|
+
# Create an Iceberg catalog config and invoke iceberg factory method
|
90
|
+
config = IcebergCatalogConfig(type=CatalogType.IN_MEMORY, properties={})
|
91
|
+
catalog = Catalog.iceberg(config)
|
92
|
+
|
93
|
+
# Check that the implementation is set to iceberg_catalog
|
94
|
+
self.assertEqual(catalog.impl, mock_iceberg_catalog)
|
95
|
+
# Check that the inner state is set to the output of initialize
|
96
|
+
self.assertEqual(catalog.inner, {"iceberg": True})
|
97
|
+
|
98
|
+
|
99
|
+
class TestCatalogsIntegration:
|
100
|
+
"""Integration tests for Default catalog functionality."""
|
101
|
+
|
102
|
+
temp_dir = None
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def setup_class(cls):
|
106
|
+
cls.temp_dir = tempfile.mkdtemp()
|
107
|
+
# Other tests are going to have initialized ray catalog. Initialize here to ensure
|
108
|
+
# that when this test class is run individuall it mimicks running with other tests
|
109
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
110
|
+
init(
|
111
|
+
catalog,
|
112
|
+
ray_init_args={"ignore_reinit_error": True},
|
113
|
+
**{"force_reinitialize": True},
|
114
|
+
)
|
115
|
+
|
116
|
+
@classmethod
|
117
|
+
def teardown_class(cls):
|
118
|
+
if cls.temp_dir and os.path.exists(cls.temp_dir):
|
119
|
+
shutil.rmtree(cls.temp_dir)
|
120
|
+
|
121
|
+
def test_init_single_catalog(self, reset_catalogs_ray_actor):
|
122
|
+
"""Test initializing a single catalog."""
|
123
|
+
|
124
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
125
|
+
|
126
|
+
# Initialize with a single catalog and Ray init args including the namespace
|
127
|
+
init(
|
128
|
+
catalog,
|
129
|
+
ray_init_args={"ignore_reinit_error": True},
|
130
|
+
**{"force_reinitialize": True},
|
131
|
+
)
|
132
|
+
|
133
|
+
assert is_initialized()
|
134
|
+
|
135
|
+
# Get the default catalog and check it's the same one we initialized with
|
136
|
+
retrieved_catalog = get_catalog()
|
137
|
+
assert retrieved_catalog.impl == MockCatalogImpl
|
138
|
+
assert retrieved_catalog.inner["initialized"]
|
139
|
+
|
140
|
+
def test_init_multiple_catalogs(self, reset_catalogs_ray_actor):
|
141
|
+
"""Test initializing multiple catalogs."""
|
142
|
+
# Create catalogs
|
143
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
144
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
145
|
+
|
146
|
+
# Initialize with multiple catalogs and Ray init args including the namespace
|
147
|
+
catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
|
148
|
+
init(
|
149
|
+
catalogs_dict,
|
150
|
+
ray_init_args={"ignore_reinit_error": True},
|
151
|
+
**{"force_reinitialize": True},
|
152
|
+
)
|
153
|
+
|
154
|
+
assert is_initialized()
|
155
|
+
|
156
|
+
# Get catalogs by name and check they're the same ones we initialized with
|
157
|
+
retrieved_catalog1 = get_catalog("catalog1")
|
158
|
+
assert retrieved_catalog1.impl == MockCatalogImpl
|
159
|
+
assert retrieved_catalog1.inner["kwargs"]["id"] == 1
|
160
|
+
|
161
|
+
retrieved_catalog2 = get_catalog("catalog2")
|
162
|
+
assert retrieved_catalog2.impl == MockCatalogImpl
|
163
|
+
assert retrieved_catalog2.inner["kwargs"]["id"] == 2
|
164
|
+
|
165
|
+
def test_init_with_default_catalog_name(self, reset_catalogs_ray_actor):
|
166
|
+
"""Test initializing with a specified default catalog name."""
|
167
|
+
# Create catalogs
|
168
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
169
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
170
|
+
|
171
|
+
# Initialize with multiple catalogs and specify a default
|
172
|
+
catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
|
173
|
+
init(
|
174
|
+
catalogs_dict,
|
175
|
+
default="catalog2",
|
176
|
+
ray_init_args={"ignore_reinit_error": True},
|
177
|
+
**{"force_reinitialize": True},
|
178
|
+
)
|
179
|
+
|
180
|
+
# Get the default catalog and check it's catalog2
|
181
|
+
default_catalog = get_catalog()
|
182
|
+
assert default_catalog.impl == MockCatalogImpl
|
183
|
+
assert default_catalog.inner["kwargs"]["id"] == 2
|
184
|
+
|
185
|
+
def test_put_catalog(self, reset_catalogs_ray_actor):
|
186
|
+
"""Test adding a catalog after initialization."""
|
187
|
+
# Initialize with a single catalog
|
188
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
189
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
190
|
+
init(
|
191
|
+
{"catalog1": catalog1},
|
192
|
+
ray_init_args={"ignore_reinit_error": True},
|
193
|
+
**{"force_reinitialize": True},
|
194
|
+
)
|
195
|
+
|
196
|
+
# Add a second catalog
|
197
|
+
put_catalog("catalog2", catalog2)
|
198
|
+
|
199
|
+
# Check both catalogs are available
|
200
|
+
retrieved_catalog1 = get_catalog("catalog1")
|
201
|
+
assert retrieved_catalog1.inner["kwargs"]["id"] == 1
|
202
|
+
|
203
|
+
retrieved_catalog2 = get_catalog("catalog2")
|
204
|
+
assert retrieved_catalog2.inner["kwargs"]["id"] == 2
|
205
|
+
|
206
|
+
def test_put_catalog_that_already_exists(self, reset_catalogs_ray_actor):
|
207
|
+
catalog = Catalog(impl=MockCatalogImpl, id=1)
|
208
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
209
|
+
put_catalog(
|
210
|
+
"test_catalog",
|
211
|
+
catalog,
|
212
|
+
id=1,
|
213
|
+
ray_init_args={"ignore_reinit_error": True},
|
214
|
+
)
|
215
|
+
|
216
|
+
# Try to add another catalog with the same name. Should not error
|
217
|
+
put_catalog(
|
218
|
+
"test_catalog",
|
219
|
+
catalog2,
|
220
|
+
ray_init_args={"ignore_reinit_error": True},
|
221
|
+
)
|
222
|
+
|
223
|
+
retrieved_catalog = get_catalog("test_catalog")
|
224
|
+
assert retrieved_catalog.inner["kwargs"]["id"] == 2
|
225
|
+
|
226
|
+
# If fail_if_exists, put call should fail
|
227
|
+
with pytest.raises(ValueError):
|
228
|
+
put_catalog(
|
229
|
+
"test_catalog",
|
230
|
+
catalog,
|
231
|
+
ray_init_args={"ignore_reinit_error": True},
|
232
|
+
fail_if_exists=True,
|
233
|
+
)
|
234
|
+
|
235
|
+
def test_get_catalog_nonexistent(self, reset_catalogs_ray_actor):
|
236
|
+
"""Test that trying to get a nonexistent catalog raises an error."""
|
237
|
+
# Initialize with a catalog
|
238
|
+
catalog = Catalog(impl=MockCatalogImpl)
|
239
|
+
init(
|
240
|
+
{"test_catalog": catalog},
|
241
|
+
ray_init_args={"ignore_reinit_error": True},
|
242
|
+
**{"force_reinitialize": True},
|
243
|
+
)
|
244
|
+
|
245
|
+
# Try to get a nonexistent catalog
|
246
|
+
with pytest.raises(ValueError):
|
247
|
+
get_catalog("nonexistent")
|
248
|
+
|
249
|
+
def test_get_catalog_no_default(self, reset_catalogs_ray_actor):
|
250
|
+
"""Test that trying to get the default catalog when none is set raises an error."""
|
251
|
+
# Initialize with multiple catalogs but no default
|
252
|
+
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
253
|
+
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
254
|
+
init(
|
255
|
+
{"catalog1": catalog1, "catalog2": catalog2},
|
256
|
+
ray_init_args={"ignore_reinit_error": True},
|
257
|
+
**{"force_reinitialize": True},
|
258
|
+
)
|
259
|
+
|
260
|
+
# Try to get the default catalog
|
261
|
+
with pytest.raises(ValueError):
|
262
|
+
get_catalog()
|
263
|
+
|
264
|
+
def test_default_catalog_initialization(self, reset_catalogs_ray_actor):
|
265
|
+
"""Test that a Default catalog can be initialized and accessed using the factory method."""
|
266
|
+
from deltacat.catalog.model.properties import CatalogProperties
|
267
|
+
|
268
|
+
catalog_name = str(uuid.uuid4())
|
269
|
+
|
270
|
+
# Create the catalog properties
|
271
|
+
config = CatalogProperties(root=self.temp_dir)
|
272
|
+
|
273
|
+
# Create the catalog using the factory method
|
274
|
+
catalog = Catalog.default(config)
|
275
|
+
|
276
|
+
# Initialize DeltaCAT with this catalog
|
277
|
+
init(
|
278
|
+
{catalog_name: catalog},
|
279
|
+
ray_init_args={"ignore_reinit_error": True},
|
280
|
+
**{"force_reinitialize": True},
|
281
|
+
)
|
282
|
+
|
283
|
+
# Retrieve the catalog and verify it's the same one
|
284
|
+
retrieved_catalog = get_catalog(catalog_name)
|
285
|
+
assert retrieved_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
286
|
+
assert isinstance(retrieved_catalog.inner, CatalogProperties)
|
287
|
+
assert retrieved_catalog.inner.root == self.temp_dir
|
288
|
+
|
289
|
+
def test_default_catalog_initialization_from_kwargs(self, reset_catalogs_ray_actor):
|
290
|
+
|
291
|
+
catalog_name = str(uuid.uuid4())
|
292
|
+
# Initialize DeltaCAT with this catalog
|
293
|
+
from deltacat.catalog.main import impl as DeltacatCatalog
|
294
|
+
|
295
|
+
put_catalog(
|
296
|
+
catalog_name,
|
297
|
+
Catalog(DeltacatCatalog, **{"root": "test_root"}),
|
298
|
+
ray_init_args={"ignore_reinit_error": True},
|
299
|
+
)
|
300
|
+
|
301
|
+
# Retrieve the catalog and verify it's the same one
|
302
|
+
retrieved_catalog = get_catalog(catalog_name)
|
303
|
+
assert retrieved_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
304
|
+
assert isinstance(retrieved_catalog.inner, CatalogProperties)
|
305
|
+
assert retrieved_catalog.inner.root == "test_root"
|
306
|
+
|
307
|
+
def test_iceberg_catalog_initialization(self, reset_catalogs_ray_actor):
|
308
|
+
"""Test that an Iceberg catalog can be initialized and accessed."""
|
309
|
+
catalog_name = str(uuid.uuid4())
|
310
|
+
|
311
|
+
# Create the Iceberg catalog config
|
312
|
+
config = IcebergCatalogConfig(
|
313
|
+
type=CatalogType.IN_MEMORY, properties={"warehouse": self.temp_dir}
|
314
|
+
)
|
315
|
+
|
316
|
+
# Create the catalog using the factory method
|
317
|
+
catalog = Catalog.iceberg(config)
|
318
|
+
|
319
|
+
put_catalog(catalog_name, catalog, ray_init_args={"ignore_reinit_error": True})
|
320
|
+
|
321
|
+
# Retrieve the catalog and verify it's the same one
|
322
|
+
retrieved_catalog = get_catalog(catalog_name)
|
323
|
+
assert retrieved_catalog.impl.__name__ == "deltacat.catalog.iceberg.impl"
|
324
|
+
assert isinstance(retrieved_catalog.inner, IcebergCatalog)
|
@@ -1,20 +1,23 @@
|
|
1
1
|
import unittest
|
2
2
|
import sqlite3
|
3
|
+
import uuid
|
4
|
+
|
3
5
|
import ray
|
4
6
|
import os
|
5
7
|
import deltacat.tests.local_deltacat_storage as ds
|
8
|
+
from deltacat import Catalog
|
9
|
+
from deltacat.catalog import CatalogProperties
|
6
10
|
from deltacat.utils.common import current_time_ms
|
7
11
|
from deltacat.tests.test_utils.pyarrow import (
|
8
12
|
create_delta_from_csv_file,
|
9
13
|
commit_delta_to_partition,
|
10
14
|
)
|
11
15
|
from deltacat.types.media import DistributedDatasetType, ContentType
|
12
|
-
|
16
|
+
import deltacat as dc
|
13
17
|
|
14
18
|
|
15
19
|
class TestReadTable(unittest.TestCase):
|
16
20
|
READ_TABLE_NAMESPACE = "catalog_read_table_namespace"
|
17
|
-
LOCAL_CATALOG_NAME = "local_catalog"
|
18
21
|
DB_FILE_PATH = f"{current_time_ms()}.db"
|
19
22
|
SAMPLE_FILE_PATH = "deltacat/tests/catalog/data/sample_table.csv"
|
20
23
|
|
@@ -31,6 +34,13 @@ class TestReadTable(unittest.TestCase):
|
|
31
34
|
}
|
32
35
|
cls.deltacat_storage_kwargs = {ds.DB_FILE_PATH_ARG: cls.DB_FILE_PATH}
|
33
36
|
|
37
|
+
cls.catalog_name = str(uuid.uuid4())
|
38
|
+
catalog_config = CatalogProperties(storage=ds)
|
39
|
+
dc.put_catalog(
|
40
|
+
cls.catalog_name,
|
41
|
+
catalog=Catalog.default(config=catalog_config),
|
42
|
+
ray_init_args={"ignore_reinit_error": True},
|
43
|
+
)
|
34
44
|
super().setUpClass()
|
35
45
|
|
36
46
|
@classmethod
|
@@ -49,13 +59,12 @@ class TestReadTable(unittest.TestCase):
|
|
49
59
|
**self.kwargs,
|
50
60
|
)
|
51
61
|
|
52
|
-
dc.initialize(ds=ds)
|
53
62
|
df = dc.read_table(
|
54
63
|
table=READ_TABLE_TABLE_NAME,
|
55
64
|
namespace=self.READ_TABLE_NAMESPACE,
|
56
|
-
catalog=self.
|
65
|
+
catalog=self.catalog_name,
|
57
66
|
distributed_dataset_type=DistributedDatasetType.DAFT,
|
58
|
-
|
67
|
+
**self.kwargs,
|
59
68
|
)
|
60
69
|
|
61
70
|
# verify
|
@@ -81,14 +90,13 @@ class TestReadTable(unittest.TestCase):
|
|
81
90
|
)
|
82
91
|
|
83
92
|
# action
|
84
|
-
dc.initialize(ds=ds)
|
85
93
|
df = dc.read_table(
|
86
94
|
table=READ_TABLE_TABLE_NAME,
|
87
95
|
namespace=self.READ_TABLE_NAMESPACE,
|
88
|
-
catalog=self.
|
96
|
+
catalog=self.catalog_name,
|
89
97
|
distributed_dataset_type=DistributedDatasetType.DAFT,
|
90
98
|
merge_on_read=False,
|
91
|
-
|
99
|
+
**self.kwargs,
|
92
100
|
)
|
93
101
|
|
94
102
|
# verify
|