deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. deltacat/__init__.py +19 -15
  2. deltacat/benchmarking/benchmark_engine.py +4 -2
  3. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  4. deltacat/catalog/__init__.py +62 -5
  5. deltacat/catalog/main/impl.py +18 -8
  6. deltacat/catalog/model/catalog.py +111 -73
  7. deltacat/catalog/model/properties.py +25 -22
  8. deltacat/compute/jobs/client.py +7 -5
  9. deltacat/constants.py +1 -2
  10. deltacat/env.py +10 -0
  11. deltacat/examples/basic_logging.py +1 -3
  12. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  13. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  14. deltacat/examples/indexer/indexer.py +2 -2
  15. deltacat/examples/indexer/job_runner.py +1 -2
  16. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  17. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  18. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +27 -9
  19. deltacat/{storage → experimental/storage}/iceberg/iceberg_scan_planner.py +1 -1
  20. deltacat/{storage → experimental/storage}/iceberg/impl.py +1 -1
  21. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  22. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  23. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -9
  24. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  25. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  26. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  27. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  28. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  29. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  30. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  31. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  32. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -1
  33. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  34. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  35. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  36. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  37. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  38. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  39. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  40. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  41. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  42. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +4 -4
  43. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  44. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  45. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  46. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  47. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  48. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  49. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  50. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  51. deltacat/io/reader/deltacat_read_api.py +1 -1
  52. deltacat/storage/model/shard.py +6 -2
  53. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  54. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +436 -0
  55. deltacat/tests/catalog/model/__init__.py +0 -0
  56. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  57. deltacat/tests/catalog/test_catalogs.py +52 -98
  58. deltacat/tests/catalog/test_default_catalog_impl.py +1 -2
  59. deltacat/tests/daft/__init__.py +0 -0
  60. deltacat/tests/daft/test_model.py +97 -0
  61. deltacat/tests/experimental/__init__.py +0 -0
  62. deltacat/tests/experimental/catalog/__init__.py +0 -0
  63. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  64. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  65. deltacat/tests/experimental/daft/__init__.py +0 -0
  66. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  67. deltacat/tests/experimental/storage/__init__.py +0 -0
  68. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  69. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  70. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  71. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
  72. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  73. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  74. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  75. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  76. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  77. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  78. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  79. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  80. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
  81. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  82. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  83. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  84. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  85. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  86. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  87. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  88. deltacat/tests/storage/model/test_shard.py +3 -1
  89. deltacat/types/media.py +3 -3
  90. deltacat/utils/daft.py +530 -4
  91. deltacat/utils/export.py +3 -1
  92. deltacat/utils/url.py +1 -1
  93. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/METADATA +4 -5
  94. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/RECORD +120 -100
  95. deltacat/catalog/iceberg/__init__.py +0 -4
  96. deltacat/daft/daft_scan.py +0 -115
  97. deltacat/daft/model.py +0 -258
  98. deltacat/daft/translator.py +0 -126
  99. deltacat/examples/common/fixtures.py +0 -15
  100. deltacat/storage/rivulet/__init__.py +0 -11
  101. deltacat/storage/rivulet/feather/__init__.py +0 -5
  102. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  103. /deltacat/{daft → examples/experimental}/__init__.py +0 -0
  104. /deltacat/examples/{common → experimental/iceberg}/__init__.py +0 -0
  105. /deltacat/{examples/iceberg → experimental/catalog}/__init__.py +0 -0
  106. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  107. /deltacat/{storage/iceberg → experimental/storage}/__init__.py +0 -0
  108. /deltacat/{storage/rivulet/arrow → experimental/storage/iceberg}/__init__.py +0 -0
  109. /deltacat/{storage → experimental/storage}/iceberg/model.py +0 -0
  110. /deltacat/{storage/rivulet/fs → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  111. /deltacat/{storage/rivulet/metastore → experimental/storage/rivulet/fs}/__init__.py +0 -0
  112. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  113. /deltacat/{storage/rivulet/reader → experimental/storage/rivulet/metastore}/__init__.py +0 -0
  114. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  115. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  116. /deltacat/{storage → experimental/storage}/rivulet/parquet/data_reader.py +0 -0
  117. /deltacat/{storage/rivulet/schema → experimental/storage/rivulet/reader}/__init__.py +0 -0
  118. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  119. /deltacat/{storage/rivulet/writer → experimental/storage/rivulet/schema}/__init__.py +0 -0
  120. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  121. /deltacat/{tests/storage/rivulet → experimental/storage/rivulet/shard}/__init__.py +0 -0
  122. /deltacat/{tests/storage/rivulet/fs → experimental/storage/rivulet/writer}/__init__.py +0 -0
  123. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  124. /deltacat/tests/{storage/rivulet/schema → catalog/data}/__init__.py +0 -0
  125. /deltacat/tests/{storage/rivulet/writer → catalog/main}/__init__.py +0 -0
  126. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/LICENSE +0 -0
  127. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/WHEEL +0 -0
  128. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,9 @@ from typing import (
13
13
  Optional,
14
14
  )
15
15
 
16
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
17
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
18
- from deltacat.storage.rivulet.schema.schema import Schema
16
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
17
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
18
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
19
19
 
20
20
  FILE_FORMAT = TypeVar("FILE_FORMAT")
21
21
  MEMORY_FORMAT = TypeVar("MEMORY_FORMAT")
@@ -3,9 +3,11 @@ from typing import Generator, Dict, Optional
3
3
  import pyarrow as pa
4
4
 
5
5
  from deltacat.storage.model.shard import Shard
6
- from deltacat.storage.rivulet.reader.dataset_reader import DatasetReader
7
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
8
- from deltacat.storage.rivulet import Schema
6
+ from deltacat.experimental.storage.rivulet.reader.dataset_reader import DatasetReader
7
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
8
+ QueryExpression,
9
+ )
10
+ from deltacat.experimental.storage.rivulet import Schema
9
11
 
10
12
 
11
13
  class DataScan:
@@ -7,16 +7,16 @@ import pyarrow.fs
7
7
 
8
8
  from deltacat.storage import Delta
9
9
  from deltacat.storage.model.partition import PartitionLocator
10
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
10
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
11
11
  from deltacat.utils.filesystem import resolve_path_and_filesystem
12
- from deltacat.storage.rivulet.metastore.json_sst import JsonSstReader
13
- from deltacat.storage.rivulet.metastore.delta import (
12
+ from deltacat.experimental.storage.rivulet.metastore.json_sst import JsonSstReader
13
+ from deltacat.experimental.storage.rivulet.metastore.delta import (
14
14
  ManifestIO,
15
15
  DeltaContext,
16
16
  RivuletDelta,
17
17
  DeltacatManifestIO,
18
18
  )
19
- from deltacat.storage.rivulet.metastore.sst import SSTReader, SSTable
19
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTReader, SSTable
20
20
  from deltacat.utils.metafile_locator import _find_table_path
21
21
  from deltacat import logs
22
22
 
@@ -2,18 +2,20 @@ import logging
2
2
  from typing import Generator, Optional, Set, Type, TypeVar, Any
3
3
 
4
4
  from deltacat.storage.model.shard import Shard
5
- from deltacat.storage.rivulet.metastore.sst import SSTableRow, SSTable
6
- from deltacat.storage.rivulet.metastore.sst_interval_tree import (
5
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow, SSTable
6
+ from deltacat.experimental.storage.rivulet.metastore.sst_interval_tree import (
7
7
  BlockIntervalTree,
8
8
  OrderedBlockGroups,
9
9
  )
10
- from deltacat.storage.rivulet.reader.block_scanner import BlockScanner
11
- from deltacat.storage.rivulet.reader.dataset_metastore import (
10
+ from deltacat.experimental.storage.rivulet.reader.block_scanner import BlockScanner
11
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
12
12
  DatasetMetastore,
13
13
  ManifestAccessor,
14
14
  )
15
- from deltacat.storage.rivulet.reader.query_expression import QueryExpression
16
- from deltacat.storage.rivulet import Schema
15
+ from deltacat.experimental.storage.rivulet.reader.query_expression import (
16
+ QueryExpression,
17
+ )
18
+ from deltacat.experimental.storage.rivulet import Schema
17
19
 
18
20
  # The type of data returned to reader
19
21
  T = TypeVar("T")
@@ -4,7 +4,10 @@ from typing import Generator, Dict, Type, NamedTuple, List
4
4
 
5
5
  from pyarrow import RecordBatch
6
6
 
7
- from deltacat.storage.rivulet.reader.data_reader import DataReader, MEMORY_FORMAT
7
+ from deltacat.experimental.storage.rivulet.reader.data_reader import (
8
+ DataReader,
9
+ MEMORY_FORMAT,
10
+ )
8
11
  import pyarrow as pa
9
12
 
10
13
 
@@ -1,9 +1,9 @@
1
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
2
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
3
- from deltacat.storage.rivulet.reader.data_reader import FileReader
1
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
2
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
3
+ from deltacat.experimental.storage.rivulet.reader.data_reader import FileReader
4
4
  from typing import Type, Dict
5
5
 
6
- from deltacat.storage.rivulet.schema.schema import Schema
6
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
7
7
 
8
8
 
9
9
  class FileReaderRegistrar:
@@ -5,7 +5,7 @@ from typing import MutableMapping, Dict, Iterable, Tuple, Optional
5
5
 
6
6
  import pyarrow as pa
7
7
 
8
- from deltacat.storage.rivulet.schema.datatype import Datatype
8
+ from deltacat.experimental.storage.rivulet.schema.datatype import Datatype
9
9
 
10
10
 
11
11
  @dataclass(frozen=True)
@@ -1,6 +1,6 @@
1
1
  from typing import Protocol, Iterable, List, Union, Any, Dict
2
2
 
3
- from deltacat.storage.rivulet.metastore.sst import SSTableRow
3
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTableRow
4
4
  import pyarrow as pa
5
5
 
6
6
  MEMTABLE_DATA = Union[Iterable[Dict[str, Any]], pa.Table]
@@ -1,11 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
- from deltacat.storage.rivulet.parquet.serializer import ParquetDataSerializer
4
- from deltacat.storage.rivulet import Schema
5
- from deltacat.storage.rivulet.serializer import DataSerializer
6
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
3
+ from deltacat.experimental.storage.rivulet.parquet.serializer import (
4
+ ParquetDataSerializer,
5
+ )
6
+ from deltacat.experimental.storage.rivulet import Schema
7
+ from deltacat.experimental.storage.rivulet.serializer import DataSerializer
8
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
7
9
 
8
- from deltacat.storage.rivulet.feather.serializer import FeatherDataSerializer
10
+ from deltacat.experimental.storage.rivulet.feather.serializer import (
11
+ FeatherDataSerializer,
12
+ )
9
13
 
10
14
 
11
15
  class DataSerializerFactory:
@@ -0,0 +1,129 @@
1
+ from __future__ import annotations
2
+ from typing import Generic, List, Union, Iterable
3
+ from deltacat.storage.model.shard import T, Shard, ShardingStrategy
4
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
5
+ DatasetMetastore,
6
+ )
7
+
8
+
9
+ class RangeShard(Shard, Generic[T]):
10
+ """
11
+ Represents a range-based shard with minimum and maximum keys.
12
+
13
+ param: min_key: The minimum key for the shard.
14
+ param: max_key: The maximum key for the shard.
15
+ """
16
+
17
+ def __init__(self, min_key: T, max_key: T):
18
+ self.min_key = min_key
19
+ self.max_key = max_key
20
+
21
+ def __repr__(self) -> str:
22
+ return f"Shard(type=range, min_key={self.min_key}, max_key={self.max_key})"
23
+
24
+ @staticmethod
25
+ def split(
26
+ global_min: Union[int, str], global_max: Union[int, str], num_shards: int
27
+ ) -> List[RangeShard]:
28
+ """
29
+ Splits a range into `num_shards` shards.
30
+ Currently supports splitting ranges of integers and strings.
31
+
32
+ Note: If global_min == global_max or num_shards <= 1, a single shard is returned,
33
+ num_shards is ignored.
34
+
35
+ :param global_min: The minimum key for the entire range (int or str).
36
+ :param global_max: The maximum key for the entire range (int or str).
37
+ :param num_shards: The number of shards to create.
38
+ :return: A list of RangeShard objects.
39
+ """
40
+ if global_min == global_max or num_shards <= 1:
41
+ return [RangeShard(global_min, global_max)]
42
+
43
+ # Determine which interpolation function to use based on the type of min/max
44
+ if isinstance(global_min, int) and isinstance(global_max, int):
45
+ interpolate = RangeShard._interpolate_numeric
46
+ elif isinstance(global_min, str) and isinstance(global_max, str):
47
+ interpolate = RangeShard._interpolate_str
48
+ else:
49
+ raise ValueError(
50
+ "Unsupported combination of types for global_min and global_max."
51
+ )
52
+
53
+ shards: List[RangeShard] = []
54
+ for i in range(num_shards):
55
+ start = interpolate(global_min, global_max, i, num_shards)
56
+ end = interpolate(global_min, global_max, i + 1, num_shards)
57
+
58
+ if i > 0:
59
+ if isinstance(start, int):
60
+ start = shards[-1].max_key + 1
61
+ elif isinstance(start, int):
62
+ char_list = list(start)
63
+ char_list[-1] = chr(ord(char_list[-1]) + 1)
64
+ start = "".join(char_list)
65
+
66
+ shards.append(RangeShard(start, end))
67
+
68
+ return shards
69
+
70
+ @staticmethod
71
+ def _interpolate_numeric(start: int, end: int, step: int, total_steps: int) -> int:
72
+ """
73
+ Integer interpolation using integer (floor) division.
74
+
75
+ param: start (int): The starting number.
76
+ param: end (int): The ending number.
77
+ param: step (int): The current step in the interpolation (0-based).
78
+ param: total_steps (int): The total number of interpolation steps.
79
+
80
+ returns: int: The interpolated integer.
81
+ """
82
+ return start + (end - start) * step // total_steps
83
+
84
+ @staticmethod
85
+ def _interpolate_str(start: str, end: str, step: int, total_steps: int) -> str:
86
+ """
87
+ Interpolates between two strings lexicographically.
88
+
89
+ param: start (str): The starting string.
90
+ param: end (str): The ending string.
91
+ param: step (int): The current step in the interpolation (0-based).
92
+ param: total_steps (int): The total number of interpolation steps.
93
+
94
+ returns: str: The interpolated string.
95
+ """
96
+ max_len = max(len(start), len(end))
97
+
98
+ # Pad strings to the same length with spaces (smallest lexicographical character).
99
+ start = start.ljust(max_len, " ")
100
+ end = end.ljust(max_len, " ")
101
+
102
+ # Interpolate character by character based on ordinal values.
103
+ interpolated_chars = [
104
+ chr(round(ord(s) + (ord(e) - ord(s)) * step / total_steps))
105
+ for s, e in zip(start, end)
106
+ ]
107
+
108
+ return "".join(interpolated_chars).rstrip()
109
+
110
+
111
+ class RangeShardingStrategy(ShardingStrategy, Generic[T]):
112
+ """
113
+ Implements a sharding strategy to divide a range of keys into shards.
114
+
115
+ method: shards: Generates a list of RangeShard objects based on the global range.
116
+ """
117
+
118
+ def shards(
119
+ self, num_shards: int, metastore: DatasetMetastore
120
+ ) -> Iterable[RangeShard[T]]:
121
+ """
122
+ Divides the global range of keys into evenly sized shards.
123
+
124
+ param: num_shards: The number of shards to divide the range into.
125
+ param: metastore: The dataset metastore providing access to manifests.
126
+ returns: A list of RangeShard objects representing the divided range.
127
+ """
128
+ min, max = metastore.get_min_max_keys()
129
+ return RangeShard.split(min, max, num_shards)
@@ -6,15 +6,26 @@ from typing import Any, List, Set, Protocol, TypeVar, Dict, Iterable
6
6
 
7
7
  from pyarrow import RecordBatch, Table
8
8
  from deltacat.storage.model.partition import PartitionLocator
9
- from deltacat.storage.rivulet.metastore.delta import ManifestIO, DeltacatManifestIO
10
-
11
- from deltacat.storage.rivulet import Schema
12
- from deltacat.storage.rivulet.metastore.json_sst import JsonSstWriter
13
- from deltacat.storage.rivulet.serializer import MEMTABLE_DATA, DataSerializer
14
- from deltacat.storage.rivulet.serializer_factory import DataSerializerFactory
15
- from deltacat.storage.rivulet.writer.dataset_writer import DatasetWriter, DATA
16
- from deltacat.storage.rivulet.metastore.sst import SSTWriter
17
- from deltacat.storage.rivulet.fs.file_provider import FileProvider
9
+ from deltacat.experimental.storage.rivulet.metastore.delta import (
10
+ ManifestIO,
11
+ DeltacatManifestIO,
12
+ )
13
+
14
+ from deltacat.experimental.storage.rivulet import Schema
15
+ from deltacat.experimental.storage.rivulet.metastore.json_sst import JsonSstWriter
16
+ from deltacat.experimental.storage.rivulet.serializer import (
17
+ MEMTABLE_DATA,
18
+ DataSerializer,
19
+ )
20
+ from deltacat.experimental.storage.rivulet.serializer_factory import (
21
+ DataSerializerFactory,
22
+ )
23
+ from deltacat.experimental.storage.rivulet.writer.dataset_writer import (
24
+ DatasetWriter,
25
+ DATA,
26
+ )
27
+ from deltacat.experimental.storage.rivulet.metastore.sst import SSTWriter
28
+ from deltacat.experimental.storage.rivulet.fs.file_provider import FileProvider
18
29
 
19
30
  INPUT_ROW = TypeVar("INPUT_ROW")
20
31
 
@@ -97,7 +97,7 @@ def read_deltacat(
97
97
  >>> # Read the Iceberg stream of the latest active DeltaCAT table version,
98
98
  >>> import deltacat as dc
99
99
  >>> dc.io.read_deltacat("dc://my_catalog/my_namespace/my_table/default/iceberg")
100
- >>> # Or, if `my_catalog is the default catalog, this is equivalent to:
100
+ >>> # Or, if `my_catalog` is the default catalog, this is equivalent to:
101
101
  >>> dc.io.read_deltacat("namespace://my_namespace/my_table/default/iceberg")
102
102
  >>> # Or, if `my_namespace` is the default namespace, this is equivalent to:
103
103
  >>> dc.io.read_deltacat("table://my_table/default/iceberg")
@@ -1,7 +1,9 @@
1
1
  from abc import abstractmethod
2
2
  from typing import Iterable, Optional, Protocol, TypeVar, Union
3
3
 
4
- from deltacat.storage.rivulet.reader.dataset_metastore import DatasetMetastore
4
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
5
+ DatasetMetastore,
6
+ )
5
7
 
6
8
  # TODO: Add type validation in dataset/schema classes
7
9
  T = TypeVar("T", bound=Union[int, str])
@@ -34,7 +36,9 @@ class ShardingStrategy(Protocol):
34
36
  return: ShardingStrategy class.
35
37
  """
36
38
  if strategy == "range":
37
- from deltacat.storage.rivulet.shard.range_shard import RangeShardingStrategy
39
+ from deltacat.experimental.storage.rivulet.shard.range_shard import (
40
+ RangeShardingStrategy,
41
+ )
38
42
 
39
43
  return RangeShardingStrategy()
40
44
  else:
@@ -0,0 +1,130 @@
1
+ import shutil
2
+
3
+ from deltacat.catalog import get_catalog_properties
4
+ from deltacat.exceptions import NamespaceAlreadyExistsError
5
+ import pytest
6
+ import tempfile
7
+ import deltacat.catalog.main.impl as catalog
8
+
9
+
10
+ class TestCatalogNamespaceOperations:
11
+ temp_dir = None
12
+ property_catalog = None
13
+ catalog = None
14
+
15
+ @classmethod
16
+ def setup_class(cls):
17
+ cls.temp_dir = tempfile.mkdtemp()
18
+ cls.catalog_properties = get_catalog_properties(root=cls.temp_dir)
19
+
20
+ @classmethod
21
+ def teardown_class(cls):
22
+ shutil.rmtree(cls.temp_dir)
23
+
24
+ def test_create_namespace(self):
25
+ """Test creating a namespace with properties"""
26
+ namespace = "test_create_namespace"
27
+ properties = {"description": "Test Namespace", "owner": "test-user"}
28
+
29
+ # Create namespace
30
+ catalog.create_namespace(
31
+ namespace=namespace, properties=properties, inner=self.catalog_properties
32
+ )
33
+
34
+ # Verify namespace exists
35
+ assert catalog.namespace_exists(namespace, inner=self.catalog_properties)
36
+
37
+ # Get namespace and verify properties
38
+ namespace = catalog.get_namespace(namespace, inner=self.catalog_properties)
39
+ assert namespace.namespace == "test_create_namespace"
40
+ assert namespace.properties["description"] == "Test Namespace"
41
+
42
+ def test_get_namespace(self):
43
+ """Test getting namespace properties"""
44
+ namespace = "test_get_namespace"
45
+ properties = {"description": "foo", "created_by": "bar"}
46
+
47
+ # Create namespace
48
+ catalog.create_namespace(
49
+ namespace=namespace, properties=properties, inner=self.catalog_properties
50
+ )
51
+
52
+ # Get namespace properties
53
+ namespace = catalog.get_namespace(namespace, inner=self.catalog_properties)
54
+
55
+ # Verify properties
56
+ assert namespace.namespace == "test_get_namespace"
57
+ assert namespace.properties["created_by"] == "bar"
58
+
59
+ def test_namespace_exists(self):
60
+ """Test checking if a namespace exists"""
61
+ existing_namespace = "test_namespace_exists"
62
+ non_existing_namespace = "non_existing_namespace"
63
+
64
+ # Create namespace
65
+ catalog.create_namespace(
66
+ namespace=existing_namespace, properties={}, inner=self.catalog_properties
67
+ )
68
+
69
+ # Check existing namespace
70
+ assert catalog.namespace_exists(
71
+ existing_namespace, inner=self.catalog_properties
72
+ )
73
+
74
+ # Check non-existing namespace
75
+ assert not catalog.namespace_exists(
76
+ non_existing_namespace, inner=self.catalog_properties
77
+ )
78
+
79
+ def test_create_namespace_already_exists(self):
80
+ """Test creating a namespace that already exists should fail"""
81
+ namespace = "test_create_namespace_already_exists"
82
+ properties = {"description": "Test namespace", "owner": "test-user"}
83
+
84
+ # Create namespace first time
85
+ catalog.create_namespace(
86
+ namespace=namespace,
87
+ properties=properties,
88
+ inner=self.catalog_properties,
89
+ )
90
+
91
+ # Verify namespace exists
92
+ assert catalog.namespace_exists(namespace, inner=self.catalog_properties)
93
+
94
+ # Try to create the same namespace again, should raise ValueError
95
+ with pytest.raises(NamespaceAlreadyExistsError, match=namespace):
96
+ catalog.create_namespace(
97
+ namespace=namespace,
98
+ properties=properties,
99
+ inner=self.catalog_properties,
100
+ )
101
+
102
+ def test_drop_namespace(self):
103
+ """Test dropping a namespace"""
104
+ namespace = "test_drop_namespace"
105
+ properties = {"description": "Test Namespace", "owner": "test-user"}
106
+
107
+ # Create namespace
108
+ catalog.create_namespace(
109
+ namespace=namespace,
110
+ properties=properties,
111
+ inner=self.catalog_properties,
112
+ )
113
+
114
+ # Verify namespace exists
115
+ assert catalog.namespace_exists(
116
+ namespace,
117
+ inner=self.catalog_properties,
118
+ )
119
+
120
+ # Drop namespace
121
+ catalog.drop_namespace(
122
+ namespace,
123
+ inner=self.catalog_properties,
124
+ )
125
+
126
+ # Verify namespace does not exist
127
+ assert not catalog.namespace_exists(
128
+ namespace,
129
+ inner=self.catalog_properties,
130
+ )