lance-namespace 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lance_namespace-0.0.5/.gitignore +120 -0
- lance_namespace-0.0.5/PKG-INFO +14 -0
- lance_namespace-0.0.5/README.md +0 -0
- lance_namespace-0.0.5/pyproject.toml +32 -0
- lance_namespace-0.0.5/src/lance_namespace/__init__.py +104 -0
- lance_namespace-0.0.5/src/lance_namespace/dir.py +348 -0
- lance_namespace-0.0.5/src/lance_namespace/namespace.py +183 -0
- lance_namespace-0.0.5/src/lance_namespace/py.typed +0 -0
- lance_namespace-0.0.5/src/lance_namespace/rest.py +322 -0
- lance_namespace-0.0.5/tests/__init__.py +1 -0
- lance_namespace-0.0.5/tests/test_dir.py +269 -0
- lance_namespace-0.0.5/tests/test_namespace.py +73 -0
- lance_namespace-0.0.5/tests/test_rest.py +847 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
2
|
+
# you may not use this file except in compliance with the License.
|
|
3
|
+
# You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
# Prerequisites
|
|
14
|
+
*.d
|
|
15
|
+
|
|
16
|
+
# Compiled Object files
|
|
17
|
+
*.slo
|
|
18
|
+
*.lo
|
|
19
|
+
*.o
|
|
20
|
+
*.obj
|
|
21
|
+
|
|
22
|
+
# Precompiled Headers
|
|
23
|
+
*.gch
|
|
24
|
+
*.pch
|
|
25
|
+
|
|
26
|
+
# Compiled Dynamic libraries
|
|
27
|
+
*.so
|
|
28
|
+
*.dylib
|
|
29
|
+
*.dll
|
|
30
|
+
|
|
31
|
+
# Fortran module files
|
|
32
|
+
*.mod
|
|
33
|
+
*.smod
|
|
34
|
+
|
|
35
|
+
# Compiled Static libraries
|
|
36
|
+
*.lai
|
|
37
|
+
*.la
|
|
38
|
+
*.a
|
|
39
|
+
*.lib
|
|
40
|
+
|
|
41
|
+
# Executables
|
|
42
|
+
*.exe
|
|
43
|
+
*.out
|
|
44
|
+
*.app
|
|
45
|
+
|
|
46
|
+
# Tracing files
|
|
47
|
+
trace-*.json
|
|
48
|
+
|
|
49
|
+
**/*~
|
|
50
|
+
**/__pycache__
|
|
51
|
+
build/
|
|
52
|
+
dist/
|
|
53
|
+
*.egg-info/
|
|
54
|
+
.python-version
|
|
55
|
+
|
|
56
|
+
.idea
|
|
57
|
+
cmake-build-*
|
|
58
|
+
.vscode
|
|
59
|
+
.DS_Store
|
|
60
|
+
|
|
61
|
+
python/lance/_*.cpp
|
|
62
|
+
|
|
63
|
+
bin/
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
*.parquet
|
|
67
|
+
*.parq
|
|
68
|
+
|
|
69
|
+
python/thirdparty/arrow/
|
|
70
|
+
python/wheels
|
|
71
|
+
python/benchmark_data
|
|
72
|
+
|
|
73
|
+
logs
|
|
74
|
+
*.ckpt
|
|
75
|
+
|
|
76
|
+
docs/_build
|
|
77
|
+
docs/api/python
|
|
78
|
+
|
|
79
|
+
**/.ipynb_checkpoints/
|
|
80
|
+
docs/notebooks
|
|
81
|
+
|
|
82
|
+
notebooks/sift
|
|
83
|
+
notebooks/image_data/data
|
|
84
|
+
benchmarks/sift/sift
|
|
85
|
+
benchmarks/sift/sift.lance
|
|
86
|
+
benchmarks/sift/lance_ivf*.csv
|
|
87
|
+
**/sift.tar.gz
|
|
88
|
+
|
|
89
|
+
wheelhouse
|
|
90
|
+
|
|
91
|
+
# pandas testing
|
|
92
|
+
.hypothesis
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
**/df.json
|
|
96
|
+
|
|
97
|
+
# Rust
|
|
98
|
+
target
|
|
99
|
+
**/sccache.log
|
|
100
|
+
|
|
101
|
+
# c++ lsp
|
|
102
|
+
.ccls-cache/
|
|
103
|
+
|
|
104
|
+
python/venv
|
|
105
|
+
test_data/venv
|
|
106
|
+
|
|
107
|
+
**/*.profraw
|
|
108
|
+
*.lance
|
|
109
|
+
|
|
110
|
+
# Environments
|
|
111
|
+
.env
|
|
112
|
+
.venv
|
|
113
|
+
env/
|
|
114
|
+
venv/
|
|
115
|
+
ENV/
|
|
116
|
+
env.bak/
|
|
117
|
+
venv.bak/
|
|
118
|
+
|
|
119
|
+
# Docs
|
|
120
|
+
docs/site
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lance-namespace
|
|
3
|
+
Version: 0.0.5
|
|
4
|
+
Summary: Python client for Lance Namespace API
|
|
5
|
+
Author-email: Jack Ye <yezhaoqin@gmail.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: lance-namespace-urllib3-client
|
|
8
|
+
Requires-Dist: opendal>=0.46.0
|
|
9
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
10
|
+
Requires-Dist: pylance>=0.18.0
|
|
11
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
12
|
+
Provides-Extra: test
|
|
13
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
|
|
14
|
+
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "lance-namespace"
|
|
3
|
+
version = "0.0.5"
|
|
4
|
+
description = "Python client for Lance Namespace API"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Jack Ye", email = "yezhaoqin@gmail.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"lance-namespace-urllib3-client",
|
|
12
|
+
"typing-extensions>=4.0.0",
|
|
13
|
+
"pylance>=0.18.0",
|
|
14
|
+
"pyarrow>=14.0.0",
|
|
15
|
+
"opendal>=0.46.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
test = [
|
|
20
|
+
"pytest>=7.0.0",
|
|
21
|
+
"pytest-cov>=4.0.0",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[tool.pytest.ini_options]
|
|
25
|
+
testpaths = ["tests"]
|
|
26
|
+
python_files = ["test_*.py"]
|
|
27
|
+
python_classes = ["Test*"]
|
|
28
|
+
python_functions = ["test_*"]
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["hatchling"]
|
|
32
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lance Namespace Python Client
|
|
3
|
+
|
|
4
|
+
A Python client for the Lance Namespace API that provides a unified interface
|
|
5
|
+
for managing namespaces and tables across different backend implementations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .namespace import LanceNamespace, connect
|
|
9
|
+
|
|
10
|
+
# Re-export all models from the urllib3 client
|
|
11
|
+
from lance_namespace_urllib3_client.models import (
|
|
12
|
+
ListNamespacesRequest,
|
|
13
|
+
ListNamespacesResponse,
|
|
14
|
+
DescribeNamespaceRequest,
|
|
15
|
+
DescribeNamespaceResponse,
|
|
16
|
+
CreateNamespaceRequest,
|
|
17
|
+
CreateNamespaceResponse,
|
|
18
|
+
DropNamespaceRequest,
|
|
19
|
+
DropNamespaceResponse,
|
|
20
|
+
NamespaceExistsRequest,
|
|
21
|
+
ListTablesRequest,
|
|
22
|
+
ListTablesResponse,
|
|
23
|
+
DescribeTableRequest,
|
|
24
|
+
DescribeTableResponse,
|
|
25
|
+
RegisterTableRequest,
|
|
26
|
+
RegisterTableResponse,
|
|
27
|
+
TableExistsRequest,
|
|
28
|
+
DropTableRequest,
|
|
29
|
+
DropTableResponse,
|
|
30
|
+
DeregisterTableRequest,
|
|
31
|
+
DeregisterTableResponse,
|
|
32
|
+
CountTableRowsRequest,
|
|
33
|
+
CreateTableRequest,
|
|
34
|
+
CreateTableResponse,
|
|
35
|
+
InsertIntoTableRequest,
|
|
36
|
+
InsertIntoTableResponse,
|
|
37
|
+
MergeInsertIntoTableRequest,
|
|
38
|
+
MergeInsertIntoTableResponse,
|
|
39
|
+
UpdateTableRequest,
|
|
40
|
+
UpdateTableResponse,
|
|
41
|
+
DeleteFromTableRequest,
|
|
42
|
+
DeleteFromTableResponse,
|
|
43
|
+
QueryTableRequest,
|
|
44
|
+
CreateTableIndexRequest,
|
|
45
|
+
CreateTableIndexResponse,
|
|
46
|
+
ListTableIndicesRequest,
|
|
47
|
+
ListTableIndicesResponse,
|
|
48
|
+
DescribeTableIndexStatsRequest,
|
|
49
|
+
DescribeTableIndexStatsResponse,
|
|
50
|
+
DescribeTransactionRequest,
|
|
51
|
+
DescribeTransactionResponse,
|
|
52
|
+
AlterTransactionRequest,
|
|
53
|
+
AlterTransactionResponse,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
__all__ = [
|
|
57
|
+
# Main interface and connect function
|
|
58
|
+
"LanceNamespace",
|
|
59
|
+
"connect",
|
|
60
|
+
|
|
61
|
+
# Request/Response models
|
|
62
|
+
"ListNamespacesRequest",
|
|
63
|
+
"ListNamespacesResponse",
|
|
64
|
+
"DescribeNamespaceRequest",
|
|
65
|
+
"DescribeNamespaceResponse",
|
|
66
|
+
"CreateNamespaceRequest",
|
|
67
|
+
"CreateNamespaceResponse",
|
|
68
|
+
"DropNamespaceRequest",
|
|
69
|
+
"DropNamespaceResponse",
|
|
70
|
+
"NamespaceExistsRequest",
|
|
71
|
+
"ListTablesRequest",
|
|
72
|
+
"ListTablesResponse",
|
|
73
|
+
"DescribeTableRequest",
|
|
74
|
+
"DescribeTableResponse",
|
|
75
|
+
"RegisterTableRequest",
|
|
76
|
+
"RegisterTableResponse",
|
|
77
|
+
"TableExistsRequest",
|
|
78
|
+
"DropTableRequest",
|
|
79
|
+
"DropTableResponse",
|
|
80
|
+
"DeregisterTableRequest",
|
|
81
|
+
"DeregisterTableResponse",
|
|
82
|
+
"CountTableRowsRequest",
|
|
83
|
+
"CreateTableRequest",
|
|
84
|
+
"CreateTableResponse",
|
|
85
|
+
"InsertIntoTableRequest",
|
|
86
|
+
"InsertIntoTableResponse",
|
|
87
|
+
"MergeInsertIntoTableRequest",
|
|
88
|
+
"MergeInsertIntoTableResponse",
|
|
89
|
+
"UpdateTableRequest",
|
|
90
|
+
"UpdateTableResponse",
|
|
91
|
+
"DeleteFromTableRequest",
|
|
92
|
+
"DeleteFromTableResponse",
|
|
93
|
+
"QueryTableRequest",
|
|
94
|
+
"CreateTableIndexRequest",
|
|
95
|
+
"CreateTableIndexResponse",
|
|
96
|
+
"ListTableIndicesRequest",
|
|
97
|
+
"ListTableIndicesResponse",
|
|
98
|
+
"DescribeTableIndexStatsRequest",
|
|
99
|
+
"DescribeTableIndexStatsResponse",
|
|
100
|
+
"DescribeTransactionRequest",
|
|
101
|
+
"DescribeTransactionResponse",
|
|
102
|
+
"AlterTransactionRequest",
|
|
103
|
+
"AlterTransactionResponse",
|
|
104
|
+
]
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lance Directory Namespace implementation using OpenDAL.
|
|
3
|
+
"""
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
import opendal
|
|
9
|
+
|
|
10
|
+
import lance
|
|
11
|
+
import pyarrow as pa
|
|
12
|
+
|
|
13
|
+
from lance_namespace.namespace import LanceNamespace
|
|
14
|
+
from lance_namespace_urllib3_client.models import (
|
|
15
|
+
ListNamespacesRequest,
|
|
16
|
+
ListNamespacesResponse,
|
|
17
|
+
DescribeNamespaceRequest,
|
|
18
|
+
DescribeNamespaceResponse,
|
|
19
|
+
CreateNamespaceRequest,
|
|
20
|
+
CreateNamespaceResponse,
|
|
21
|
+
DropNamespaceRequest,
|
|
22
|
+
DropNamespaceResponse,
|
|
23
|
+
NamespaceExistsRequest,
|
|
24
|
+
ListTablesRequest,
|
|
25
|
+
ListTablesResponse,
|
|
26
|
+
CreateTableRequest,
|
|
27
|
+
CreateTableResponse,
|
|
28
|
+
DropTableRequest,
|
|
29
|
+
DropTableResponse,
|
|
30
|
+
DescribeTableRequest,
|
|
31
|
+
DescribeTableResponse,
|
|
32
|
+
JsonArrowSchema,
|
|
33
|
+
JsonArrowField,
|
|
34
|
+
JsonArrowDataType,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DirectoryNamespace(LanceNamespace):
|
|
39
|
+
"""Lance Directory Namespace implementation using OpenDAL."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, **properties):
|
|
42
|
+
"""Initialize the directory namespace.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
root: The root directory of the namespace (optional, defaults to current directory)
|
|
46
|
+
**properties: Additional configuration properties for specific storage backends
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
self.config = DirectoryNamespaceConfig(properties)
|
|
50
|
+
root = self.config.root
|
|
51
|
+
|
|
52
|
+
# Use current directory if root is not specified
|
|
53
|
+
if not root:
|
|
54
|
+
root = os.getcwd()
|
|
55
|
+
|
|
56
|
+
self.namespace_path = self._parse_path(root)
|
|
57
|
+
self.operator = self._initialize_operator(root)
|
|
58
|
+
|
|
59
|
+
def create_namespace(self, request: CreateNamespaceRequest) -> CreateNamespaceResponse:
|
|
60
|
+
"""Create a namespace - not supported for directory namespace."""
|
|
61
|
+
raise NotImplementedError(
|
|
62
|
+
"Directory namespace only contains a flat list of tables and does not support creating namespaces"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
|
|
66
|
+
"""List namespaces - not supported for directory namespace."""
|
|
67
|
+
raise NotImplementedError(
|
|
68
|
+
"Directory namespace only contains a flat list of tables and does not support listing namespaces"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def describe_namespace(self, request: DescribeNamespaceRequest) -> DescribeNamespaceResponse:
|
|
72
|
+
"""Describe namespace - not supported for directory namespace."""
|
|
73
|
+
raise NotImplementedError(
|
|
74
|
+
"Directory namespace only contains a flat list of tables and does not support describing namespaces"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
|
|
78
|
+
"""Drop namespace - not supported for directory namespace."""
|
|
79
|
+
raise NotImplementedError(
|
|
80
|
+
"Directory namespace only contains a flat list of tables and does not support dropping namespaces"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def namespace_exists(self, request: NamespaceExistsRequest) -> None:
|
|
84
|
+
"""Check namespace exists - not supported for directory namespace."""
|
|
85
|
+
raise NotImplementedError(
|
|
86
|
+
"Directory namespace only contains a flat list of tables and does not support namespace existence checks"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
|
|
90
|
+
"""List all tables in the namespace."""
|
|
91
|
+
self._validate_root_namespace_id(request.id)
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
tables = []
|
|
95
|
+
entries = self.operator.list("", recursive=False)
|
|
96
|
+
|
|
97
|
+
for entry in entries:
|
|
98
|
+
path = entry.path.rstrip('/')
|
|
99
|
+
|
|
100
|
+
# Only process paths that contain ".lance"
|
|
101
|
+
if ".lance" not in path:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
# Strip .lance suffix to get clean table name
|
|
105
|
+
table_name = path[:-6] # Remove '.lance' (6 characters)
|
|
106
|
+
|
|
107
|
+
# Check if it's a valid Lance dataset
|
|
108
|
+
try:
|
|
109
|
+
versions_path = f"{table_name}.lance/_versions/"
|
|
110
|
+
version_entries = list(self.operator.list(versions_path, limit=1))
|
|
111
|
+
if version_entries:
|
|
112
|
+
tables.append(table_name) # Add clean name without .lance
|
|
113
|
+
except:
|
|
114
|
+
# If _versions doesn't exist, it's not a Lance dataset
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
response = ListTablesResponse(tables=tables)
|
|
118
|
+
return response
|
|
119
|
+
except Exception as e:
|
|
120
|
+
raise RuntimeError(f"Failed to list tables: {e}")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def create_table(self, request: CreateTableRequest, request_data: bytes) -> CreateTableResponse:
|
|
124
|
+
"""Create a table using Lance dataset."""
|
|
125
|
+
if not request.id:
|
|
126
|
+
raise ValueError("table ID cannot be empty")
|
|
127
|
+
|
|
128
|
+
if not request.var_schema:
|
|
129
|
+
raise ValueError("Schema is required in CreateTableRequest")
|
|
130
|
+
|
|
131
|
+
table_name = self._normalize_table_id(request.id)
|
|
132
|
+
table_path = self._get_table_path(table_name)
|
|
133
|
+
|
|
134
|
+
if request.location and request.location != table_path:
|
|
135
|
+
raise ValueError(f"Cannot create table {table_name} at location {request.location}, must be at location {table_path}")
|
|
136
|
+
|
|
137
|
+
# Convert JsonArrowSchema to PyArrow Schema
|
|
138
|
+
schema = self._convert_json_arrow_schema_to_pyarrow(request.var_schema)
|
|
139
|
+
|
|
140
|
+
# Create empty table with schema
|
|
141
|
+
arrays = []
|
|
142
|
+
for field in schema:
|
|
143
|
+
# Create empty array for each field
|
|
144
|
+
empty_array = pa.array([], type=field.type)
|
|
145
|
+
arrays.append(empty_array)
|
|
146
|
+
|
|
147
|
+
empty_table = pa.Table.from_arrays(arrays, schema=schema)
|
|
148
|
+
|
|
149
|
+
# Create Lance dataset
|
|
150
|
+
lance.write_dataset(empty_table, table_path, storage_options=self.config.storage_options)
|
|
151
|
+
|
|
152
|
+
response = CreateTableResponse(location=table_path, version=1)
|
|
153
|
+
return response
|
|
154
|
+
|
|
155
|
+
def drop_table(self, request: DropTableRequest) -> DropTableResponse:
|
|
156
|
+
"""Drop a table by removing its Lance dataset."""
|
|
157
|
+
if not request.id:
|
|
158
|
+
raise ValueError("table ID cannot be empty")
|
|
159
|
+
|
|
160
|
+
table_name = self._normalize_table_id(request.id)
|
|
161
|
+
table_path = self._get_table_path(table_name)
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
# Remove the entire table directory
|
|
165
|
+
self.operator.remove_all(f"{table_name}.lance/")
|
|
166
|
+
response = DropTableResponse()
|
|
167
|
+
return response
|
|
168
|
+
except Exception as e:
|
|
169
|
+
raise RuntimeError(f"Failed to drop table {table_name}: {e}")
|
|
170
|
+
|
|
171
|
+
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
|
|
172
|
+
"""Describe a table by checking its existence and returning location."""
|
|
173
|
+
if not request.id:
|
|
174
|
+
raise ValueError("table ID cannot be empty")
|
|
175
|
+
|
|
176
|
+
table_name = self._normalize_table_id(request.id)
|
|
177
|
+
table_path = self._get_table_path(table_name)
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# Check if it's a Lance dataset by looking for objects with _versions/ prefix
|
|
181
|
+
versions_path = f"{table_name}.lance/_versions/"
|
|
182
|
+
version_entries = list(self.operator.list(versions_path, limit=1))
|
|
183
|
+
if not version_entries:
|
|
184
|
+
raise RuntimeError(f"Table does not exist: {table_name}")
|
|
185
|
+
except Exception as e:
|
|
186
|
+
raise RuntimeError(f"Table does not exist: {table_name}: {e}")
|
|
187
|
+
|
|
188
|
+
response = DescribeTableResponse(location=table_path)
|
|
189
|
+
return response
|
|
190
|
+
|
|
191
|
+
def _normalize_table_id(self, id: List[str]) -> str:
|
|
192
|
+
"""Normalize table ID - only single-level IDs are supported."""
|
|
193
|
+
if not id:
|
|
194
|
+
raise ValueError("Directory namespace table ID cannot be empty")
|
|
195
|
+
|
|
196
|
+
if len(id) != 1:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"Directory namespace only supports single-level table IDs, but got: {id}"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return id[0]
|
|
202
|
+
|
|
203
|
+
def _validate_root_namespace_id(self, id: Optional[List[str]]) -> None:
|
|
204
|
+
"""Validate that the namespace ID represents a root namespace."""
|
|
205
|
+
if id:
|
|
206
|
+
raise ValueError(
|
|
207
|
+
f"Directory namespace only supports root namespace operations, "
|
|
208
|
+
f"but got namespace ID: {id}. Expected empty ID."
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def _get_table_path(self, table_name: str) -> str:
|
|
212
|
+
"""Get the full path for a table."""
|
|
213
|
+
root = self.config.root if self.config.root else os.getcwd()
|
|
214
|
+
return f"{root}/{table_name}.lance"
|
|
215
|
+
|
|
216
|
+
def _convert_json_arrow_schema_to_pyarrow(self, json_schema: JsonArrowSchema) -> pa.Schema:
|
|
217
|
+
"""Convert JsonArrowSchema to PyArrow Schema."""
|
|
218
|
+
fields = []
|
|
219
|
+
for json_field in json_schema.fields:
|
|
220
|
+
arrow_type = self._convert_json_arrow_type_to_pyarrow(json_field.type)
|
|
221
|
+
field = pa.field(json_field.name, arrow_type, nullable=json_field.nullable)
|
|
222
|
+
fields.append(field)
|
|
223
|
+
|
|
224
|
+
return pa.schema(fields, metadata=json_schema.metadata)
|
|
225
|
+
|
|
226
|
+
def _convert_json_arrow_type_to_pyarrow(self, json_type: JsonArrowDataType) -> pa.DataType:
|
|
227
|
+
"""Convert JsonArrowDataType to PyArrow DataType."""
|
|
228
|
+
type_name = json_type.type.lower()
|
|
229
|
+
|
|
230
|
+
if type_name == "null":
|
|
231
|
+
return pa.null()
|
|
232
|
+
elif type_name in ["bool", "boolean"]:
|
|
233
|
+
return pa.bool_()
|
|
234
|
+
elif type_name == "int8":
|
|
235
|
+
return pa.int8()
|
|
236
|
+
elif type_name == "uint8":
|
|
237
|
+
return pa.uint8()
|
|
238
|
+
elif type_name == "int16":
|
|
239
|
+
return pa.int16()
|
|
240
|
+
elif type_name == "uint16":
|
|
241
|
+
return pa.uint16()
|
|
242
|
+
elif type_name == "int32":
|
|
243
|
+
return pa.int32()
|
|
244
|
+
elif type_name == "uint32":
|
|
245
|
+
return pa.uint32()
|
|
246
|
+
elif type_name == "int64":
|
|
247
|
+
return pa.int64()
|
|
248
|
+
elif type_name == "uint64":
|
|
249
|
+
return pa.uint64()
|
|
250
|
+
elif type_name == "float32":
|
|
251
|
+
return pa.float32()
|
|
252
|
+
elif type_name == "float64":
|
|
253
|
+
return pa.float64()
|
|
254
|
+
elif type_name == "utf8":
|
|
255
|
+
return pa.utf8()
|
|
256
|
+
elif type_name == "binary":
|
|
257
|
+
return pa.binary()
|
|
258
|
+
else:
|
|
259
|
+
raise ValueError(f"Unsupported Arrow type: {type_name}")
|
|
260
|
+
|
|
261
|
+
def _parse_path(self, path: str) -> str:
|
|
262
|
+
"""Parse the path and convert to a proper URI if needed."""
|
|
263
|
+
parsed = urlparse(path)
|
|
264
|
+
if parsed.scheme:
|
|
265
|
+
return path
|
|
266
|
+
|
|
267
|
+
# Handle absolute and relative POSIX paths
|
|
268
|
+
if path.startswith('/'):
|
|
269
|
+
return f"file://{path}"
|
|
270
|
+
else:
|
|
271
|
+
current_dir = os.getcwd()
|
|
272
|
+
absolute_path = os.path.abspath(os.path.join(current_dir, path))
|
|
273
|
+
return f"file://{absolute_path}"
|
|
274
|
+
|
|
275
|
+
def _normalize_scheme(self, scheme: Optional[str]) -> str:
|
|
276
|
+
"""Normalize scheme with aliases."""
|
|
277
|
+
if scheme is None:
|
|
278
|
+
return 'fs'
|
|
279
|
+
|
|
280
|
+
# Handle scheme aliases
|
|
281
|
+
scheme_lower = scheme.lower()
|
|
282
|
+
if scheme_lower in ['s3a', 's3n']:
|
|
283
|
+
return 's3'
|
|
284
|
+
elif scheme_lower == 'abfs':
|
|
285
|
+
return 'azblob'
|
|
286
|
+
elif scheme_lower == 'file':
|
|
287
|
+
return 'fs'
|
|
288
|
+
else:
|
|
289
|
+
return scheme_lower
|
|
290
|
+
|
|
291
|
+
def _initialize_operator(self, root: str) -> opendal.Operator:
|
|
292
|
+
"""Initialize the OpenDAL operator based on the root path."""
|
|
293
|
+
scheme_split = root.split("://", 1)
|
|
294
|
+
|
|
295
|
+
# Local file system path
|
|
296
|
+
if len(scheme_split) < 2:
|
|
297
|
+
return opendal.Operator("fs", root=root)
|
|
298
|
+
|
|
299
|
+
scheme = self._normalize_scheme(scheme_split[0])
|
|
300
|
+
authority_split = scheme_split[1].split("/", 1)
|
|
301
|
+
authority = authority_split[0]
|
|
302
|
+
path = authority_split[1] if len(authority_split) > 1 else ""
|
|
303
|
+
|
|
304
|
+
if scheme in ["s3", "gcs"]:
|
|
305
|
+
return opendal.Operator(scheme, root=path, bucket=authority)
|
|
306
|
+
elif scheme == "azblob":
|
|
307
|
+
return opendal.Operator(scheme, root=path, container=authority)
|
|
308
|
+
else:
|
|
309
|
+
return opendal.Operator(scheme, root=scheme_split[1])
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class DirectoryNamespaceConfig:
|
|
314
|
+
"""Configuration for DirectoryNamespace."""
|
|
315
|
+
|
|
316
|
+
ROOT = "root"
|
|
317
|
+
STORAGE_OPTIONS_PREFIX = "storage."
|
|
318
|
+
|
|
319
|
+
def __init__(self, properties: Optional[Dict[str, str]] = None):
|
|
320
|
+
"""Initialize configuration from properties.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
properties: Dictionary of configuration properties
|
|
324
|
+
"""
|
|
325
|
+
if properties is None:
|
|
326
|
+
properties = {}
|
|
327
|
+
|
|
328
|
+
self._root = properties.get(self.ROOT)
|
|
329
|
+
self._storage_options = self._extract_storage_options(properties)
|
|
330
|
+
|
|
331
|
+
def _extract_storage_options(self, properties: Dict[str, str]) -> Dict[str, str]:
|
|
332
|
+
"""Extract storage configuration properties by removing the prefix."""
|
|
333
|
+
storage_options = {}
|
|
334
|
+
for key, value in properties.items():
|
|
335
|
+
if key.startswith(self.STORAGE_OPTIONS_PREFIX):
|
|
336
|
+
storage_key = key[len(self.STORAGE_OPTIONS_PREFIX):]
|
|
337
|
+
storage_options[storage_key] = value
|
|
338
|
+
return storage_options
|
|
339
|
+
|
|
340
|
+
@property
|
|
341
|
+
def root(self) -> Optional[str]:
|
|
342
|
+
"""Get the namespace root directory."""
|
|
343
|
+
return self._root
|
|
344
|
+
|
|
345
|
+
@property
|
|
346
|
+
def storage_options(self) -> Dict[str, str]:
|
|
347
|
+
"""Get the storage configuration properties."""
|
|
348
|
+
return self._storage_options.copy()
|