lance-namespace 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ # Prerequisites
14
+ *.d
15
+
16
+ # Compiled Object files
17
+ *.slo
18
+ *.lo
19
+ *.o
20
+ *.obj
21
+
22
+ # Precompiled Headers
23
+ *.gch
24
+ *.pch
25
+
26
+ # Compiled Dynamic libraries
27
+ *.so
28
+ *.dylib
29
+ *.dll
30
+
31
+ # Fortran module files
32
+ *.mod
33
+ *.smod
34
+
35
+ # Compiled Static libraries
36
+ *.lai
37
+ *.la
38
+ *.a
39
+ *.lib
40
+
41
+ # Executables
42
+ *.exe
43
+ *.out
44
+ *.app
45
+
46
+ # Tracing files
47
+ trace-*.json
48
+
49
+ **/*~
50
+ **/__pycache__
51
+ build/
52
+ dist/
53
+ *.egg-info/
54
+ .python-version
55
+
56
+ .idea
57
+ cmake-build-*
58
+ .vscode
59
+ .DS_Store
60
+
61
+ python/lance/_*.cpp
62
+
63
+ bin/
64
+
65
+
66
+ *.parquet
67
+ *.parq
68
+
69
+ python/thirdparty/arrow/
70
+ python/wheels
71
+ python/benchmark_data
72
+
73
+ logs
74
+ *.ckpt
75
+
76
+ docs/_build
77
+ docs/api/python
78
+
79
+ **/.ipynb_checkpoints/
80
+ docs/notebooks
81
+
82
+ notebooks/sift
83
+ notebooks/image_data/data
84
+ benchmarks/sift/sift
85
+ benchmarks/sift/sift.lance
86
+ benchmarks/sift/lance_ivf*.csv
87
+ **/sift.tar.gz
88
+
89
+ wheelhouse
90
+
91
+ # pandas testing
92
+ .hypothesis
93
+
94
+
95
+ **/df.json
96
+
97
+ # Rust
98
+ target
99
+ **/sccache.log
100
+
101
+ # c++ lsp
102
+ .ccls-cache/
103
+
104
+ python/venv
105
+ test_data/venv
106
+
107
+ **/*.profraw
108
+ *.lance
109
+
110
+ # Environments
111
+ .env
112
+ .venv
113
+ env/
114
+ venv/
115
+ ENV/
116
+ env.bak/
117
+ venv.bak/
118
+
119
+ # Docs
120
+ docs/site
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: lance-namespace
3
+ Version: 0.0.5
4
+ Summary: Python client for Lance Namespace API
5
+ Author-email: Jack Ye <yezhaoqin@gmail.com>
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: lance-namespace-urllib3-client
8
+ Requires-Dist: opendal>=0.46.0
9
+ Requires-Dist: pyarrow>=14.0.0
10
+ Requires-Dist: pylance>=0.18.0
11
+ Requires-Dist: typing-extensions>=4.0.0
12
+ Provides-Extra: test
13
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
14
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
File without changes
@@ -0,0 +1,32 @@
1
+ [project]
2
+ name = "lance-namespace"
3
+ version = "0.0.5"
4
+ description = "Python client for Lance Namespace API"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Jack Ye", email = "yezhaoqin@gmail.com" }
8
+ ]
9
+ requires-python = ">=3.10"
10
+ dependencies = [
11
+ "lance-namespace-urllib3-client",
12
+ "typing-extensions>=4.0.0",
13
+ "pylance>=0.18.0",
14
+ "pyarrow>=14.0.0",
15
+ "opendal>=0.46.0",
16
+ ]
17
+
18
+ [project.optional-dependencies]
19
+ test = [
20
+ "pytest>=7.0.0",
21
+ "pytest-cov>=4.0.0",
22
+ ]
23
+
24
+ [tool.pytest.ini_options]
25
+ testpaths = ["tests"]
26
+ python_files = ["test_*.py"]
27
+ python_classes = ["Test*"]
28
+ python_functions = ["test_*"]
29
+
30
+ [build-system]
31
+ requires = ["hatchling"]
32
+ build-backend = "hatchling.build"
@@ -0,0 +1,104 @@
1
+ """
2
+ Lance Namespace Python Client
3
+
4
+ A Python client for the Lance Namespace API that provides a unified interface
5
+ for managing namespaces and tables across different backend implementations.
6
+ """
7
+
8
+ from .namespace import LanceNamespace, connect
9
+
10
+ # Re-export all models from the urllib3 client
11
+ from lance_namespace_urllib3_client.models import (
12
+ ListNamespacesRequest,
13
+ ListNamespacesResponse,
14
+ DescribeNamespaceRequest,
15
+ DescribeNamespaceResponse,
16
+ CreateNamespaceRequest,
17
+ CreateNamespaceResponse,
18
+ DropNamespaceRequest,
19
+ DropNamespaceResponse,
20
+ NamespaceExistsRequest,
21
+ ListTablesRequest,
22
+ ListTablesResponse,
23
+ DescribeTableRequest,
24
+ DescribeTableResponse,
25
+ RegisterTableRequest,
26
+ RegisterTableResponse,
27
+ TableExistsRequest,
28
+ DropTableRequest,
29
+ DropTableResponse,
30
+ DeregisterTableRequest,
31
+ DeregisterTableResponse,
32
+ CountTableRowsRequest,
33
+ CreateTableRequest,
34
+ CreateTableResponse,
35
+ InsertIntoTableRequest,
36
+ InsertIntoTableResponse,
37
+ MergeInsertIntoTableRequest,
38
+ MergeInsertIntoTableResponse,
39
+ UpdateTableRequest,
40
+ UpdateTableResponse,
41
+ DeleteFromTableRequest,
42
+ DeleteFromTableResponse,
43
+ QueryTableRequest,
44
+ CreateTableIndexRequest,
45
+ CreateTableIndexResponse,
46
+ ListTableIndicesRequest,
47
+ ListTableIndicesResponse,
48
+ DescribeTableIndexStatsRequest,
49
+ DescribeTableIndexStatsResponse,
50
+ DescribeTransactionRequest,
51
+ DescribeTransactionResponse,
52
+ AlterTransactionRequest,
53
+ AlterTransactionResponse,
54
+ )
55
+
56
+ __all__ = [
57
+ # Main interface and connect function
58
+ "LanceNamespace",
59
+ "connect",
60
+
61
+ # Request/Response models
62
+ "ListNamespacesRequest",
63
+ "ListNamespacesResponse",
64
+ "DescribeNamespaceRequest",
65
+ "DescribeNamespaceResponse",
66
+ "CreateNamespaceRequest",
67
+ "CreateNamespaceResponse",
68
+ "DropNamespaceRequest",
69
+ "DropNamespaceResponse",
70
+ "NamespaceExistsRequest",
71
+ "ListTablesRequest",
72
+ "ListTablesResponse",
73
+ "DescribeTableRequest",
74
+ "DescribeTableResponse",
75
+ "RegisterTableRequest",
76
+ "RegisterTableResponse",
77
+ "TableExistsRequest",
78
+ "DropTableRequest",
79
+ "DropTableResponse",
80
+ "DeregisterTableRequest",
81
+ "DeregisterTableResponse",
82
+ "CountTableRowsRequest",
83
+ "CreateTableRequest",
84
+ "CreateTableResponse",
85
+ "InsertIntoTableRequest",
86
+ "InsertIntoTableResponse",
87
+ "MergeInsertIntoTableRequest",
88
+ "MergeInsertIntoTableResponse",
89
+ "UpdateTableRequest",
90
+ "UpdateTableResponse",
91
+ "DeleteFromTableRequest",
92
+ "DeleteFromTableResponse",
93
+ "QueryTableRequest",
94
+ "CreateTableIndexRequest",
95
+ "CreateTableIndexResponse",
96
+ "ListTableIndicesRequest",
97
+ "ListTableIndicesResponse",
98
+ "DescribeTableIndexStatsRequest",
99
+ "DescribeTableIndexStatsResponse",
100
+ "DescribeTransactionRequest",
101
+ "DescribeTransactionResponse",
102
+ "AlterTransactionRequest",
103
+ "AlterTransactionResponse",
104
+ ]
@@ -0,0 +1,348 @@
1
+ """
2
+ Lance Directory Namespace implementation using OpenDAL.
3
+ """
4
+ from typing import Dict, List, Optional
5
+ from urllib.parse import urlparse
6
+ import os
7
+
8
+ import opendal
9
+
10
+ import lance
11
+ import pyarrow as pa
12
+
13
+ from lance_namespace.namespace import LanceNamespace
14
+ from lance_namespace_urllib3_client.models import (
15
+ ListNamespacesRequest,
16
+ ListNamespacesResponse,
17
+ DescribeNamespaceRequest,
18
+ DescribeNamespaceResponse,
19
+ CreateNamespaceRequest,
20
+ CreateNamespaceResponse,
21
+ DropNamespaceRequest,
22
+ DropNamespaceResponse,
23
+ NamespaceExistsRequest,
24
+ ListTablesRequest,
25
+ ListTablesResponse,
26
+ CreateTableRequest,
27
+ CreateTableResponse,
28
+ DropTableRequest,
29
+ DropTableResponse,
30
+ DescribeTableRequest,
31
+ DescribeTableResponse,
32
+ JsonArrowSchema,
33
+ JsonArrowField,
34
+ JsonArrowDataType,
35
+ )
36
+
37
+
38
+ class DirectoryNamespace(LanceNamespace):
39
+ """Lance Directory Namespace implementation using OpenDAL."""
40
+
41
+ def __init__(self, **properties):
42
+ """Initialize the directory namespace.
43
+
44
+ Args:
45
+ root: The root directory of the namespace (optional, defaults to current directory)
46
+ **properties: Additional configuration properties for specific storage backends
47
+ """
48
+
49
+ self.config = DirectoryNamespaceConfig(properties)
50
+ root = self.config.root
51
+
52
+ # Use current directory if root is not specified
53
+ if not root:
54
+ root = os.getcwd()
55
+
56
+ self.namespace_path = self._parse_path(root)
57
+ self.operator = self._initialize_operator(root)
58
+
59
+ def create_namespace(self, request: CreateNamespaceRequest) -> CreateNamespaceResponse:
60
+ """Create a namespace - not supported for directory namespace."""
61
+ raise NotImplementedError(
62
+ "Directory namespace only contains a flat list of tables and does not support creating namespaces"
63
+ )
64
+
65
+ def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
66
+ """List namespaces - not supported for directory namespace."""
67
+ raise NotImplementedError(
68
+ "Directory namespace only contains a flat list of tables and does not support listing namespaces"
69
+ )
70
+
71
+ def describe_namespace(self, request: DescribeNamespaceRequest) -> DescribeNamespaceResponse:
72
+ """Describe namespace - not supported for directory namespace."""
73
+ raise NotImplementedError(
74
+ "Directory namespace only contains a flat list of tables and does not support describing namespaces"
75
+ )
76
+
77
+ def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
78
+ """Drop namespace - not supported for directory namespace."""
79
+ raise NotImplementedError(
80
+ "Directory namespace only contains a flat list of tables and does not support dropping namespaces"
81
+ )
82
+
83
+ def namespace_exists(self, request: NamespaceExistsRequest) -> None:
84
+ """Check namespace exists - not supported for directory namespace."""
85
+ raise NotImplementedError(
86
+ "Directory namespace only contains a flat list of tables and does not support namespace existence checks"
87
+ )
88
+
89
+ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
90
+ """List all tables in the namespace."""
91
+ self._validate_root_namespace_id(request.id)
92
+
93
+ try:
94
+ tables = []
95
+ entries = self.operator.list("", recursive=False)
96
+
97
+ for entry in entries:
98
+ path = entry.path.rstrip('/')
99
+
100
+ # Only process paths that contain ".lance"
101
+ if ".lance" not in path:
102
+ continue
103
+
104
+ # Strip .lance suffix to get clean table name
105
+ table_name = path[:-6] # Remove '.lance' (6 characters)
106
+
107
+ # Check if it's a valid Lance dataset
108
+ try:
109
+ versions_path = f"{table_name}.lance/_versions/"
110
+ version_entries = list(self.operator.list(versions_path, limit=1))
111
+ if version_entries:
112
+ tables.append(table_name) # Add clean name without .lance
113
+ except:
114
+ # If _versions doesn't exist, it's not a Lance dataset
115
+ pass
116
+
117
+ response = ListTablesResponse(tables=tables)
118
+ return response
119
+ except Exception as e:
120
+ raise RuntimeError(f"Failed to list tables: {e}")
121
+
122
+
123
+ def create_table(self, request: CreateTableRequest, request_data: bytes) -> CreateTableResponse:
124
+ """Create a table using Lance dataset."""
125
+ if not request.id:
126
+ raise ValueError("table ID cannot be empty")
127
+
128
+ if not request.var_schema:
129
+ raise ValueError("Schema is required in CreateTableRequest")
130
+
131
+ table_name = self._normalize_table_id(request.id)
132
+ table_path = self._get_table_path(table_name)
133
+
134
+ if request.location and request.location != table_path:
135
+ raise ValueError(f"Cannot create table {table_name} at location {request.location}, must be at location {table_path}")
136
+
137
+ # Convert JsonArrowSchema to PyArrow Schema
138
+ schema = self._convert_json_arrow_schema_to_pyarrow(request.var_schema)
139
+
140
+ # Create empty table with schema
141
+ arrays = []
142
+ for field in schema:
143
+ # Create empty array for each field
144
+ empty_array = pa.array([], type=field.type)
145
+ arrays.append(empty_array)
146
+
147
+ empty_table = pa.Table.from_arrays(arrays, schema=schema)
148
+
149
+ # Create Lance dataset
150
+ lance.write_dataset(empty_table, table_path, storage_options=self.config.storage_options)
151
+
152
+ response = CreateTableResponse(location=table_path, version=1)
153
+ return response
154
+
155
+ def drop_table(self, request: DropTableRequest) -> DropTableResponse:
156
+ """Drop a table by removing its Lance dataset."""
157
+ if not request.id:
158
+ raise ValueError("table ID cannot be empty")
159
+
160
+ table_name = self._normalize_table_id(request.id)
161
+ table_path = self._get_table_path(table_name)
162
+
163
+ try:
164
+ # Remove the entire table directory
165
+ self.operator.remove_all(f"{table_name}.lance/")
166
+ response = DropTableResponse()
167
+ return response
168
+ except Exception as e:
169
+ raise RuntimeError(f"Failed to drop table {table_name}: {e}")
170
+
171
+ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
172
+ """Describe a table by checking its existence and returning location."""
173
+ if not request.id:
174
+ raise ValueError("table ID cannot be empty")
175
+
176
+ table_name = self._normalize_table_id(request.id)
177
+ table_path = self._get_table_path(table_name)
178
+
179
+ try:
180
+ # Check if it's a Lance dataset by looking for objects with _versions/ prefix
181
+ versions_path = f"{table_name}.lance/_versions/"
182
+ version_entries = list(self.operator.list(versions_path, limit=1))
183
+ if not version_entries:
184
+ raise RuntimeError(f"Table does not exist: {table_name}")
185
+ except Exception as e:
186
+ raise RuntimeError(f"Table does not exist: {table_name}: {e}")
187
+
188
+ response = DescribeTableResponse(location=table_path)
189
+ return response
190
+
191
+ def _normalize_table_id(self, id: List[str]) -> str:
192
+ """Normalize table ID - only single-level IDs are supported."""
193
+ if not id:
194
+ raise ValueError("Directory namespace table ID cannot be empty")
195
+
196
+ if len(id) != 1:
197
+ raise ValueError(
198
+ f"Directory namespace only supports single-level table IDs, but got: {id}"
199
+ )
200
+
201
+ return id[0]
202
+
203
+ def _validate_root_namespace_id(self, id: Optional[List[str]]) -> None:
204
+ """Validate that the namespace ID represents a root namespace."""
205
+ if id:
206
+ raise ValueError(
207
+ f"Directory namespace only supports root namespace operations, "
208
+ f"but got namespace ID: {id}. Expected empty ID."
209
+ )
210
+
211
+ def _get_table_path(self, table_name: str) -> str:
212
+ """Get the full path for a table."""
213
+ root = self.config.root if self.config.root else os.getcwd()
214
+ return f"{root}/{table_name}.lance"
215
+
216
+ def _convert_json_arrow_schema_to_pyarrow(self, json_schema: JsonArrowSchema) -> pa.Schema:
217
+ """Convert JsonArrowSchema to PyArrow Schema."""
218
+ fields = []
219
+ for json_field in json_schema.fields:
220
+ arrow_type = self._convert_json_arrow_type_to_pyarrow(json_field.type)
221
+ field = pa.field(json_field.name, arrow_type, nullable=json_field.nullable)
222
+ fields.append(field)
223
+
224
+ return pa.schema(fields, metadata=json_schema.metadata)
225
+
226
+ def _convert_json_arrow_type_to_pyarrow(self, json_type: JsonArrowDataType) -> pa.DataType:
227
+ """Convert JsonArrowDataType to PyArrow DataType."""
228
+ type_name = json_type.type.lower()
229
+
230
+ if type_name == "null":
231
+ return pa.null()
232
+ elif type_name in ["bool", "boolean"]:
233
+ return pa.bool_()
234
+ elif type_name == "int8":
235
+ return pa.int8()
236
+ elif type_name == "uint8":
237
+ return pa.uint8()
238
+ elif type_name == "int16":
239
+ return pa.int16()
240
+ elif type_name == "uint16":
241
+ return pa.uint16()
242
+ elif type_name == "int32":
243
+ return pa.int32()
244
+ elif type_name == "uint32":
245
+ return pa.uint32()
246
+ elif type_name == "int64":
247
+ return pa.int64()
248
+ elif type_name == "uint64":
249
+ return pa.uint64()
250
+ elif type_name == "float32":
251
+ return pa.float32()
252
+ elif type_name == "float64":
253
+ return pa.float64()
254
+ elif type_name == "utf8":
255
+ return pa.utf8()
256
+ elif type_name == "binary":
257
+ return pa.binary()
258
+ else:
259
+ raise ValueError(f"Unsupported Arrow type: {type_name}")
260
+
261
+ def _parse_path(self, path: str) -> str:
262
+ """Parse the path and convert to a proper URI if needed."""
263
+ parsed = urlparse(path)
264
+ if parsed.scheme:
265
+ return path
266
+
267
+ # Handle absolute and relative POSIX paths
268
+ if path.startswith('/'):
269
+ return f"file://{path}"
270
+ else:
271
+ current_dir = os.getcwd()
272
+ absolute_path = os.path.abspath(os.path.join(current_dir, path))
273
+ return f"file://{absolute_path}"
274
+
275
+ def _normalize_scheme(self, scheme: Optional[str]) -> str:
276
+ """Normalize scheme with aliases."""
277
+ if scheme is None:
278
+ return 'fs'
279
+
280
+ # Handle scheme aliases
281
+ scheme_lower = scheme.lower()
282
+ if scheme_lower in ['s3a', 's3n']:
283
+ return 's3'
284
+ elif scheme_lower == 'abfs':
285
+ return 'azblob'
286
+ elif scheme_lower == 'file':
287
+ return 'fs'
288
+ else:
289
+ return scheme_lower
290
+
291
+ def _initialize_operator(self, root: str) -> opendal.Operator:
292
+ """Initialize the OpenDAL operator based on the root path."""
293
+ scheme_split = root.split("://", 1)
294
+
295
+ # Local file system path
296
+ if len(scheme_split) < 2:
297
+ return opendal.Operator("fs", root=root)
298
+
299
+ scheme = self._normalize_scheme(scheme_split[0])
300
+ authority_split = scheme_split[1].split("/", 1)
301
+ authority = authority_split[0]
302
+ path = authority_split[1] if len(authority_split) > 1 else ""
303
+
304
+ if scheme in ["s3", "gcs"]:
305
+ return opendal.Operator(scheme, root=path, bucket=authority)
306
+ elif scheme == "azblob":
307
+ return opendal.Operator(scheme, root=path, container=authority)
308
+ else:
309
+ return opendal.Operator(scheme, root=scheme_split[1])
310
+
311
+
312
+
313
+ class DirectoryNamespaceConfig:
314
+ """Configuration for DirectoryNamespace."""
315
+
316
+ ROOT = "root"
317
+ STORAGE_OPTIONS_PREFIX = "storage."
318
+
319
+ def __init__(self, properties: Optional[Dict[str, str]] = None):
320
+ """Initialize configuration from properties.
321
+
322
+ Args:
323
+ properties: Dictionary of configuration properties
324
+ """
325
+ if properties is None:
326
+ properties = {}
327
+
328
+ self._root = properties.get(self.ROOT)
329
+ self._storage_options = self._extract_storage_options(properties)
330
+
331
+ def _extract_storage_options(self, properties: Dict[str, str]) -> Dict[str, str]:
332
+ """Extract storage configuration properties by removing the prefix."""
333
+ storage_options = {}
334
+ for key, value in properties.items():
335
+ if key.startswith(self.STORAGE_OPTIONS_PREFIX):
336
+ storage_key = key[len(self.STORAGE_OPTIONS_PREFIX):]
337
+ storage_options[storage_key] = value
338
+ return storage_options
339
+
340
+ @property
341
+ def root(self) -> Optional[str]:
342
+ """Get the namespace root directory."""
343
+ return self._root
344
+
345
+ @property
346
+ def storage_options(self) -> Dict[str, str]:
347
+ """Get the storage configuration properties."""
348
+ return self._storage_options.copy()