edgevdb 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edgevdb/__init__.py +401 -0
- edgevdb/embedder.py +7 -0
- edgevdb/lib/README.md +31 -0
- edgevdb/lib/darwin/libedgevdb_shared.dylib +0 -0
- edgevdb/lib/linux/libedgevdb_shared.so +0 -0
- edgevdb/lib/windows/libedgevdb_shared.dll +0 -0
- edgevdb/object_store.py +27 -0
- edgevdb/sync.py +30 -0
- edgevdb/vectordb.py +8 -0
- edgevdb-1.0.0.dist-info/METADATA +666 -0
- edgevdb-1.0.0.dist-info/RECORD +13 -0
- edgevdb-1.0.0.dist-info/WHEEL +5 -0
- edgevdb-1.0.0.dist-info/top_level.txt +1 -0
edgevdb/__init__.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
EdgeVDB Python SDK -- ctypes wrapper for desktop/Raspberry Pi.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import ctypes
|
|
7
|
+
import os
|
|
8
|
+
import platform
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
# Library loading
|
|
14
|
+
def _find_library() -> str:
|
|
15
|
+
"""Find the EdgeVDB shared library."""
|
|
16
|
+
system = platform.system()
|
|
17
|
+
if system == "Windows":
|
|
18
|
+
names = ["libedgevdb_shared.dll", "edgevdb_shared.dll", "edgevdb.dll"]
|
|
19
|
+
elif system == "Darwin":
|
|
20
|
+
names = ["libedgevdb_shared.dylib", "libedgevdb.dylib"]
|
|
21
|
+
else:
|
|
22
|
+
names = ["libedgevdb_shared.so", "libedgevdb.so"]
|
|
23
|
+
|
|
24
|
+
# Search paths — platform-specific lib/ subdirectory first
|
|
25
|
+
pkg_dir = Path(__file__).parent
|
|
26
|
+
plat_dir = {"Windows": "windows", "Darwin": "darwin", "Linux": "linux"}.get(system, "linux")
|
|
27
|
+
search_dirs = [
|
|
28
|
+
pkg_dir / "lib" / plat_dir,
|
|
29
|
+
pkg_dir / "lib",
|
|
30
|
+
pkg_dir,
|
|
31
|
+
Path.cwd(),
|
|
32
|
+
Path.cwd() / "build",
|
|
33
|
+
Path.cwd() / "build" / "desktop-release" / "core",
|
|
34
|
+
Path.cwd() / "build" / "desktop-debug" / "core",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
for dir_path in search_dirs:
|
|
38
|
+
for name in names:
|
|
39
|
+
lib_path = dir_path / name
|
|
40
|
+
if lib_path.exists():
|
|
41
|
+
return str(lib_path)
|
|
42
|
+
|
|
43
|
+
raise FileNotFoundError(
|
|
44
|
+
f"Could not find EdgeVDB library. Searched: {[str(d) for d in search_dirs]}"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class _Lib:
|
|
49
|
+
"""Lazy-loaded library singleton."""
|
|
50
|
+
_instance = None
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def get(cls):
|
|
54
|
+
if cls._instance is None:
|
|
55
|
+
cls._instance = ctypes.CDLL(_find_library())
|
|
56
|
+
cls._setup_signatures(cls._instance)
|
|
57
|
+
return cls._instance
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def _setup_signatures(lib):
|
|
61
|
+
# evdb_default_config
|
|
62
|
+
lib.evdb_default_config.argtypes = [ctypes.c_void_p]
|
|
63
|
+
lib.evdb_default_config.restype = None
|
|
64
|
+
|
|
65
|
+
# evdb_open
|
|
66
|
+
lib.evdb_open.argtypes = [ctypes.c_void_p]
|
|
67
|
+
lib.evdb_open.restype = ctypes.c_void_p
|
|
68
|
+
|
|
69
|
+
# evdb_close
|
|
70
|
+
lib.evdb_close.argtypes = [ctypes.c_void_p]
|
|
71
|
+
lib.evdb_close.restype = None
|
|
72
|
+
|
|
73
|
+
# evdb_save
|
|
74
|
+
lib.evdb_save.argtypes = [ctypes.c_void_p]
|
|
75
|
+
lib.evdb_save.restype = ctypes.c_int
|
|
76
|
+
|
|
77
|
+
# evdb_embedder_create
|
|
78
|
+
lib.evdb_embedder_create.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_int]
|
|
79
|
+
lib.evdb_embedder_create.restype = ctypes.c_void_p
|
|
80
|
+
|
|
81
|
+
# evdb_embedder_destroy
|
|
82
|
+
lib.evdb_embedder_destroy.argtypes = [ctypes.c_void_p]
|
|
83
|
+
lib.evdb_embedder_destroy.restype = None
|
|
84
|
+
|
|
85
|
+
# evdb_embed_text
|
|
86
|
+
lib.evdb_embed_text.argtypes = [ctypes.c_void_p, ctypes.c_char_p, ctypes.POINTER(ctypes.c_float)]
|
|
87
|
+
lib.evdb_embed_text.restype = ctypes.c_int
|
|
88
|
+
|
|
89
|
+
# evdb_insert_text
|
|
90
|
+
lib.evdb_insert_text.argtypes = [
|
|
91
|
+
ctypes.c_void_p, ctypes.c_void_p, ctypes.c_char_p,
|
|
92
|
+
ctypes.c_uint32, ctypes.c_uint32, ctypes.POINTER(ctypes.c_uint64)
|
|
93
|
+
]
|
|
94
|
+
lib.evdb_insert_text.restype = ctypes.c_int
|
|
95
|
+
|
|
96
|
+
# evdb_insert_chunk
|
|
97
|
+
lib.evdb_insert_chunk.argtypes = [
|
|
98
|
+
ctypes.c_void_p, ctypes.c_char_p, ctypes.POINTER(ctypes.c_float),
|
|
99
|
+
ctypes.c_uint32, ctypes.c_uint32, ctypes.POINTER(ctypes.c_uint64)
|
|
100
|
+
]
|
|
101
|
+
lib.evdb_insert_chunk.restype = ctypes.c_int
|
|
102
|
+
|
|
103
|
+
# evdb_remove_chunk
|
|
104
|
+
lib.evdb_remove_chunk.argtypes = [ctypes.c_void_p, ctypes.c_uint64]
|
|
105
|
+
lib.evdb_remove_chunk.restype = ctypes.c_int
|
|
106
|
+
|
|
107
|
+
# evdb_query_text
|
|
108
|
+
lib.evdb_query_text.argtypes = [
|
|
109
|
+
ctypes.c_void_p, ctypes.c_void_p, ctypes.c_char_p,
|
|
110
|
+
ctypes.c_int, ctypes.c_int
|
|
111
|
+
]
|
|
112
|
+
lib.evdb_query_text.restype = ctypes.c_void_p
|
|
113
|
+
|
|
114
|
+
# evdb_query_vector
|
|
115
|
+
lib.evdb_query_vector.argtypes = [
|
|
116
|
+
ctypes.c_void_p, ctypes.POINTER(ctypes.c_float),
|
|
117
|
+
ctypes.c_char_p, ctypes.c_int
|
|
118
|
+
]
|
|
119
|
+
lib.evdb_query_vector.restype = ctypes.c_void_p
|
|
120
|
+
|
|
121
|
+
# evdb_result_count / text / score / chunk_id / page / context
|
|
122
|
+
lib.evdb_result_count.argtypes = [ctypes.c_void_p]
|
|
123
|
+
lib.evdb_result_count.restype = ctypes.c_int
|
|
124
|
+
|
|
125
|
+
lib.evdb_result_text.argtypes = [ctypes.c_void_p, ctypes.c_int]
|
|
126
|
+
lib.evdb_result_text.restype = ctypes.c_char_p
|
|
127
|
+
|
|
128
|
+
lib.evdb_result_score.argtypes = [ctypes.c_void_p, ctypes.c_int]
|
|
129
|
+
lib.evdb_result_score.restype = ctypes.c_float
|
|
130
|
+
|
|
131
|
+
lib.evdb_result_chunk_id.argtypes = [ctypes.c_void_p, ctypes.c_int]
|
|
132
|
+
lib.evdb_result_chunk_id.restype = ctypes.c_uint64
|
|
133
|
+
|
|
134
|
+
lib.evdb_result_page.argtypes = [ctypes.c_void_p, ctypes.c_int]
|
|
135
|
+
lib.evdb_result_page.restype = ctypes.c_uint32
|
|
136
|
+
|
|
137
|
+
lib.evdb_result_context_string.argtypes = [ctypes.c_void_p]
|
|
138
|
+
lib.evdb_result_context_string.restype = ctypes.c_char_p
|
|
139
|
+
|
|
140
|
+
lib.evdb_query_free.argtypes = [ctypes.c_void_p]
|
|
141
|
+
lib.evdb_query_free.restype = None
|
|
142
|
+
|
|
143
|
+
# Object store
|
|
144
|
+
lib.evdb_object_put.argtypes = [
|
|
145
|
+
ctypes.c_void_p, ctypes.c_char_p, ctypes.c_char_p,
|
|
146
|
+
ctypes.POINTER(ctypes.c_uint64)
|
|
147
|
+
]
|
|
148
|
+
lib.evdb_object_put.restype = ctypes.c_int
|
|
149
|
+
|
|
150
|
+
lib.evdb_object_get.argtypes = [
|
|
151
|
+
ctypes.c_void_p, ctypes.c_uint64, ctypes.c_char_p, ctypes.c_int
|
|
152
|
+
]
|
|
153
|
+
lib.evdb_object_get.restype = ctypes.c_int
|
|
154
|
+
|
|
155
|
+
lib.evdb_object_remove.argtypes = [ctypes.c_void_p, ctypes.c_uint64]
|
|
156
|
+
lib.evdb_object_remove.restype = ctypes.c_int
|
|
157
|
+
|
|
158
|
+
# Relations
|
|
159
|
+
lib.evdb_relation_add.argtypes = [
|
|
160
|
+
ctypes.c_void_p, ctypes.c_char_p, ctypes.c_uint64, ctypes.c_uint64
|
|
161
|
+
]
|
|
162
|
+
lib.evdb_relation_add.restype = ctypes.c_int
|
|
163
|
+
|
|
164
|
+
# Version
|
|
165
|
+
lib.evdb_version_string.argtypes = []
|
|
166
|
+
lib.evdb_version_string.restype = ctypes.c_char_p
|
|
167
|
+
|
|
168
|
+
# Log level
|
|
169
|
+
lib.evdb_set_log_level.argtypes = [ctypes.c_int]
|
|
170
|
+
lib.evdb_set_log_level.restype = None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class EvdbConfig(ctypes.Structure):
|
|
174
|
+
_fields_ = [
|
|
175
|
+
("storage_dir", ctypes.c_char_p),
|
|
176
|
+
("hnsw_M", ctypes.c_int),
|
|
177
|
+
("hnsw_ef_construction", ctypes.c_int),
|
|
178
|
+
("hnsw_ef_search", ctypes.c_int),
|
|
179
|
+
("ranker_alpha", ctypes.c_float),
|
|
180
|
+
("ranker_beta", ctypes.c_float),
|
|
181
|
+
("ranker_gamma", ctypes.c_float),
|
|
182
|
+
("token_budget", ctypes.c_int),
|
|
183
|
+
("embedding_threads", ctypes.c_int),
|
|
184
|
+
("enable_knowledge_graph", ctypes.c_int),
|
|
185
|
+
("enable_sync", ctypes.c_int),
|
|
186
|
+
("device_id", ctypes.c_char_p),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class ChunkResult:
|
|
191
|
+
"""A single query result chunk."""
|
|
192
|
+
def __init__(self, chunk_id: int, text: str, score: float,
|
|
193
|
+
page_number: int, doc_id: int = 0):
|
|
194
|
+
self.chunk_id = chunk_id
|
|
195
|
+
self.text = text
|
|
196
|
+
self.score = score
|
|
197
|
+
self.page_number = page_number
|
|
198
|
+
self.doc_id = doc_id
|
|
199
|
+
|
|
200
|
+
def __repr__(self):
|
|
201
|
+
return f"ChunkResult(id={self.chunk_id}, score={self.score:.3f}, page={self.page_number})"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class QueryResults:
|
|
205
|
+
"""Query results with lazy access to individual chunks."""
|
|
206
|
+
def __init__(self, handle):
|
|
207
|
+
self._handle = handle
|
|
208
|
+
self._lib = _Lib.get()
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def count(self) -> int:
|
|
212
|
+
return self._lib.evdb_result_count(self._handle)
|
|
213
|
+
|
|
214
|
+
def __getitem__(self, index: int) -> ChunkResult:
|
|
215
|
+
return ChunkResult(
|
|
216
|
+
chunk_id=self._lib.evdb_result_chunk_id(self._handle, index),
|
|
217
|
+
text=self._lib.evdb_result_text(self._handle, index).decode("utf-8"),
|
|
218
|
+
score=self._lib.evdb_result_score(self._handle, index),
|
|
219
|
+
page_number=self._lib.evdb_result_page(self._handle, index),
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def __len__(self) -> int:
|
|
223
|
+
return self.count
|
|
224
|
+
|
|
225
|
+
def __iter__(self):
|
|
226
|
+
for i in range(self.count):
|
|
227
|
+
yield self[i]
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def context_string(self) -> str:
|
|
231
|
+
raw = self._lib.evdb_result_context_string(self._handle)
|
|
232
|
+
return raw.decode("utf-8") if raw else ""
|
|
233
|
+
|
|
234
|
+
def to_list(self) -> List[ChunkResult]:
|
|
235
|
+
return list(self)
|
|
236
|
+
|
|
237
|
+
def free(self):
|
|
238
|
+
if hasattr(self, '_handle') and self._handle:
|
|
239
|
+
self._lib.evdb_query_free(self._handle)
|
|
240
|
+
self._handle = None
|
|
241
|
+
|
|
242
|
+
def __del__(self):
|
|
243
|
+
self.free()
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class Embedder:
|
|
247
|
+
"""ONNX embedding model wrapper."""
|
|
248
|
+
def __init__(self, model_path: str, vocab_path: str, threads: int = 2):
|
|
249
|
+
self._lib = _Lib.get()
|
|
250
|
+
self._handle = self._lib.evdb_embedder_create(
|
|
251
|
+
model_path.encode(), vocab_path.encode(), threads)
|
|
252
|
+
if not self._handle:
|
|
253
|
+
raise RuntimeError("Failed to create embedder")
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def handle(self):
|
|
257
|
+
return self._handle
|
|
258
|
+
|
|
259
|
+
def embed(self, text: str) -> List[float]:
|
|
260
|
+
out = (ctypes.c_float * 384)()
|
|
261
|
+
err = self._lib.evdb_embed_text(self._handle, text.encode(), out)
|
|
262
|
+
if err != 0:
|
|
263
|
+
raise RuntimeError(f"Embedding failed: {err}")
|
|
264
|
+
return list(out)
|
|
265
|
+
|
|
266
|
+
def destroy(self):
|
|
267
|
+
if self._handle:
|
|
268
|
+
self._lib.evdb_embedder_destroy(self._handle)
|
|
269
|
+
self._handle = None
|
|
270
|
+
|
|
271
|
+
def __del__(self):
|
|
272
|
+
self.destroy()
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class EdgeVDB:
|
|
276
|
+
"""Main EdgeVDB database class."""
|
|
277
|
+
|
|
278
|
+
def __init__(self, storage_dir: str, **kwargs):
|
|
279
|
+
self._lib = _Lib.get()
|
|
280
|
+
|
|
281
|
+
config = EvdbConfig()
|
|
282
|
+
self._lib.evdb_default_config(ctypes.byref(config))
|
|
283
|
+
config.storage_dir = storage_dir.encode()
|
|
284
|
+
|
|
285
|
+
for key, value in kwargs.items():
|
|
286
|
+
if hasattr(config, key):
|
|
287
|
+
setattr(config, key, value)
|
|
288
|
+
|
|
289
|
+
os.makedirs(storage_dir, exist_ok=True)
|
|
290
|
+
self._handle = self._lib.evdb_open(ctypes.byref(config))
|
|
291
|
+
if not self._handle:
|
|
292
|
+
raise RuntimeError(f"Failed to open EdgeVDB at {storage_dir}")
|
|
293
|
+
|
|
294
|
+
def close(self):
|
|
295
|
+
if self._handle:
|
|
296
|
+
self._lib.evdb_close(self._handle)
|
|
297
|
+
self._handle = None
|
|
298
|
+
|
|
299
|
+
def save(self):
|
|
300
|
+
err = self._lib.evdb_save(self._handle)
|
|
301
|
+
if err != 0:
|
|
302
|
+
raise RuntimeError(f"Save failed: {err}")
|
|
303
|
+
|
|
304
|
+
# Vector store
|
|
305
|
+
def insert_text(self, embedder: Embedder, text: str,
|
|
306
|
+
doc_id: int = 0, page_number: int = 0) -> int:
|
|
307
|
+
chunk_id = ctypes.c_uint64(0)
|
|
308
|
+
err = self._lib.evdb_insert_text(
|
|
309
|
+
self._handle, embedder.handle, text.encode(),
|
|
310
|
+
doc_id, page_number, ctypes.byref(chunk_id))
|
|
311
|
+
if err != 0:
|
|
312
|
+
raise RuntimeError(f"Insert failed: {err}")
|
|
313
|
+
return chunk_id.value
|
|
314
|
+
|
|
315
|
+
def insert_chunk(self, text: str, embedding: List[float],
|
|
316
|
+
doc_id: int = 0, page_number: int = 0) -> int:
|
|
317
|
+
emb_arr = (ctypes.c_float * 384)(*embedding)
|
|
318
|
+
chunk_id = ctypes.c_uint64(0)
|
|
319
|
+
err = self._lib.evdb_insert_chunk(
|
|
320
|
+
self._handle, text.encode(), emb_arr,
|
|
321
|
+
doc_id, page_number, ctypes.byref(chunk_id))
|
|
322
|
+
if err != 0:
|
|
323
|
+
raise RuntimeError(f"Insert failed: {err}")
|
|
324
|
+
return chunk_id.value
|
|
325
|
+
|
|
326
|
+
def remove_chunk(self, chunk_id: int):
|
|
327
|
+
err = self._lib.evdb_remove_chunk(self._handle, chunk_id)
|
|
328
|
+
if err != 0:
|
|
329
|
+
raise RuntimeError(f"Remove failed: {err}")
|
|
330
|
+
|
|
331
|
+
# Query
|
|
332
|
+
def query_text(self, embedder: Embedder, query: str,
|
|
333
|
+
top_k: int = 5, use_kg_expansion: bool = False) -> QueryResults:
|
|
334
|
+
qh = self._lib.evdb_query_text(
|
|
335
|
+
self._handle, embedder.handle, query.encode(),
|
|
336
|
+
top_k, 1 if use_kg_expansion else 0)
|
|
337
|
+
if not qh:
|
|
338
|
+
raise RuntimeError("Query failed")
|
|
339
|
+
return QueryResults(qh)
|
|
340
|
+
|
|
341
|
+
def query_vector(self, embedding: List[float], query_text: str = "",
|
|
342
|
+
top_k: int = 5) -> QueryResults:
|
|
343
|
+
emb_arr = (ctypes.c_float * 384)(*embedding)
|
|
344
|
+
qh = self._lib.evdb_query_vector(
|
|
345
|
+
self._handle, emb_arr, query_text.encode(), top_k)
|
|
346
|
+
if not qh:
|
|
347
|
+
raise RuntimeError("Query failed")
|
|
348
|
+
return QueryResults(qh)
|
|
349
|
+
|
|
350
|
+
# Object store
|
|
351
|
+
def put_object(self, type_name: str, properties: Dict[str, Any]) -> int:
|
|
352
|
+
json_str = json.dumps(properties)
|
|
353
|
+
out_id = ctypes.c_uint64(0)
|
|
354
|
+
err = self._lib.evdb_object_put(
|
|
355
|
+
self._handle, type_name.encode(), json_str.encode(),
|
|
356
|
+
ctypes.byref(out_id))
|
|
357
|
+
if err != 0:
|
|
358
|
+
raise RuntimeError(f"Object put failed: {err}")
|
|
359
|
+
return out_id.value
|
|
360
|
+
|
|
361
|
+
def get_object(self, object_id: int) -> Optional[Dict[str, Any]]:
|
|
362
|
+
buf = ctypes.create_string_buffer(4096)
|
|
363
|
+
err = self._lib.evdb_object_get(self._handle, object_id, buf, 4096)
|
|
364
|
+
if err == 4: # NOT_FOUND
|
|
365
|
+
return None
|
|
366
|
+
if err != 0:
|
|
367
|
+
raise RuntimeError(f"Object get failed: {err}")
|
|
368
|
+
return json.loads(buf.value.decode())
|
|
369
|
+
|
|
370
|
+
def remove_object(self, object_id: int):
|
|
371
|
+
err = self._lib.evdb_object_remove(self._handle, object_id)
|
|
372
|
+
if err != 0:
|
|
373
|
+
raise RuntimeError(f"Object remove failed: {err}")
|
|
374
|
+
|
|
375
|
+
# Relations
|
|
376
|
+
def add_relation(self, name: str, from_id: int, to_id: int):
|
|
377
|
+
err = self._lib.evdb_relation_add(
|
|
378
|
+
self._handle, name.encode(), from_id, to_id)
|
|
379
|
+
if err != 0:
|
|
380
|
+
raise RuntimeError(f"Relation add failed: {err}")
|
|
381
|
+
|
|
382
|
+
# Context manager
|
|
383
|
+
def __enter__(self):
|
|
384
|
+
return self
|
|
385
|
+
|
|
386
|
+
def __exit__(self, *args):
|
|
387
|
+
self.save()
|
|
388
|
+
self.close()
|
|
389
|
+
|
|
390
|
+
def __del__(self):
|
|
391
|
+
self.close()
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def version() -> str:
|
|
395
|
+
lib = _Lib.get()
|
|
396
|
+
return lib.evdb_version_string().decode()
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def set_log_level(level: int):
|
|
400
|
+
lib = _Lib.get()
|
|
401
|
+
lib.evdb_set_log_level(level)
|
edgevdb/embedder.py
ADDED
edgevdb/lib/README.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# EdgeVDB Native Libraries
|
|
2
|
+
|
|
3
|
+
Platform-specific shared libraries for the EdgeVDB Python SDK.
|
|
4
|
+
|
|
5
|
+
## Structure
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
lib/
|
|
9
|
+
windows/ → edgevdb_shared.dll, libedgevdb_shared.dll
|
|
10
|
+
linux/ → libedgevdb_shared.so
|
|
11
|
+
darwin/ → libedgevdb_shared.dylib
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Building
|
|
15
|
+
|
|
16
|
+
Copy the built shared library from `build/desktop-release/core/` into the
|
|
17
|
+
appropriate platform directory:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Linux
|
|
21
|
+
cp build/desktop-release/core/libedgevdb_shared.so python/edgevdb/lib/linux/
|
|
22
|
+
|
|
23
|
+
# macOS
|
|
24
|
+
cp build/desktop-release/core/libedgevdb_shared.dylib python/edgevdb/lib/darwin/
|
|
25
|
+
|
|
26
|
+
# Windows (PowerShell)
|
|
27
|
+
copy build\desktop-release\core\edgevdb_shared.dll python\edgevdb\lib\windows\
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
The Python SDK (`__init__.py`) automatically detects the host platform and
|
|
31
|
+
loads the correct library from the matching subdirectory.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
edgevdb/object_store.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EdgeVDB Python object_store module — re-exported from __init__.py.
|
|
3
|
+
Provides convenience methods for the relational object store.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from edgevdb import EdgeVDB
|
|
7
|
+
|
|
8
|
+
__all__ = ["ObjectStoreHelper"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ObjectStoreHelper:
|
|
12
|
+
"""Convenience wrapper for object store operations."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, db: EdgeVDB):
|
|
15
|
+
self._db = db
|
|
16
|
+
|
|
17
|
+
def put(self, type_name: str, properties: dict) -> int:
|
|
18
|
+
return self._db.put_object(type_name, properties)
|
|
19
|
+
|
|
20
|
+
def get(self, object_id: int) -> dict:
|
|
21
|
+
return self._db.get_object(object_id)
|
|
22
|
+
|
|
23
|
+
def remove(self, object_id: int):
|
|
24
|
+
self._db.remove_object(object_id)
|
|
25
|
+
|
|
26
|
+
def add_relation(self, name: str, from_id: int, to_id: int):
|
|
27
|
+
self._db.add_relation(name, from_id, to_id)
|
edgevdb/sync.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EdgeVDB Python sync module — file-based sync utilities.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from edgevdb import EdgeVDB
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SyncHelper:
|
|
13
|
+
"""File-based sync helper for Python SDK."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, db: EdgeVDB, device_id: str = "python-desktop"):
|
|
16
|
+
self._db = db
|
|
17
|
+
self._device_id = device_id
|
|
18
|
+
|
|
19
|
+
def export_to_file(self, path: str, since_clock: int = 0) -> bool:
|
|
20
|
+
"""Export sync delta to a JSON file."""
|
|
21
|
+
# Note: Requires sync engine handle — this is a placeholder
|
|
22
|
+
# In full implementation, would call evdb_sync_export_to_file
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
def import_from_file(self, path: str) -> dict:
|
|
26
|
+
"""Import sync delta from a JSON file."""
|
|
27
|
+
return {"applied": 0, "skipped": 0}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = ["SyncHelper"]
|
edgevdb/vectordb.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EdgeVDB Python vectordb module — re-exported from __init__.py.
|
|
3
|
+
This module provides the core VectorDB functionality as a standalone import.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from edgevdb import EdgeVDB, Embedder, QueryResults, ChunkResult
|
|
7
|
+
|
|
8
|
+
__all__ = ["EdgeVDB", "Embedder", "QueryResults", "ChunkResult"]
|
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: edgevdb
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: EdgeVDB — On-device vector database with HNSW, hybrid retrieval, knowledge graph, and CRDT sync
|
|
5
|
+
Author-email: XformAI <contact@xformai.in>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/XformAI/EDGEVDB
|
|
8
|
+
Project-URL: Documentation, https://xformai.github.io/EDGEVDB/
|
|
9
|
+
Project-URL: Repository, https://github.com/XformAI/EDGEVDB
|
|
10
|
+
Project-URL: Issues, https://github.com/XformAI/EDGEVDB/issues
|
|
11
|
+
Keywords: vector-database,hnsw,embedding,rag,on-device,edge-ai,semantic-search
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
23
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
24
|
+
Classifier: Operating System :: MacOS
|
|
25
|
+
Classifier: Topic :: Database
|
|
26
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
27
|
+
Requires-Python: >=3.8
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# EdgeVDB Python SDK
|
|
31
|
+
|
|
32
|
+
> **Python wrapper for EdgeVDB on-device vector database with ctypes FFI binding.**
|
|
33
|
+
|
|
34
|
+
The EdgeVDB Python SDK provides a Pythonic interface to the EdgeVDB C++ core library using ctypes. It enables Python applications to use EdgeVDB's vector database capabilities on desktop and Raspberry Pi platforms.
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- **ctypes FFI Binding** — Direct calls to C API with no Python dependencies
|
|
39
|
+
- **Context Manager Support** — Automatic resource cleanup with `with` statements
|
|
40
|
+
- **Type Hints** — Full type annotations for IDE support
|
|
41
|
+
- **Zero Python Dependencies** — Only standard library and ctypes
|
|
42
|
+
- **Cross-Platform** — Linux, macOS, Windows, Raspberry Pi
|
|
43
|
+
- **Flexible Embedding** — Use any embedding provider or built-in ONNX embedder
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
### From Source
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Build the C++ core first
|
|
51
|
+
cd ..
|
|
52
|
+
cmake --preset desktop-release
|
|
53
|
+
cmake --build build/desktop-release
|
|
54
|
+
|
|
55
|
+
# Copy shared library to Python package (platform-specific)
|
|
56
|
+
# Linux:
|
|
57
|
+
cp build/desktop-release/core/libedgevdb_shared.so python/edgevdb/lib/linux/
|
|
58
|
+
# macOS:
|
|
59
|
+
# cp build/desktop-release/core/libedgevdb_shared.dylib python/edgevdb/lib/darwin/
|
|
60
|
+
# Windows:
|
|
61
|
+
# copy build\desktop-release\core\edgevdb_shared.dll python\edgevdb\lib\windows\
|
|
62
|
+
|
|
63
|
+
# Install in development mode
|
|
64
|
+
cd python
|
|
65
|
+
pip install -e .
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### From PyPI (after publishing)
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install edgevdb
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
### Without ONNX (Recommended)
|
|
77
|
+
|
|
78
|
+
Use embeddings from any provider (OpenAI, Cohere, sentence-transformers, etc.):
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from edgevdb import EdgeVDB
|
|
82
|
+
|
|
83
|
+
# Open database
|
|
84
|
+
db = EdgeVDB("./my_database")
|
|
85
|
+
|
|
86
|
+
# Get embeddings from your preferred provider
|
|
87
|
+
# Example with sentence-transformers:
|
|
88
|
+
from sentence_transformers import SentenceTransformer
|
|
89
|
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
90
|
+
embedding = model.encode("Machine learning finds patterns in data")
|
|
91
|
+
|
|
92
|
+
# Insert with pre-computed embedding
|
|
93
|
+
chunk_id = db.insert_chunk(
|
|
94
|
+
text="Machine learning finds patterns in data",
|
|
95
|
+
embedding=embedding,
|
|
96
|
+
doc_id=1,
|
|
97
|
+
page_number=0
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Query
|
|
101
|
+
query_emb = model.encode("what is ML?")
|
|
102
|
+
results = db.query_vector(query_emb, query_text="what is ML?", top_k=5)
|
|
103
|
+
|
|
104
|
+
for r in results:
|
|
105
|
+
print(f"score={r.score:.3f} text={r.text}")
|
|
106
|
+
|
|
107
|
+
# Object store
|
|
108
|
+
doc_id = db.put_object("Document", {"title": "ML Intro", "author": "Alice"})
|
|
109
|
+
db.add_relation("has_chunk", doc_id, chunk_id)
|
|
110
|
+
|
|
111
|
+
db.save()
|
|
112
|
+
db.close()
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### With Built-in Embedder
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from edgevdb import EdgeVDB, Embedder
|
|
119
|
+
|
|
120
|
+
# Create embedder
|
|
121
|
+
embedder = Embedder(
|
|
122
|
+
model_path="models/model.onnx",
|
|
123
|
+
vocab_path="models/vocab.txt",
|
|
124
|
+
threads=2
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Use with context manager
|
|
128
|
+
with EdgeVDB("./my_database") as db:
|
|
129
|
+
# Auto-embed on insert
|
|
130
|
+
chunk_id = db.insert_text(
|
|
131
|
+
embedder,
|
|
132
|
+
"Deep learning uses neural networks",
|
|
133
|
+
doc_id=1,
|
|
134
|
+
page_number=0
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Auto-embed on query
|
|
138
|
+
results = db.query_text(embedder, "neural network architecture", top_k=5)
|
|
139
|
+
print(results.context_string)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## API Reference
|
|
143
|
+
|
|
144
|
+
### EdgeVDB
|
|
145
|
+
|
|
146
|
+
Main database class.
|
|
147
|
+
|
|
148
|
+
#### Constructor
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
EdgeVDB(storage_dir: str, **kwargs)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Parameters:**
|
|
155
|
+
- `storage_dir` (str): Directory for database files
|
|
156
|
+
- `hnsw_M` (int): HNSW M parameter (default: 16)
|
|
157
|
+
- `hnsw_ef_construction` (int): HNSW ef_construction (default: 200)
|
|
158
|
+
- `hnsw_ef_search` (int): HNSW ef_search (default: 64)
|
|
159
|
+
- `ranker_alpha` (float): Cosine weight (default: 0.70)
|
|
160
|
+
- `ranker_beta` (float): Page proximity weight (default: 0.20)
|
|
161
|
+
- `ranker_gamma` (float): Keyword weight (default: 0.10)
|
|
162
|
+
- `token_budget` (int): Max tokens in context (default: 3200)
|
|
163
|
+
- `embedding_threads` (int): ONNX thread count (default: 2)
|
|
164
|
+
- `enable_knowledge_graph` (bool): Enable KG (default: True)
|
|
165
|
+
- `enable_sync` (bool): Enable sync (default: False)
|
|
166
|
+
- `device_id` (str): Device ID for sync (default: auto-generated)
|
|
167
|
+
|
|
168
|
+
#### Methods
|
|
169
|
+
|
|
170
|
+
##### Vector Store
|
|
171
|
+
|
|
172
|
+
**insert_chunk(text, embedding, doc_id=0, page_number=0) -> int**
|
|
173
|
+
- Insert text with pre-computed embedding
|
|
174
|
+
- Returns chunk ID
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
chunk_id = db.insert_chunk(
|
|
178
|
+
text="Your text here",
|
|
179
|
+
embedding=[0.1, 0.2, ...], # 384-dim float array
|
|
180
|
+
doc_id=1,
|
|
181
|
+
page_number=0
|
|
182
|
+
)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**insert_text(embedder, text, doc_id=0, page_number=0) -> int**
|
|
186
|
+
- Insert text with auto-embedding via embedder
|
|
187
|
+
- Returns chunk ID
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
chunk_id = db.insert_text(
|
|
191
|
+
embedder,
|
|
192
|
+
"Your text here",
|
|
193
|
+
doc_id=1,
|
|
194
|
+
page_number=0
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**remove_chunk(chunk_id)**
|
|
199
|
+
- Remove chunk by ID
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
db.remove_chunk(chunk_id)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
**query_vector(embedding, query_text="", top_k=5) -> QueryResults**
|
|
206
|
+
- Query with pre-computed embedding
|
|
207
|
+
- Returns QueryResults object
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
results = db.query_vector(
|
|
211
|
+
embedding=[0.1, 0.2, ...],
|
|
212
|
+
query_text="search query",
|
|
213
|
+
top_k=5
|
|
214
|
+
)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
**query_text(embedder, query, top_k=5, use_kg_expansion=False) -> QueryResults**
|
|
218
|
+
- Query with auto-embedding via embedder
|
|
219
|
+
- Returns QueryResults object
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
results = db.query_text(
|
|
223
|
+
embedder,
|
|
224
|
+
"search query",
|
|
225
|
+
top_k=5,
|
|
226
|
+
use_kg_expansion=False
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
##### Object Store
|
|
231
|
+
|
|
232
|
+
**put_object(type_name, properties) -> int**
|
|
233
|
+
- Store JSON object
|
|
234
|
+
- Returns object ID
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
doc_id = db.put_object(
|
|
238
|
+
"Document",
|
|
239
|
+
{"title": "My Doc", "author": "Alice"}
|
|
240
|
+
)
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
**get_object(object_id) -> Optional[Dict]**
|
|
244
|
+
- Retrieve object by ID
|
|
245
|
+
- Returns dict or None if not found
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
obj = db.get_object(doc_id)
|
|
249
|
+
if obj:
|
|
250
|
+
print(obj["title"])
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
**remove_object(object_id)**
|
|
254
|
+
- Soft delete object
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
db.remove_object(doc_id)
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
##### Relations
|
|
261
|
+
|
|
262
|
+
**add_relation(name, from_id, to_id)**
|
|
263
|
+
- Add typed edge between objects
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
db.add_relation("has_chunk", doc_id, chunk_id)
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
##### Lifecycle
|
|
270
|
+
|
|
271
|
+
**save()**
|
|
272
|
+
- Flush all data to disk
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
db.save()
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
**close()**
|
|
279
|
+
- Release native resources
|
|
280
|
+
|
|
281
|
+
```python
|
|
282
|
+
db.close()
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
**Context Manager**
|
|
286
|
+
|
|
287
|
+
```python
|
|
288
|
+
with EdgeVDB("./data") as db:
|
|
289
|
+
# Auto-save and close on exit
|
|
290
|
+
db.insert_chunk("text", embedding, doc_id=1)
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### Embedder
|
|
294
|
+
|
|
295
|
+
ONNX embedding model wrapper.
|
|
296
|
+
|
|
297
|
+
#### Constructor
|
|
298
|
+
|
|
299
|
+
```python
|
|
300
|
+
Embedder(model_path: str, vocab_path: str, threads: int = 2)
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
**Parameters:**
|
|
304
|
+
- `model_path` (str): Path to ONNX model file
|
|
305
|
+
- `vocab_path` (str): Path to vocabulary file
|
|
306
|
+
- `threads` (int): Number of inference threads (default: 2)
|
|
307
|
+
|
|
308
|
+
#### Methods
|
|
309
|
+
|
|
310
|
+
**embed(text: str) -> List[float]**
|
|
311
|
+
- Embed text to 384-dim vector
|
|
312
|
+
- Returns list of floats
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
embedding = embedder.embed("Hello world")
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
**destroy()**
|
|
319
|
+
- Release native resources
|
|
320
|
+
|
|
321
|
+
```python
|
|
322
|
+
embedder.destroy()
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### QueryResults
|
|
326
|
+
|
|
327
|
+
Query result container with lazy access.
|
|
328
|
+
|
|
329
|
+
#### Properties
|
|
330
|
+
|
|
331
|
+
**count** (int): Number of results
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
print(f"Found {results.count} results")
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
**context_string** (str): Pre-assembled RAG context
|
|
338
|
+
|
|
339
|
+
```python
|
|
340
|
+
print(results.context_string)
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
#### Methods
|
|
344
|
+
|
|
345
|
+
**__getitem__(index) -> ChunkResult**
|
|
346
|
+
- Access individual result by index
|
|
347
|
+
|
|
348
|
+
```python
|
|
349
|
+
result = results[0]
|
|
350
|
+
print(result.text)
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
**__iter__()**
|
|
354
|
+
- Iterate over results
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
for r in results:
|
|
358
|
+
print(f"{r.score}: {r.text}")
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
**to_list() -> List[ChunkResult]**
|
|
362
|
+
- Convert to list
|
|
363
|
+
|
|
364
|
+
```python
|
|
365
|
+
results_list = results.to_list()
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
**free()**
|
|
369
|
+
- Free native query handle (called automatically by __del__)
|
|
370
|
+
|
|
371
|
+
```python
|
|
372
|
+
results.free()
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### ChunkResult
|
|
376
|
+
|
|
377
|
+
Single query result.
|
|
378
|
+
|
|
379
|
+
#### Attributes
|
|
380
|
+
|
|
381
|
+
- **chunk_id** (int): Unique chunk identifier
|
|
382
|
+
- **text** (str): Chunk text content
|
|
383
|
+
- **score** (float): Hybrid similarity score [0.0, 1.0]
|
|
384
|
+
- **page_number** (int): Page number in document
|
|
385
|
+
- **doc_id** (int): Document identifier
|
|
386
|
+
|
|
387
|
+
```python
|
|
388
|
+
for r in results:
|
|
389
|
+
print(f"ID: {r.chunk_id}")
|
|
390
|
+
print(f"Text: {r.text}")
|
|
391
|
+
print(f"Score: {r.score:.3f}")
|
|
392
|
+
print(f"Page: {r.page_number}")
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
## Examples
|
|
396
|
+
|
|
397
|
+
### RAG Pipeline
|
|
398
|
+
|
|
399
|
+
```python
|
|
400
|
+
from edgevdb import EdgeVDB
|
|
401
|
+
from sentence_transformers import SentenceTransformer
|
|
402
|
+
|
|
403
|
+
# Initialize
|
|
404
|
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
405
|
+
db = EdgeVDB("./rag_database")
|
|
406
|
+
|
|
407
|
+
# Index documents
|
|
408
|
+
documents = [
|
|
409
|
+
{"id": 1, "text": "Python is a high-level programming language."},
|
|
410
|
+
{"id": 2, "text": "Machine learning is a subset of AI."},
|
|
411
|
+
{"id": 3, "text": "Vector databases enable semantic search."},
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
for doc in documents:
|
|
415
|
+
embedding = model.encode(doc["text"])
|
|
416
|
+
db.insert_chunk(doc["text"], embedding, doc_id=doc["id"])
|
|
417
|
+
|
|
418
|
+
# Query
|
|
419
|
+
query = "What is semantic search?"
|
|
420
|
+
query_emb = model.encode(query)
|
|
421
|
+
results = db.query_vector(query_emb, query_text=query, top_k=2)
|
|
422
|
+
|
|
423
|
+
# Assemble context
|
|
424
|
+
context = results.context_string
|
|
425
|
+
print(f"Context: {context}")
|
|
426
|
+
|
|
427
|
+
db.save()
|
|
428
|
+
db.close()
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
### Object Store + Relations
|
|
432
|
+
|
|
433
|
+
```python
|
|
434
|
+
from edgevdb import EdgeVDB
|
|
435
|
+
|
|
436
|
+
db = EdgeVDB("./my_database")
|
|
437
|
+
|
|
438
|
+
# Store documents
|
|
439
|
+
doc1_id = db.put_object("Document", {
|
|
440
|
+
"title": "Introduction to ML",
|
|
441
|
+
"author": "Alice",
|
|
442
|
+
"year": 2024
|
|
443
|
+
})
|
|
444
|
+
|
|
445
|
+
doc2_id = db.put_object("Document", {
|
|
446
|
+
"title": "Advanced Topics",
|
|
447
|
+
"author": "Bob",
|
|
448
|
+
"year": 2024
|
|
449
|
+
})
|
|
450
|
+
|
|
451
|
+
# Store chunks with embeddings
|
|
452
|
+
chunk1_id = db.insert_chunk("ML is fascinating", emb, doc_id=doc1_id)
|
|
453
|
+
chunk2_id = db.insert_chunk("Deep learning is powerful", emb, doc_id=doc2_id)
|
|
454
|
+
|
|
455
|
+
# Link chunks to documents
|
|
456
|
+
db.add_relation("has_chunk", doc1_id, chunk1_id)
|
|
457
|
+
db.add_relation("has_chunk", doc2_id, chunk2_id)
|
|
458
|
+
|
|
459
|
+
db.save()
|
|
460
|
+
db.close()
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
### Error Handling
|
|
464
|
+
|
|
465
|
+
```python
|
|
466
|
+
from edgevdb import EdgeVDB, set_log_level
|
|
467
|
+
|
|
468
|
+
# Enable debug logging
|
|
469
|
+
set_log_level(3)
|
|
470
|
+
|
|
471
|
+
try:
|
|
472
|
+
db = EdgeVDB("./my_database")
|
|
473
|
+
|
|
474
|
+
# Operations
|
|
475
|
+
chunk_id = db.insert_chunk("text", embedding, doc_id=1)
|
|
476
|
+
|
|
477
|
+
# Object not found returns None (doesn't throw)
|
|
478
|
+
obj = db.get_object(999)
|
|
479
|
+
if obj is None:
|
|
480
|
+
print("Object not found")
|
|
481
|
+
|
|
482
|
+
db.save()
|
|
483
|
+
db.close()
|
|
484
|
+
|
|
485
|
+
except RuntimeError as e:
|
|
486
|
+
print(f"EdgeVDB error: {e}")
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
## Library Discovery
|
|
490
|
+
|
|
491
|
+
The Python SDK automatically searches for the EdgeVDB shared library in the following locations:
|
|
492
|
+
|
|
493
|
+
1. Platform-specific directory (`edgevdb/lib/<platform>/`) — **preferred**
|
|
494
|
+
2. Package lib directory (`edgevdb/lib/`)
|
|
495
|
+
3. Package directory (`edgevdb/`)
|
|
496
|
+
4. Current working directory
|
|
497
|
+
5. `build/desktop-release/core/`
|
|
498
|
+
6. `build/desktop-debug/core/`
|
|
499
|
+
|
|
500
|
+
**Library Layout:**
|
|
501
|
+
```
|
|
502
|
+
python/edgevdb/lib/
|
|
503
|
+
linux/ → libedgevdb_shared.so
|
|
504
|
+
darwin/ → libedgevdb_shared.dylib
|
|
505
|
+
windows/ → edgevdb_shared.dll, libedgevdb_shared.dll
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
## Performance Considerations
|
|
509
|
+
|
|
510
|
+
### Embedding Provider Choice
|
|
511
|
+
|
|
512
|
+
| Provider | Speed | Quality | Offline | Cost |
|
|
513
|
+
|----------|-------|--------|---------|------|
|
|
514
|
+
| sentence-transformers | Fast | Good | ✅ | Free |
|
|
515
|
+
| OpenAI API | Slow | Excellent | ❌ | Paid |
|
|
516
|
+
| Cohere API | Medium | Good | ❌ | Paid |
|
|
517
|
+
| Built-in ONNX | Medium | Good | ✅ | Free |
|
|
518
|
+
|
|
519
|
+
### Batch Operations
|
|
520
|
+
|
|
521
|
+
For large-scale operations, consider batching:
|
|
522
|
+
|
|
523
|
+
```python
|
|
524
|
+
# Batch insert
|
|
525
|
+
embeddings = model.encode(texts)
|
|
526
|
+
for text, emb in zip(texts, embeddings):
|
|
527
|
+
db.insert_chunk(text, emb, doc_id=doc_id)
|
|
528
|
+
|
|
529
|
+
db.save() # Save once after all inserts
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
### Memory Management
|
|
533
|
+
|
|
534
|
+
- Query results hold native handles; call `results.free()` or use context manager
|
|
535
|
+
- Embedders hold native resources; call `embedder.destroy()` when done
|
|
536
|
+
- Database handles are released by `close()` or context manager
|
|
537
|
+
|
|
538
|
+
## Platform-Specific Notes
|
|
539
|
+
|
|
540
|
+
### Linux
|
|
541
|
+
|
|
542
|
+
```bash
|
|
543
|
+
# Build
|
|
544
|
+
cmake --preset desktop-release
|
|
545
|
+
cmake --build build/desktop-release
|
|
546
|
+
|
|
547
|
+
# Install
|
|
548
|
+
cp build/desktop-release/core/libedgevdb_shared.so python/edgevdb/lib/linux/
|
|
549
|
+
pip install -e python/
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
### macOS
|
|
553
|
+
|
|
554
|
+
```bash
|
|
555
|
+
# Build
|
|
556
|
+
cmake --preset desktop-release
|
|
557
|
+
cmake --build build/desktop-release
|
|
558
|
+
|
|
559
|
+
# Install
|
|
560
|
+
cp build/desktop-release/core/libedgevdb_shared.dylib python/edgevdb/lib/darwin/
|
|
561
|
+
pip install -e python/
|
|
562
|
+
```
|
|
563
|
+
|
|
564
|
+
### Windows
|
|
565
|
+
|
|
566
|
+
```powershell
|
|
567
|
+
# Build
|
|
568
|
+
cmake --preset desktop-release
|
|
569
|
+
cmake --build build/desktop-release
|
|
570
|
+
|
|
571
|
+
# Install
|
|
572
|
+
copy build\desktop-release\core\edgevdb_shared.dll python\edgevdb\lib\windows\
|
|
573
|
+
pip install -e python\
|
|
574
|
+
```
|
|
575
|
+
|
|
576
|
+
### Raspberry Pi
|
|
577
|
+
|
|
578
|
+
```bash
|
|
579
|
+
# Build with NEON support
|
|
580
|
+
cmake --preset desktop-release
|
|
581
|
+
cmake --build build/desktop-release
|
|
582
|
+
|
|
583
|
+
# Install
|
|
584
|
+
cp build/desktop-release/core/libedgevdb_shared.so python/edgevdb/lib/linux/
|
|
585
|
+
pip install -e python/
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
## Testing
|
|
589
|
+
|
|
590
|
+
```bash
|
|
591
|
+
cd python
|
|
592
|
+
|
|
593
|
+
# Run tests
|
|
594
|
+
python -m unittest tests.test_edgevdb -v
|
|
595
|
+
|
|
596
|
+
# Or with pytest
|
|
597
|
+
pytest tests/ -v
|
|
598
|
+
```
|
|
599
|
+
|
|
600
|
+
## Troubleshooting
|
|
601
|
+
|
|
602
|
+
### Library Not Found
|
|
603
|
+
|
|
604
|
+
**Error:** `FileNotFoundError: Could not find EdgeVDB library`
|
|
605
|
+
|
|
606
|
+
**Solution:**
|
|
607
|
+
1. Build the C++ core: `cmake --preset desktop-release && cmake --build build/desktop-release`
|
|
608
|
+
2. Copy the shared library to `python/edgevdb/lib/<platform>/`
|
|
609
|
+
3. Verify the library name matches your platform
|
|
610
|
+
|
|
611
|
+
### Import Errors
|
|
612
|
+
|
|
613
|
+
**Error:** `ImportError: dynamic module does not define init function`
|
|
614
|
+
|
|
615
|
+
**Solution:**
|
|
616
|
+
- Ensure the shared library was built for your platform
|
|
617
|
+
- Check Python architecture matches library (32-bit vs 64-bit)
|
|
618
|
+
- Rebuild the C++ core for your platform
|
|
619
|
+
|
|
620
|
+
### Segmentation Faults
|
|
621
|
+
|
|
622
|
+
**Error:** Python crashes with segmentation fault
|
|
623
|
+
|
|
624
|
+
**Solution:**
|
|
625
|
+
- Ensure you're using the correct library version
|
|
626
|
+
- Check that you're not accessing freed handles
|
|
627
|
+
- Verify embedding dimensions are exactly 384
|
|
628
|
+
- Enable debug logging: `set_log_level(3)`
|
|
629
|
+
|
|
630
|
+
## Contributing
|
|
631
|
+
|
|
632
|
+
### Development Setup
|
|
633
|
+
|
|
634
|
+
```bash
|
|
635
|
+
# Build C++ core in debug mode
|
|
636
|
+
cmake --preset desktop-debug
|
|
637
|
+
cmake --build build/desktop-debug
|
|
638
|
+
|
|
639
|
+
# Copy debug library
|
|
640
|
+
cp build/desktop-debug/core/libedgevdb_shared.so python/edgevdb/
|
|
641
|
+
|
|
642
|
+
# Install in development mode
|
|
643
|
+
cd python
|
|
644
|
+
pip install -e .
|
|
645
|
+
```
|
|
646
|
+
|
|
647
|
+
### Running Tests
|
|
648
|
+
|
|
649
|
+
```bash
|
|
650
|
+
cd python
|
|
651
|
+
python -m unittest tests.test_edgevdb -v
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
### Code Style
|
|
655
|
+
|
|
656
|
+
- Follow PEP 8
|
|
657
|
+
- Use type hints
|
|
658
|
+
- Add docstrings for public APIs
|
|
659
|
+
- Run black and flake8
|
|
660
|
+
|
|
661
|
+
## See Also
|
|
662
|
+
|
|
663
|
+
- [../README.md](../README.md) — Project overview
|
|
664
|
+
- [../../DEVELOPER_GUIDE.md](../../DEVELOPER_GUIDE.md) — Build and integration guide
|
|
665
|
+
- [../../docs/python_integration.md](../../docs/python_integration.md) — Python integration guide
|
|
666
|
+
- [examples/](examples/) — Example scripts
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
edgevdb/__init__.py,sha256=fGSvoYmn5r5JGQQdZkwVKLbMLLP78q1tNNdm5aoPyLI,13423
|
|
2
|
+
edgevdb/embedder.py,sha256=K5pOgqAcgUULaM2WMXmnLfsBJ8Uh6bOok1Mnv_1nc7c,127
|
|
3
|
+
edgevdb/object_store.py,sha256=fn_ydPEJvgAibYJDJNVSMqkbMVHieAq5Kh6y-Lh0aQU,755
|
|
4
|
+
edgevdb/sync.py,sha256=Nmm4KY_YvUnihg8KP4-32hXotWjlVLDYdGAQgTRnhL4,800
|
|
5
|
+
edgevdb/vectordb.py,sha256=Sg26gLffFDye0GyheF82pBAbE4bFKPb1IFgtCr3RgQo,282
|
|
6
|
+
edgevdb/lib/README.md,sha256=j__VETKWC5fBAtgwnEHXdVmST_xZO4yVx-Q2RzGSvVs,802
|
|
7
|
+
edgevdb/lib/darwin/libedgevdb_shared.dylib,sha256=yzBAcrwA_9FKT0fJEtH-t0YW5UyTfNGhnALqugF9Nlk,296664
|
|
8
|
+
edgevdb/lib/linux/libedgevdb_shared.so,sha256=ozzQc31T_6zKMCcKOZhVgeZJykl8ZkUfDhi6l4u3gZA,439272
|
|
9
|
+
edgevdb/lib/windows/libedgevdb_shared.dll,sha256=uJgOO3pvQHUBOB9ROP_uKTR1hYe6bLbBPDTl1r3uuAk,586681
|
|
10
|
+
edgevdb-1.0.0.dist-info/METADATA,sha256=rjtMuSBg0OseEkpuRRWiVyD1ms6W6NDmG5xtcpoUUwY,15029
|
|
11
|
+
edgevdb-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
12
|
+
edgevdb-1.0.0.dist-info/top_level.txt,sha256=BAh4GWbR3XbJXfgJyjfJf4rTaw9WQISnbwdVMKT2ZF4,8
|
|
13
|
+
edgevdb-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
edgevdb
|