beaver-db 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of beaver-db might be problematic. Click here for more details.
- beaver/core.py +205 -59
- {beaver_db-0.3.0.dist-info → beaver_db-0.4.0.dist-info}/METADATA +1 -1
- beaver_db-0.4.0.dist-info/RECORD +6 -0
- beaver_db-0.3.0.dist-info/RECORD +0 -6
- {beaver_db-0.3.0.dist-info → beaver_db-0.4.0.dist-info}/WHEEL +0 -0
- {beaver_db-0.3.0.dist-info → beaver_db-0.4.0.dist-info}/top_level.txt +0 -0
beaver/core.py
CHANGED
|
@@ -29,56 +29,82 @@ class BeaverDB:
|
|
|
29
29
|
self._create_kv_table()
|
|
30
30
|
self._create_list_table()
|
|
31
31
|
self._create_collections_table()
|
|
32
|
+
self._create_fts_table() # <-- Nueva llamada
|
|
33
|
+
|
|
34
|
+
def _create_fts_table(self):
|
|
35
|
+
"""Creates the virtual FTS table for full text search."""
|
|
36
|
+
with self._conn:
|
|
37
|
+
self._conn.execute(
|
|
38
|
+
"""
|
|
39
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS beaver_fts_index USING fts5(
|
|
40
|
+
collection,
|
|
41
|
+
item_id,
|
|
42
|
+
field_path,
|
|
43
|
+
field_content,
|
|
44
|
+
tokenize = 'porter'
|
|
45
|
+
)
|
|
46
|
+
"""
|
|
47
|
+
)
|
|
32
48
|
|
|
33
49
|
def _create_pubsub_table(self):
|
|
34
50
|
"""Creates the pub/sub log table if it doesn't exist."""
|
|
35
51
|
with self._conn:
|
|
36
|
-
self._conn.execute(
|
|
52
|
+
self._conn.execute(
|
|
53
|
+
"""
|
|
37
54
|
CREATE TABLE IF NOT EXISTS beaver_pubsub_log (
|
|
38
55
|
timestamp REAL PRIMARY KEY,
|
|
39
56
|
channel_name TEXT NOT NULL,
|
|
40
57
|
message_payload TEXT NOT NULL
|
|
41
58
|
)
|
|
42
|
-
"""
|
|
43
|
-
|
|
59
|
+
"""
|
|
60
|
+
)
|
|
61
|
+
self._conn.execute(
|
|
62
|
+
"""
|
|
44
63
|
CREATE INDEX IF NOT EXISTS idx_pubsub_channel_timestamp
|
|
45
64
|
ON beaver_pubsub_log (channel_name, timestamp)
|
|
46
|
-
"""
|
|
65
|
+
"""
|
|
66
|
+
)
|
|
47
67
|
|
|
48
68
|
def _create_kv_table(self):
|
|
49
69
|
"""Creates the key-value store table if it doesn't exist."""
|
|
50
70
|
with self._conn:
|
|
51
|
-
self._conn.execute(
|
|
71
|
+
self._conn.execute(
|
|
72
|
+
"""
|
|
52
73
|
CREATE TABLE IF NOT EXISTS _beaver_kv_store (
|
|
53
74
|
key TEXT PRIMARY KEY,
|
|
54
75
|
value TEXT NOT NULL
|
|
55
76
|
)
|
|
56
|
-
"""
|
|
77
|
+
"""
|
|
78
|
+
)
|
|
57
79
|
|
|
58
80
|
def _create_list_table(self):
|
|
59
81
|
"""Creates the lists table if it doesn't exist."""
|
|
60
82
|
with self._conn:
|
|
61
|
-
self._conn.execute(
|
|
83
|
+
self._conn.execute(
|
|
84
|
+
"""
|
|
62
85
|
CREATE TABLE IF NOT EXISTS beaver_lists (
|
|
63
86
|
list_name TEXT NOT NULL,
|
|
64
87
|
item_order REAL NOT NULL,
|
|
65
88
|
item_value TEXT NOT NULL,
|
|
66
89
|
PRIMARY KEY (list_name, item_order)
|
|
67
90
|
)
|
|
68
|
-
"""
|
|
91
|
+
"""
|
|
92
|
+
)
|
|
69
93
|
|
|
70
94
|
def _create_collections_table(self):
|
|
71
95
|
"""Creates the collections table if it doesn't exist."""
|
|
72
96
|
with self._conn:
|
|
73
|
-
self._conn.execute(
|
|
97
|
+
self._conn.execute(
|
|
98
|
+
"""
|
|
74
99
|
CREATE TABLE IF NOT EXISTS beaver_collections (
|
|
75
100
|
collection TEXT NOT NULL,
|
|
76
101
|
item_id TEXT NOT NULL,
|
|
77
|
-
item_vector BLOB
|
|
102
|
+
item_vector BLOB,
|
|
78
103
|
metadata TEXT,
|
|
79
104
|
PRIMARY KEY (collection, item_id)
|
|
80
105
|
)
|
|
81
|
-
"""
|
|
106
|
+
"""
|
|
107
|
+
)
|
|
82
108
|
|
|
83
109
|
def close(self):
|
|
84
110
|
"""Closes the database connection."""
|
|
@@ -110,7 +136,7 @@ class BeaverDB:
|
|
|
110
136
|
with self._conn:
|
|
111
137
|
self._conn.execute(
|
|
112
138
|
"INSERT OR REPLACE INTO _beaver_kv_store (key, value) VALUES (?, ?)",
|
|
113
|
-
(key, json_value)
|
|
139
|
+
(key, json_value),
|
|
114
140
|
)
|
|
115
141
|
|
|
116
142
|
def get(self, key: str) -> Any:
|
|
@@ -136,7 +162,7 @@ class BeaverDB:
|
|
|
136
162
|
cursor.close()
|
|
137
163
|
|
|
138
164
|
if result:
|
|
139
|
-
return json.loads(result[
|
|
165
|
+
return json.loads(result["value"])
|
|
140
166
|
return None
|
|
141
167
|
|
|
142
168
|
# --- List Methods ---
|
|
@@ -173,16 +199,14 @@ class BeaverDB:
|
|
|
173
199
|
except TypeError as e:
|
|
174
200
|
raise TypeError("Message payload must be JSON-serializable.") from e
|
|
175
201
|
|
|
176
|
-
await asyncio.to_thread(
|
|
177
|
-
self._write_publish_to_db, channel_name, json_payload
|
|
178
|
-
)
|
|
202
|
+
await asyncio.to_thread(self._write_publish_to_db, channel_name, json_payload)
|
|
179
203
|
|
|
180
204
|
def _write_publish_to_db(self, channel_name, json_payload):
|
|
181
205
|
"""The synchronous part of the publish operation."""
|
|
182
206
|
with self._conn:
|
|
183
207
|
self._conn.execute(
|
|
184
208
|
"INSERT INTO beaver_pubsub_log (timestamp, channel_name, message_payload) VALUES (?, ?, ?)",
|
|
185
|
-
(time.time(), channel_name, json_payload)
|
|
209
|
+
(time.time(), channel_name, json_payload),
|
|
186
210
|
)
|
|
187
211
|
|
|
188
212
|
def subscribe(self, channel_name: str) -> "Subscriber":
|
|
@@ -202,7 +226,9 @@ class ListWrapper:
|
|
|
202
226
|
def __len__(self) -> int:
|
|
203
227
|
"""Returns the number of items in the list (e.g., `len(my_list)`)."""
|
|
204
228
|
cursor = self._conn.cursor()
|
|
205
|
-
cursor.execute(
|
|
229
|
+
cursor.execute(
|
|
230
|
+
"SELECT COUNT(*) FROM beaver_lists WHERE list_name = ?", (self._name,)
|
|
231
|
+
)
|
|
206
232
|
count = cursor.fetchone()[0]
|
|
207
233
|
cursor.close()
|
|
208
234
|
return count
|
|
@@ -223,9 +249,9 @@ class ListWrapper:
|
|
|
223
249
|
cursor = self._conn.cursor()
|
|
224
250
|
cursor.execute(
|
|
225
251
|
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT ? OFFSET ?",
|
|
226
|
-
(self._name, limit, start)
|
|
252
|
+
(self._name, limit, start),
|
|
227
253
|
)
|
|
228
|
-
results = [json.loads(row[
|
|
254
|
+
results = [json.loads(row["item_value"]) for row in cursor.fetchall()]
|
|
229
255
|
cursor.close()
|
|
230
256
|
return results
|
|
231
257
|
|
|
@@ -239,11 +265,11 @@ class ListWrapper:
|
|
|
239
265
|
cursor = self._conn.cursor()
|
|
240
266
|
cursor.execute(
|
|
241
267
|
"SELECT item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
242
|
-
(self._name, offset)
|
|
268
|
+
(self._name, offset),
|
|
243
269
|
)
|
|
244
270
|
result = cursor.fetchone()
|
|
245
271
|
cursor.close()
|
|
246
|
-
return json.loads(result[
|
|
272
|
+
return json.loads(result["item_value"]) if result else None
|
|
247
273
|
|
|
248
274
|
else:
|
|
249
275
|
raise TypeError("List indices must be integers or slices.")
|
|
@@ -253,7 +279,7 @@ class ListWrapper:
|
|
|
253
279
|
cursor = self._conn.cursor()
|
|
254
280
|
cursor.execute(
|
|
255
281
|
"SELECT item_order FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1 OFFSET ?",
|
|
256
|
-
(self._name, index)
|
|
282
|
+
(self._name, index),
|
|
257
283
|
)
|
|
258
284
|
result = cursor.fetchone()
|
|
259
285
|
cursor.close()
|
|
@@ -267,26 +293,32 @@ class ListWrapper:
|
|
|
267
293
|
"""Pushes an item to the end of the list."""
|
|
268
294
|
with self._conn:
|
|
269
295
|
cursor = self._conn.cursor()
|
|
270
|
-
cursor.execute(
|
|
296
|
+
cursor.execute(
|
|
297
|
+
"SELECT MAX(item_order) FROM beaver_lists WHERE list_name = ?",
|
|
298
|
+
(self._name,),
|
|
299
|
+
)
|
|
271
300
|
max_order = cursor.fetchone()[0] or 0.0
|
|
272
301
|
new_order = max_order + 1.0
|
|
273
302
|
|
|
274
303
|
cursor.execute(
|
|
275
304
|
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
276
|
-
(self._name, new_order, json.dumps(value))
|
|
305
|
+
(self._name, new_order, json.dumps(value)),
|
|
277
306
|
)
|
|
278
307
|
|
|
279
308
|
def prepend(self, value: Any):
|
|
280
309
|
"""Prepends an item to the beginning of the list."""
|
|
281
310
|
with self._conn:
|
|
282
311
|
cursor = self._conn.cursor()
|
|
283
|
-
cursor.execute(
|
|
312
|
+
cursor.execute(
|
|
313
|
+
"SELECT MIN(item_order) FROM beaver_lists WHERE list_name = ?",
|
|
314
|
+
(self._name,),
|
|
315
|
+
)
|
|
284
316
|
min_order = cursor.fetchone()[0] or 0.0
|
|
285
317
|
new_order = min_order - 1.0
|
|
286
318
|
|
|
287
319
|
cursor.execute(
|
|
288
320
|
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
289
|
-
(self._name, new_order, json.dumps(value))
|
|
321
|
+
(self._name, new_order, json.dumps(value)),
|
|
290
322
|
)
|
|
291
323
|
|
|
292
324
|
def insert(self, index: int, value: Any):
|
|
@@ -308,7 +340,7 @@ class ListWrapper:
|
|
|
308
340
|
with self._conn:
|
|
309
341
|
self._conn.execute(
|
|
310
342
|
"INSERT INTO beaver_lists (list_name, item_order, item_value) VALUES (?, ?, ?)",
|
|
311
|
-
(self._name, new_order, json.dumps(value))
|
|
343
|
+
(self._name, new_order, json.dumps(value)),
|
|
312
344
|
)
|
|
313
345
|
|
|
314
346
|
def pop(self) -> Any:
|
|
@@ -317,14 +349,16 @@ class ListWrapper:
|
|
|
317
349
|
cursor = self._conn.cursor()
|
|
318
350
|
cursor.execute(
|
|
319
351
|
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order DESC LIMIT 1",
|
|
320
|
-
(self._name,)
|
|
352
|
+
(self._name,),
|
|
321
353
|
)
|
|
322
354
|
result = cursor.fetchone()
|
|
323
355
|
if not result:
|
|
324
356
|
return None
|
|
325
357
|
|
|
326
358
|
rowid_to_delete, value_to_return = result
|
|
327
|
-
cursor.execute(
|
|
359
|
+
cursor.execute(
|
|
360
|
+
"DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
|
|
361
|
+
)
|
|
328
362
|
return json.loads(value_to_return)
|
|
329
363
|
|
|
330
364
|
def deque(self) -> Any:
|
|
@@ -333,14 +367,16 @@ class ListWrapper:
|
|
|
333
367
|
cursor = self._conn.cursor()
|
|
334
368
|
cursor.execute(
|
|
335
369
|
"SELECT rowid, item_value FROM beaver_lists WHERE list_name = ? ORDER BY item_order ASC LIMIT 1",
|
|
336
|
-
(self._name,)
|
|
370
|
+
(self._name,),
|
|
337
371
|
)
|
|
338
372
|
result = cursor.fetchone()
|
|
339
373
|
if not result:
|
|
340
374
|
return None
|
|
341
375
|
|
|
342
376
|
rowid_to_delete, value_to_return = result
|
|
343
|
-
cursor.execute(
|
|
377
|
+
cursor.execute(
|
|
378
|
+
"DELETE FROM beaver_lists WHERE rowid = ?", (rowid_to_delete,)
|
|
379
|
+
)
|
|
344
380
|
return json.loads(value_to_return)
|
|
345
381
|
|
|
346
382
|
|
|
@@ -350,7 +386,9 @@ class Subscriber(AsyncIterator):
|
|
|
350
386
|
Designed to be used with 'async with'.
|
|
351
387
|
"""
|
|
352
388
|
|
|
353
|
-
def __init__(
|
|
389
|
+
def __init__(
|
|
390
|
+
self, conn: sqlite3.Connection, channel_name: str, poll_interval: float = 0.1
|
|
391
|
+
):
|
|
354
392
|
self._conn = conn
|
|
355
393
|
self._channel = channel_name
|
|
356
394
|
self._poll_interval = poll_interval
|
|
@@ -362,9 +400,7 @@ class Subscriber(AsyncIterator):
|
|
|
362
400
|
"""Background task that polls the database for new messages."""
|
|
363
401
|
while True:
|
|
364
402
|
try:
|
|
365
|
-
new_messages = await asyncio.to_thread(
|
|
366
|
-
self._fetch_new_messages_from_db
|
|
367
|
-
)
|
|
403
|
+
new_messages = await asyncio.to_thread(self._fetch_new_messages_from_db)
|
|
368
404
|
if new_messages:
|
|
369
405
|
for msg in new_messages:
|
|
370
406
|
payload = json.loads(msg["message_payload"])
|
|
@@ -382,7 +418,7 @@ class Subscriber(AsyncIterator):
|
|
|
382
418
|
cursor = self._conn.cursor()
|
|
383
419
|
cursor.execute(
|
|
384
420
|
"SELECT timestamp, message_payload FROM beaver_pubsub_log WHERE channel_name = ? AND timestamp > ? ORDER BY timestamp ASC",
|
|
385
|
-
(self._channel, self._last_seen_timestamp)
|
|
421
|
+
(self._channel, self._last_seen_timestamp),
|
|
386
422
|
)
|
|
387
423
|
results = cursor.fetchall()
|
|
388
424
|
cursor.close()
|
|
@@ -409,12 +445,21 @@ class Subscriber(AsyncIterator):
|
|
|
409
445
|
|
|
410
446
|
class Document:
|
|
411
447
|
"""A data class for a vector and its metadata, with a unique ID."""
|
|
412
|
-
def __init__(self, embedding: list[float], id: str|None = None, **metadata):
|
|
413
|
-
if not isinstance(embedding, list) or not all(isinstance(x, (int, float)) for x in embedding):
|
|
414
|
-
raise TypeError("Embedding must be a list of numbers.")
|
|
415
448
|
|
|
449
|
+
def __init__(
|
|
450
|
+
self, embedding: list[float] | None = None, id: str | None = None, **metadata
|
|
451
|
+
):
|
|
416
452
|
self.id = id or str(uuid.uuid4())
|
|
417
|
-
|
|
453
|
+
|
|
454
|
+
if embedding is None:
|
|
455
|
+
self.embedding = None
|
|
456
|
+
else:
|
|
457
|
+
if not isinstance(embedding, list) or not all(
|
|
458
|
+
isinstance(x, (int, float)) for x in embedding
|
|
459
|
+
):
|
|
460
|
+
raise TypeError("Embedding must be a list of numbers.")
|
|
461
|
+
|
|
462
|
+
self.embedding = np.array(embedding, dtype=np.float32)
|
|
418
463
|
|
|
419
464
|
for key, value in metadata.items():
|
|
420
465
|
setattr(self, key, value)
|
|
@@ -423,42 +468,74 @@ class Document:
|
|
|
423
468
|
"""Serializes metadata to a dictionary."""
|
|
424
469
|
metadata = self.__dict__.copy()
|
|
425
470
|
# Exclude internal attributes from the metadata payload
|
|
426
|
-
metadata.pop(
|
|
427
|
-
metadata.pop(
|
|
471
|
+
metadata.pop("embedding", None)
|
|
472
|
+
metadata.pop("id", None)
|
|
428
473
|
return metadata
|
|
429
474
|
|
|
430
475
|
def __repr__(self):
|
|
431
|
-
metadata_str =
|
|
476
|
+
metadata_str = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
|
|
432
477
|
return f"Document(id='{self.id}', {metadata_str})"
|
|
433
478
|
|
|
434
479
|
|
|
435
480
|
class CollectionWrapper:
|
|
436
481
|
"""A wrapper for vector collection operations with upsert logic."""
|
|
482
|
+
|
|
437
483
|
def __init__(self, name: str, conn: sqlite3.Connection):
|
|
438
484
|
self._name = name
|
|
439
485
|
self._conn = conn
|
|
440
486
|
|
|
441
|
-
|
|
442
|
-
"""
|
|
443
|
-
Indexes a Document, performing an upsert based on the document's ID.
|
|
444
|
-
If the ID exists, the record is replaced.
|
|
445
|
-
If the ID is new (or auto-generated), a new record is inserted.
|
|
487
|
+
# Dentro de la clase CollectionWrapper en beaver/core.py
|
|
446
488
|
|
|
447
|
-
|
|
448
|
-
|
|
489
|
+
def _flatten_metadata(self, metadata: dict, prefix: str = "") -> dict[str, str]:
|
|
490
|
+
"""
|
|
491
|
+
Aplana un diccionario anidado y filtra solo los valores de tipo string.
|
|
492
|
+
Ejemplo: {'a': {'b': 'c'}} -> {'a__b': 'c'}
|
|
493
|
+
"""
|
|
494
|
+
flat_dict = {}
|
|
495
|
+
for key, value in metadata.items():
|
|
496
|
+
new_key = f"{prefix}__{key}" if prefix else key
|
|
497
|
+
if isinstance(value, dict):
|
|
498
|
+
flat_dict.update(self._flatten_metadata(value, new_key))
|
|
499
|
+
elif isinstance(value, str):
|
|
500
|
+
flat_dict[new_key] = value
|
|
501
|
+
return flat_dict
|
|
502
|
+
|
|
503
|
+
def index(self, document: Document, *, fts: bool = True):
|
|
504
|
+
"""
|
|
505
|
+
Indexa un Document, realizando un upsert y actualizando el índice FTS.
|
|
449
506
|
"""
|
|
450
507
|
with self._conn:
|
|
508
|
+
if fts:
|
|
509
|
+
self._conn.execute(
|
|
510
|
+
"DELETE FROM beaver_fts_index WHERE collection = ? AND item_id = ?",
|
|
511
|
+
(self._name, document.id),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
string_fields = self._flatten_metadata(document.to_dict())
|
|
515
|
+
|
|
516
|
+
if string_fields:
|
|
517
|
+
fts_data = [
|
|
518
|
+
(self._name, document.id, path, content)
|
|
519
|
+
for path, content in string_fields.items()
|
|
520
|
+
]
|
|
521
|
+
self._conn.executemany(
|
|
522
|
+
"INSERT INTO beaver_fts_index (collection, item_id, field_path, field_content) VALUES (?, ?, ?, ?)",
|
|
523
|
+
fts_data,
|
|
524
|
+
)
|
|
525
|
+
|
|
451
526
|
self._conn.execute(
|
|
452
527
|
"INSERT OR REPLACE INTO beaver_collections (collection, item_id, item_vector, metadata) VALUES (?, ?, ?, ?)",
|
|
453
528
|
(
|
|
454
529
|
self._name,
|
|
455
530
|
document.id,
|
|
456
|
-
document.embedding.tobytes(),
|
|
457
|
-
json.dumps(document.to_dict())
|
|
458
|
-
)
|
|
531
|
+
document.embedding.tobytes() if document.embedding is not None else None,
|
|
532
|
+
json.dumps(document.to_dict()),
|
|
533
|
+
),
|
|
459
534
|
)
|
|
460
535
|
|
|
461
|
-
def search(
|
|
536
|
+
def search(
|
|
537
|
+
self, vector: list[float], top_k: int = 10
|
|
538
|
+
) -> list[tuple[Document, float]]:
|
|
462
539
|
"""
|
|
463
540
|
Performs a vector search and returns Document objects.
|
|
464
541
|
"""
|
|
@@ -467,7 +544,7 @@ class CollectionWrapper:
|
|
|
467
544
|
cursor = self._conn.cursor()
|
|
468
545
|
cursor.execute(
|
|
469
546
|
"SELECT item_id, item_vector, metadata FROM beaver_collections WHERE collection = ?",
|
|
470
|
-
(self._name,)
|
|
547
|
+
(self._name,),
|
|
471
548
|
)
|
|
472
549
|
|
|
473
550
|
all_docs_data = cursor.fetchall()
|
|
@@ -478,9 +555,12 @@ class CollectionWrapper:
|
|
|
478
555
|
|
|
479
556
|
results = []
|
|
480
557
|
for row in all_docs_data:
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
558
|
+
if row["item_vector"] is None:
|
|
559
|
+
continue # Skip documents without embeddings
|
|
560
|
+
|
|
561
|
+
doc_id = row["item_id"]
|
|
562
|
+
embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
|
|
563
|
+
metadata = json.loads(row["metadata"])
|
|
484
564
|
|
|
485
565
|
distance = np.linalg.norm(embedding - query_vector)
|
|
486
566
|
|
|
@@ -490,3 +570,69 @@ class CollectionWrapper:
|
|
|
490
570
|
|
|
491
571
|
results.sort(key=lambda x: x[1])
|
|
492
572
|
return results[:top_k]
|
|
573
|
+
|
|
574
|
+
def match(
|
|
575
|
+
self, query: str, on_field: str | None = None, top_k: int = 10
|
|
576
|
+
) -> list[tuple[Document, float]]:
|
|
577
|
+
"""
|
|
578
|
+
Realiza una búsqueda de texto completo en los campos de metadatos indexados.
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
query: La expresión de búsqueda (ej. "gato", "perro OR conejo").
|
|
582
|
+
on_field: Opcional, el campo específico donde buscar (ej. "details__title").
|
|
583
|
+
top_k: El número máximo de resultados a devolver.
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
Una lista de tuplas (Documento, puntuación_de_relevancia).
|
|
587
|
+
"""
|
|
588
|
+
cursor = self._conn.cursor()
|
|
589
|
+
|
|
590
|
+
sql_query = """
|
|
591
|
+
SELECT
|
|
592
|
+
t1.item_id, t1.item_vector, t1.metadata, fts.rank
|
|
593
|
+
FROM beaver_collections AS t1
|
|
594
|
+
JOIN (
|
|
595
|
+
SELECT DISTINCT item_id, rank
|
|
596
|
+
FROM beaver_fts_index
|
|
597
|
+
WHERE beaver_fts_index MATCH ?
|
|
598
|
+
ORDER BY rank
|
|
599
|
+
LIMIT ?
|
|
600
|
+
) AS fts ON t1.item_id = fts.item_id
|
|
601
|
+
WHERE t1.collection = ?
|
|
602
|
+
ORDER BY fts.rank
|
|
603
|
+
"""
|
|
604
|
+
|
|
605
|
+
params = []
|
|
606
|
+
field_filter_sql = ""
|
|
607
|
+
|
|
608
|
+
if on_field:
|
|
609
|
+
field_filter_sql = "AND field_path = ?"
|
|
610
|
+
params.append(on_field)
|
|
611
|
+
else:
|
|
612
|
+
# Búsqueda en todos los campos
|
|
613
|
+
params.append(query)
|
|
614
|
+
|
|
615
|
+
sql_query = sql_query.format(field_filter_sql)
|
|
616
|
+
params.extend([top_k, self._name])
|
|
617
|
+
|
|
618
|
+
cursor.execute(sql_query, tuple(params))
|
|
619
|
+
|
|
620
|
+
results = []
|
|
621
|
+
for row in cursor.fetchall():
|
|
622
|
+
doc_id = row["item_id"]
|
|
623
|
+
|
|
624
|
+
if row["item_vector"] is None:
|
|
625
|
+
embedding = None
|
|
626
|
+
else:
|
|
627
|
+
embedding = np.frombuffer(row["item_vector"], dtype=np.float32).tolist()
|
|
628
|
+
|
|
629
|
+
metadata = json.loads(row["metadata"])
|
|
630
|
+
rank = row["rank"]
|
|
631
|
+
|
|
632
|
+
doc = Document(id=doc_id, embedding=embedding, **metadata)
|
|
633
|
+
results.append((doc, rank))
|
|
634
|
+
|
|
635
|
+
results.sort(key=lambda x: x[1])
|
|
636
|
+
cursor.close()
|
|
637
|
+
|
|
638
|
+
return results
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
beaver/__init__.py,sha256=uTPhMNDjw41YTWQN8NTLbovudfp8RIwcqbZ5XtYIuJA,36
|
|
2
|
+
beaver/core.py,sha256=i2rBoUM1rq_j1xM3w4xW4c9e2eI8Ce6BeJ8rE8jQ-fI,21928
|
|
3
|
+
beaver_db-0.4.0.dist-info/METADATA,sha256=7VzqxHKU-Ft1QVAfVvywt4e50C3QWxS7FUpKIaQEJKk,4865
|
|
4
|
+
beaver_db-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
+
beaver_db-0.4.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
|
|
6
|
+
beaver_db-0.4.0.dist-info/RECORD,,
|
beaver_db-0.3.0.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
beaver/__init__.py,sha256=uTPhMNDjw41YTWQN8NTLbovudfp8RIwcqbZ5XtYIuJA,36
|
|
2
|
-
beaver/core.py,sha256=I-_i8AshcNor1OZxoEtNjzLXCy1Byuxvo84y9K4AV_Q,17518
|
|
3
|
-
beaver_db-0.3.0.dist-info/METADATA,sha256=_Hy3Fq64IDahqm3K0vuPvvZRTmtO0sU-tEGlmEjLNpE,4865
|
|
4
|
-
beaver_db-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
-
beaver_db-0.3.0.dist-info/top_level.txt,sha256=FxA4XnX5Qm5VudEXCduFriqi4dQmDWpQ64d7g69VQKI,7
|
|
6
|
-
beaver_db-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|