haiku.rag 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/client.py +7 -2
- haiku/rag/store/models/chunk.py +2 -1
- haiku/rag/store/repositories/chunk.py +11 -3
- haiku/rag/store/repositories/document.py +21 -5
- {haiku_rag-0.4.1.dist-info → haiku_rag-0.4.2.dist-info}/METADATA +1 -1
- {haiku_rag-0.4.1.dist-info → haiku_rag-0.4.2.dist-info}/RECORD +9 -9
- {haiku_rag-0.4.1.dist-info → haiku_rag-0.4.2.dist-info}/WHEEL +0 -0
- {haiku_rag-0.4.1.dist-info → haiku_rag-0.4.2.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.4.1.dist-info → haiku_rag-0.4.2.dist-info}/licenses/LICENSE +0 -0
haiku/rag/client.py
CHANGED
|
@@ -50,7 +50,11 @@ class HaikuRAG:
|
|
|
50
50
|
return False
|
|
51
51
|
|
|
52
52
|
async def create_document(
|
|
53
|
-
self,
|
|
53
|
+
self,
|
|
54
|
+
content: str,
|
|
55
|
+
uri: str | None = None,
|
|
56
|
+
metadata: dict | None = None,
|
|
57
|
+
chunks: list[Chunk] | None = None,
|
|
54
58
|
) -> Document:
|
|
55
59
|
"""Create a new document with optional URI and metadata.
|
|
56
60
|
|
|
@@ -58,6 +62,7 @@ class HaikuRAG:
|
|
|
58
62
|
content: The text content of the document.
|
|
59
63
|
uri: Optional URI identifier for the document.
|
|
60
64
|
metadata: Optional metadata dictionary.
|
|
65
|
+
chunks: Optional list of pre-created chunks to use instead of generating new ones.
|
|
61
66
|
|
|
62
67
|
Returns:
|
|
63
68
|
The created Document instance.
|
|
@@ -67,7 +72,7 @@ class HaikuRAG:
|
|
|
67
72
|
uri=uri,
|
|
68
73
|
metadata=metadata or {},
|
|
69
74
|
)
|
|
70
|
-
return await self.document_repository.create(document)
|
|
75
|
+
return await self.document_repository.create(document, chunks)
|
|
71
76
|
|
|
72
77
|
async def create_document_from_source(
|
|
73
78
|
self, source: str | Path, metadata: dict = {}
|
haiku/rag/store/models/chunk.py
CHANGED
|
@@ -18,6 +18,8 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
18
18
|
"""Create a chunk in the database."""
|
|
19
19
|
if self.store._connection is None:
|
|
20
20
|
raise ValueError("Store connection is not available")
|
|
21
|
+
if entity.document_id is None:
|
|
22
|
+
raise ValueError("Chunk must have a document_id to be created")
|
|
21
23
|
|
|
22
24
|
cursor = self.store._connection.cursor()
|
|
23
25
|
cursor.execute(
|
|
@@ -34,9 +36,15 @@ class ChunkRepository(BaseRepository[Chunk]):
|
|
|
34
36
|
|
|
35
37
|
entity.id = cursor.lastrowid
|
|
36
38
|
|
|
37
|
-
# Generate and store embedding
|
|
38
|
-
embedding
|
|
39
|
-
|
|
39
|
+
# Generate and store embedding - use existing one if provided
|
|
40
|
+
if entity.embedding is not None:
|
|
41
|
+
# Use the provided embedding
|
|
42
|
+
serialized_embedding = self.store.serialize_embedding(entity.embedding)
|
|
43
|
+
else:
|
|
44
|
+
# Generate embedding from content
|
|
45
|
+
embedding = await self.embedder.embed(entity.content)
|
|
46
|
+
serialized_embedding = self.store.serialize_embedding(embedding)
|
|
47
|
+
|
|
40
48
|
cursor.execute(
|
|
41
49
|
"""
|
|
42
50
|
INSERT INTO chunk_embeddings (chunk_id, embedding)
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
2
3
|
|
|
3
4
|
from haiku.rag.store.models.document import Document
|
|
4
5
|
from haiku.rag.store.repositories.base import BaseRepository
|
|
5
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from haiku.rag.store.models.chunk import Chunk
|
|
9
|
+
|
|
6
10
|
|
|
7
11
|
class DocumentRepository(BaseRepository[Document]):
|
|
8
12
|
"""Repository for Document database operations."""
|
|
@@ -16,7 +20,9 @@ class DocumentRepository(BaseRepository[Document]):
|
|
|
16
20
|
chunk_repository = ChunkRepository(store)
|
|
17
21
|
self.chunk_repository = chunk_repository
|
|
18
22
|
|
|
19
|
-
async def create(
|
|
23
|
+
async def create(
|
|
24
|
+
self, entity: Document, chunks: list["Chunk"] | None = None
|
|
25
|
+
) -> Document:
|
|
20
26
|
"""Create a document with its chunks and embeddings."""
|
|
21
27
|
if self.store._connection is None:
|
|
22
28
|
raise ValueError("Store connection is not available")
|
|
@@ -46,10 +52,20 @@ class DocumentRepository(BaseRepository[Document]):
|
|
|
46
52
|
assert document_id is not None, "Failed to create document in database"
|
|
47
53
|
entity.id = document_id
|
|
48
54
|
|
|
49
|
-
# Create chunks
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
55
|
+
# Create chunks - either use provided chunks or generate from content
|
|
56
|
+
if chunks is not None:
|
|
57
|
+
# Use provided chunks, but update their document_id and set order from list position
|
|
58
|
+
for order, chunk in enumerate(chunks):
|
|
59
|
+
chunk.document_id = document_id
|
|
60
|
+
# Ensure order is set from list position
|
|
61
|
+
chunk.metadata = chunk.metadata.copy() if chunk.metadata else {}
|
|
62
|
+
chunk.metadata["order"] = order
|
|
63
|
+
await self.chunk_repository.create(chunk, commit=False)
|
|
64
|
+
else:
|
|
65
|
+
# Create chunks and embeddings using ChunkRepository
|
|
66
|
+
await self.chunk_repository.create_chunks_for_document(
|
|
67
|
+
document_id, entity.content, commit=False
|
|
68
|
+
)
|
|
53
69
|
|
|
54
70
|
cursor.execute("COMMIT")
|
|
55
71
|
return entity
|
|
@@ -2,7 +2,7 @@ haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
2
2
|
haiku/rag/app.py,sha256=FpLVyP1-zAq_XPmU8CPVLkuIAeuhBOGvMqhYS8RbN40,7649
|
|
3
3
|
haiku/rag/chunker.py,sha256=MbCtP66OfTFoIBvqmVT9T9c87fozsYYzAQzJJEfPBVI,1812
|
|
4
4
|
haiku/rag/cli.py,sha256=oCj65JcV2MEhzA2okbLHAK1I0FrClIKuYZx2jtbjbqE,5628
|
|
5
|
-
haiku/rag/client.py,sha256=
|
|
5
|
+
haiku/rag/client.py,sha256=MZNIpMm6MS3P6vjLqiCztT2dBOM7-bZOosX5IpbHJbI,12724
|
|
6
6
|
haiku/rag/config.py,sha256=_Ss54kmfxVAJupExLKaYjYUlFxJgb7hEEdbG4-isapY,1662
|
|
7
7
|
haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
|
|
8
8
|
haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
|
|
@@ -27,17 +27,17 @@ haiku/rag/reranking/mxbai.py,sha256=46sVTsTIkzIX9THgM3u8HaEmgY7evvEyB-N54JTHvK8,
|
|
|
27
27
|
haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
|
|
28
28
|
haiku/rag/store/engine.py,sha256=4ouAD0s-TFwEoEHjVVw_KnV6aaw5nwhe9fdT8PRXfok,6061
|
|
29
29
|
haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
|
|
30
|
-
haiku/rag/store/models/chunk.py,sha256=
|
|
30
|
+
haiku/rag/store/models/chunk.py,sha256=9-vIxW75-kMTelIhgVIMd_WhP-Drc1q65vjaWMP8w1E,364
|
|
31
31
|
haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
|
|
32
32
|
haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
|
|
33
33
|
haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
|
|
34
|
-
haiku/rag/store/repositories/chunk.py,sha256=
|
|
35
|
-
haiku/rag/store/repositories/document.py,sha256=
|
|
34
|
+
haiku/rag/store/repositories/chunk.py,sha256=UyvHhKb1ESZePoTp2GneAARdfKoocEdfPOwgWPPQ0v8,16878
|
|
35
|
+
haiku/rag/store/repositories/document.py,sha256=fXIWevJaOe6x2cK4u9cQxiEGD0ntKQb9y3VRqklQypE,7920
|
|
36
36
|
haiku/rag/store/repositories/settings.py,sha256=dme3_ulQdQvyF9daavSjAd-SjZ5hh0MJoxP7iXgap-A,2492
|
|
37
37
|
haiku/rag/store/upgrades/__init__.py,sha256=kKS1YWT_P-CYKhKtokOLTIFNKf9jlfjFFr8lyIMeogM,100
|
|
38
38
|
haiku/rag/store/upgrades/v0_3_4.py,sha256=GLogKZdZ40NX1vBHKdOJju7fFzNUCHoEnjSZg17Hm2U,663
|
|
39
|
-
haiku_rag-0.4.
|
|
40
|
-
haiku_rag-0.4.
|
|
41
|
-
haiku_rag-0.4.
|
|
42
|
-
haiku_rag-0.4.
|
|
43
|
-
haiku_rag-0.4.
|
|
39
|
+
haiku_rag-0.4.2.dist-info/METADATA,sha256=0ctTSGB6uqGl2INUcNxnCphxwrLTlK7KVdKKXXB35mg,4235
|
|
40
|
+
haiku_rag-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
41
|
+
haiku_rag-0.4.2.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
|
|
42
|
+
haiku_rag-0.4.2.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
|
|
43
|
+
haiku_rag-0.4.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|