haiku.rag 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/app.py +4 -4
- haiku/rag/cli.py +38 -27
- haiku/rag/client.py +19 -23
- haiku/rag/config.py +6 -2
- haiku/rag/logging.py +4 -0
- haiku/rag/mcp.py +12 -9
- haiku/rag/migration.py +316 -0
- haiku/rag/reranking/__init__.py +0 -6
- haiku/rag/store/engine.py +173 -141
- haiku/rag/store/models/chunk.py +2 -2
- haiku/rag/store/models/document.py +1 -1
- haiku/rag/store/repositories/__init__.py +6 -2
- haiku/rag/store/repositories/chunk.py +279 -414
- haiku/rag/store/repositories/document.py +171 -205
- haiku/rag/store/repositories/settings.py +115 -49
- haiku/rag/store/upgrades/__init__.py +1 -3
- haiku/rag/utils.py +39 -31
- {haiku_rag-0.6.0.dist-info → haiku_rag-0.7.0.dist-info}/METADATA +21 -16
- haiku_rag-0.7.0.dist-info/RECORD +39 -0
- haiku/rag/reranking/ollama.py +0 -81
- haiku/rag/store/repositories/base.py +0 -40
- haiku/rag/store/upgrades/v0_3_4.py +0 -26
- haiku_rag-0.6.0.dist-info/RECORD +0 -41
- {haiku_rag-0.6.0.dist-info → haiku_rag-0.7.0.dist-info}/WHEEL +0 -0
- {haiku_rag-0.6.0.dist-info → haiku_rag-0.7.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.6.0.dist-info → haiku_rag-0.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,248 +1,214 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from datetime import datetime
|
|
2
3
|
from typing import TYPE_CHECKING
|
|
4
|
+
from uuid import uuid4
|
|
3
5
|
|
|
4
6
|
from docling_core.types.doc.document import DoclingDocument
|
|
5
7
|
|
|
8
|
+
from haiku.rag.store.engine import DocumentRecord, Store
|
|
6
9
|
from haiku.rag.store.models.document import Document
|
|
7
|
-
from haiku.rag.store.repositories.base import BaseRepository
|
|
8
|
-
from haiku.rag.utils import text_to_docling_document
|
|
9
10
|
|
|
10
11
|
if TYPE_CHECKING:
|
|
11
12
|
from haiku.rag.store.models.chunk import Chunk
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
class DocumentRepository
|
|
15
|
-
"""Repository for Document
|
|
15
|
+
class DocumentRepository:
|
|
16
|
+
"""Repository for Document operations."""
|
|
16
17
|
|
|
17
|
-
def __init__(self, store
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
def __init__(self, store: Store) -> None:
|
|
19
|
+
self.store = store
|
|
20
|
+
self._chunk_repository = None
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def chunk_repository(self):
|
|
24
|
+
"""Lazy-load ChunkRepository when needed."""
|
|
25
|
+
if self._chunk_repository is None:
|
|
21
26
|
from haiku.rag.store.repositories.chunk import ChunkRepository
|
|
22
27
|
|
|
23
|
-
|
|
24
|
-
self.
|
|
28
|
+
self._chunk_repository = ChunkRepository(self.store)
|
|
29
|
+
return self._chunk_repository
|
|
25
30
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
try:
|
|
42
|
-
# Insert the document
|
|
43
|
-
cursor.execute(
|
|
44
|
-
"""
|
|
45
|
-
INSERT INTO documents (content, uri, metadata, created_at, updated_at)
|
|
46
|
-
VALUES (:content, :uri, :metadata, :created_at, :updated_at)
|
|
47
|
-
""",
|
|
48
|
-
{
|
|
49
|
-
"content": entity.content,
|
|
50
|
-
"uri": entity.uri,
|
|
51
|
-
"metadata": json.dumps(entity.metadata),
|
|
52
|
-
"created_at": entity.created_at,
|
|
53
|
-
"updated_at": entity.updated_at,
|
|
54
|
-
},
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
document_id = cursor.lastrowid
|
|
58
|
-
assert document_id is not None, "Failed to create document in database"
|
|
59
|
-
entity.id = document_id
|
|
60
|
-
|
|
61
|
-
# Create chunks - either use provided chunks or generate from content
|
|
62
|
-
if chunks is not None:
|
|
63
|
-
# Use provided chunks, but update their document_id and set order from list position
|
|
64
|
-
for order, chunk in enumerate(chunks):
|
|
65
|
-
chunk.document_id = document_id
|
|
66
|
-
# Ensure order is set from list position
|
|
67
|
-
chunk.metadata = chunk.metadata.copy() if chunk.metadata else {}
|
|
68
|
-
chunk.metadata["order"] = order
|
|
69
|
-
await self.chunk_repository.create(chunk, commit=False)
|
|
70
|
-
else:
|
|
71
|
-
# Create chunks and embeddings using DoclingDocument
|
|
72
|
-
await self.chunk_repository.create_chunks_for_document(
|
|
73
|
-
document_id, docling_document, commit=False
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
cursor.execute("COMMIT")
|
|
77
|
-
return entity
|
|
78
|
-
|
|
79
|
-
except Exception:
|
|
80
|
-
cursor.execute("ROLLBACK")
|
|
81
|
-
raise
|
|
31
|
+
def _record_to_document(self, record: DocumentRecord) -> Document:
|
|
32
|
+
"""Convert a DocumentRecord to a Document model."""
|
|
33
|
+
return Document(
|
|
34
|
+
id=record.id,
|
|
35
|
+
content=record.content,
|
|
36
|
+
uri=record.uri,
|
|
37
|
+
metadata=json.loads(record.metadata) if record.metadata else {},
|
|
38
|
+
created_at=datetime.fromisoformat(record.created_at)
|
|
39
|
+
if record.created_at
|
|
40
|
+
else datetime.now(),
|
|
41
|
+
updated_at=datetime.fromisoformat(record.updated_at)
|
|
42
|
+
if record.updated_at
|
|
43
|
+
else datetime.now(),
|
|
44
|
+
)
|
|
82
45
|
|
|
83
46
|
async def create(self, entity: Document) -> Document:
|
|
84
|
-
"""Create a document
|
|
85
|
-
#
|
|
86
|
-
|
|
47
|
+
"""Create a document in the database."""
|
|
48
|
+
# Generate new UUID
|
|
49
|
+
doc_id = str(uuid4())
|
|
50
|
+
|
|
51
|
+
# Create timestamp
|
|
52
|
+
now = datetime.now().isoformat()
|
|
53
|
+
|
|
54
|
+
# Create document record
|
|
55
|
+
doc_record = DocumentRecord(
|
|
56
|
+
id=doc_id,
|
|
57
|
+
content=entity.content,
|
|
58
|
+
uri=entity.uri,
|
|
59
|
+
metadata=json.dumps(entity.metadata),
|
|
60
|
+
created_at=now,
|
|
61
|
+
updated_at=now,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Add to table
|
|
65
|
+
self.store.documents_table.add([doc_record])
|
|
87
66
|
|
|
88
|
-
|
|
67
|
+
entity.id = doc_id
|
|
68
|
+
entity.created_at = datetime.fromisoformat(now)
|
|
69
|
+
entity.updated_at = datetime.fromisoformat(now)
|
|
70
|
+
return entity
|
|
89
71
|
|
|
90
|
-
async def get_by_id(self, entity_id:
|
|
72
|
+
async def get_by_id(self, entity_id: str) -> Document | None:
|
|
91
73
|
"""Get a document by its ID."""
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
"""
|
|
98
|
-
SELECT id, content, uri, metadata, created_at, updated_at
|
|
99
|
-
FROM documents WHERE id = :id
|
|
100
|
-
""",
|
|
101
|
-
{"id": entity_id},
|
|
74
|
+
results = list(
|
|
75
|
+
self.store.documents_table.search()
|
|
76
|
+
.where(f"id = '{entity_id}'")
|
|
77
|
+
.limit(1)
|
|
78
|
+
.to_pydantic(DocumentRecord)
|
|
102
79
|
)
|
|
103
80
|
|
|
104
|
-
|
|
105
|
-
if row is None:
|
|
81
|
+
if not results:
|
|
106
82
|
return None
|
|
107
83
|
|
|
108
|
-
|
|
109
|
-
metadata = json.loads(metadata_json) if metadata_json else {}
|
|
110
|
-
|
|
111
|
-
return Document(
|
|
112
|
-
id=document_id,
|
|
113
|
-
content=content,
|
|
114
|
-
uri=uri,
|
|
115
|
-
metadata=metadata,
|
|
116
|
-
created_at=created_at,
|
|
117
|
-
updated_at=updated_at,
|
|
118
|
-
)
|
|
84
|
+
return self._record_to_document(results[0])
|
|
119
85
|
|
|
120
|
-
async def
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
""
|
|
131
|
-
{
|
|
86
|
+
async def update(self, entity: Document) -> Document:
|
|
87
|
+
"""Update an existing document."""
|
|
88
|
+
assert entity.id, "Document ID is required for update"
|
|
89
|
+
|
|
90
|
+
# Update timestamp
|
|
91
|
+
now = datetime.now().isoformat()
|
|
92
|
+
entity.updated_at = datetime.fromisoformat(now)
|
|
93
|
+
|
|
94
|
+
# Update the record
|
|
95
|
+
self.store.documents_table.update(
|
|
96
|
+
where=f"id = '{entity.id}'",
|
|
97
|
+
values={
|
|
98
|
+
"content": entity.content,
|
|
99
|
+
"uri": entity.uri,
|
|
100
|
+
"metadata": json.dumps(entity.metadata),
|
|
101
|
+
"updated_at": now,
|
|
102
|
+
},
|
|
132
103
|
)
|
|
133
104
|
|
|
134
|
-
|
|
135
|
-
if row is None:
|
|
136
|
-
return None
|
|
105
|
+
return entity
|
|
137
106
|
|
|
138
|
-
|
|
139
|
-
|
|
107
|
+
async def delete(self, entity_id: str) -> bool:
|
|
108
|
+
"""Delete a document by its ID."""
|
|
109
|
+
# Check if document exists
|
|
110
|
+
doc = await self.get_by_id(entity_id)
|
|
111
|
+
if doc is None:
|
|
112
|
+
return False
|
|
140
113
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
content=content,
|
|
144
|
-
uri=uri,
|
|
145
|
-
metadata=metadata,
|
|
146
|
-
created_at=created_at,
|
|
147
|
-
updated_at=updated_at,
|
|
148
|
-
)
|
|
114
|
+
# Delete associated chunks first
|
|
115
|
+
await self.chunk_repository.delete_by_document_id(entity_id)
|
|
149
116
|
|
|
150
|
-
|
|
151
|
-
self
|
|
152
|
-
|
|
153
|
-
"""Update an existing document and regenerate its chunks and embeddings."""
|
|
154
|
-
if self.store._connection is None:
|
|
155
|
-
raise ValueError("Store connection is not available")
|
|
156
|
-
if entity.id is None:
|
|
157
|
-
raise ValueError("Document ID is required for update")
|
|
158
|
-
|
|
159
|
-
cursor = self.store._connection.cursor()
|
|
160
|
-
|
|
161
|
-
# Start transaction
|
|
162
|
-
cursor.execute("BEGIN TRANSACTION")
|
|
163
|
-
|
|
164
|
-
try:
|
|
165
|
-
# Update the document
|
|
166
|
-
cursor.execute(
|
|
167
|
-
"""
|
|
168
|
-
UPDATE documents
|
|
169
|
-
SET content = :content, uri = :uri, metadata = :metadata, updated_at = :updated_at
|
|
170
|
-
WHERE id = :id
|
|
171
|
-
""",
|
|
172
|
-
{
|
|
173
|
-
"content": entity.content,
|
|
174
|
-
"uri": entity.uri,
|
|
175
|
-
"metadata": json.dumps(entity.metadata),
|
|
176
|
-
"updated_at": entity.updated_at,
|
|
177
|
-
"id": entity.id,
|
|
178
|
-
},
|
|
179
|
-
)
|
|
117
|
+
# Delete the document
|
|
118
|
+
self.store.documents_table.delete(f"id = '{entity_id}'")
|
|
119
|
+
return True
|
|
180
120
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
121
|
+
async def list_all(
|
|
122
|
+
self, limit: int | None = None, offset: int | None = None
|
|
123
|
+
) -> list[Document]:
|
|
124
|
+
"""List all documents with optional pagination."""
|
|
125
|
+
query = self.store.documents_table.search()
|
|
186
126
|
|
|
187
|
-
|
|
188
|
-
|
|
127
|
+
if offset is not None:
|
|
128
|
+
query = query.offset(offset)
|
|
129
|
+
if limit is not None:
|
|
130
|
+
query = query.limit(limit)
|
|
189
131
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
raise
|
|
132
|
+
results = list(query.to_pydantic(DocumentRecord))
|
|
133
|
+
return [self._record_to_document(doc) for doc in results]
|
|
193
134
|
|
|
194
|
-
async def
|
|
195
|
-
"""
|
|
196
|
-
|
|
197
|
-
|
|
135
|
+
async def get_by_uri(self, uri: str) -> Document | None:
|
|
136
|
+
"""Get a document by its URI."""
|
|
137
|
+
results = list(
|
|
138
|
+
self.store.documents_table.search()
|
|
139
|
+
.where(f"uri = '{uri}'")
|
|
140
|
+
.limit(1)
|
|
141
|
+
.to_pydantic(DocumentRecord)
|
|
142
|
+
)
|
|
198
143
|
|
|
199
|
-
|
|
144
|
+
if not results:
|
|
145
|
+
return None
|
|
200
146
|
|
|
201
|
-
|
|
202
|
-
"""Delete a document and all its associated chunks and embeddings."""
|
|
203
|
-
# Delete chunks and embeddings first
|
|
204
|
-
await self.chunk_repository.delete_by_document_id(entity_id)
|
|
147
|
+
return self._record_to_document(results[0])
|
|
205
148
|
|
|
206
|
-
|
|
207
|
-
|
|
149
|
+
async def delete_all(self) -> None:
|
|
150
|
+
"""Delete all documents from the database."""
|
|
151
|
+
# Delete all chunks first
|
|
152
|
+
await self.chunk_repository.delete_all()
|
|
208
153
|
|
|
209
|
-
|
|
210
|
-
|
|
154
|
+
# Get count before deletion
|
|
155
|
+
count = len(
|
|
156
|
+
list(
|
|
157
|
+
self.store.documents_table.search().limit(1).to_pydantic(DocumentRecord)
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
if count > 0:
|
|
161
|
+
# Drop and recreate table to clear all data
|
|
162
|
+
self.store.db.drop_table("documents")
|
|
163
|
+
self.store.documents_table = self.store.db.create_table(
|
|
164
|
+
"documents", schema=DocumentRecord
|
|
165
|
+
)
|
|
211
166
|
|
|
212
|
-
|
|
213
|
-
self
|
|
214
|
-
|
|
167
|
+
async def _create_with_docling(
|
|
168
|
+
self,
|
|
169
|
+
entity: Document,
|
|
170
|
+
docling_document: DoclingDocument,
|
|
171
|
+
chunks: list["Chunk"] | None = None,
|
|
172
|
+
) -> Document:
|
|
173
|
+
"""Create a document with its chunks and embeddings."""
|
|
174
|
+
# Create the document
|
|
175
|
+
created_doc = await self.create(entity)
|
|
215
176
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
177
|
+
# Create chunks if not provided
|
|
178
|
+
if chunks is None:
|
|
179
|
+
assert created_doc.id is not None, (
|
|
180
|
+
"Document ID should not be None after creation"
|
|
181
|
+
)
|
|
182
|
+
await self.chunk_repository.create_chunks_for_document(
|
|
183
|
+
created_doc.id, docling_document
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
# Use provided chunks, set order from list position
|
|
187
|
+
assert created_doc.id is not None, (
|
|
188
|
+
"Document ID should not be None after creation"
|
|
189
|
+
)
|
|
190
|
+
for order, chunk in enumerate(chunks):
|
|
191
|
+
chunk.document_id = created_doc.id
|
|
192
|
+
chunk.metadata["order"] = order
|
|
193
|
+
await self.chunk_repository.create(chunk)
|
|
222
194
|
|
|
223
|
-
|
|
224
|
-
query = "SELECT id, content, uri, metadata, created_at, updated_at FROM documents ORDER BY created_at DESC"
|
|
225
|
-
params = {}
|
|
195
|
+
return created_doc
|
|
226
196
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
197
|
+
async def _update_with_docling(
|
|
198
|
+
self, entity: Document, docling_document: DoclingDocument
|
|
199
|
+
) -> Document:
|
|
200
|
+
"""Update a document and regenerate its chunks."""
|
|
201
|
+
# Delete existing chunks
|
|
202
|
+
assert entity.id is not None, "Document ID is required for update"
|
|
203
|
+
await self.chunk_repository.delete_by_document_id(entity.id)
|
|
204
|
+
|
|
205
|
+
# Update the document
|
|
206
|
+
updated_doc = await self.update(entity)
|
|
207
|
+
|
|
208
|
+
# Create new chunks
|
|
209
|
+
assert updated_doc.id is not None, "Document ID should not be None after update"
|
|
210
|
+
await self.chunk_repository.create_chunks_for_document(
|
|
211
|
+
updated_doc.id, docling_document
|
|
212
|
+
)
|
|
230
213
|
|
|
231
|
-
|
|
232
|
-
query += " OFFSET :offset"
|
|
233
|
-
params["offset"] = offset
|
|
234
|
-
|
|
235
|
-
cursor.execute(query, params)
|
|
236
|
-
rows = cursor.fetchall()
|
|
237
|
-
|
|
238
|
-
return [
|
|
239
|
-
Document(
|
|
240
|
-
id=document_id,
|
|
241
|
-
content=content,
|
|
242
|
-
uri=uri,
|
|
243
|
-
metadata=json.loads(metadata_json) if metadata_json else {},
|
|
244
|
-
created_at=created_at,
|
|
245
|
-
updated_at=updated_at,
|
|
246
|
-
)
|
|
247
|
-
for document_id, content, uri, metadata_json, created_at, updated_at in rows
|
|
248
|
-
]
|
|
214
|
+
return updated_doc
|
|
@@ -1,77 +1,143 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from typing import Any
|
|
3
2
|
|
|
4
|
-
from haiku.rag.
|
|
3
|
+
from haiku.rag.config import Config
|
|
4
|
+
from haiku.rag.store.engine import SettingsRecord, Store
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class ConfigMismatchError(Exception):
|
|
8
|
-
"""Raised when
|
|
8
|
+
"""Raised when stored config doesn't match current config."""
|
|
9
9
|
|
|
10
10
|
pass
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class SettingsRepository:
|
|
14
|
-
|
|
14
|
+
"""Repository for Settings operations."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, store: Store) -> None:
|
|
15
17
|
self.store = store
|
|
16
18
|
|
|
17
|
-
def
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
async def create(self, entity: dict) -> dict:
|
|
20
|
+
"""Create settings in the database."""
|
|
21
|
+
settings_record = SettingsRecord(id="settings", settings=json.dumps(entity))
|
|
22
|
+
self.store.settings_table.add([settings_record])
|
|
23
|
+
return entity
|
|
24
|
+
|
|
25
|
+
async def get_by_id(self, entity_id: str) -> dict | None:
|
|
26
|
+
"""Get settings by ID."""
|
|
27
|
+
results = list(
|
|
28
|
+
self.store.settings_table.search()
|
|
29
|
+
.where(f"id = '{entity_id}'")
|
|
30
|
+
.limit(1)
|
|
31
|
+
.to_pydantic(SettingsRecord)
|
|
32
|
+
)
|
|
21
33
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
if row:
|
|
25
|
-
return json.loads(row[0])
|
|
26
|
-
return {}
|
|
34
|
+
if not results:
|
|
35
|
+
return None
|
|
27
36
|
|
|
28
|
-
|
|
29
|
-
"""Sync settings from the current AppConfig to database."""
|
|
30
|
-
if self.store._connection is None:
|
|
31
|
-
raise ValueError("Store connection is not available")
|
|
37
|
+
return json.loads(results[0].settings) if results[0].settings else {}
|
|
32
38
|
|
|
33
|
-
|
|
39
|
+
async def update(self, entity: dict) -> dict:
|
|
40
|
+
"""Update existing settings."""
|
|
41
|
+
self.store.settings_table.update(
|
|
42
|
+
where="id = 'settings'", values={"settings": json.dumps(entity)}
|
|
43
|
+
)
|
|
44
|
+
return entity
|
|
45
|
+
|
|
46
|
+
async def delete(self, entity_id: str) -> bool:
|
|
47
|
+
"""Delete settings by ID."""
|
|
48
|
+
self.store.settings_table.delete(f"id = '{entity_id}'")
|
|
49
|
+
return True
|
|
50
|
+
|
|
51
|
+
async def list_all(
|
|
52
|
+
self, limit: int | None = None, offset: int | None = None
|
|
53
|
+
) -> list[dict]:
|
|
54
|
+
"""List all settings."""
|
|
55
|
+
results = list(self.store.settings_table.search().to_pydantic(SettingsRecord))
|
|
56
|
+
return [
|
|
57
|
+
json.loads(record.settings) if record.settings else {} for record in results
|
|
58
|
+
]
|
|
34
59
|
|
|
35
|
-
|
|
60
|
+
def get_current_settings(self) -> dict:
|
|
61
|
+
"""Get the current settings."""
|
|
62
|
+
results = list(
|
|
63
|
+
self.store.settings_table.search()
|
|
64
|
+
.where("id = 'settings'")
|
|
65
|
+
.limit(1)
|
|
66
|
+
.to_pydantic(SettingsRecord)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if not results:
|
|
70
|
+
return {}
|
|
71
|
+
|
|
72
|
+
return json.loads(results[0].settings) if results[0].settings else {}
|
|
73
|
+
|
|
74
|
+
def save_current_settings(self) -> None:
|
|
75
|
+
"""Save the current configuration to the database."""
|
|
76
|
+
current_config = Config.model_dump(mode="json")
|
|
36
77
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
(
|
|
78
|
+
# Check if settings exist
|
|
79
|
+
existing = list(
|
|
80
|
+
self.store.settings_table.search()
|
|
81
|
+
.where("id = 'settings'")
|
|
82
|
+
.limit(1)
|
|
83
|
+
.to_pydantic(SettingsRecord)
|
|
40
84
|
)
|
|
41
85
|
|
|
42
|
-
|
|
86
|
+
if existing:
|
|
87
|
+
# Update existing settings
|
|
88
|
+
self.store.settings_table.update(
|
|
89
|
+
where="id = 'settings'", values={"settings": json.dumps(current_config)}
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
# Create new settings
|
|
93
|
+
settings_record = SettingsRecord(
|
|
94
|
+
id="settings", settings=json.dumps(current_config)
|
|
95
|
+
)
|
|
96
|
+
self.store.settings_table.add([settings_record])
|
|
43
97
|
|
|
44
98
|
def validate_config_compatibility(self) -> None:
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
Raises ConfigMismatchError if there are incompatible differences.
|
|
48
|
-
If no settings exist, saves current config.
|
|
49
|
-
"""
|
|
50
|
-
db_settings = self.get()
|
|
51
|
-
if not db_settings:
|
|
52
|
-
# No settings in DB, save current config
|
|
53
|
-
self.save()
|
|
54
|
-
return
|
|
99
|
+
"""Validate that the current configuration is compatible with stored settings."""
|
|
100
|
+
stored_settings = self.get_current_settings()
|
|
55
101
|
|
|
56
|
-
|
|
102
|
+
# If no stored settings, this is a new database - save current config and return
|
|
103
|
+
if not stored_settings:
|
|
104
|
+
self.save_current_settings()
|
|
105
|
+
return
|
|
57
106
|
|
|
58
107
|
current_config = Config.model_dump(mode="json")
|
|
59
108
|
|
|
60
|
-
#
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
109
|
+
# Check if embedding provider or model has changed
|
|
110
|
+
stored_provider = stored_settings.get("EMBEDDINGS_PROVIDER")
|
|
111
|
+
current_provider = current_config.get("EMBEDDINGS_PROVIDER")
|
|
112
|
+
|
|
113
|
+
stored_model = stored_settings.get("EMBEDDINGS_MODEL")
|
|
114
|
+
current_model = current_config.get("EMBEDDINGS_MODEL")
|
|
115
|
+
|
|
116
|
+
stored_vector_dim = stored_settings.get("EMBEDDINGS_VECTOR_DIM")
|
|
117
|
+
current_vector_dim = current_config.get("EMBEDDINGS_VECTOR_DIM")
|
|
118
|
+
|
|
119
|
+
# Check for incompatible changes
|
|
120
|
+
incompatible_changes = []
|
|
121
|
+
|
|
122
|
+
if stored_provider and stored_provider != current_provider:
|
|
123
|
+
incompatible_changes.append(
|
|
124
|
+
f"Embedding provider changed from '{stored_provider}' to '{current_provider}'"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if stored_model and stored_model != current_model:
|
|
128
|
+
incompatible_changes.append(
|
|
129
|
+
f"Embedding model changed from '{stored_model}' to '{current_model}'"
|
|
130
|
+
)
|
|
67
131
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
f"{setting}: current={current_config.get(setting)}, stored={db_settings.get(setting)}"
|
|
73
|
-
)
|
|
132
|
+
if stored_vector_dim and stored_vector_dim != current_vector_dim:
|
|
133
|
+
incompatible_changes.append(
|
|
134
|
+
f"Vector dimension changed from {stored_vector_dim} to {current_vector_dim}"
|
|
135
|
+
)
|
|
74
136
|
|
|
75
|
-
if
|
|
76
|
-
error_msg =
|
|
137
|
+
if incompatible_changes:
|
|
138
|
+
error_msg = (
|
|
139
|
+
"Database configuration is incompatible with current settings:\n"
|
|
140
|
+
+ "\n".join(f" - {change}" for change in incompatible_changes)
|
|
141
|
+
)
|
|
142
|
+
error_msg += "\n\nPlease rebuild the database using: haiku-rag rebuild"
|
|
77
143
|
raise ConfigMismatchError(error_msg)
|