haiku.rag 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/migration.py ADDED
@@ -0,0 +1,316 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Migration script to migrate from SQLite to LanceDB.
4
+
5
+ This script will:
6
+ 1. Read data from an existing SQLite database
7
+ 2. Create a new LanceDB database with the same data
8
+ 3. Preserve all documents, chunks, embeddings, and settings
9
+ """
10
+
11
+ import json
12
+ import sqlite3
13
+ import struct
14
+ from pathlib import Path
15
+ from uuid import uuid4
16
+
17
+ from rich.console import Console
18
+ from rich.progress import Progress, TaskID
19
+
20
+ from haiku.rag.store.engine import Store
21
+
22
+
23
+ def deserialize_sqlite_embedding(data: bytes) -> list[float]:
24
+ """Deserialize sqlite-vec embedding from bytes."""
25
+ if not data:
26
+ return []
27
+ # sqlite-vec stores embeddings as float32 arrays
28
+ num_floats = len(data) // 4
29
+ return list(struct.unpack(f"{num_floats}f", data))
30
+
31
+
32
+ class SQLiteToLanceDBMigrator:
33
+ """Migrates data from SQLite to LanceDB."""
34
+
35
+ def __init__(self, sqlite_path: Path, lancedb_path: Path):
36
+ self.sqlite_path = sqlite_path
37
+ self.lancedb_path = lancedb_path
38
+ self.console = Console()
39
+
40
+ def migrate(self) -> bool:
41
+ """Perform the migration."""
42
+ try:
43
+ self.console.print(
44
+ f"[blue]Starting migration from {self.sqlite_path} to {self.lancedb_path}[/blue]"
45
+ )
46
+
47
+ # Check if SQLite database exists
48
+ if not self.sqlite_path.exists():
49
+ self.console.print(
50
+ f"[red]SQLite database not found: {self.sqlite_path}[/red]"
51
+ )
52
+ return False
53
+
54
+ # Connect to SQLite database
55
+ sqlite_conn = sqlite3.connect(self.sqlite_path)
56
+ sqlite_conn.row_factory = sqlite3.Row
57
+
58
+ # Create LanceDB store
59
+ lance_store = Store(self.lancedb_path, skip_validation=True)
60
+
61
+ with Progress() as progress:
62
+ # Migrate documents
63
+ doc_task = progress.add_task(
64
+ "[green]Migrating documents...", total=None
65
+ )
66
+ document_id_mapping = self._migrate_documents(
67
+ sqlite_conn, lance_store, progress, doc_task
68
+ )
69
+
70
+ # Migrate chunks and embeddings
71
+ chunk_task = progress.add_task(
72
+ "[yellow]Migrating chunks and embeddings...", total=None
73
+ )
74
+ self._migrate_chunks(
75
+ sqlite_conn, lance_store, progress, chunk_task, document_id_mapping
76
+ )
77
+
78
+ # Migrate settings
79
+ settings_task = progress.add_task(
80
+ "[blue]Migrating settings...", total=None
81
+ )
82
+ self._migrate_settings(
83
+ sqlite_conn, lance_store, progress, settings_task
84
+ )
85
+
86
+ sqlite_conn.close()
87
+
88
+ # Optimize the chunks table after migration
89
+ self.console.print("[blue]Optimizing LanceDB...[/blue]")
90
+ try:
91
+ lance_store.chunks_table.optimize()
92
+ self.console.print("[green]✅ Optimization completed[/green]")
93
+ except Exception as e:
94
+ self.console.print(
95
+ f"[yellow]Warning: Optimization failed: {e}[/yellow]"
96
+ )
97
+
98
+ lance_store.close()
99
+
100
+ self.console.print("[green]✅ Migration completed successfully![/green]")
101
+ self.console.print(
102
+ f"[green]✅ Migrated {len(document_id_mapping)} documents[/green]"
103
+ )
104
+ return True
105
+
106
+ except Exception as e:
107
+ self.console.print(f"[red]❌ Migration failed: {e}[/red]")
108
+ import traceback
109
+
110
+ self.console.print(f"[red]{traceback.format_exc()}[/red]")
111
+ return False
112
+
113
+ def _migrate_documents(
114
+ self,
115
+ sqlite_conn: sqlite3.Connection,
116
+ lance_store: Store,
117
+ progress: Progress,
118
+ task: TaskID,
119
+ ) -> dict[int, str]:
120
+ """Migrate documents from SQLite to LanceDB and return ID mapping."""
121
+ cursor = sqlite_conn.cursor()
122
+ cursor.execute(
123
+ "SELECT id, content, uri, metadata, created_at, updated_at FROM documents ORDER BY id"
124
+ )
125
+
126
+ documents = []
127
+ id_mapping = {} # Maps old integer ID to new UUID
128
+
129
+ for row in cursor.fetchall():
130
+ new_uuid = str(uuid4())
131
+ id_mapping[row["id"]] = new_uuid
132
+
133
+ doc_data = {
134
+ "id": new_uuid,
135
+ "content": row["content"],
136
+ "uri": row["uri"],
137
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
138
+ "created_at": row["created_at"],
139
+ "updated_at": row["updated_at"],
140
+ }
141
+ documents.append(doc_data)
142
+
143
+ # Batch insert documents to LanceDB
144
+ if documents:
145
+ from haiku.rag.store.engine import DocumentRecord
146
+
147
+ doc_records = [
148
+ DocumentRecord(
149
+ id=doc["id"],
150
+ content=doc["content"],
151
+ uri=doc["uri"],
152
+ metadata=json.dumps(doc["metadata"]),
153
+ created_at=doc["created_at"],
154
+ updated_at=doc["updated_at"],
155
+ )
156
+ for doc in documents
157
+ ]
158
+ lance_store.documents_table.add(doc_records)
159
+
160
+ progress.update(task, completed=len(documents), total=len(documents))
161
+ return id_mapping
162
+
163
+ def _migrate_chunks(
164
+ self,
165
+ sqlite_conn: sqlite3.Connection,
166
+ lance_store: Store,
167
+ progress: Progress,
168
+ task: TaskID,
169
+ document_id_mapping: dict[int, str],
170
+ ):
171
+ """Migrate chunks and embeddings from SQLite to LanceDB."""
172
+ cursor = sqlite_conn.cursor()
173
+
174
+ # Get chunks first
175
+ cursor.execute("""
176
+ SELECT id, document_id, content, metadata
177
+ FROM chunks
178
+ ORDER BY id
179
+ """)
180
+
181
+ chunks_data = cursor.fetchall()
182
+
183
+ # Get embeddings separately to avoid vec0 virtual table issues
184
+ embeddings_map = {}
185
+ try:
186
+ # Try to get embeddings from the vec0 tables directly
187
+ cursor.execute("""
188
+ SELECT
189
+ r.chunk_id,
190
+ v.vectors
191
+ FROM chunk_embeddings_rowids r
192
+ JOIN chunk_embeddings_vector_chunks00 v ON r.rowid = v.rowid
193
+ """)
194
+
195
+ for row in cursor.fetchall():
196
+ chunk_id = row[0]
197
+ vectors_blob = row[1]
198
+ if vectors_blob and chunk_id not in embeddings_map:
199
+ embeddings_map[chunk_id] = vectors_blob
200
+
201
+ except sqlite3.OperationalError as e:
202
+ self.console.print(
203
+ f"[yellow]Warning: Could not extract embeddings: {e}[/yellow]"
204
+ )
205
+ self.console.print(
206
+ "[yellow]Continuing migration without embeddings...[/yellow]"
207
+ )
208
+
209
+ chunks = []
210
+ for row in chunks_data:
211
+ # Generate new UUID for chunk
212
+ chunk_uuid = str(uuid4())
213
+
214
+ # Map the old document_id to new UUID
215
+ document_uuid = document_id_mapping.get(row["document_id"])
216
+ if not document_uuid:
217
+ self.console.print(
218
+ f"[yellow]Warning: Document ID {row['document_id']} not found in mapping for chunk {row['id']}[/yellow]"
219
+ )
220
+ continue
221
+
222
+ # Get embedding for this chunk
223
+ embedding = []
224
+ embedding_blob = embeddings_map.get(row["id"])
225
+ if embedding_blob:
226
+ try:
227
+ embedding = deserialize_sqlite_embedding(embedding_blob)
228
+ except Exception as e:
229
+ self.console.print(
230
+ f"[yellow]Warning: Failed to deserialize embedding for chunk {row['id']}: {e}[/yellow]"
231
+ )
232
+ # Generate a zero vector of the expected dimension
233
+ embedding = [0.0] * lance_store.embedder._vector_dim
234
+ else:
235
+ # No embedding found, generate zero vector
236
+ embedding = [0.0] * lance_store.embedder._vector_dim
237
+
238
+ chunk_data = {
239
+ "id": chunk_uuid,
240
+ "document_id": document_uuid,
241
+ "content": row["content"],
242
+ "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
243
+ "vector": embedding,
244
+ }
245
+ chunks.append(chunk_data)
246
+
247
+ # Batch insert chunks to LanceDB
248
+ if chunks:
249
+ chunk_records = [
250
+ lance_store.ChunkRecord(
251
+ id=chunk["id"],
252
+ document_id=chunk["document_id"],
253
+ content=chunk["content"],
254
+ metadata=json.dumps(chunk["metadata"]),
255
+ vector=chunk["vector"],
256
+ )
257
+ for chunk in chunks
258
+ ]
259
+ lance_store.chunks_table.add(chunk_records)
260
+
261
+ progress.update(task, completed=len(chunks), total=len(chunks))
262
+
263
+ def _migrate_settings(
264
+ self,
265
+ sqlite_conn: sqlite3.Connection,
266
+ lance_store: Store,
267
+ progress: Progress,
268
+ task: TaskID,
269
+ ):
270
+ """Migrate settings from SQLite to LanceDB."""
271
+ cursor = sqlite_conn.cursor()
272
+
273
+ try:
274
+ cursor.execute("SELECT id, settings FROM settings WHERE id = 1")
275
+ row = cursor.fetchone()
276
+
277
+ if row:
278
+ settings_data = json.loads(row["settings"]) if row["settings"] else {}
279
+
280
+ # Update the existing settings in LanceDB (use string ID)
281
+ lance_store.settings_table.update(
282
+ where="id = 'settings'",
283
+ values={"settings": json.dumps(settings_data)},
284
+ )
285
+
286
+ progress.update(task, completed=1, total=1)
287
+ else:
288
+ progress.update(task, completed=0, total=0)
289
+
290
+ except sqlite3.OperationalError:
291
+ # Settings table doesn't exist in old SQLite database
292
+ self.console.print(
293
+ "[yellow]No settings table found in SQLite database[/yellow]"
294
+ )
295
+ progress.update(task, completed=0, total=0)
296
+
297
+
298
+ async def migrate_sqlite_to_lancedb(
299
+ sqlite_path: Path, lancedb_path: Path | None = None
300
+ ) -> bool:
301
+ """
302
+ Migrate an existing SQLite database to LanceDB.
303
+
304
+ Args:
305
+ sqlite_path: Path to the existing SQLite database
306
+ lancedb_path: Path for the new LanceDB database (optional, will auto-generate if not provided)
307
+
308
+ Returns:
309
+ True if migration was successful, False otherwise
310
+ """
311
+ if lancedb_path is None:
312
+ # Auto-generate LanceDB path
313
+ lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
314
+
315
+ migrator = SQLiteToLanceDBMigrator(sqlite_path, lancedb_path)
316
+ return migrator.migrate()
@@ -31,10 +31,4 @@ def get_reranker() -> RerankerBase | None:
31
31
  except ImportError:
32
32
  return None
33
33
 
34
- if Config.RERANK_PROVIDER == "ollama":
35
- from haiku.rag.reranking.ollama import OllamaReranker
36
-
37
- _reranker = OllamaReranker()
38
- return _reranker
39
-
40
34
  return None
haiku/rag/store/engine.py CHANGED
@@ -1,171 +1,203 @@
1
- import sqlite3
2
- import struct
1
+ import json
2
+ import logging
3
3
  from importlib import metadata
4
4
  from pathlib import Path
5
- from typing import Literal
5
+ from uuid import uuid4
6
6
 
7
- import sqlite_vec
8
- from packaging.version import parse
9
- from rich.console import Console
7
+ import lancedb
8
+ from lancedb.pydantic import LanceModel, Vector
9
+ from pydantic import Field
10
10
 
11
11
  from haiku.rag.config import Config
12
12
  from haiku.rag.embeddings import get_embedder
13
- from haiku.rag.store.upgrades import upgrades
14
- from haiku.rag.utils import int_to_semantic_version, semantic_version_to_int
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class DocumentRecord(LanceModel):
18
+ id: str = Field(default_factory=lambda: str(uuid4()))
19
+ content: str
20
+ uri: str | None = None
21
+ metadata: str = Field(default="{}")
22
+ created_at: str = Field(default_factory=lambda: "")
23
+ updated_at: str = Field(default_factory=lambda: "")
24
+
25
+
26
+ def create_chunk_model(vector_dim: int):
27
+ """Create a ChunkRecord model with the specified vector dimension.
28
+
29
+ This creates a model with proper vector typing for LanceDB.
30
+ """
31
+
32
+ class ChunkRecord(LanceModel):
33
+ id: str = Field(default_factory=lambda: str(uuid4()))
34
+ document_id: str
35
+ content: str
36
+ metadata: str = Field(default="{}")
37
+ vector: Vector(vector_dim) = Field(default_factory=lambda: [0.0] * vector_dim) # type: ignore
38
+
39
+ return ChunkRecord
40
+
41
+
42
+ class SettingsRecord(LanceModel):
43
+ id: str = Field(default="settings")
44
+ settings: str = Field(default="{}")
15
45
 
16
46
 
17
47
  class Store:
18
- def __init__(
19
- self, db_path: Path | Literal[":memory:"], skip_validation: bool = False
20
- ):
21
- self.db_path: Path | Literal[":memory:"] = db_path
48
+ def __init__(self, db_path: Path, skip_validation: bool = False):
49
+ self.db_path: Path = db_path
50
+ self.embedder = get_embedder()
51
+
52
+ # Create the ChunkRecord model with the correct vector dimension
53
+ self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
54
+
55
+ # Connect to LanceDB
56
+ self.db = self._connect_to_lancedb(db_path)
57
+
58
+ # Initialize tables
22
59
  self.create_or_update_db()
23
60
 
24
61
  # Validate config compatibility after connection is established
25
62
  if not skip_validation:
26
- from haiku.rag.store.repositories.settings import SettingsRepository
63
+ self._validate_configuration()
64
+
65
+ def _connect_to_lancedb(self, db_path: Path):
66
+ """Establish connection to LanceDB (local, cloud, or object storage)."""
67
+ # Check if we have cloud configuration
68
+ if self._has_cloud_config():
69
+ return lancedb.connect(
70
+ uri=Config.LANCEDB_URI,
71
+ api_key=Config.LANCEDB_API_KEY,
72
+ region=Config.LANCEDB_REGION,
73
+ )
74
+ else:
75
+ # Local file system connection
76
+ return lancedb.connect(db_path)
77
+
78
+ def _has_cloud_config(self) -> bool:
79
+ """Check if cloud configuration is complete."""
80
+ return bool(
81
+ Config.LANCEDB_URI and Config.LANCEDB_API_KEY and Config.LANCEDB_REGION
82
+ )
27
83
 
28
- settings_repo = SettingsRepository(self)
29
- settings_repo.validate_config_compatibility()
30
- current_version = metadata.version("haiku.rag")
31
- self.set_user_version(current_version)
84
+ def _validate_configuration(self) -> None:
85
+ """Validate that the configuration is compatible with the database."""
86
+ from haiku.rag.store.repositories.settings import SettingsRepository
87
+
88
+ settings_repo = SettingsRepository(self)
89
+ settings_repo.validate_config_compatibility()
32
90
 
33
91
  def create_or_update_db(self):
34
- """Create the database and tables with sqlite-vec support for embeddings."""
35
- current_version = metadata.version("haiku.rag")
92
+ """Create the database tables."""
36
93
 
37
- db = sqlite3.connect(self.db_path)
38
- db.enable_load_extension(True)
39
- sqlite_vec.load(db)
40
-
41
- # Enable WAL mode for better concurrency (skip for in-memory databases)
42
- if self.db_path != ":memory:":
43
- db.execute("PRAGMA journal_mode=WAL")
44
-
45
- self._connection = db
46
- existing_tables = [
47
- row[0]
48
- for row in db.execute(
49
- "SELECT name FROM sqlite_master WHERE type='table';"
50
- ).fetchall()
51
- ]
52
-
53
- # If we have a db already, perform upgrades and return
54
- if self.db_path != ":memory:" and "documents" in existing_tables:
55
- # Upgrade database
56
- console = Console()
57
- db_version = self.get_user_version()
58
- for version, steps in upgrades:
59
- if parse(current_version) >= parse(version) and parse(version) > parse(
60
- db_version
61
- ):
62
- for step in steps:
63
- step(db)
64
- console.print(
65
- f"[green][b]DB Upgrade: [/b]{step.__doc__}[/green]"
66
- )
67
- return
68
-
69
- # Create documents table
70
- db.execute("""
71
- CREATE TABLE IF NOT EXISTS documents (
72
- id INTEGER PRIMARY KEY AUTOINCREMENT,
73
- content TEXT NOT NULL,
74
- uri TEXT,
75
- metadata TEXT DEFAULT '{}',
76
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
77
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
78
- )
79
- """)
80
- # Create chunks table
81
- db.execute("""
82
- CREATE TABLE IF NOT EXISTS chunks (
83
- id INTEGER PRIMARY KEY AUTOINCREMENT,
84
- document_id INTEGER NOT NULL,
85
- content TEXT NOT NULL,
86
- metadata TEXT DEFAULT '{}',
87
- FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
88
- )
89
- """)
90
- # Create vector table for chunk embeddings
91
- embedder = get_embedder()
92
- db.execute(f"""
93
- CREATE VIRTUAL TABLE IF NOT EXISTS chunk_embeddings USING vec0(
94
- chunk_id INTEGER PRIMARY KEY,
95
- embedding FLOAT[{embedder._vector_dim}]
94
+ # Get list of existing tables
95
+ existing_tables = self.db.table_names()
96
+
97
+ # Create or get documents table
98
+ if "documents" in existing_tables:
99
+ self.documents_table = self.db.open_table("documents")
100
+ else:
101
+ self.documents_table = self.db.create_table(
102
+ "documents", schema=DocumentRecord
96
103
  )
97
- """)
98
- # Create FTS5 table for full-text search
99
- db.execute("""
100
- CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
101
- content,
102
- content='chunks',
103
- content_rowid='id'
104
+
105
+ # Create or get chunks table
106
+ if "chunks" in existing_tables:
107
+ self.chunks_table = self.db.open_table("chunks")
108
+ else:
109
+ self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
110
+ # Create FTS index on the new table
111
+ self.chunks_table.create_fts_index("content", replace=True)
112
+
113
+ # Create or get settings table
114
+ if "settings" in existing_tables:
115
+ self.settings_table = self.db.open_table("settings")
116
+ else:
117
+ self.settings_table = self.db.create_table(
118
+ "settings", schema=SettingsRecord
104
119
  )
105
- """)
106
- # Create settings table for storing current configuration
107
- db.execute("""
108
- CREATE TABLE IF NOT EXISTS settings (
109
- id INTEGER PRIMARY KEY DEFAULT 1,
110
- settings TEXT NOT NULL DEFAULT '{}'
120
+ # Save current settings to the new database
121
+ settings_data = Config.model_dump(mode="json")
122
+ self.settings_table.add(
123
+ [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
111
124
  )
112
- """)
113
- # Save current settings to the new database
114
- settings_json = Config.model_dump_json()
115
- db.execute(
116
- "INSERT OR IGNORE INTO settings (id, settings) VALUES (1, ?)",
117
- (settings_json,),
118
- )
119
- # Create indexes for better performance
120
- db.execute(
121
- "CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)"
122
- )
123
- db.commit()
124
-
125
- def get_user_version(self) -> str:
126
- """Returns the SQLite user version"""
127
- if self._connection is None:
128
- raise ValueError("Store connection is not available")
129
125
 
130
- cursor = self._connection.execute("PRAGMA user_version;")
131
- version = cursor.fetchone()
132
- return int_to_semantic_version(version[0])
126
+ # Set current version in settings
127
+ current_version = metadata.version("haiku.rag")
128
+ self.set_haiku_version(current_version)
133
129
 
134
- def set_user_version(self, version: str) -> None:
135
- """Updates the SQLite user version"""
136
- if self._connection is None:
137
- raise ValueError("Store connection is not available")
130
+ # Check if we need to perform upgrades
131
+ try:
132
+ existing_settings = list(
133
+ self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
134
+ )
135
+ if existing_settings:
136
+ db_version = self.get_haiku_version() # noqa: F841
137
+ # TODO: Add upgrade logic here similar to SQLite version when needed
138
+ except Exception:
139
+ # Settings table might not exist yet in fresh databases
140
+ pass
141
+
142
+ def get_haiku_version(self) -> str:
143
+ """Returns the user version stored in settings."""
144
+ settings_records = list(
145
+ self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
146
+ )
147
+ if settings_records:
148
+ settings = (
149
+ json.loads(settings_records[0].settings)
150
+ if settings_records[0].settings
151
+ else {}
152
+ )
153
+ return settings.get("version", "0.0.0")
154
+ return "0.0.0"
138
155
 
139
- self._connection.execute(
140
- f"PRAGMA user_version = {semantic_version_to_int(version)};"
156
+ def set_haiku_version(self, version: str) -> None:
157
+ """Updates the user version in settings."""
158
+ settings_records = list(
159
+ self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
141
160
  )
161
+ if settings_records:
162
+ settings = (
163
+ json.loads(settings_records[0].settings)
164
+ if settings_records[0].settings
165
+ else {}
166
+ )
167
+ settings["version"] = version
168
+ # Update the record
169
+ self.settings_table.update(
170
+ where="id = 'settings'", values={"settings": json.dumps(settings)}
171
+ )
172
+ else:
173
+ # Create new settings record
174
+ settings_data = Config.model_dump(mode="json")
175
+ settings_data["version"] = version
176
+ self.settings_table.add(
177
+ [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
178
+ )
142
179
 
143
180
  def recreate_embeddings_table(self) -> None:
144
- """Recreate the embeddings table with current vector dimensions."""
145
- if self._connection is None:
146
- raise ValueError("Store connection is not available")
147
-
148
- # Drop existing embeddings table
149
- self._connection.execute("DROP TABLE IF EXISTS chunk_embeddings")
150
-
151
- # Recreate with current dimensions
152
- embedder = get_embedder()
153
- self._connection.execute(f"""
154
- CREATE VIRTUAL TABLE chunk_embeddings USING vec0(
155
- chunk_id INTEGER PRIMARY KEY,
156
- embedding FLOAT[{embedder._vector_dim}]
157
- )
158
- """)
181
+ """Recreate the chunks table with current vector dimensions."""
182
+ # Drop and recreate chunks table
183
+ try:
184
+ self.db.drop_table("chunks")
185
+ except Exception:
186
+ pass
159
187
 
160
- self._connection.commit()
188
+ # Update the ChunkRecord model with new vector dimension
189
+ self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
190
+ self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
161
191
 
162
- @staticmethod
163
- def serialize_embedding(embedding: list[float]) -> bytes:
164
- """Serialize a list of floats to bytes for sqlite-vec storage."""
165
- return struct.pack(f"{len(embedding)}f", *embedding)
192
+ # Create FTS index on the new table
193
+ self.chunks_table.create_fts_index("content", replace=True)
166
194
 
167
195
  def close(self):
168
- """Close the database connection if it's an in-memory database."""
169
- if self._connection is not None:
170
- self._connection.close()
171
- self._connection = None
196
+ """Close the database connection."""
197
+ # LanceDB connections are automatically managed
198
+ pass
199
+
200
+ @property
201
+ def _connection(self):
202
+ """Compatibility property for repositories expecting _connection."""
203
+ return self
@@ -6,8 +6,8 @@ class Chunk(BaseModel):
6
6
  Represents a chunk with content, metadata, and optional document information.
7
7
  """
8
8
 
9
- id: int | None = None
10
- document_id: int | None = None
9
+ id: str | None = None
10
+ document_id: str | None = None
11
11
  content: str
12
12
  metadata: dict = {}
13
13
  document_uri: str | None = None