haiku.rag 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/PKG-INFO +1 -1
  2. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/benchmarks.md +2 -1
  3. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/configuration.md +3 -0
  4. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/pyproject.toml +2 -2
  5. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/app.py +1 -1
  6. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/cli.py +18 -1
  7. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/client.py +23 -21
  8. haiku_rag-0.3.4/src/haiku/rag/store/engine.py +166 -0
  9. haiku_rag-0.3.4/src/haiku/rag/store/repositories/settings.py +78 -0
  10. haiku_rag-0.3.4/src/haiku/rag/store/upgrades/__init__.py +3 -0
  11. haiku_rag-0.3.4/src/haiku/rag/store/upgrades/v0_3_4.py +26 -0
  12. haiku_rag-0.3.4/src/haiku/rag/utils.py +80 -0
  13. haiku_rag-0.3.4/tests/test_client.py +451 -0
  14. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_monitor.py +6 -14
  15. haiku_rag-0.3.4/tests/test_rebuild.py +49 -0
  16. haiku_rag-0.3.4/tests/test_settings.py +80 -0
  17. haiku_rag-0.3.4/tests/test_utils.py +15 -0
  18. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/uv.lock +5 -5
  19. haiku_rag-0.3.3/src/haiku/rag/store/engine.py +0 -80
  20. haiku_rag-0.3.3/src/haiku/rag/utils.py +0 -25
  21. haiku_rag-0.3.3/tests/test_client.py +0 -499
  22. haiku_rag-0.3.3/tests/test_rebuild.py +0 -52
  23. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/.github/FUNDING.yml +0 -0
  24. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/.github/workflows/build-docs.yml +0 -0
  25. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/.github/workflows/build-publish.yml +0 -0
  26. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/.gitignore +0 -0
  27. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/.pre-commit-config.yaml +0 -0
  28. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/.python-version +0 -0
  29. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/LICENSE +0 -0
  30. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/README.md +0 -0
  31. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/cli.md +0 -0
  32. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/index.md +0 -0
  33. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/installation.md +0 -0
  34. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/mcp.md +0 -0
  35. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/python.md +0 -0
  36. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/docs/server.md +0 -0
  37. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/mkdocs.yml +0 -0
  38. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/__init__.py +0 -0
  39. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/chunker.py +0 -0
  40. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/config.py +0 -0
  41. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/embeddings/__init__.py +0 -0
  42. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/embeddings/base.py +0 -0
  43. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/embeddings/ollama.py +0 -0
  44. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/embeddings/openai.py +0 -0
  45. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/embeddings/voyageai.py +0 -0
  46. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/logging.py +0 -0
  47. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/mcp.py +0 -0
  48. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/monitor.py +0 -0
  49. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/qa/__init__.py +0 -0
  50. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/qa/anthropic.py +0 -0
  51. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/qa/base.py +0 -0
  52. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/qa/ollama.py +0 -0
  53. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/qa/openai.py +0 -0
  54. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/qa/prompts.py +0 -0
  55. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/reader.py +0 -0
  56. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/__init__.py +0 -0
  57. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/models/__init__.py +0 -0
  58. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/models/chunk.py +0 -0
  59. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/models/document.py +0 -0
  60. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/repositories/__init__.py +0 -0
  61. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/repositories/base.py +0 -0
  62. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/repositories/chunk.py +0 -0
  63. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/src/haiku/rag/store/repositories/document.py +0 -0
  64. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/__init__.py +0 -0
  65. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/conftest.py +0 -0
  66. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/generate_benchmark_db.py +0 -0
  67. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/llm_judge.py +0 -0
  68. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_app.py +0 -0
  69. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_chunk.py +0 -0
  70. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_chunker.py +0 -0
  71. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_cli.py +0 -0
  72. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_document.py +0 -0
  73. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_embedder.py +0 -0
  74. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_qa.py +0 -0
  75. {haiku_rag-0.3.3 → haiku_rag-0.3.4}/tests/test_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -14,7 +14,8 @@ The recall obtained is ~0.73 for matching in the top result, raising to ~0.75 fo
14
14
 
15
15
  | Model | Document in top 1 | Document in top 3 |
16
16
  |---------------------------------------|-------------------|-------------------|
17
- | Ollama / `mxbai-embed-large` | 0.73 | 0.75 |
17
+ | Ollama / `mxbai-embed-large` | 0.77 | 0.89 |
18
+ | Ollama / `nomic-embed-text` | 0.74 | 0.88 |
18
19
  | OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 |
19
20
 
20
21
  ## Question/Answer evaluation
@@ -2,6 +2,9 @@
2
2
 
3
3
  Configuration is done through the use of environment variables.
4
4
 
5
+ !!! note
6
+ If you create a db with certain settings and later change them, `haiku.rag` will detect incompatibilities (for example, if you change embedding provider) and will exit. You can **rebuild** the database to apply the new settings, see [Rebuild Database](./cli.md#rebuild-database).
7
+
5
8
  ## File Monitoring
6
9
 
7
10
  Set directories to monitor for automatic indexing:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.3.3"
3
+ version = "0.3.4"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -56,7 +56,7 @@ dev = [
56
56
  "mkdocs>=1.6.1",
57
57
  "mkdocs-material>=9.6.14",
58
58
  "pre-commit>=4.2.0",
59
- "pyright>=1.1.402",
59
+ "pyright>=1.1.403",
60
60
  "pytest>=8.4.0",
61
61
  "pytest-asyncio>=1.0.0",
62
62
  "pytest-cov>=6.2.1",
@@ -74,7 +74,7 @@ class HaikuRAGApp:
74
74
  self.console.print(f"[red]Error: {e}[/red]")
75
75
 
76
76
  async def rebuild(self):
77
- async with HaikuRAG(db_path=self.db_path) as client:
77
+ async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
78
78
  try:
79
79
  documents = await client.list_documents()
80
80
  total_docs = len(documents)
@@ -5,7 +5,7 @@ import typer
5
5
  from rich.console import Console
6
6
 
7
7
  from haiku.rag.app import HaikuRAGApp
8
- from haiku.rag.utils import get_default_data_dir
8
+ from haiku.rag.utils import get_default_data_dir, is_up_to_date
9
9
 
10
10
  cli = typer.Typer(
11
11
  context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
@@ -15,6 +15,23 @@ console = Console()
15
15
  event_loop = asyncio.get_event_loop()
16
16
 
17
17
 
18
+ async def check_version():
19
+ """Check if haiku.rag is up to date and show warning if not."""
20
+ up_to_date, current_version, latest_version = await is_up_to_date()
21
+ if not up_to_date:
22
+ console.print(
23
+ f"[yellow]Warning: haiku.rag is outdated. Current: {current_version}, Latest: {latest_version}[/yellow]"
24
+ )
25
+ console.print("[yellow]Please update.[/yellow]")
26
+
27
+
28
+ @cli.callback()
29
+ def main():
30
+ """haiku.rag CLI - SQLite-based RAG system"""
31
+ # Run version check before any command
32
+ event_loop.run_until_complete(check_version())
33
+
34
+
18
35
  @cli.command("list", help="List all stored documents")
19
36
  def list_documents(
20
37
  db: Path = typer.Option(
@@ -24,12 +24,13 @@ class HaikuRAG:
24
24
  self,
25
25
  db_path: Path | Literal[":memory:"] = Config.DEFAULT_DATA_DIR
26
26
  / "haiku.rag.sqlite",
27
+ skip_validation: bool = False,
27
28
  ):
28
29
  """Initialize the RAG client with a database path."""
29
30
  if isinstance(db_path, Path):
30
31
  if not db_path.parent.exists():
31
32
  Path.mkdir(db_path.parent, parents=True)
32
- self.store = Store(db_path)
33
+ self.store = Store(db_path, skip_validation=skip_validation)
33
34
  self.document_repository = DocumentRepository(self.store)
34
35
  self.chunk_repository = ChunkRepository(self.store)
35
36
 
@@ -165,29 +166,26 @@ class HaikuRAG:
165
166
 
166
167
  # Create a temporary file with the appropriate extension
167
168
  with tempfile.NamedTemporaryFile(
168
- mode="wb", suffix=file_extension, delete=False
169
+ mode="wb", suffix=file_extension
169
170
  ) as temp_file:
170
171
  temp_file.write(response.content)
172
+ temp_file.flush() # Ensure content is written to disk
171
173
  temp_path = Path(temp_file.name)
172
174
 
173
- try:
174
175
  # Parse the content using FileReader
175
176
  content = FileReader.parse_file(temp_path)
176
177
 
177
- # Merge metadata with contentType and md5
178
- metadata.update({"contentType": content_type, "md5": md5_hash})
179
-
180
- if existing_doc:
181
- existing_doc.content = content
182
- existing_doc.metadata = metadata
183
- return await self.update_document(existing_doc)
184
- else:
185
- return await self.create_document(
186
- content=content, uri=url, metadata=metadata
187
- )
188
- finally:
189
- # Clean up temporary file
190
- temp_path.unlink(missing_ok=True)
178
+ # Merge metadata with contentType and md5
179
+ metadata.update({"contentType": content_type, "md5": md5_hash})
180
+
181
+ if existing_doc:
182
+ existing_doc.content = content
183
+ existing_doc.metadata = metadata
184
+ return await self.update_document(existing_doc)
185
+ else:
186
+ return await self.create_document(
187
+ content=content, uri=url, metadata=metadata
188
+ )
191
189
 
192
190
  def _get_extension_from_content_type_or_url(
193
191
  self, url: str, content_type: str
@@ -277,12 +275,16 @@ class HaikuRAG:
277
275
  Yields:
278
276
  int: The ID of the document currently being processed
279
277
  """
280
- documents = await self.list_documents()
278
+ await self.chunk_repository.delete_all()
279
+ self.store.recreate_embeddings_table()
281
280
 
282
- if not documents:
283
- return
281
+ # Update settings to current config
282
+ from haiku.rag.store.repositories.settings import SettingsRepository
284
283
 
285
- await self.chunk_repository.delete_all()
284
+ settings_repo = SettingsRepository(self.store)
285
+ settings_repo.save()
286
+
287
+ documents = await self.list_documents()
286
288
 
287
289
  for doc in documents:
288
290
  if doc.id is not None:
@@ -0,0 +1,166 @@
1
+ import sqlite3
2
+ import struct
3
+ from importlib import metadata
4
+ from pathlib import Path
5
+ from typing import Literal
6
+
7
+ import sqlite_vec
8
+ from packaging.version import parse
9
+ from rich.console import Console
10
+
11
+ from haiku.rag.config import Config
12
+ from haiku.rag.embeddings import get_embedder
13
+ from haiku.rag.store.upgrades import upgrades
14
+ from haiku.rag.utils import int_to_semantic_version, semantic_version_to_int
15
+
16
+
17
+ class Store:
18
+ def __init__(
19
+ self, db_path: Path | Literal[":memory:"], skip_validation: bool = False
20
+ ):
21
+ self.db_path: Path | Literal[":memory:"] = db_path
22
+ self.create_or_update_db()
23
+
24
+ # Validate config compatibility after connection is established
25
+ if not skip_validation:
26
+ from haiku.rag.store.repositories.settings import SettingsRepository
27
+
28
+ settings_repo = SettingsRepository(self)
29
+ settings_repo.validate_config_compatibility()
30
+ current_version = metadata.version("haiku.rag")
31
+ self.set_user_version(current_version)
32
+
33
+ def create_or_update_db(self):
34
+ """Create the database and tables with sqlite-vec support for embeddings."""
35
+ current_version = metadata.version("haiku.rag")
36
+
37
+ db = sqlite3.connect(self.db_path)
38
+ db.enable_load_extension(True)
39
+ sqlite_vec.load(db)
40
+ self._connection = db
41
+ existing_tables = [
42
+ row[0]
43
+ for row in db.execute(
44
+ "SELECT name FROM sqlite_master WHERE type='table';"
45
+ ).fetchall()
46
+ ]
47
+
48
+ # If we have a db already, perform upgrades and return
49
+ if self.db_path != ":memory:" and "documents" in existing_tables:
50
+ # Upgrade database
51
+ console = Console()
52
+ db_version = self.get_user_version()
53
+ for version, steps in upgrades:
54
+ if parse(current_version) >= parse(version) and parse(version) > parse(
55
+ db_version
56
+ ):
57
+ for step in steps:
58
+ step(db)
59
+ console.print(
60
+ f"[green][b]DB Upgrade: [/b]{step.__doc__}[/green]"
61
+ )
62
+ return
63
+
64
+ # Create documents table
65
+ db.execute("""
66
+ CREATE TABLE IF NOT EXISTS documents (
67
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
68
+ content TEXT NOT NULL,
69
+ uri TEXT,
70
+ metadata TEXT DEFAULT '{}',
71
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
72
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
73
+ )
74
+ """)
75
+ # Create chunks table
76
+ db.execute("""
77
+ CREATE TABLE IF NOT EXISTS chunks (
78
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
79
+ document_id INTEGER NOT NULL,
80
+ content TEXT NOT NULL,
81
+ metadata TEXT DEFAULT '{}',
82
+ FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
83
+ )
84
+ """)
85
+ # Create vector table for chunk embeddings
86
+ embedder = get_embedder()
87
+ db.execute(f"""
88
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunk_embeddings USING vec0(
89
+ chunk_id INTEGER PRIMARY KEY,
90
+ embedding FLOAT[{embedder._vector_dim}]
91
+ )
92
+ """)
93
+ # Create FTS5 table for full-text search
94
+ db.execute("""
95
+ CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
96
+ content,
97
+ content='chunks',
98
+ content_rowid='id'
99
+ )
100
+ """)
101
+ # Create settings table for storing current configuration
102
+ db.execute("""
103
+ CREATE TABLE IF NOT EXISTS settings (
104
+ id INTEGER PRIMARY KEY DEFAULT 1,
105
+ settings TEXT NOT NULL DEFAULT '{}'
106
+ )
107
+ """)
108
+ # Save current settings to the new database
109
+ settings_json = Config.model_dump_json()
110
+ db.execute(
111
+ "INSERT OR IGNORE INTO settings (id, settings) VALUES (1, ?)",
112
+ (settings_json,),
113
+ )
114
+ # Create indexes for better performance
115
+ db.execute(
116
+ "CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)"
117
+ )
118
+ db.commit()
119
+
120
+ def get_user_version(self) -> str:
121
+ """Returns the SQLite user version"""
122
+ if self._connection is None:
123
+ raise ValueError("Store connection is not available")
124
+
125
+ cursor = self._connection.execute("PRAGMA user_version;")
126
+ version = cursor.fetchone()
127
+ return int_to_semantic_version(version[0])
128
+
129
+ def set_user_version(self, version: str) -> None:
130
+ """Updates the SQLite user version"""
131
+ if self._connection is None:
132
+ raise ValueError("Store connection is not available")
133
+
134
+ self._connection.execute(
135
+ f"PRAGMA user_version = {semantic_version_to_int(version)};"
136
+ )
137
+
138
+ def recreate_embeddings_table(self) -> None:
139
+ """Recreate the embeddings table with current vector dimensions."""
140
+ if self._connection is None:
141
+ raise ValueError("Store connection is not available")
142
+
143
+ # Drop existing embeddings table
144
+ self._connection.execute("DROP TABLE IF EXISTS chunk_embeddings")
145
+
146
+ # Recreate with current dimensions
147
+ embedder = get_embedder()
148
+ self._connection.execute(f"""
149
+ CREATE VIRTUAL TABLE chunk_embeddings USING vec0(
150
+ chunk_id INTEGER PRIMARY KEY,
151
+ embedding FLOAT[{embedder._vector_dim}]
152
+ )
153
+ """)
154
+
155
+ self._connection.commit()
156
+
157
+ @staticmethod
158
+ def serialize_embedding(embedding: list[float]) -> bytes:
159
+ """Serialize a list of floats to bytes for sqlite-vec storage."""
160
+ return struct.pack(f"{len(embedding)}f", *embedding)
161
+
162
+ def close(self):
163
+ """Close the database connection if it's an in-memory database."""
164
+ if self._connection is not None:
165
+ self._connection.close()
166
+ self._connection = None
@@ -0,0 +1,78 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ from haiku.rag.store.engine import Store
5
+
6
+
7
+ class ConfigMismatchError(Exception):
8
+ """Raised when current config doesn't match stored settings."""
9
+
10
+ pass
11
+
12
+
13
+ class SettingsRepository:
14
+ def __init__(self, store: Store):
15
+ self.store = store
16
+
17
+ def get(self) -> dict[str, Any]:
18
+ """Get all settings from the database."""
19
+ if self.store._connection is None:
20
+ raise ValueError("Store connection is not available")
21
+
22
+ cursor = self.store._connection.execute("SELECT settings FROM settings LIMIT 1")
23
+ row = cursor.fetchone()
24
+ if row:
25
+ return json.loads(row[0])
26
+ return {}
27
+
28
+ def save(self) -> None:
29
+ """Sync settings from the current AppConfig to database."""
30
+ if self.store._connection is None:
31
+ raise ValueError("Store connection is not available")
32
+
33
+ from haiku.rag.config import Config
34
+
35
+ settings_json = Config.model_dump_json()
36
+
37
+ self.store._connection.execute(
38
+ "INSERT INTO settings (id, settings) VALUES (1, ?) ON CONFLICT(id) DO UPDATE SET settings = excluded.settings",
39
+ (settings_json,),
40
+ )
41
+
42
+ self.store._connection.commit()
43
+
44
+ def validate_config_compatibility(self) -> None:
45
+ """Check if current config is compatible with stored settings.
46
+
47
+ Raises ConfigMismatchError if there are incompatible differences.
48
+ If no settings exist, saves current config.
49
+ """
50
+ db_settings = self.get()
51
+ if not db_settings:
52
+ # No settings in DB, save current config
53
+ self.save()
54
+ return
55
+
56
+ from haiku.rag.config import Config
57
+
58
+ current_config = Config.model_dump(mode="json")
59
+
60
+ # Critical settings that must match
61
+ critical_settings = [
62
+ "EMBEDDINGS_PROVIDER",
63
+ "EMBEDDINGS_MODEL",
64
+ "EMBEDDINGS_VECTOR_DIM",
65
+ "CHUNK_SIZE",
66
+ "CHUNK_OVERLAP",
67
+ ]
68
+
69
+ errors = []
70
+ for setting in critical_settings:
71
+ if db_settings.get(setting) != current_config.get(setting):
72
+ errors.append(
73
+ f"{setting}: current={current_config.get(setting)}, stored={db_settings.get(setting)}"
74
+ )
75
+
76
+ if errors:
77
+ error_msg = f"Config mismatch detected: {'; '.join(errors)}. Consider rebuilding the database with the current configuration."
78
+ raise ConfigMismatchError(error_msg)
@@ -0,0 +1,3 @@
1
+ from haiku.rag.store.upgrades.v0_3_4 import upgrades as v0_3_4_upgrades
2
+
3
+ upgrades = v0_3_4_upgrades
@@ -0,0 +1,26 @@
1
+ from collections.abc import Callable
2
+ from sqlite3 import Connection
3
+
4
+ from haiku.rag.config import Config
5
+
6
+
7
+ def add_settings_table(db: Connection) -> None:
8
+ """Create settings table for storing current configuration"""
9
+ db.execute("""
10
+ CREATE TABLE settings (
11
+ id INTEGER PRIMARY KEY DEFAULT 1,
12
+ settings TEXT NOT NULL DEFAULT '{}'
13
+ )
14
+ """)
15
+
16
+ settings_json = Config.model_dump_json()
17
+ db.execute(
18
+ "INSERT INTO settings (id, settings) VALUES (1, ?)",
19
+ (settings_json,),
20
+ )
21
+ db.commit()
22
+
23
+
24
+ upgrades: list[tuple[str, list[Callable[[Connection], None]]]] = [
25
+ ("0.3.4", [add_settings_table])
26
+ ]
@@ -0,0 +1,80 @@
1
+ import sys
2
+ from importlib import metadata
3
+ from pathlib import Path
4
+
5
+ import httpx
6
+ from packaging.version import Version, parse
7
+
8
+
9
+ def get_default_data_dir() -> Path:
10
+ """
11
+ Get the user data directory for the current system platform.
12
+
13
+ Linux: ~/.local/share/haiku.rag
14
+ macOS: ~/Library/Application Support/haiku.rag
15
+ Windows: C:/Users/<USER>/AppData/Roaming/haiku.rag
16
+
17
+ :return: User Data Path
18
+ :rtype: Path
19
+ """
20
+ home = Path.home()
21
+
22
+ system_paths = {
23
+ "win32": home / "AppData/Roaming/haiku.rag",
24
+ "linux": home / ".local/share/haiku.rag",
25
+ "darwin": home / "Library/Application Support/haiku.rag",
26
+ }
27
+
28
+ data_path = system_paths[sys.platform]
29
+ return data_path
30
+
31
+
32
+ def semantic_version_to_int(version: str) -> int:
33
+ """
34
+ Convert a semantic version string to an integer.
35
+
36
+ :param version: Semantic version string
37
+ :type version: str
38
+ :return: Integer representation of semantic version
39
+ :rtype: int
40
+ """
41
+ major, minor, patch = version.split(".")
42
+ major = int(major) << 16
43
+ minor = int(minor) << 8
44
+ patch = int(patch)
45
+ return major + minor + patch
46
+
47
+
48
+ def int_to_semantic_version(version: int) -> str:
49
+ """
50
+ Convert an integer to a semantic version string.
51
+
52
+ :param version: Integer representation of semantic version
53
+ :type version: int
54
+ :return: Semantic version string
55
+ :rtype: str
56
+ """
57
+ major = version >> 16
58
+ minor = (version >> 8) & 255
59
+ patch = version & 255
60
+ return f"{major}.{minor}.{patch}"
61
+
62
+
63
+ async def is_up_to_date() -> tuple[bool, Version, Version]:
64
+ """
65
+ Checks whether haiku.rag is current.
66
+
67
+ :return: A tuple containing a boolean indicating whether haiku.rag is current, the running version and the latest version
68
+ :rtype: tuple[bool, Version, Version]
69
+ """
70
+
71
+ async with httpx.AsyncClient() as client:
72
+ running_version = parse(metadata.version("haiku.rag"))
73
+ try:
74
+ response = await client.get("https://pypi.org/pypi/haiku.rag/json")
75
+ data = response.json()
76
+ pypi_version = parse(data["info"]["version"])
77
+ except Exception:
78
+ # If no network connection, do not raise alarms.
79
+ pypi_version = running_version
80
+ return running_version >= pypi_version, running_version, pypi_version