PyPI - haiku.rag - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

haiku.rag 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (17) hide show

haiku/rag/app.py +1 -1
haiku/rag/cli.py +18 -1
haiku/rag/client.py +23 -21
haiku/rag/qa/anthropic.py +57 -63
haiku/rag/qa/ollama.py +37 -40
haiku/rag/qa/openai.py +54 -55
haiku/rag/qa/prompts.py +18 -5
haiku/rag/store/engine.py +95 -9
haiku/rag/store/repositories/settings.py +78 -0
haiku/rag/store/upgrades/__init__.py +3 -0
haiku/rag/store/upgrades/v0_3_4.py +26 -0
haiku/rag/utils.py +55 -0
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/METADATA +2 -1
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/RECORD +17 -14
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/WHEEL +0 -0
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/entry_points.txt +0 -0
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/licenses/LICENSE +0 -0

haiku/rag/app.py CHANGED Viewed

@@ -74,7 +74,7 @@ class HaikuRAGApp:
                 self.console.print(f"[red]Error: {e}[/red]")
     async def rebuild(self):
-        async with HaikuRAG(db_path=self.db_path) as client:
+        async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
             try:
                 documents = await client.list_documents()
                 total_docs = len(documents)

haiku/rag/cli.py CHANGED Viewed

@@ -5,7 +5,7 @@ import typer
 from rich.console import Console
 from haiku.rag.app import HaikuRAGApp
-from haiku.rag.utils import get_default_data_dir
+from haiku.rag.utils import get_default_data_dir, is_up_to_date
 cli = typer.Typer(
     context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
@@ -15,6 +15,23 @@ console = Console()
 event_loop = asyncio.get_event_loop()
+async def check_version():
+    """Check if haiku.rag is up to date and show warning if not."""
+    up_to_date, current_version, latest_version = await is_up_to_date()
+    if not up_to_date:
+        console.print(
+            f"[yellow]Warning: haiku.rag is outdated. Current: {current_version}, Latest: {latest_version}[/yellow]"
+        )
+        console.print("[yellow]Please update.[/yellow]")
+@cli.callback()
+def main():
+    """haiku.rag CLI - SQLite-based RAG system"""
+    # Run version check before any command
+    event_loop.run_until_complete(check_version())
 @cli.command("list", help="List all stored documents")
 def list_documents(
     db: Path = typer.Option(

haiku/rag/client.py CHANGED Viewed

@@ -24,12 +24,13 @@ class HaikuRAG:
         self,
         db_path: Path | Literal[":memory:"] = Config.DEFAULT_DATA_DIR
         / "haiku.rag.sqlite",
+        skip_validation: bool = False,
     ):
         """Initialize the RAG client with a database path."""
         if isinstance(db_path, Path):
             if not db_path.parent.exists():
                 Path.mkdir(db_path.parent, parents=True)
-        self.store = Store(db_path)
+        self.store = Store(db_path, skip_validation=skip_validation)
         self.document_repository = DocumentRepository(self.store)
         self.chunk_repository = ChunkRepository(self.store)
@@ -165,29 +166,26 @@ class HaikuRAG:
             # Create a temporary file with the appropriate extension
             with tempfile.NamedTemporaryFile(
-                mode="wb", suffix=file_extension, delete=False
+                mode="wb", suffix=file_extension
             ) as temp_file:
                 temp_file.write(response.content)
+                temp_file.flush()  # Ensure content is written to disk
                 temp_path = Path(temp_file.name)
-            try:
                 # Parse the content using FileReader
                 content = FileReader.parse_file(temp_path)
-                # Merge metadata with contentType and md5
-                metadata.update({"contentType": content_type, "md5": md5_hash})
-                if existing_doc:
-                    existing_doc.content = content
-                    existing_doc.metadata = metadata
-                    return await self.update_document(existing_doc)
-                else:
-                    return await self.create_document(
-                        content=content, uri=url, metadata=metadata
-                    )
-            finally:
-                # Clean up temporary file
-                temp_path.unlink(missing_ok=True)
+            # Merge metadata with contentType and md5
+            metadata.update({"contentType": content_type, "md5": md5_hash})
+            if existing_doc:
+                existing_doc.content = content
+                existing_doc.metadata = metadata
+                return await self.update_document(existing_doc)
+            else:
+                return await self.create_document(
+                    content=content, uri=url, metadata=metadata
+                )
     def _get_extension_from_content_type_or_url(
         self, url: str, content_type: str
@@ -277,12 +275,16 @@ class HaikuRAG:
         Yields:
             int: The ID of the document currently being processed
         """
-        documents = await self.list_documents()
+        await self.chunk_repository.delete_all()
+        self.store.recreate_embeddings_table()
-        if not documents:
-            return
+        # Update settings to current config
+        from haiku.rag.store.repositories.settings import SettingsRepository
-        await self.chunk_repository.delete_all()
+        settings_repo = SettingsRepository(self.store)
+        settings_repo.save()
+        documents = await self.list_documents()
         for doc in documents:
             if doc.id is not None:

haiku/rag/qa/anthropic.py CHANGED Viewed

@@ -37,75 +37,69 @@ try:
             messages: list[MessageParam] = [{"role": "user", "content": question}]
-            response = await anthropic_client.messages.create(
-                model=self._model,
-                max_tokens=4096,
-                system=self._system_prompt,
-                messages=messages,
-                tools=self.tools,
-                temperature=0.0,
-            )
-            if response.stop_reason == "tool_use":
-                messages.append({"role": "assistant", "content": response.content})
-                # Process tool calls
-                tool_results = []
-                for content_block in response.content:
-                    if isinstance(content_block, ToolUseBlock):
-                        if content_block.name == "search_documents":
-                            args = content_block.input
-                            query = (
-                                args.get("query", question)
-                                if isinstance(args, dict)
-                                else question
-                            )
-                            limit = (
-                                int(args.get("limit", 3))
-                                if isinstance(args, dict)
-                                else 3
-                            )
-                            search_results = await self._client.search(
-                                query, limit=limit
-                            )
-                            context_chunks = []
-                            for chunk, score in search_results:
-                                context_chunks.append(
-                                    f"Content: {chunk.content}\nScore: {score:.4f}"
+            max_rounds = 5  # Prevent infinite loops
+            for _ in range(max_rounds):
+                response = await anthropic_client.messages.create(
+                    model=self._model,
+                    max_tokens=4096,
+                    system=self._system_prompt,
+                    messages=messages,
+                    tools=self.tools,
+                    temperature=0.0,
+                )
+                if response.stop_reason == "tool_use":
+                    messages.append({"role": "assistant", "content": response.content})
+                    # Process tool calls
+                    tool_results = []
+                    for content_block in response.content:
+                        if isinstance(content_block, ToolUseBlock):
+                            if content_block.name == "search_documents":
+                                args = content_block.input
+                                query = (
+                                    args.get("query", question)
+                                    if isinstance(args, dict)
+                                    else question
+                                )
+                                limit = (
+                                    int(args.get("limit", 3))
+                                    if isinstance(args, dict)
+                                    else 3
+                                )
+                                search_results = await self._client.search(
+                                    query, limit=limit
+                                )
+                                context_chunks = []
+                                for chunk, score in search_results:
+                                    context_chunks.append(
+                                        f"Content: {chunk.content}\nScore: {score:.4f}"
+                                    )
+                                context = "\n\n".join(context_chunks)
+                                tool_results.append(
+                                    {
+                                        "type": "tool_result",
+                                        "tool_use_id": content_block.id,
+                                        "content": context,
+                                    }
                                 )
-                            context = "\n\n".join(context_chunks)
-                            tool_results.append(
-                                {
-                                    "type": "tool_result",
-                                    "tool_use_id": content_block.id,
-                                    "content": context,
-                                }
-                            )
-                if tool_results:
-                    messages.append({"role": "user", "content": tool_results})
-                    final_response = await anthropic_client.messages.create(
-                        model=self._model,
-                        max_tokens=4096,
-                        system=self._system_prompt,
-                        messages=messages,
-                        temperature=0.0,
-                    )
-                    if final_response.content:
-                        first_content = final_response.content[0]
+                    if tool_results:
+                        messages.append({"role": "user", "content": tool_results})
+                else:
+                    # No tool use, return the response
+                    if response.content:
+                        first_content = response.content[0]
                         if isinstance(first_content, TextBlock):
                             return first_content.text
                     return ""
-            if response.content:
-                first_content = response.content[0]
-                if isinstance(first_content, TextBlock):
-                    return first_content.text
+            # If we've exhausted max rounds, return empty string
             return ""
 except ImportError:

haiku/rag/qa/ollama.py CHANGED Viewed

@@ -14,54 +14,51 @@ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
     async def answer(self, question: str) -> str:
         ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
-        # Define the search tool
         messages = [
             {"role": "system", "content": self._system_prompt},
             {"role": "user", "content": question},
         ]
-        # Initial response with tool calling
-        response = await ollama_client.chat(
-            model=self._model,
-            messages=messages,
-            tools=self.tools,
-            options=OLLAMA_OPTIONS,
-            think=False,
-        )
+        max_rounds = 5  # Prevent infinite loops
-        if response.get("message", {}).get("tool_calls"):
-            for tool_call in response["message"]["tool_calls"]:
-                if tool_call["function"]["name"] == "search_documents":
-                    args = tool_call["function"]["arguments"]
-                    query = args.get("query", question)
-                    limit = int(args.get("limit", 3))
+        for _ in range(max_rounds):
+            response = await ollama_client.chat(
+                model=self._model,
+                messages=messages,
+                tools=self.tools,
+                options=OLLAMA_OPTIONS,
+                think=False,
+            )
-                    search_results = await self._client.search(query, limit=limit)
+            if response.get("message", {}).get("tool_calls"):
+                messages.append(response["message"])
-                    context_chunks = []
-                    for chunk, score in search_results:
-                        context_chunks.append(
-                            f"Content: {chunk.content}\nScore: {score:.4f}"
-                        )
+                for tool_call in response["message"]["tool_calls"]:
+                    if tool_call["function"]["name"] == "search_documents":
+                        args = tool_call["function"]["arguments"]
+                        query = args.get("query", question)
+                        limit = int(args.get("limit", 3))
-                    context = "\n\n".join(context_chunks)
+                        search_results = await self._client.search(query, limit=limit)
-                    messages.append(response["message"])
-                    messages.append(
-                        {
-                            "role": "tool",
-                            "content": context,
-                            "tool_call_id": tool_call.get("id", "search_tool"),
-                        }
-                    )
+                        context_chunks = []
+                        for chunk, score in search_results:
+                            context_chunks.append(
+                                f"Content: {chunk.content}\nScore: {score:.4f}"
+                            )
-            final_response = await ollama_client.chat(
-                model=self._model,
-                messages=messages,
-                think=False,
-                options=OLLAMA_OPTIONS,
-            )
-            return final_response["message"]["content"]
-        else:
-            return response["message"]["content"]
+                        context = "\n\n".join(context_chunks)
+                        messages.append(
+                            {
+                                "role": "tool",
+                                "content": context,
+                                "tool_call_id": tool_call.get("id", "search_tool"),
+                            }
+                        )
+            else:
+                # No tool calls, return the response
+                return response["message"]["content"]
+        # If we've exhausted max rounds, return empty string
+        return ""

haiku/rag/qa/openai.py CHANGED Viewed

@@ -24,8 +24,6 @@ try:
         async def answer(self, question: str) -> str:
             openai_client = AsyncOpenAI()
-            # Define the search tool
             messages: list[ChatCompletionMessageParam] = [
                 ChatCompletionSystemMessageParam(
                     role="system", content=self._system_prompt
@@ -33,69 +31,70 @@ try:
                 ChatCompletionUserMessageParam(role="user", content=question),
             ]
-            # Initial response with tool calling
-            response = await openai_client.chat.completions.create(
-                model=self._model,
-                messages=messages,
-                tools=self.tools,
-                temperature=0.0,
-            )
-            response_message = response.choices[0].message
-            if response_message.tool_calls:
-                messages.append(
-                    ChatCompletionAssistantMessageParam(
-                        role="assistant",
-                        content=response_message.content,
-                        tool_calls=[
-                            {
-                                "id": tc.id,
-                                "type": "function",
-                                "function": {
-                                    "name": tc.function.name,
-                                    "arguments": tc.function.arguments,
-                                },
-                            }
-                            for tc in response_message.tool_calls
-                        ],
-                    )
+            max_rounds = 5  # Prevent infinite loops
+            for _ in range(max_rounds):
+                response = await openai_client.chat.completions.create(
+                    model=self._model,
+                    messages=messages,
+                    tools=self.tools,
+                    temperature=0.0,
                 )
-                for tool_call in response_message.tool_calls:
-                    if tool_call.function.name == "search_documents":
-                        import json
+                response_message = response.choices[0].message
+                if response_message.tool_calls:
+                    messages.append(
+                        ChatCompletionAssistantMessageParam(
+                            role="assistant",
+                            content=response_message.content,
+                            tool_calls=[
+                                {
+                                    "id": tc.id,
+                                    "type": "function",
+                                    "function": {
+                                        "name": tc.function.name,
+                                        "arguments": tc.function.arguments,
+                                    },
+                                }
+                                for tc in response_message.tool_calls
+                            ],
+                        )
+                    )
-                        args = json.loads(tool_call.function.arguments)
-                        query = args.get("query", question)
-                        limit = int(args.get("limit", 3))
+                    for tool_call in response_message.tool_calls:
+                        if tool_call.function.name == "search_documents":
+                            import json
-                        search_results = await self._client.search(query, limit=limit)
+                            args = json.loads(tool_call.function.arguments)
+                            query = args.get("query", question)
+                            limit = int(args.get("limit", 3))
-                        context_chunks = []
-                        for chunk, score in search_results:
-                            context_chunks.append(
-                                f"Content: {chunk.content}\nScore: {score:.4f}"
+                            search_results = await self._client.search(
+                                query, limit=limit
                             )
-                        context = "\n\n".join(context_chunks)
+                            context_chunks = []
+                            for chunk, score in search_results:
+                                context_chunks.append(
+                                    f"Content: {chunk.content}\nScore: {score:.4f}"
+                                )
+                            context = "\n\n".join(context_chunks)
-                        messages.append(
-                            ChatCompletionToolMessageParam(
-                                role="tool",
-                                content=context,
-                                tool_call_id=tool_call.id,
+                            messages.append(
+                                ChatCompletionToolMessageParam(
+                                    role="tool",
+                                    content=context,
+                                    tool_call_id=tool_call.id,
+                                )
                             )
-                        )
+                else:
+                    # No tool calls, return the response
+                    return response_message.content or ""
-                final_response = await openai_client.chat.completions.create(
-                    model=self._model,
-                    messages=messages,
-                    temperature=0.0,
-                )
-                return final_response.choices[0].message.content or ""
-            else:
-                return response_message.content or ""
+            # If we've exhausted max rounds, return empty string
+            return ""
 except ImportError:
     pass

haiku/rag/qa/prompts.py CHANGED Viewed

@@ -1,7 +1,20 @@
 SYSTEM_PROMPT = """
-You are a helpful assistant that uses a RAG library to answer the user's prompt.
-Your task is to provide a concise and accurate answer based on the provided context.
-You should ask the provided tools to find relevant documents and then use the content of those documents to answer the question.
-Never make up information, always use the context to answer the question.
-If the context does not contain enough information to answer the question, respond with "I cannot answer that based on the provided context."
+You are a knowledgeable assistant that helps users find information from a document knowledge base.
+Your process:
+1. When a user asks a question, use the search_documents tool to find relevant information
+2. Search with specific keywords and phrases from the user's question
+3. Review the search results and their relevance scores
+4. If you need additional context, perform follow-up searches with different keywords
+5. Provide a comprehensive answer based only on the retrieved documents
+Guidelines:
+- Base your answers strictly on the provided document content
+- Quote or reference specific information when possible
+- If multiple documents contain relevant information, synthesize them coherently
+- Indicate when information is incomplete or when you need to search for additional context
+- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
+- For complex questions, consider breaking them down and performing multiple searches
+Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
 """

haiku/rag/store/engine.py CHANGED Viewed

@@ -1,23 +1,65 @@
 import sqlite3
 import struct
+from importlib import metadata
 from pathlib import Path
 from typing import Literal
 import sqlite_vec
+from packaging.version import parse
+from rich.console import Console
+from haiku.rag.config import Config
 from haiku.rag.embeddings import get_embedder
+from haiku.rag.store.upgrades import upgrades
+from haiku.rag.utils import int_to_semantic_version, semantic_version_to_int
 class Store:
-    def __init__(self, db_path: Path | Literal[":memory:"]):
+    def __init__(
+        self, db_path: Path | Literal[":memory:"], skip_validation: bool = False
+    ):
         self.db_path: Path | Literal[":memory:"] = db_path
-        self._connection = self.create_db()
+        self.create_or_update_db()
-    def create_db(self) -> sqlite3.Connection:
+        # Validate config compatibility after connection is established
+        if not skip_validation:
+            from haiku.rag.store.repositories.settings import SettingsRepository
+            settings_repo = SettingsRepository(self)
+            settings_repo.validate_config_compatibility()
+        current_version = metadata.version("haiku.rag")
+        self.set_user_version(current_version)
+    def create_or_update_db(self):
         """Create the database and tables with sqlite-vec support for embeddings."""
+        current_version = metadata.version("haiku.rag")
         db = sqlite3.connect(self.db_path)
         db.enable_load_extension(True)
         sqlite_vec.load(db)
+        self._connection = db
+        existing_tables = [
+            row[0]
+            for row in db.execute(
+                "SELECT name FROM sqlite_master WHERE type='table';"
+            ).fetchall()
+        ]
+        # If we have a db already, perform upgrades and return
+        if self.db_path != ":memory:" and "documents" in existing_tables:
+            # Upgrade database
+            console = Console()
+            db_version = self.get_user_version()
+            for version, steps in upgrades:
+                if parse(current_version) >= parse(version) and parse(version) > parse(
+                    db_version
+                ):
+                    for step in steps:
+                        step(db)
+                        console.print(
+                            f"[green][b]DB Upgrade: [/b]{step.__doc__}[/green]"
+                        )
+            return
         # Create documents table
         db.execute("""
@@ -30,7 +72,6 @@ class Store:
                 updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
             )
         """)
         # Create chunks table
         db.execute("""
             CREATE TABLE IF NOT EXISTS chunks (
@@ -41,7 +82,6 @@ class Store:
                 FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
             )
         """)
         # Create vector table for chunk embeddings
         embedder = get_embedder()
         db.execute(f"""
@@ -50,7 +90,6 @@ class Store:
                 embedding FLOAT[{embedder._vector_dim}]
             )
         """)
         # Create FTS5 table for full-text search
         db.execute("""
             CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
@@ -59,14 +98,61 @@ class Store:
                 content_rowid='id'
             )
         """)
+        # Create settings table for storing current configuration
+        db.execute("""
+            CREATE TABLE IF NOT EXISTS settings (
+                id INTEGER PRIMARY KEY DEFAULT 1,
+                settings TEXT NOT NULL DEFAULT '{}'
+            )
+        """)
+        # Save current settings to the new database
+        settings_json = Config.model_dump_json()
+        db.execute(
+            "INSERT OR IGNORE INTO settings (id, settings) VALUES (1, ?)",
+            (settings_json,),
+        )
         # Create indexes for better performance
         db.execute(
             "CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)"
         )
         db.commit()
-        return db
+    def get_user_version(self) -> str:
+        """Returns the SQLite user version"""
+        if self._connection is None:
+            raise ValueError("Store connection is not available")
+        cursor = self._connection.execute("PRAGMA user_version;")
+        version = cursor.fetchone()
+        return int_to_semantic_version(version[0])
+    def set_user_version(self, version: str) -> None:
+        """Updates the SQLite user version"""
+        if self._connection is None:
+            raise ValueError("Store connection is not available")
+        self._connection.execute(
+            f"PRAGMA user_version = {semantic_version_to_int(version)};"
+        )
+    def recreate_embeddings_table(self) -> None:
+        """Recreate the embeddings table with current vector dimensions."""
+        if self._connection is None:
+            raise ValueError("Store connection is not available")
+        # Drop existing embeddings table
+        self._connection.execute("DROP TABLE IF EXISTS chunk_embeddings")
+        # Recreate with current dimensions
+        embedder = get_embedder()
+        self._connection.execute(f"""
+            CREATE VIRTUAL TABLE chunk_embeddings USING vec0(
+                chunk_id INTEGER PRIMARY KEY,
+                embedding FLOAT[{embedder._vector_dim}]
+            )
+        """)
+        self._connection.commit()
     @staticmethod
     def serialize_embedding(embedding: list[float]) -> bytes:

haiku/rag/store/repositories/settings.py ADDED Viewed

@@ -0,0 +1,78 @@
+import json
+from typing import Any
+from haiku.rag.store.engine import Store
+class ConfigMismatchError(Exception):
+    """Raised when current config doesn't match stored settings."""
+    pass
+class SettingsRepository:
+    def __init__(self, store: Store):
+        self.store = store
+    def get(self) -> dict[str, Any]:
+        """Get all settings from the database."""
+        if self.store._connection is None:
+            raise ValueError("Store connection is not available")
+        cursor = self.store._connection.execute("SELECT settings FROM settings LIMIT 1")
+        row = cursor.fetchone()
+        if row:
+            return json.loads(row[0])
+        return {}
+    def save(self) -> None:
+        """Sync settings from the current AppConfig to database."""
+        if self.store._connection is None:
+            raise ValueError("Store connection is not available")
+        from haiku.rag.config import Config
+        settings_json = Config.model_dump_json()
+        self.store._connection.execute(
+            "INSERT INTO settings (id, settings) VALUES (1, ?) ON CONFLICT(id) DO UPDATE SET settings = excluded.settings",
+            (settings_json,),
+        )
+        self.store._connection.commit()
+    def validate_config_compatibility(self) -> None:
+        """Check if current config is compatible with stored settings.
+        Raises ConfigMismatchError if there are incompatible differences.
+        If no settings exist, saves current config.
+        """
+        db_settings = self.get()
+        if not db_settings:
+            # No settings in DB, save current config
+            self.save()
+            return
+        from haiku.rag.config import Config
+        current_config = Config.model_dump(mode="json")
+        # Critical settings that must match
+        critical_settings = [
+            "EMBEDDINGS_PROVIDER",
+            "EMBEDDINGS_MODEL",
+            "EMBEDDINGS_VECTOR_DIM",
+            "CHUNK_SIZE",
+            "CHUNK_OVERLAP",
+        ]
+        errors = []
+        for setting in critical_settings:
+            if db_settings.get(setting) != current_config.get(setting):
+                errors.append(
+                    f"{setting}: current={current_config.get(setting)}, stored={db_settings.get(setting)}"
+                )
+        if errors:
+            error_msg = f"Config mismatch detected: {'; '.join(errors)}. Consider rebuilding the database with the current configuration."
+            raise ConfigMismatchError(error_msg)

haiku/rag/store/upgrades/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from haiku.rag.store.upgrades.v0_3_4 import upgrades as v0_3_4_upgrades
+upgrades = v0_3_4_upgrades

haiku/rag/store/upgrades/v0_3_4.py ADDED Viewed

@@ -0,0 +1,26 @@
+from collections.abc import Callable
+from sqlite3 import Connection
+from haiku.rag.config import Config
+def add_settings_table(db: Connection) -> None:
+    """Create settings table for storing current configuration"""
+    db.execute("""
+        CREATE TABLE settings (
+            id INTEGER PRIMARY KEY DEFAULT 1,
+            settings TEXT NOT NULL DEFAULT '{}'
+        )
+    """)
+    settings_json = Config.model_dump_json()
+    db.execute(
+        "INSERT INTO settings (id, settings) VALUES (1, ?)",
+        (settings_json,),
+    )
+    db.commit()
+upgrades: list[tuple[str, list[Callable[[Connection], None]]]] = [
+    ("0.3.4", [add_settings_table])
+]

haiku/rag/utils.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import sys
+from importlib import metadata
 from pathlib import Path
+import httpx
+from packaging.version import Version, parse
 def get_default_data_dir() -> Path:
     """
@@ -23,3 +27,54 @@ def get_default_data_dir() -> Path:
     data_path = system_paths[sys.platform]
     return data_path
+def semantic_version_to_int(version: str) -> int:
+    """
+    Convert a semantic version string to an integer.
+    :param version: Semantic version string
+    :type version: str
+    :return: Integer representation of semantic version
+    :rtype: int
+    """
+    major, minor, patch = version.split(".")
+    major = int(major) << 16
+    minor = int(minor) << 8
+    patch = int(patch)
+    return major + minor + patch
+def int_to_semantic_version(version: int) -> str:
+    """
+    Convert an integer to a semantic version string.
+    :param version: Integer representation of semantic version
+    :type version: int
+    :return: Semantic version string
+    :rtype: str
+    """
+    major = version >> 16
+    minor = (version >> 8) & 255
+    patch = version & 255
+    return f"{major}.{minor}.{patch}"
+async def is_up_to_date() -> tuple[bool, Version, Version]:
+    """
+    Checks whether haiku.rag is current.
+    :return: A tuple containing a boolean indicating whether haiku.rag is current, the running version and the latest version
+    :rtype: tuple[bool, Version, Version]
+    """
+    async with httpx.AsyncClient() as client:
+        running_version = parse(metadata.version("haiku.rag"))
+        try:
+            response = await client.get("https://pypi.org/pypi/haiku.rag/json")
+            data = response.json()
+            pypi_version = parse(data["info"]["version"])
+        except Exception:
+            # If no network connection, do not raise alarms.
+            pypi_version = running_version
+    return running_version >= pypi_version, running_version, pypi_version

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.3.2
+Version: 0.3.4
 Summary: Retrieval Augmented Generation (RAG) with SQLite
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT
@@ -116,3 +116,4 @@ Full documentation at: https://ggozad.github.io/haiku.rag/
 - [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
 - [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
 - [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
+- [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/RECORD RENAMED Viewed

@@ -1,27 +1,27 @@
 haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-haiku/rag/app.py,sha256=Foi_K-sAqHWsIAAaxY2Tb0hyXnMCi06LqIFCPiBS5n0,7627
+haiku/rag/app.py,sha256=FpLVyP1-zAq_XPmU8CPVLkuIAeuhBOGvMqhYS8RbN40,7649
 haiku/rag/chunker.py,sha256=lSSPWgNAe7gNZL_yNLmDtqxJix4YclOiG7gbARcEpV8,1871
-haiku/rag/cli.py,sha256=9F64IIm2c1nBKn7p9D5yYkVZr8HcjDemrzjF9SRGIY8,5017
-haiku/rag/client.py,sha256=qoVgdsP_MH8wVcDTvPIcMgW7323tTjOXH8JKugz5snY,10847
+haiku/rag/cli.py,sha256=8PC7r5odIVLyksSm_BXor2rznIZ2KDug-YhzqbFPvms,5605
+haiku/rag/client.py,sha256=AeRXw67E1dr6ICI6EJE1q0WwZgA6ezwFw55v6QVydYk,11014
 haiku/rag/config.py,sha256=ctD_pu7nDOieirJofhNMO-OJIONLC5myvcru9iTm_ps,1433
 haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
 haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
 haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
 haiku/rag/reader.py,sha256=S7-Z72pDvSHedvgt4-RkTOwZadG88Oed9keJ69SVITk,962
-haiku/rag/utils.py,sha256=6xVM6z2OmhzB4FEDlPbMsr_ZBBmCbMQb83nP6E2UdxY,629
+haiku/rag/utils.py,sha256=flQqO12OIqApINYAfkg8VDXBgRDFVR_HRaIaydk_OBQ,2310
 haiku/rag/embeddings/__init__.py,sha256=4jUPe2FyIf8BGZ7AncWSlBdNXG3URejBbnkhQf3JiD0,1505
 haiku/rag/embeddings/base.py,sha256=PTAWKTU-Q-hXIhbRK1o6pIdpaW7DFdzJXQ0Nzc6VI-w,379
 haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0qU,441
 haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
 haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
 haiku/rag/qa/__init__.py,sha256=oso98Ypti7mBLTJ6Zk71YaSJ9Rgc89QXp9RSB6zSpYs,1501
-haiku/rag/qa/anthropic.py,sha256=lzHRQxpEv6Qd6iBIqexUgWnq-ITqytppwkfOuRGWdDs,4556
+haiku/rag/qa/anthropic.py,sha256=6I6cf6ySNkYbmDFdy22sA8r3GO5moiiH75tJnHcgJQA,4448
 haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
-haiku/rag/qa/ollama.py,sha256=poShrse-RgLTwa5gbVzoERNTrn5QRpovJCZKYkIpOZI,2393
-haiku/rag/qa/openai.py,sha256=yBbSjGlG4Lo5p2B2NOTa5C6JceX0OJ1jXar_ABFZYYI,3849
-haiku/rag/qa/prompts.py,sha256=dAz2HjD4eJ8tcW534Tx7EuFOs6pSv2kPr7yrHnHtS0E,535
+haiku/rag/qa/ollama.py,sha256=-UtNFErYlA_66g3WLU6lK38a1Y5zhAL6s_uZ5AP0TFs,2381
+haiku/rag/qa/openai.py,sha256=dF32sGgVt8mZi5oVxByaeECs9NqLjvDiZnnpJBsrHm8,3968
+haiku/rag/qa/prompts.py,sha256=578LJGZJ0LQ_q7ccyj5hLabtHo8Zcfw5-DiLGN9lC-w,1200
 haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
-haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
+haiku/rag/store/engine.py,sha256=4ouAD0s-TFwEoEHjVVw_KnV6aaw5nwhe9fdT8PRXfok,6061
 haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
 haiku/rag/store/models/chunk.py,sha256=lmbPOOTz-N4PXhrA5XCUxyRcSTZBo135fqkV1mwnGcE,309
 haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
@@ -29,8 +29,11 @@ haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPv
 haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
 haiku/rag/store/repositories/chunk.py,sha256=gik7ZPOK3gCoG6tU1pGueAZBPmJxIb7obYFUhwINrYg,16497
 haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
-haiku_rag-0.3.2.dist-info/METADATA,sha256=0A8BVZDp38i_xLznvkrZBq3f3OYtWPtqBx_U2eHRIow,3931
-haiku_rag-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-haiku_rag-0.3.2.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
-haiku_rag-0.3.2.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
-haiku_rag-0.3.2.dist-info/RECORD,,
+haiku/rag/store/repositories/settings.py,sha256=dme3_ulQdQvyF9daavSjAd-SjZ5hh0MJoxP7iXgap-A,2492
+haiku/rag/store/upgrades/__init__.py,sha256=kKS1YWT_P-CYKhKtokOLTIFNKf9jlfjFFr8lyIMeogM,100
+haiku/rag/store/upgrades/v0_3_4.py,sha256=GLogKZdZ40NX1vBHKdOJju7fFzNUCHoEnjSZg17Hm2U,663
+haiku_rag-0.3.4.dist-info/METADATA,sha256=9FEVS2pZkPrRYVGd1qaMmfjyxr4fc9sHx1NTeyCbTo0,4019
+haiku_rag-0.3.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+haiku_rag-0.3.4.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
+haiku_rag-0.3.4.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
+haiku_rag-0.3.4.dist-info/RECORD,,

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

haiku.rag 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

Potentially problematic release.

haiku.rag 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl