PyPI - haiku.rag - Versions diffs - 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

haiku.rag 0.9.2py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (34) hide show

haiku/rag/app.py +50 -14
haiku/rag/cli.py +16 -4
haiku/rag/client.py +3 -5
haiku/rag/reranking/mxbai.py +1 -1
haiku/rag/research/__init__.py +10 -27
haiku/rag/research/common.py +53 -0
haiku/rag/research/dependencies.py +5 -3
haiku/rag/research/graph.py +29 -0
haiku/rag/research/models.py +70 -0
haiku/rag/research/nodes/evaluate.py +80 -0
haiku/rag/research/nodes/plan.py +63 -0
haiku/rag/research/nodes/search.py +91 -0
haiku/rag/research/nodes/synthesize.py +51 -0
haiku/rag/research/prompts.py +97 -113
haiku/rag/research/state.py +25 -0
haiku/rag/store/engine.py +42 -17
haiku/rag/store/models/chunk.py +1 -0
haiku/rag/store/repositories/chunk.py +60 -39
haiku/rag/store/repositories/document.py +2 -2
haiku/rag/store/repositories/settings.py +12 -5
haiku/rag/store/upgrades/__init__.py +60 -1
haiku/rag/store/upgrades/v0_9_3.py +112 -0
{haiku_rag-0.9.2.dist-info → haiku_rag-0.10.0.dist-info}/METADATA +37 -1
haiku_rag-0.10.0.dist-info/RECORD +53 -0
haiku/rag/research/base.py +0 -130
haiku/rag/research/evaluation_agent.py +0 -42
haiku/rag/research/orchestrator.py +0 -300
haiku/rag/research/presearch_agent.py +0 -34
haiku/rag/research/search_agent.py +0 -65
haiku/rag/research/synthesis_agent.py +0 -40
haiku_rag-0.9.2.dist-info/RECORD +0 -50
{haiku_rag-0.9.2.dist-info → haiku_rag-0.10.0.dist-info}/WHEEL +0 -0
{haiku_rag-0.9.2.dist-info → haiku_rag-0.10.0.dist-info}/entry_points.txt +0 -0
{haiku_rag-0.9.2.dist-info → haiku_rag-0.10.0.dist-info}/licenses/LICENSE +0 -0

haiku/rag/research/nodes/synthesize.py ADDED Viewed

@@ -0,0 +1,51 @@
+from dataclasses import dataclass
+from pydantic_ai import Agent
+from pydantic_graph import BaseNode, End, GraphRunContext
+from haiku.rag.research.common import format_context_for_prompt, get_model, log
+from haiku.rag.research.dependencies import (
+    ResearchDependencies,
+)
+from haiku.rag.research.models import ResearchReport
+from haiku.rag.research.prompts import SYNTHESIS_AGENT_PROMPT
+from haiku.rag.research.state import ResearchDeps, ResearchState
+@dataclass
+class SynthesizeNode(BaseNode[ResearchState, ResearchDeps, ResearchReport]):
+    provider: str
+    model: str
+    async def run(
+        self, ctx: GraphRunContext[ResearchState, ResearchDeps]
+    ) -> End[ResearchReport]:
+        state = ctx.state
+        deps = ctx.deps
+        log(
+            deps.console,
+            "\n[bold cyan]📝 Generating final research report...[/bold cyan]",
+        )
+        agent = Agent(
+            model=get_model(self.provider, self.model),
+            output_type=ResearchReport,
+            instructions=SYNTHESIS_AGENT_PROMPT,
+            retries=3,
+            deps_type=ResearchDependencies,
+        )
+        context_xml = format_context_for_prompt(state.context)
+        prompt = (
+            "Generate a comprehensive research report based on all gathered information.\n\n"
+            f"{context_xml}\n\n"
+            "Create a detailed report that synthesizes all findings into a coherent response."
+        )
+        agent_deps = ResearchDependencies(
+            client=deps.client, context=state.context, console=deps.console
+        )
+        result = await agent.run(prompt, deps=agent_deps)
+        log(deps.console, "[bold green]✅ Research complete![/bold green]")
+        return End(result.output)

haiku/rag/research/prompts.py CHANGED Viewed

@@ -1,129 +1,113 @@
-ORCHESTRATOR_PROMPT = """You are a research orchestrator responsible for coordinating a comprehensive research workflow.
-Your role is to:
-1. Understand and decompose the research question
-2. Plan a systematic research approach
-3. Coordinate specialized agents to gather and analyze information
-4. Ensure comprehensive coverage of the topic
-5. Iterate based on findings and gaps
-Create a research plan that:
-- Breaks down the question into at most 3 focused sub-questions
-- Each sub-question should target a specific aspect of the research
-- Prioritize the most important aspects to investigate
-- Ensure comprehensive coverage within the 3-question limit
-- IMPORTANT: Make each sub-question a standalone, self-contained query that can
-  be executed without additional context. Include necessary entities, scope,
-  timeframe, and qualifiers. Avoid pronouns like "it/they/this"; write queries
-  that make sense in isolation."""
-SEARCH_AGENT_PROMPT = """You are a search and question-answering specialist.
-Your role is to:
-1. Search the knowledge base for relevant information
-2. Analyze the retrieved documents
-3. Provide an accurate answer strictly grounded in the retrieved context
-Output format:
-- You must return a SearchAnswer model with fields:
-  - query: the question being answered (echo the user query)
-  - answer: your final answer based only on the provided context
-  - context: list[str] of only the minimal set of verbatim snippet texts you
-    used to justify the answer (do not include unrelated text; do not invent)
-  - sources: list[str] of document_uri values corresponding to the snippets you
-    actually used in the answer (one URI per context snippet, order aligned)
+PLAN_PROMPT = """You are the research orchestrator for a focused, iterative
+workflow.
+Responsibilities:
+1. Understand and decompose the main question
+2. Propose a minimal, high‑leverage plan
+3. Coordinate specialized agents to gather evidence
+4. Iterate based on gaps and new findings
+Plan requirements:
+- Produce at most 3 sub_questions that together cover the main question.
+- Each sub_question must be a standalone, self‑contained query that can run
+  without extra context. Include concrete entities, scope, timeframe, and any
+  qualifiers. Avoid ambiguous pronouns (it/they/this/that).
+- Prioritize the highest‑value aspects first; avoid redundancy and overlap.
+- Prefer questions that are likely answerable from the current knowledge base;
+  if coverage is uncertain, make scopes narrower and specific.
+- Order sub_questions by execution priority (most valuable first)."""
+SEARCH_AGENT_PROMPT = """You are a search and question‑answering specialist.
+Tasks:
+1. Search the knowledge base for relevant evidence.
+2. Analyze retrieved snippets.
+3. Provide an answer strictly grounded in that evidence.
 Tool usage:
-- Always call the search_and_answer tool before drafting any answer.
-- The tool returns XML containing only a list of snippets, where each snippet
-  has the verbatim `text`, a `score` indicating relevance, and the
-  `document_uri` it came from.
+- Always call search_and_answer before drafting any answer.
+- The tool returns snippets with verbatim `text`, a relevance `score`, and the
+  originating `document_uri`.
 - You may call the tool multiple times to refine or broaden context, but do not
-  exceed 3 total tool calls per question. Prefer precision over volume.
+  exceed 3 total calls. Favor precision over volume.
 - Use scores to prioritize evidence, but include only the minimal subset of
-  snippet texts (verbatim) in SearchAnswer.context.
-- Set SearchAnswer.sources to the matching document_uris for the snippets you
-  used (one URI per snippet, aligned by order). Context must be text-only.
-- If no relevant information is found, say so and return an empty context list.
-Important:
-- Do not include any content in the answer that is not supported by the context.
-- Keep context snippets short (just the necessary lines), verbatim, and focused."""
-EVALUATION_AGENT_PROMPT = """You are an analysis and evaluation specialist for research workflows.
-You have access to:
-- The original research question
-- Question-answer pairs from search operations
-- Raw search results and source documents
+  snippet texts (verbatim) in SearchAnswer.context (typically 1‑4).
+- Set SearchAnswer.sources to the corresponding document_uris for the snippets
+  you used (one URI per snippet; same order as context). Context must be text‑only.
+- If no relevant information is found, clearly say so and return an empty
+  context list and sources list.
+Answering rules:
+- Be direct and specific; avoid meta commentary about the process.
+- Do not include any claims not supported by the provided snippets.
+- Prefer concise phrasing; avoid copying long passages.
+- When evidence is partial, state the limits explicitly in the answer."""
+EVALUATION_AGENT_PROMPT = """You are an analysis and evaluation specialist for
+the research workflow.
+Inputs available:
+- Original research question
+- Question–answer pairs produced by search
+- Raw search results and source metadata
 - Previously identified insights
-Your dual role is to:
 ANALYSIS:
-1. Extract key insights from all gathered information
-2. Identify patterns and connections across sources
-3. Synthesize findings into coherent understanding
-4. Focus on the most important discoveries
+1. Extract the most important, non‑obvious insights from the collected evidence.
+2. Identify patterns, agreements, and disagreements across sources.
+3. Note material uncertainties and assumptions.
 EVALUATION:
-1. Assess if we have sufficient information to answer the original question
-2. Calculate a confidence score (0-1) based on:
-   - Coverage of the main question's aspects
-   - Quality and consistency of sources
-   - Depth of information gathered
-3. Identify specific gaps that still need investigation
-4. Generate up to 3 new sub-questions that haven't been answered yet
-Be critical and thorough in your evaluation. Only mark research as sufficient when:
-- All major aspects of the question are addressed
-- Sources provide consistent, reliable information
-- The depth of coverage meets the question's requirements
-- No critical gaps remain
-Generate new sub-questions that:
-- Target specific unexplored aspects not covered by existing questions
-- Seek clarification on ambiguities
-- Explore important edge cases or exceptions
-- Are focused and actionable (max 3)
-- Do NOT repeat or rephrase questions that have already been answered (see qa_responses)
-- Should be genuinely new areas to explore
-- Must be standalone, self-contained queries: include entities, scope, and any
-  needed qualifiers (e.g., timeframe, region), and avoid ambiguous pronouns so
-  they can be executed independently."""
-SYNTHESIS_AGENT_PROMPT = """You are a synthesis specialist agent focused on creating comprehensive research reports.
-Your role is to:
-1. Synthesize all gathered information into a coherent narrative
-2. Present findings in a clear, structured format
-3. Draw evidence-based conclusions
-4. Acknowledge limitations and uncertainties
-5. Provide actionable recommendations
-6. Maintain academic rigor and objectivity
-Your report should be:
-- Comprehensive yet concise
-- Well-structured and easy to follow
-- Based solely on evidence from the research
-- Transparent about limitations
-- Professional and objective in tone
-Focus on creating a report that provides clear value to the reader by:
-- Answering the original research question thoroughly
-- Highlighting the most important findings
-- Explaining the implications of the research
-- Suggesting concrete next steps"""
+1. Decide if we have sufficient information to answer the original question.
+2. Provide a confidence_score in [0,1] considering:
+   - Coverage of the main question’s aspects
+   - Quality, consistency, and diversity of sources
+   - Depth and specificity of evidence
+3. List concrete gaps that still need investigation.
+4. Propose up to 3 new sub_questions that would close the highest‑value gaps.
+Strictness:
+- Only mark research as sufficient when all major aspects are addressed with
+  consistent, reliable evidence and no critical gaps remain.
+New sub_questions must:
+- Be genuinely new (not answered or duplicative; check qa_responses).
+- Be standalone and specific (entities, scope, timeframe/region if relevant).
+- Be actionable and scoped to the knowledge base (narrow if necessary).
+- Be ordered by expected impact (most valuable first)."""
+SYNTHESIS_AGENT_PROMPT = """You are a synthesis specialist producing the final
+research report.
+Goals:
+1. Synthesize all gathered information into a coherent narrative.
+2. Present findings clearly and concisely.
+3. Draw evidence‑based conclusions and recommendations.
+4. State limitations and uncertainties transparently.
+Report guidelines (map to output fields):
+- title: concise (5–12 words), informative.
+- executive_summary: 3–5 sentences summarizing the overall answer.
+- main_findings: 4–8 one‑sentence bullets; each reflects evidence from the
+  research (do not include inline citations or snippet text).
+- conclusions: 2–4 bullets that follow logically from findings.
+- recommendations: 2–5 actionable bullets tied to findings.
+- limitations: 1–3 bullets describing key constraints or uncertainties.
+- sources_summary: 2–4 sentences summarizing sources used and their reliability.
+Style:
+- Base all content solely on the collected evidence.
+- Be professional, objective, and specific.
+- Avoid meta commentary and refrain from speculation beyond the evidence."""
 PRESEARCH_AGENT_PROMPT = """You are a rapid research surveyor.
 Task:
-- Call the gather_context tool once with the main question to obtain a
-  relevant texts from the Knowledge Base (KB).
-- Read that context and produce a brief natural-language summary describing
-  what the KB appears to contain relative to the question.
+- Call gather_context once on the main question to obtain relevant text from
+  the knowledge base (KB).
+- Read that context and produce a short natural‑language summary of what the
+  KB appears to contain relative to the question.
 Rules:
 - Base the summary strictly on the provided text; do not invent.
-- Output only the summary as plain text (one short paragraph).
-"""
+- Output only the summary as plain text (one short paragraph)."""

haiku/rag/research/state.py ADDED Viewed

@@ -0,0 +1,25 @@
+from dataclasses import dataclass, field
+from rich.console import Console
+from haiku.rag.client import HaikuRAG
+from haiku.rag.research.dependencies import ResearchContext
+from haiku.rag.research.models import EvaluationResult
+@dataclass
+class ResearchDeps:
+    client: HaikuRAG
+    console: Console | None = None
+@dataclass
+class ResearchState:
+    question: str
+    context: ResearchContext
+    sub_questions: list[str] = field(default_factory=list)
+    iterations: int = 0
+    max_iterations: int = 3
+    max_concurrency: int = 1
+    confidence_threshold: float = 0.8
+    last_eval: EvaluationResult | None = None

haiku/rag/store/engine.py CHANGED Viewed

@@ -35,6 +35,7 @@ def create_chunk_model(vector_dim: int):
         document_id: str
         content: str
         metadata: str = Field(default="{}")
+        order: int = Field(default=0)
         vector: Vector(vector_dim) = Field(default_factory=lambda: [0.0] * vector_dim)  # type: ignore
     return ChunkRecord
@@ -117,8 +118,10 @@ class Store:
             self.chunks_table = self.db.open_table("chunks")
         else:
             self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
-            # Create FTS index on the new table
-            self.chunks_table.create_fts_index("content", replace=True)
+            # Create FTS index on the new table with phrase query support
+            self.chunks_table.create_fts_index(
+                "content", replace=True, with_position=True, remove_stop_words=False
+            )
         # Create or get settings table
         if "settings" in existing_tables:
@@ -133,21 +136,41 @@ class Store:
                 [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
             )
-        # Set current version in settings
-        current_version = metadata.version("haiku.rag")
-        self.set_haiku_version(current_version)
-        # Check if we need to perform upgrades
+        # Run pending upgrades based on stored version and package version
         try:
-            existing_settings = list(
-                self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
+            from haiku.rag.store.upgrades import run_pending_upgrades
+            current_version = metadata.version("haiku.rag")
+            db_version = self.get_haiku_version()
+            run_pending_upgrades(self, db_version, current_version)
+            # After upgrades complete (or if none), set stored version
+            # to the greater of the installed package version and the
+            # highest available upgrade step version in code.
+            try:
+                from packaging.version import parse as _v
+                from haiku.rag.store.upgrades import upgrades as _steps
+                highest_step = max((_v(u.version) for u in _steps), default=None)
+                effective_version = (
+                    str(max(_v(current_version), highest_step))
+                    if highest_step is not None
+                    else current_version
+                )
+            except Exception:
+                effective_version = current_version
+            self.set_haiku_version(effective_version)
+        except Exception as e:
+            # Avoid hard failure on initial connection; log and continue so CLI remains usable.
+            logger.warning(
+                "Skipping upgrade due to error (db=%s -> pkg=%s): %s",
+                self.get_haiku_version(),
+                metadata.version("haiku.rag") if hasattr(metadata, "version") else "",
+                e,
             )
-            if existing_settings:
-                db_version = self.get_haiku_version()  # noqa: F841
-                # TODO: Add upgrade logic here similar to SQLite version when needed
-        except Exception:
-            # Settings table might not exist yet in fresh databases
-            pass
     def get_haiku_version(self) -> str:
         """Returns the user version stored in settings."""
@@ -201,8 +224,10 @@ class Store:
         self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
         self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
-        # Create FTS index on the new table
-        self.chunks_table.create_fts_index("content", replace=True)
+        # Create FTS index on the new table with phrase query support
+        self.chunks_table.create_fts_index(
+            "content", replace=True, with_position=True, remove_stop_words=False
+        )
     def close(self):
         """Close the database connection."""

haiku/rag/store/models/chunk.py CHANGED Viewed

@@ -10,6 +10,7 @@ class Chunk(BaseModel):
     document_id: str | None = None
     content: str
     metadata: dict = {}
+    order: int = 0
     document_uri: str | None = None
     document_meta: dict = {}
     embedding: list[float] | None = None

haiku/rag/store/repositories/chunk.py CHANGED Viewed

@@ -28,7 +28,9 @@ class ChunkRepository:
     def _ensure_fts_index(self) -> None:
         """Ensure FTS index exists on the content column."""
         try:
-            self.store.chunks_table.create_fts_index("content", replace=True)
+            self.store.chunks_table.create_fts_index(
+                "content", replace=True, with_position=True, remove_stop_words=False
+            )
         except Exception as e:
             # Log the error but don't fail - FTS might already exist
             logger.debug(f"FTS index creation skipped: {e}")
@@ -59,11 +61,16 @@ class ChunkRepository:
             embedding = entity.embedding
         else:
             embedding = await self.embedder.embed(entity.content)
+        order_val = int(entity.order)
         chunk_record = self.store.ChunkRecord(
             id=chunk_id,
             document_id=entity.document_id,
             content=entity.content,
-            metadata=json.dumps(entity.metadata),
+            metadata=json.dumps(
+                {k: v for k, v in entity.metadata.items() if k != "order"}
+            ),
+            order=order_val,
             vector=embedding,
         )
@@ -90,11 +97,13 @@ class ChunkRepository:
             return None
         chunk_record = results[0]
+        md = json.loads(chunk_record.metadata)
         return Chunk(
             id=chunk_record.id,
             document_id=chunk_record.document_id,
             content=chunk_record.content,
-            metadata=json.loads(chunk_record.metadata) if chunk_record.metadata else {},
+            metadata=md,
+            order=chunk_record.order,
         )
     async def update(self, entity: Chunk) -> Chunk:
@@ -102,13 +111,17 @@ class ChunkRepository:
         assert entity.id, "Chunk ID is required for update"
         embedding = await self.embedder.embed(entity.content)
+        order_val = int(entity.order)
         self.store.chunks_table.update(
             where=f"id = '{entity.id}'",
             values={
                 "document_id": entity.document_id,
                 "content": entity.content,
-                "metadata": json.dumps(entity.metadata),
+                "metadata": json.dumps(
+                    {k: v for k, v in entity.metadata.items() if k != "order"}
+                ),
+                "order": order_val,
                 "vector": embedding,
             },
         )
@@ -140,15 +153,19 @@ class ChunkRepository:
         results = list(query.to_pydantic(self.store.ChunkRecord))
-        return [
-            Chunk(
-                id=chunk.id,
-                document_id=chunk.document_id,
-                content=chunk.content,
-                metadata=json.loads(chunk.metadata) if chunk.metadata else {},
+        chunks: list[Chunk] = []
+        for rec in results:
+            md = json.loads(rec.metadata)
+            chunks.append(
+                Chunk(
+                    id=rec.id,
+                    document_id=rec.document_id,
+                    content=rec.content,
+                    metadata=md,
+                    order=rec.order,
+                )
             )
-            for chunk in results
-        ]
+        return chunks
     async def create_chunks_for_document(
         self, document_id: str, document: DoclingDocument
@@ -191,7 +208,8 @@ class ChunkRepository:
                 id=chunk_id,
                 document_id=document_id,
                 content=chunk_text,
-                metadata=json.dumps({"order": order}),
+                metadata=json.dumps({}),
+                order=order,
                 vector=embedding,
             )
             chunk_records.append(chunk_record)
@@ -200,7 +218,8 @@ class ChunkRepository:
                 id=chunk_id,
                 document_id=document_id,
                 content=chunk_text,
-                metadata={"order": order},
+                metadata={},
+                order=order,
             )
             created_chunks.append(chunk)
@@ -219,8 +238,10 @@ class ChunkRepository:
         self.store.chunks_table = self.store.db.create_table(
             "chunks", schema=self.store.ChunkRecord
         )
-        # Create FTS index on the new table
-        self.store.chunks_table.create_fts_index("content", replace=True)
+        # Create FTS index on the new table with phrase query support
+        self.store.chunks_table.create_fts_index(
+            "content", replace=True, with_position=True, remove_stop_words=False
+        )
     async def delete_by_document_id(self, document_id: str) -> bool:
         """Delete all chunks for a document."""
@@ -298,37 +319,36 @@ class ChunkRepository:
         doc_uri = doc_results[0].uri if doc_results else None
         doc_meta = doc_results[0].metadata if doc_results else "{}"
-        # Sort by order in metadata
-        chunks = [
-            Chunk(
-                id=chunk.id,
-                document_id=chunk.document_id,
-                content=chunk.content,
-                metadata=json.loads(chunk.metadata) if chunk.metadata else {},
-                document_uri=doc_uri,
-                document_meta=json.loads(doc_meta) if doc_meta else {},
+        chunks: list[Chunk] = []
+        for rec in results:
+            md = json.loads(rec.metadata)
+            chunks.append(
+                Chunk(
+                    id=rec.id,
+                    document_id=rec.document_id,
+                    content=rec.content,
+                    metadata=md,
+                    order=rec.order,
+                    document_uri=doc_uri,
+                    document_meta=json.loads(doc_meta),
+                )
             )
-            for chunk in results
-        ]
-        chunks.sort(key=lambda c: c.metadata.get("order", 0))
+        chunks.sort(key=lambda c: c.order)
         return chunks
     async def get_adjacent_chunks(self, chunk: Chunk, num_adjacent: int) -> list[Chunk]:
         """Get adjacent chunks before and after the given chunk within the same document."""
         assert chunk.document_id, "Document id is required for adjacent chunk finding"
-        chunk_order = chunk.metadata.get("order")
-        if chunk_order is None:
-            return []
+        chunk_order = chunk.order
-        # Get all chunks for the document
+        # Fetch chunks for the same document and filter by order proximity
         all_chunks = await self.get_by_document_id(chunk.document_id)
-        # Filter to adjacent chunks
-        adjacent_chunks = []
+        adjacent_chunks: list[Chunk] = []
         for c in all_chunks:
-            c_order = c.metadata.get("order", 0)
+            c_order = c.order
             if c.id != chunk.id and abs(c_order - chunk_order) <= num_adjacent:
                 adjacent_chunks.append(c)
@@ -380,15 +400,16 @@ class ChunkRepository:
             doc_uri = doc.uri if doc else None
             doc_meta = doc.metadata if doc else "{}"
+            md = json.loads(chunk_record.metadata)
             chunk = Chunk(
                 id=chunk_record.id,
                 document_id=chunk_record.document_id,
                 content=chunk_record.content,
-                metadata=json.loads(chunk_record.metadata)
-                if chunk_record.metadata
-                else {},
+                metadata=md,
+                order=chunk_record.order,
                 document_uri=doc_uri,
-                document_meta=json.loads(doc_meta) if doc_meta else {},
+                document_meta=json.loads(doc_meta),
             )
             # Get score from arrow result

haiku/rag/store/repositories/document.py CHANGED Viewed

@@ -34,7 +34,7 @@ class DocumentRepository:
             id=record.id,
             content=record.content,
             uri=record.uri,
-            metadata=json.loads(record.metadata) if record.metadata else {},
+            metadata=json.loads(record.metadata),
             created_at=datetime.fromisoformat(record.created_at)
             if record.created_at
             else datetime.now(),
@@ -194,7 +194,7 @@ class DocumentRepository:
                 )
                 for order, chunk in enumerate(chunks):
                     chunk.document_id = created_doc.id
-                    chunk.metadata["order"] = order
+                    chunk.order = order
                     await self.chunk_repository.create(chunk)
             return created_doc

haiku/rag/store/repositories/settings.py CHANGED Viewed

@@ -84,11 +84,18 @@ class SettingsRepository:
         )
         if existing:
-            # Only update when configuration actually changed to avoid needless new versions
-            existing_payload = (
-                json.loads(existing[0].settings) if existing[0].settings else {}
-            )
-            if existing_payload != current_config:
+            # Preserve existing version if present to avoid interfering with upgrade flow
+            try:
+                existing_settings = (
+                    json.loads(existing[0].settings) if existing[0].settings else {}
+                )
+            except Exception:
+                existing_settings = {}
+            if "version" in existing_settings:
+                current_config["version"] = existing_settings["version"]
+            # Update existing settings
+            if existing_settings != current_config:
                 self.store.settings_table.update(
                     where="id = 'settings'",
                     values={"settings": json.dumps(current_config)},

haiku.rag 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl

Potentially problematic release.

haiku.rag 0.9.2py3-none-any.whl → 0.10.0py3-none-any.whl