raggity 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
raggity/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
raggity/answerer.py ADDED
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from abc import ABC, abstractmethod
5
+
6
+ from claude_agent_sdk import query, ClaudeAgentOptions, AssistantMessage
7
+
8
+ from .models import Answer, Chunk
9
+ from .prompts import (SYSTEM_PROMPT, ABSTAIN_MESSAGE, build_user_prompt,
10
+ verify_citations)
11
+ from .registry import register
12
+
13
+
14
+ class Answerer(ABC):
15
+ @abstractmethod
16
+ async def answer(self, question: str, chunks: list[Chunk]) -> Answer: ...
17
+
18
+
19
+ class ClaudeAgentAnswerer(Answerer):
20
+ def __init__(self, model: str = "claude-opus-4-8", auth: str = "auto") -> None:
21
+ self.model = model
22
+ self.auth = auth
23
+
24
+ def _options(self) -> ClaudeAgentOptions:
25
+ if self.auth == "subscription":
26
+ # Subscription-primary: pass os.environ minus ANTHROPIC_API_KEY so
27
+ # the Agent SDK cannot fall back to a per-token API key and must use
28
+ # the `claude login` subscription session.
29
+ env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
30
+ return ClaudeAgentOptions(
31
+ system_prompt=SYSTEM_PROMPT,
32
+ model=self.model,
33
+ allowed_tools=[],
34
+ permission_mode="dontAsk",
35
+ env=env,
36
+ )
37
+ if self.auth == "api_key":
38
+ # api_key mode: the SDK reads ANTHROPIC_API_KEY from the environment.
39
+ if not os.environ.get("ANTHROPIC_API_KEY"):
40
+ raise RuntimeError(
41
+ "auth='api_key' but ANTHROPIC_API_KEY is not set. "
42
+ "Set the key, or use auth='subscription' after `claude login`."
43
+ )
44
+ return ClaudeAgentOptions(
45
+ system_prompt=SYSTEM_PROMPT,
46
+ model=self.model,
47
+ allowed_tools=[],
48
+ permission_mode="dontAsk",
49
+ )
50
+ # auth == "auto": leave env untouched — SDK resolves key-first, then
51
+ # subscription session if no key is present.
52
+ return ClaudeAgentOptions(
53
+ system_prompt=SYSTEM_PROMPT,
54
+ model=self.model,
55
+ allowed_tools=[],
56
+ permission_mode="dontAsk",
57
+ )
58
+
59
+ async def answer_stream(self, question: str, chunks: list[Chunk]):
60
+ """Yield text-delta str items as they arrive, then a final Answer."""
61
+ if not chunks:
62
+ yield ABSTAIN_MESSAGE
63
+ yield Answer(text=ABSTAIN_MESSAGE, citations=[], abstained=True)
64
+ return
65
+ prompt = build_user_prompt(question, chunks)
66
+ opts = self._options()
67
+ try:
68
+ opts.include_partial_messages = True
69
+ except Exception:
70
+ pass
71
+ parts: list[str] = []
72
+ async for message in query(prompt=prompt, options=opts):
73
+ if isinstance(message, AssistantMessage):
74
+ for block in message.content:
75
+ text = getattr(block, "text", None)
76
+ if text:
77
+ parts.append(text)
78
+ yield text
79
+ text = "".join(parts).strip()
80
+ abstained = text == ABSTAIN_MESSAGE
81
+ citations = [] if abstained else verify_citations(text, chunks)
82
+ yield Answer(text=text, citations=citations, abstained=abstained)
83
+
84
+ async def answer(self, question: str, chunks: list[Chunk]) -> Answer:
85
+ if not chunks:
86
+ return Answer(text=ABSTAIN_MESSAGE, citations=[], abstained=True)
87
+ prompt = build_user_prompt(question, chunks)
88
+ parts: list[str] = []
89
+ async for message in query(prompt=prompt, options=self._options()):
90
+ if isinstance(message, AssistantMessage):
91
+ for block in message.content:
92
+ text = getattr(block, "text", None)
93
+ if text:
94
+ parts.append(text)
95
+ text = "".join(parts).strip()
96
+ abstained = text.strip() == ABSTAIN_MESSAGE
97
+ citations = [] if abstained else verify_citations(text, chunks)
98
+ return Answer(text=text, citations=citations, abstained=abstained)
99
+
100
+
101
+ register("answerer", "claude", "raggity.answerer:ClaudeAgentAnswerer")
raggity/cache.py ADDED
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+ import hashlib
3
+ import json
4
+ import logging
5
+ import os
6
+ from pathlib import Path
7
+
8
+ from .models import Answer, Citation
9
+
10
+ log = logging.getLogger("raggity.cache")
11
+
12
+
13
+ def cache_key(question: str, chunk_ids: list[str], model: str) -> str:
14
+ payload = question + "|" + "|".join(sorted(chunk_ids)) + "|" + model
15
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
16
+
17
+
18
+ def load(path: str) -> dict:
19
+ if not os.path.isfile(path):
20
+ return {}
21
+ try:
22
+ with open(path, encoding="utf-8") as fh:
23
+ return json.load(fh)
24
+ except Exception as exc:
25
+ log.warning("ignoring unreadable answer cache %s: %s", path, exc)
26
+ return {}
27
+
28
+
29
+ def save(path: str, data: dict) -> None:
30
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
31
+ with open(path, "w", encoding="utf-8") as fh:
32
+ json.dump(data, fh)
33
+
34
+
35
+ def answer_to_dict(a: Answer) -> dict:
36
+ return {"text": a.text, "abstained": a.abstained,
37
+ "citations": [{"chunk_id": c.chunk_id, "source_path": c.source_path,
38
+ "title": c.title, "supported": c.supported}
39
+ for c in a.citations]}
40
+
41
+
42
+ def answer_from_dict(d: dict) -> Answer:
43
+ return Answer(text=d["text"],
44
+ citations=[Citation(c["chunk_id"], c["source_path"], c["title"], c["supported"])
45
+ for c in d.get("citations", [])],
46
+ abstained=d.get("abstained", False))
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import os
7
+ from pathlib import Path
8
+
9
+ from .embedder import Embedder
10
+
11
+ log = logging.getLogger("raggity.cached_embedder")
12
+
13
+
14
+ class CachedEmbedder(Embedder):
15
+ def __init__(self, inner: Embedder, cache_path: str) -> None:
16
+ self._inner = inner
17
+ self._path = cache_path
18
+ self._cache = self._load()
19
+
20
+ def _load(self) -> dict:
21
+ if not os.path.isfile(self._path):
22
+ return {}
23
+ try:
24
+ with open(self._path, encoding="utf-8") as fh:
25
+ return json.load(fh)
26
+ except Exception as exc:
27
+ log.warning("ignoring unreadable embed cache %s: %s", self._path, exc)
28
+ return {}
29
+
30
+ def _save(self) -> None:
31
+ Path(self._path).parent.mkdir(parents=True, exist_ok=True)
32
+ with open(self._path, "w", encoding="utf-8") as fh:
33
+ json.dump(self._cache, fh)
34
+
35
+ @property
36
+ def dim(self) -> int:
37
+ return self._inner.dim
38
+
39
+ def embed_query(self, text: str) -> list[float]:
40
+ return self._inner.embed_query(text)
41
+
42
+ def embed_documents(self, texts: list[str]) -> list[list[float]]:
43
+ keys = [hashlib.sha256(t.encode("utf-8")).hexdigest() for t in texts]
44
+ missing = [i for i, k in enumerate(keys) if k not in self._cache]
45
+ if missing:
46
+ vecs = self._inner.embed_documents([texts[i] for i in missing])
47
+ for i, v in zip(missing, vecs):
48
+ self._cache[keys[i]] = v
49
+ self._save()
50
+ return [self._cache[k] for k in keys]
raggity/chunker.py ADDED
@@ -0,0 +1,130 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import re
5
+
6
+ from .models import Chunk, Document
7
+
8
+ _HEADER_RE = re.compile(r"^(#{1,6})\s+(.*)$")
9
+
10
+
11
+ def estimate_tokens(text: str) -> int:
12
+ # Dependency-free heuristic; tiktoken is wrong for Claude and unneeded here.
13
+ return max(1, len(text) // 4)
14
+
15
+
16
+ def _split_into_sections(text: str) -> list[tuple[str, str]]:
17
+ """Return (heading_path, section_body) preserving markdown header hierarchy."""
18
+ sections: list[tuple[str, str]] = []
19
+ stack: list[str] = []
20
+ buf: list[str] = []
21
+
22
+ def flush():
23
+ body = "\n".join(buf).strip()
24
+ if body:
25
+ sections.append((" > ".join(stack), body))
26
+
27
+ for line in text.splitlines():
28
+ m = _HEADER_RE.match(line)
29
+ if m:
30
+ flush()
31
+ buf = []
32
+ level = len(m.group(1))
33
+ title = m.group(2).strip()
34
+ stack = stack[: level - 1]
35
+ while len(stack) < level - 1:
36
+ stack.append("")
37
+ stack.append(title)
38
+ else:
39
+ buf.append(line)
40
+ flush()
41
+ if not sections:
42
+ sections.append(("", text.strip()))
43
+ return sections
44
+
45
+
46
+ def _split_body(body: str, target_tokens: int, overlap_tokens: int) -> list[str]:
47
+ paras = [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()]
48
+ pieces: list[str] = []
49
+ cur: list[str] = []
50
+ cur_tok = 0
51
+ for para in paras:
52
+ ptok = estimate_tokens(para)
53
+ if cur and cur_tok + ptok > target_tokens:
54
+ pieces.append("\n\n".join(cur))
55
+ # overlap: carry tail paragraphs until ~overlap_tokens
56
+ carry: list[str] = []
57
+ ctok = 0
58
+ for prev in reversed(cur):
59
+ carry.insert(0, prev)
60
+ ctok += estimate_tokens(prev)
61
+ if ctok >= overlap_tokens:
62
+ break
63
+ cur = carry[:]
64
+ cur_tok = sum(estimate_tokens(x) for x in cur)
65
+ cur.append(para)
66
+ cur_tok += ptok
67
+ if cur:
68
+ pieces.append("\n\n".join(cur))
69
+ return pieces or [body]
70
+
71
+
72
+ def _chunk_flat(doc: Document, target_tokens: int, overlap_tokens: int) -> list[Chunk]:
73
+ chunks: list[Chunk] = []
74
+ ordinal = 0
75
+ for heading_path, body in _split_into_sections(doc.text):
76
+ full_path = doc.title if not heading_path else f"{doc.title} > {heading_path}"
77
+ for piece in _split_body(body, target_tokens, overlap_tokens):
78
+ header = full_path
79
+ chunk_text = f"{header}\n\n{piece}" if header else piece
80
+ chunk_id = hashlib.sha256(
81
+ f"{doc.path}|{ordinal}|{piece}".encode("utf-8")
82
+ ).hexdigest()
83
+ chunks.append(
84
+ Chunk(
85
+ text=chunk_text,
86
+ source_path=doc.path,
87
+ title=doc.title,
88
+ heading_path=heading_path or doc.title,
89
+ ordinal=ordinal,
90
+ chunk_id=chunk_id,
91
+ )
92
+ )
93
+ ordinal += 1
94
+ return chunks
95
+
96
+
97
+ def _chunk_parent(doc: Document, parent_tokens: int, child_tokens: int,
98
+ overlap_tokens: int) -> list[Chunk]:
99
+ chunks: list[Chunk] = []
100
+ ordinal = 0
101
+ parent_index = 0
102
+ for heading_path, body in _split_into_sections(doc.text):
103
+ full_path = doc.title if not heading_path else f"{doc.title} > {heading_path}"
104
+ for parent_piece in _split_body(body, parent_tokens, overlap_tokens):
105
+ parent_text = f"{full_path}\n\n{parent_piece}" if full_path else parent_piece
106
+ parent_id = hashlib.sha256(
107
+ f"{doc.path}|parent|{parent_index}".encode("utf-8")
108
+ ).hexdigest()
109
+ for child_piece in _split_body(parent_piece, child_tokens, overlap_tokens):
110
+ child_text = f"{full_path}\n\n{child_piece}" if full_path else child_piece
111
+ chunk_id = hashlib.sha256(
112
+ f"{doc.path}|{ordinal}|{child_piece}".encode("utf-8")
113
+ ).hexdigest()
114
+ chunks.append(Chunk(
115
+ text=child_text, source_path=doc.path, title=doc.title,
116
+ heading_path=heading_path or doc.title, ordinal=ordinal,
117
+ chunk_id=chunk_id, parent_id=parent_id, parent_text=parent_text,
118
+ ))
119
+ ordinal += 1
120
+ parent_index += 1
121
+ return chunks
122
+
123
+
124
+ def chunk_document(doc: Document, target_tokens: int = 512,
125
+ overlap_tokens: int = 64, parent_document: bool = False,
126
+ parent_target_tokens: int = 1024,
127
+ child_target_tokens: int = 256) -> list[Chunk]:
128
+ if not parent_document:
129
+ return _chunk_flat(doc, target_tokens, overlap_tokens)
130
+ return _chunk_parent(doc, parent_target_tokens, child_target_tokens, overlap_tokens)
raggity/cli.py ADDED
@@ -0,0 +1,169 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import typer
5
+ from rich.console import Console
6
+
7
+ from .config import load_config
8
+ from .core import Raggity
9
+ from .evaluate import evaluate, load_golden
10
+
11
+ app = typer.Typer(help="raggity — local-first RAG over your notes, answered by Claude.")
12
+ console = Console()
13
+
14
+
15
+ def _rag(config: str | None) -> Raggity:
16
+ return Raggity(load_config(config))
17
+
18
+
19
+ @app.command()
20
+ def ingest(config: str = typer.Option(None, "--config")):
21
+ """Incrementally index configured source folders."""
22
+ report = _rag(config).ingest()
23
+ console.print(
24
+ f"[green]Indexed.[/green] added={report.added} updated={report.updated} "
25
+ f"deleted={report.deleted} unchanged={report.unchanged}"
26
+ )
27
+
28
+
29
+ @app.command()
30
+ def ask(question: str, config: str = typer.Option(None, "--config"),
31
+ plain: bool = typer.Option(False, "--plain"),
32
+ expand: bool = typer.Option(False, "--expand"),
33
+ hyde: bool = typer.Option(False, "--hyde"),
34
+ step_back: bool = typer.Option(False, "--step-back"),
35
+ no_stream: bool = typer.Option(False, "--no-stream"),
36
+ decompose: bool = typer.Option(False, "--decompose"),
37
+ no_cache: bool = typer.Option(False, "--no-cache")):
38
+ """Ask a question against your knowledge base."""
39
+ import asyncio
40
+ rag = _rag(config)
41
+ if decompose:
42
+ if expand or hyde or step_back:
43
+ typer.echo("note: --decompose overrides other query transforms", err=True)
44
+ typer.echo("Decomposing query (+model calls)…", err=True)
45
+ answer = rag.ask_decompose(question)
46
+ if plain:
47
+ typer.echo(answer.text)
48
+ else:
49
+ console.print(answer.text)
50
+ else:
51
+ if expand or hyde or step_back:
52
+ typer.echo("Query transforms enabled (+model calls)…", err=True)
53
+ expand_arg = True if expand else None
54
+ hyde_arg = True if hyde else None
55
+ step_back_arg = True if step_back else None
56
+ use_cache_arg = False if no_cache else None
57
+ if plain or no_stream:
58
+ # Buffered path — honors cache (unless --no-cache)
59
+ answer = rag.ask(question, expand=expand_arg, hyde=hyde_arg, step_back=step_back_arg,
60
+ use_cache=use_cache_arg)
61
+ if plain:
62
+ typer.echo(answer.text)
63
+ else:
64
+ console.print(answer.text)
65
+ else:
66
+ # Streaming path — default; always calls the model (cache is buffered-only)
67
+ async def _stream():
68
+ final = None
69
+ async for piece in rag.aask_stream(question, expand=expand_arg,
70
+ hyde=hyde_arg, step_back=step_back_arg):
71
+ if isinstance(piece, str):
72
+ print(piece, end="", flush=True)
73
+ else:
74
+ final = piece
75
+ print()
76
+ return final
77
+ answer = asyncio.run(_stream())
78
+ if answer is not None and answer.citations and not plain:
79
+ console.print("\n[dim]Sources:[/dim]")
80
+ seen = set()
81
+ for c in answer.citations:
82
+ if c.supported and c.source_path not in seen:
83
+ seen.add(c.source_path)
84
+ console.print(f" [dim]- {c.source_path}[/dim]")
85
+
86
+
87
+ @app.command()
88
+ def status(config: str = typer.Option(None, "--config")):
89
+ """Show index statistics."""
90
+ st = _rag(config).status()
91
+ for k, v in st.items():
92
+ console.print(f"{k}: {v}")
93
+
94
+
95
+ @app.command()
96
+ def reindex(config: str = typer.Option(None, "--config"),
97
+ force: bool = typer.Option(False, "--force")):
98
+ """Rebuild the index from scratch."""
99
+ cfg = load_config(config)
100
+ if force:
101
+ shutil.rmtree(cfg.index.path, ignore_errors=True)
102
+ report = Raggity(cfg).ingest()
103
+ console.print(f"[green]Reindexed.[/green] added={report.added}")
104
+
105
+
106
+ @app.command(name="eval")
107
+ def eval_cmd(golden: str = typer.Argument(...),
108
+ config: str = typer.Option(None, "--config"),
109
+ k: int = typer.Option(5, "--k"),
110
+ llm_judge: bool = typer.Option(False, "--llm-judge")):
111
+ """Run free CPU retrieval metrics against a golden.jsonl set."""
112
+ rag = _rag(config)
113
+ if llm_judge:
114
+ typer.echo("Running LLM-judge eval (+2 model calls per question)…", err=True)
115
+ import asyncio
116
+ from .evaluate import llm_judge as run_judge
117
+ res = asyncio.run(run_judge(rag, load_golden(golden),
118
+ model=rag.cfg.generation.model,
119
+ auth=rag.cfg.generation.auth))
120
+ console.print(f"Faithfulness={res.faithfulness:.3f} "
121
+ f"AnswerRelevance={res.answer_relevance:.3f} (n={res.n})")
122
+ console.print("(note: self-assessed — same model family generates and grades)")
123
+ else:
124
+ res = evaluate(rag.retriever, load_golden(golden), k=k)
125
+ console.print(f"Hit@{k}={res.hit_rate:.3f} MRR={res.mrr:.3f} "
126
+ f"Recall@{k}={res.recall:.3f} (n={res.n})")
127
+
128
+
129
+ @app.command()
130
+ def watch(config: str = typer.Option(None, "--config"),
131
+ debounce: float = typer.Option(2.0, "--debounce")):
132
+ """Watch source folders and re-index on change (Ctrl-C to stop)."""
133
+ rag = _rag(config)
134
+ try:
135
+ from .watch import run_watch
136
+ except ImportError:
137
+ console.print("[red]watch needs extra deps:[/red] pip install raggity[watch]")
138
+ raise typer.Exit(1)
139
+ try:
140
+ observer = run_watch(rag, rag.cfg.sources.include, debounce)
141
+ except RuntimeError as exc:
142
+ console.print(f"[red]{exc}[/red]")
143
+ raise typer.Exit(1)
144
+ console.print(f"[green]Watching[/green] {len(rag.cfg.sources.include)} source pattern(s). Ctrl-C to stop.")
145
+ import time
146
+ try:
147
+ while True:
148
+ time.sleep(1)
149
+ except KeyboardInterrupt:
150
+ observer.stop()
151
+ observer.join()
152
+
153
+
154
+ @app.command()
155
+ def serve(config: str = typer.Option(None, "--config"),
156
+ host: str = typer.Option("127.0.0.1", "--host"),
157
+ port: int = typer.Option(8000, "--port")):
158
+ """Run the local HTTP API server."""
159
+ try:
160
+ import uvicorn
161
+ from .server import create_app
162
+ except ImportError:
163
+ console.print("[red]The server needs extra deps:[/red] pip install raggity[server]")
164
+ raise typer.Exit(1)
165
+ uvicorn.run(create_app(load_config(config)), host=host, port=port)
166
+
167
+
168
+ if __name__ == "__main__":
169
+ app()
raggity/config.py ADDED
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ import tomllib
4
+ from pathlib import Path
5
+
6
+ import platformdirs
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class SourcesConfig(BaseModel):
11
+ include: list[str] = Field(default_factory=list)
12
+
13
+
14
+ class EmbeddingConfig(BaseModel):
15
+ model: str = "BAAI/bge-small-en-v1.5"
16
+ provider: str = "cpu" # cpu | cuda | directml | rocm
17
+ batch_size: int = 256
18
+ parallel: int = 0
19
+ cache: bool = False
20
+
21
+
22
+ class RetrievalConfig(BaseModel):
23
+ hybrid: bool = True
24
+ rrf_k: int = 60
25
+ candidates: int = 30
26
+ rerank: bool = True
27
+ rerank_model: str = "Xenova/ms-marco-MiniLM-L-6-v2"
28
+ top_k: int = 5
29
+ dedup_cosine: float = 0.92
30
+ # Dense-cosine sufficiency floor: governs abstention. Reliable signal (~0.6–0.8
31
+ # for relevant, ~0.43–0.47 for off-topic). When max_dense < this value, abstain.
32
+ sufficiency_floor: float = 0.5
33
+ # OPTIONAL secondary rerank-score filter. 0.0 = off (default). Only applied when
34
+ # rerank=True and relevance_floor > 0. Cross-encoder absolute score is unreliable
35
+ # for abstention, so this is off by default.
36
+ relevance_floor: float = 0.0
37
+ parent_document: bool = False
38
+ parent_target_tokens: int = 1024
39
+ child_target_tokens: int = 256
40
+ expand: bool = False
41
+ expand_n: int = 3
42
+ hyde: bool = False
43
+ step_back: bool = False
44
+
45
+
46
+ class GenerationConfig(BaseModel):
47
+ auth: str = "auto" # auto | subscription | api_key
48
+ model: str = "claude-opus-4-8"
49
+ cache: bool = False
50
+
51
+
52
+ class IndexConfig(BaseModel):
53
+ path: str = ".raggity/index"
54
+ backend: str = "lancedb"
55
+ ann_threshold: int = 50000
56
+ qdrant_location: str = ":memory:"
57
+ qdrant_collection: str = "raggity"
58
+ qdrant_api_key: str | None = None
59
+
60
+
61
+ class RaggityConfig(BaseModel):
62
+ sources: SourcesConfig = Field(default_factory=SourcesConfig)
63
+ embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig)
64
+ retrieval: RetrievalConfig = Field(default_factory=RetrievalConfig)
65
+ generation: GenerationConfig = Field(default_factory=GenerationConfig)
66
+ index: IndexConfig = Field(default_factory=IndexConfig)
67
+
68
+
69
+ def _find_config_path(explicit: str | None) -> Path | None:
70
+ if explicit:
71
+ return Path(explicit)
72
+ local = Path.cwd() / "raggity.toml"
73
+ if local.is_file():
74
+ return local
75
+ user = Path(platformdirs.user_config_dir("raggity")) / "raggity.toml"
76
+ if user.is_file():
77
+ return user
78
+ return None
79
+
80
+
81
+ def load_config(path: str | None = None) -> RaggityConfig:
82
+ cfg_path = _find_config_path(path)
83
+ if cfg_path is None:
84
+ return RaggityConfig()
85
+ with open(cfg_path, "rb") as fh:
86
+ data = tomllib.load(fh)
87
+ return RaggityConfig.model_validate(data)