raggity 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raggity/__init__.py +1 -0
- raggity/answerer.py +101 -0
- raggity/cache.py +46 -0
- raggity/cached_embedder.py +50 -0
- raggity/chunker.py +130 -0
- raggity/cli.py +169 -0
- raggity/config.py +87 -0
- raggity/core.py +144 -0
- raggity/embedder.py +54 -0
- raggity/evaluate.py +118 -0
- raggity/indexer.py +103 -0
- raggity/loader.py +80 -0
- raggity/models.py +38 -0
- raggity/prompts.py +69 -0
- raggity/qdrant_store.py +121 -0
- raggity/query_transform.py +64 -0
- raggity/registry.py +30 -0
- raggity/reranker.py +31 -0
- raggity/retriever.py +136 -0
- raggity/server.py +48 -0
- raggity/store.py +185 -0
- raggity/watch.py +49 -0
- raggity-0.1.0.dist-info/METADATA +467 -0
- raggity-0.1.0.dist-info/RECORD +27 -0
- raggity-0.1.0.dist-info/WHEEL +4 -0
- raggity-0.1.0.dist-info/entry_points.txt +3 -0
- raggity-0.1.0.dist-info/licenses/LICENSE +661 -0
raggity/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
raggity/answerer.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
|
|
6
|
+
from claude_agent_sdk import query, ClaudeAgentOptions, AssistantMessage
|
|
7
|
+
|
|
8
|
+
from .models import Answer, Chunk
|
|
9
|
+
from .prompts import (SYSTEM_PROMPT, ABSTAIN_MESSAGE, build_user_prompt,
|
|
10
|
+
verify_citations)
|
|
11
|
+
from .registry import register
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Answerer(ABC):
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def answer(self, question: str, chunks: list[Chunk]) -> Answer: ...
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClaudeAgentAnswerer(Answerer):
|
|
20
|
+
def __init__(self, model: str = "claude-opus-4-8", auth: str = "auto") -> None:
|
|
21
|
+
self.model = model
|
|
22
|
+
self.auth = auth
|
|
23
|
+
|
|
24
|
+
def _options(self) -> ClaudeAgentOptions:
|
|
25
|
+
if self.auth == "subscription":
|
|
26
|
+
# Subscription-primary: pass os.environ minus ANTHROPIC_API_KEY so
|
|
27
|
+
# the Agent SDK cannot fall back to a per-token API key and must use
|
|
28
|
+
# the `claude login` subscription session.
|
|
29
|
+
env = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
|
|
30
|
+
return ClaudeAgentOptions(
|
|
31
|
+
system_prompt=SYSTEM_PROMPT,
|
|
32
|
+
model=self.model,
|
|
33
|
+
allowed_tools=[],
|
|
34
|
+
permission_mode="dontAsk",
|
|
35
|
+
env=env,
|
|
36
|
+
)
|
|
37
|
+
if self.auth == "api_key":
|
|
38
|
+
# api_key mode: the SDK reads ANTHROPIC_API_KEY from the environment.
|
|
39
|
+
if not os.environ.get("ANTHROPIC_API_KEY"):
|
|
40
|
+
raise RuntimeError(
|
|
41
|
+
"auth='api_key' but ANTHROPIC_API_KEY is not set. "
|
|
42
|
+
"Set the key, or use auth='subscription' after `claude login`."
|
|
43
|
+
)
|
|
44
|
+
return ClaudeAgentOptions(
|
|
45
|
+
system_prompt=SYSTEM_PROMPT,
|
|
46
|
+
model=self.model,
|
|
47
|
+
allowed_tools=[],
|
|
48
|
+
permission_mode="dontAsk",
|
|
49
|
+
)
|
|
50
|
+
# auth == "auto": leave env untouched — SDK resolves key-first, then
|
|
51
|
+
# subscription session if no key is present.
|
|
52
|
+
return ClaudeAgentOptions(
|
|
53
|
+
system_prompt=SYSTEM_PROMPT,
|
|
54
|
+
model=self.model,
|
|
55
|
+
allowed_tools=[],
|
|
56
|
+
permission_mode="dontAsk",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
async def answer_stream(self, question: str, chunks: list[Chunk]):
|
|
60
|
+
"""Yield text-delta str items as they arrive, then a final Answer."""
|
|
61
|
+
if not chunks:
|
|
62
|
+
yield ABSTAIN_MESSAGE
|
|
63
|
+
yield Answer(text=ABSTAIN_MESSAGE, citations=[], abstained=True)
|
|
64
|
+
return
|
|
65
|
+
prompt = build_user_prompt(question, chunks)
|
|
66
|
+
opts = self._options()
|
|
67
|
+
try:
|
|
68
|
+
opts.include_partial_messages = True
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
parts: list[str] = []
|
|
72
|
+
async for message in query(prompt=prompt, options=opts):
|
|
73
|
+
if isinstance(message, AssistantMessage):
|
|
74
|
+
for block in message.content:
|
|
75
|
+
text = getattr(block, "text", None)
|
|
76
|
+
if text:
|
|
77
|
+
parts.append(text)
|
|
78
|
+
yield text
|
|
79
|
+
text = "".join(parts).strip()
|
|
80
|
+
abstained = text == ABSTAIN_MESSAGE
|
|
81
|
+
citations = [] if abstained else verify_citations(text, chunks)
|
|
82
|
+
yield Answer(text=text, citations=citations, abstained=abstained)
|
|
83
|
+
|
|
84
|
+
async def answer(self, question: str, chunks: list[Chunk]) -> Answer:
|
|
85
|
+
if not chunks:
|
|
86
|
+
return Answer(text=ABSTAIN_MESSAGE, citations=[], abstained=True)
|
|
87
|
+
prompt = build_user_prompt(question, chunks)
|
|
88
|
+
parts: list[str] = []
|
|
89
|
+
async for message in query(prompt=prompt, options=self._options()):
|
|
90
|
+
if isinstance(message, AssistantMessage):
|
|
91
|
+
for block in message.content:
|
|
92
|
+
text = getattr(block, "text", None)
|
|
93
|
+
if text:
|
|
94
|
+
parts.append(text)
|
|
95
|
+
text = "".join(parts).strip()
|
|
96
|
+
abstained = text.strip() == ABSTAIN_MESSAGE
|
|
97
|
+
citations = [] if abstained else verify_citations(text, chunks)
|
|
98
|
+
return Answer(text=text, citations=citations, abstained=abstained)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
register("answerer", "claude", "raggity.answerer:ClaudeAgentAnswerer")
|
raggity/cache.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .models import Answer, Citation
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger("raggity.cache")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def cache_key(question: str, chunk_ids: list[str], model: str) -> str:
|
|
14
|
+
payload = question + "|" + "|".join(sorted(chunk_ids)) + "|" + model
|
|
15
|
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load(path: str) -> dict:
|
|
19
|
+
if not os.path.isfile(path):
|
|
20
|
+
return {}
|
|
21
|
+
try:
|
|
22
|
+
with open(path, encoding="utf-8") as fh:
|
|
23
|
+
return json.load(fh)
|
|
24
|
+
except Exception as exc:
|
|
25
|
+
log.warning("ignoring unreadable answer cache %s: %s", path, exc)
|
|
26
|
+
return {}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def save(path: str, data: dict) -> None:
|
|
30
|
+
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
with open(path, "w", encoding="utf-8") as fh:
|
|
32
|
+
json.dump(data, fh)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def answer_to_dict(a: Answer) -> dict:
|
|
36
|
+
return {"text": a.text, "abstained": a.abstained,
|
|
37
|
+
"citations": [{"chunk_id": c.chunk_id, "source_path": c.source_path,
|
|
38
|
+
"title": c.title, "supported": c.supported}
|
|
39
|
+
for c in a.citations]}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def answer_from_dict(d: dict) -> Answer:
|
|
43
|
+
return Answer(text=d["text"],
|
|
44
|
+
citations=[Citation(c["chunk_id"], c["source_path"], c["title"], c["supported"])
|
|
45
|
+
for c in d.get("citations", [])],
|
|
46
|
+
abstained=d.get("abstained", False))
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .embedder import Embedder
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger("raggity.cached_embedder")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CachedEmbedder(Embedder):
|
|
15
|
+
def __init__(self, inner: Embedder, cache_path: str) -> None:
|
|
16
|
+
self._inner = inner
|
|
17
|
+
self._path = cache_path
|
|
18
|
+
self._cache = self._load()
|
|
19
|
+
|
|
20
|
+
def _load(self) -> dict:
|
|
21
|
+
if not os.path.isfile(self._path):
|
|
22
|
+
return {}
|
|
23
|
+
try:
|
|
24
|
+
with open(self._path, encoding="utf-8") as fh:
|
|
25
|
+
return json.load(fh)
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
log.warning("ignoring unreadable embed cache %s: %s", self._path, exc)
|
|
28
|
+
return {}
|
|
29
|
+
|
|
30
|
+
def _save(self) -> None:
|
|
31
|
+
Path(self._path).parent.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
with open(self._path, "w", encoding="utf-8") as fh:
|
|
33
|
+
json.dump(self._cache, fh)
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def dim(self) -> int:
|
|
37
|
+
return self._inner.dim
|
|
38
|
+
|
|
39
|
+
def embed_query(self, text: str) -> list[float]:
|
|
40
|
+
return self._inner.embed_query(text)
|
|
41
|
+
|
|
42
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
|
43
|
+
keys = [hashlib.sha256(t.encode("utf-8")).hexdigest() for t in texts]
|
|
44
|
+
missing = [i for i, k in enumerate(keys) if k not in self._cache]
|
|
45
|
+
if missing:
|
|
46
|
+
vecs = self._inner.embed_documents([texts[i] for i in missing])
|
|
47
|
+
for i, v in zip(missing, vecs):
|
|
48
|
+
self._cache[keys[i]] = v
|
|
49
|
+
self._save()
|
|
50
|
+
return [self._cache[k] for k in keys]
|
raggity/chunker.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from .models import Chunk, Document
|
|
7
|
+
|
|
8
|
+
_HEADER_RE = re.compile(r"^(#{1,6})\s+(.*)$")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def estimate_tokens(text: str) -> int:
|
|
12
|
+
# Dependency-free heuristic; tiktoken is wrong for Claude and unneeded here.
|
|
13
|
+
return max(1, len(text) // 4)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _split_into_sections(text: str) -> list[tuple[str, str]]:
|
|
17
|
+
"""Return (heading_path, section_body) preserving markdown header hierarchy."""
|
|
18
|
+
sections: list[tuple[str, str]] = []
|
|
19
|
+
stack: list[str] = []
|
|
20
|
+
buf: list[str] = []
|
|
21
|
+
|
|
22
|
+
def flush():
|
|
23
|
+
body = "\n".join(buf).strip()
|
|
24
|
+
if body:
|
|
25
|
+
sections.append((" > ".join(stack), body))
|
|
26
|
+
|
|
27
|
+
for line in text.splitlines():
|
|
28
|
+
m = _HEADER_RE.match(line)
|
|
29
|
+
if m:
|
|
30
|
+
flush()
|
|
31
|
+
buf = []
|
|
32
|
+
level = len(m.group(1))
|
|
33
|
+
title = m.group(2).strip()
|
|
34
|
+
stack = stack[: level - 1]
|
|
35
|
+
while len(stack) < level - 1:
|
|
36
|
+
stack.append("")
|
|
37
|
+
stack.append(title)
|
|
38
|
+
else:
|
|
39
|
+
buf.append(line)
|
|
40
|
+
flush()
|
|
41
|
+
if not sections:
|
|
42
|
+
sections.append(("", text.strip()))
|
|
43
|
+
return sections
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _split_body(body: str, target_tokens: int, overlap_tokens: int) -> list[str]:
|
|
47
|
+
paras = [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()]
|
|
48
|
+
pieces: list[str] = []
|
|
49
|
+
cur: list[str] = []
|
|
50
|
+
cur_tok = 0
|
|
51
|
+
for para in paras:
|
|
52
|
+
ptok = estimate_tokens(para)
|
|
53
|
+
if cur and cur_tok + ptok > target_tokens:
|
|
54
|
+
pieces.append("\n\n".join(cur))
|
|
55
|
+
# overlap: carry tail paragraphs until ~overlap_tokens
|
|
56
|
+
carry: list[str] = []
|
|
57
|
+
ctok = 0
|
|
58
|
+
for prev in reversed(cur):
|
|
59
|
+
carry.insert(0, prev)
|
|
60
|
+
ctok += estimate_tokens(prev)
|
|
61
|
+
if ctok >= overlap_tokens:
|
|
62
|
+
break
|
|
63
|
+
cur = carry[:]
|
|
64
|
+
cur_tok = sum(estimate_tokens(x) for x in cur)
|
|
65
|
+
cur.append(para)
|
|
66
|
+
cur_tok += ptok
|
|
67
|
+
if cur:
|
|
68
|
+
pieces.append("\n\n".join(cur))
|
|
69
|
+
return pieces or [body]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _chunk_flat(doc: Document, target_tokens: int, overlap_tokens: int) -> list[Chunk]:
|
|
73
|
+
chunks: list[Chunk] = []
|
|
74
|
+
ordinal = 0
|
|
75
|
+
for heading_path, body in _split_into_sections(doc.text):
|
|
76
|
+
full_path = doc.title if not heading_path else f"{doc.title} > {heading_path}"
|
|
77
|
+
for piece in _split_body(body, target_tokens, overlap_tokens):
|
|
78
|
+
header = full_path
|
|
79
|
+
chunk_text = f"{header}\n\n{piece}" if header else piece
|
|
80
|
+
chunk_id = hashlib.sha256(
|
|
81
|
+
f"{doc.path}|{ordinal}|{piece}".encode("utf-8")
|
|
82
|
+
).hexdigest()
|
|
83
|
+
chunks.append(
|
|
84
|
+
Chunk(
|
|
85
|
+
text=chunk_text,
|
|
86
|
+
source_path=doc.path,
|
|
87
|
+
title=doc.title,
|
|
88
|
+
heading_path=heading_path or doc.title,
|
|
89
|
+
ordinal=ordinal,
|
|
90
|
+
chunk_id=chunk_id,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
ordinal += 1
|
|
94
|
+
return chunks
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _chunk_parent(doc: Document, parent_tokens: int, child_tokens: int,
|
|
98
|
+
overlap_tokens: int) -> list[Chunk]:
|
|
99
|
+
chunks: list[Chunk] = []
|
|
100
|
+
ordinal = 0
|
|
101
|
+
parent_index = 0
|
|
102
|
+
for heading_path, body in _split_into_sections(doc.text):
|
|
103
|
+
full_path = doc.title if not heading_path else f"{doc.title} > {heading_path}"
|
|
104
|
+
for parent_piece in _split_body(body, parent_tokens, overlap_tokens):
|
|
105
|
+
parent_text = f"{full_path}\n\n{parent_piece}" if full_path else parent_piece
|
|
106
|
+
parent_id = hashlib.sha256(
|
|
107
|
+
f"{doc.path}|parent|{parent_index}".encode("utf-8")
|
|
108
|
+
).hexdigest()
|
|
109
|
+
for child_piece in _split_body(parent_piece, child_tokens, overlap_tokens):
|
|
110
|
+
child_text = f"{full_path}\n\n{child_piece}" if full_path else child_piece
|
|
111
|
+
chunk_id = hashlib.sha256(
|
|
112
|
+
f"{doc.path}|{ordinal}|{child_piece}".encode("utf-8")
|
|
113
|
+
).hexdigest()
|
|
114
|
+
chunks.append(Chunk(
|
|
115
|
+
text=child_text, source_path=doc.path, title=doc.title,
|
|
116
|
+
heading_path=heading_path or doc.title, ordinal=ordinal,
|
|
117
|
+
chunk_id=chunk_id, parent_id=parent_id, parent_text=parent_text,
|
|
118
|
+
))
|
|
119
|
+
ordinal += 1
|
|
120
|
+
parent_index += 1
|
|
121
|
+
return chunks
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def chunk_document(doc: Document, target_tokens: int = 512,
|
|
125
|
+
overlap_tokens: int = 64, parent_document: bool = False,
|
|
126
|
+
parent_target_tokens: int = 1024,
|
|
127
|
+
child_target_tokens: int = 256) -> list[Chunk]:
|
|
128
|
+
if not parent_document:
|
|
129
|
+
return _chunk_flat(doc, target_tokens, overlap_tokens)
|
|
130
|
+
return _chunk_parent(doc, parent_target_tokens, child_target_tokens, overlap_tokens)
|
raggity/cli.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import typer
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
|
|
7
|
+
from .config import load_config
|
|
8
|
+
from .core import Raggity
|
|
9
|
+
from .evaluate import evaluate, load_golden
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(help="raggity — local-first RAG over your notes, answered by Claude.")
|
|
12
|
+
console = Console()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _rag(config: str | None) -> Raggity:
|
|
16
|
+
return Raggity(load_config(config))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@app.command()
|
|
20
|
+
def ingest(config: str = typer.Option(None, "--config")):
|
|
21
|
+
"""Incrementally index configured source folders."""
|
|
22
|
+
report = _rag(config).ingest()
|
|
23
|
+
console.print(
|
|
24
|
+
f"[green]Indexed.[/green] added={report.added} updated={report.updated} "
|
|
25
|
+
f"deleted={report.deleted} unchanged={report.unchanged}"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@app.command()
|
|
30
|
+
def ask(question: str, config: str = typer.Option(None, "--config"),
|
|
31
|
+
plain: bool = typer.Option(False, "--plain"),
|
|
32
|
+
expand: bool = typer.Option(False, "--expand"),
|
|
33
|
+
hyde: bool = typer.Option(False, "--hyde"),
|
|
34
|
+
step_back: bool = typer.Option(False, "--step-back"),
|
|
35
|
+
no_stream: bool = typer.Option(False, "--no-stream"),
|
|
36
|
+
decompose: bool = typer.Option(False, "--decompose"),
|
|
37
|
+
no_cache: bool = typer.Option(False, "--no-cache")):
|
|
38
|
+
"""Ask a question against your knowledge base."""
|
|
39
|
+
import asyncio
|
|
40
|
+
rag = _rag(config)
|
|
41
|
+
if decompose:
|
|
42
|
+
if expand or hyde or step_back:
|
|
43
|
+
typer.echo("note: --decompose overrides other query transforms", err=True)
|
|
44
|
+
typer.echo("Decomposing query (+model calls)…", err=True)
|
|
45
|
+
answer = rag.ask_decompose(question)
|
|
46
|
+
if plain:
|
|
47
|
+
typer.echo(answer.text)
|
|
48
|
+
else:
|
|
49
|
+
console.print(answer.text)
|
|
50
|
+
else:
|
|
51
|
+
if expand or hyde or step_back:
|
|
52
|
+
typer.echo("Query transforms enabled (+model calls)…", err=True)
|
|
53
|
+
expand_arg = True if expand else None
|
|
54
|
+
hyde_arg = True if hyde else None
|
|
55
|
+
step_back_arg = True if step_back else None
|
|
56
|
+
use_cache_arg = False if no_cache else None
|
|
57
|
+
if plain or no_stream:
|
|
58
|
+
# Buffered path — honors cache (unless --no-cache)
|
|
59
|
+
answer = rag.ask(question, expand=expand_arg, hyde=hyde_arg, step_back=step_back_arg,
|
|
60
|
+
use_cache=use_cache_arg)
|
|
61
|
+
if plain:
|
|
62
|
+
typer.echo(answer.text)
|
|
63
|
+
else:
|
|
64
|
+
console.print(answer.text)
|
|
65
|
+
else:
|
|
66
|
+
# Streaming path — default; always calls the model (cache is buffered-only)
|
|
67
|
+
async def _stream():
|
|
68
|
+
final = None
|
|
69
|
+
async for piece in rag.aask_stream(question, expand=expand_arg,
|
|
70
|
+
hyde=hyde_arg, step_back=step_back_arg):
|
|
71
|
+
if isinstance(piece, str):
|
|
72
|
+
print(piece, end="", flush=True)
|
|
73
|
+
else:
|
|
74
|
+
final = piece
|
|
75
|
+
print()
|
|
76
|
+
return final
|
|
77
|
+
answer = asyncio.run(_stream())
|
|
78
|
+
if answer is not None and answer.citations and not plain:
|
|
79
|
+
console.print("\n[dim]Sources:[/dim]")
|
|
80
|
+
seen = set()
|
|
81
|
+
for c in answer.citations:
|
|
82
|
+
if c.supported and c.source_path not in seen:
|
|
83
|
+
seen.add(c.source_path)
|
|
84
|
+
console.print(f" [dim]- {c.source_path}[/dim]")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@app.command()
|
|
88
|
+
def status(config: str = typer.Option(None, "--config")):
|
|
89
|
+
"""Show index statistics."""
|
|
90
|
+
st = _rag(config).status()
|
|
91
|
+
for k, v in st.items():
|
|
92
|
+
console.print(f"{k}: {v}")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@app.command()
|
|
96
|
+
def reindex(config: str = typer.Option(None, "--config"),
|
|
97
|
+
force: bool = typer.Option(False, "--force")):
|
|
98
|
+
"""Rebuild the index from scratch."""
|
|
99
|
+
cfg = load_config(config)
|
|
100
|
+
if force:
|
|
101
|
+
shutil.rmtree(cfg.index.path, ignore_errors=True)
|
|
102
|
+
report = Raggity(cfg).ingest()
|
|
103
|
+
console.print(f"[green]Reindexed.[/green] added={report.added}")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@app.command(name="eval")
|
|
107
|
+
def eval_cmd(golden: str = typer.Argument(...),
|
|
108
|
+
config: str = typer.Option(None, "--config"),
|
|
109
|
+
k: int = typer.Option(5, "--k"),
|
|
110
|
+
llm_judge: bool = typer.Option(False, "--llm-judge")):
|
|
111
|
+
"""Run free CPU retrieval metrics against a golden.jsonl set."""
|
|
112
|
+
rag = _rag(config)
|
|
113
|
+
if llm_judge:
|
|
114
|
+
typer.echo("Running LLM-judge eval (+2 model calls per question)…", err=True)
|
|
115
|
+
import asyncio
|
|
116
|
+
from .evaluate import llm_judge as run_judge
|
|
117
|
+
res = asyncio.run(run_judge(rag, load_golden(golden),
|
|
118
|
+
model=rag.cfg.generation.model,
|
|
119
|
+
auth=rag.cfg.generation.auth))
|
|
120
|
+
console.print(f"Faithfulness={res.faithfulness:.3f} "
|
|
121
|
+
f"AnswerRelevance={res.answer_relevance:.3f} (n={res.n})")
|
|
122
|
+
console.print("(note: self-assessed — same model family generates and grades)")
|
|
123
|
+
else:
|
|
124
|
+
res = evaluate(rag.retriever, load_golden(golden), k=k)
|
|
125
|
+
console.print(f"Hit@{k}={res.hit_rate:.3f} MRR={res.mrr:.3f} "
|
|
126
|
+
f"Recall@{k}={res.recall:.3f} (n={res.n})")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@app.command()
|
|
130
|
+
def watch(config: str = typer.Option(None, "--config"),
|
|
131
|
+
debounce: float = typer.Option(2.0, "--debounce")):
|
|
132
|
+
"""Watch source folders and re-index on change (Ctrl-C to stop)."""
|
|
133
|
+
rag = _rag(config)
|
|
134
|
+
try:
|
|
135
|
+
from .watch import run_watch
|
|
136
|
+
except ImportError:
|
|
137
|
+
console.print("[red]watch needs extra deps:[/red] pip install raggity[watch]")
|
|
138
|
+
raise typer.Exit(1)
|
|
139
|
+
try:
|
|
140
|
+
observer = run_watch(rag, rag.cfg.sources.include, debounce)
|
|
141
|
+
except RuntimeError as exc:
|
|
142
|
+
console.print(f"[red]{exc}[/red]")
|
|
143
|
+
raise typer.Exit(1)
|
|
144
|
+
console.print(f"[green]Watching[/green] {len(rag.cfg.sources.include)} source pattern(s). Ctrl-C to stop.")
|
|
145
|
+
import time
|
|
146
|
+
try:
|
|
147
|
+
while True:
|
|
148
|
+
time.sleep(1)
|
|
149
|
+
except KeyboardInterrupt:
|
|
150
|
+
observer.stop()
|
|
151
|
+
observer.join()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@app.command()
|
|
155
|
+
def serve(config: str = typer.Option(None, "--config"),
|
|
156
|
+
host: str = typer.Option("127.0.0.1", "--host"),
|
|
157
|
+
port: int = typer.Option(8000, "--port")):
|
|
158
|
+
"""Run the local HTTP API server."""
|
|
159
|
+
try:
|
|
160
|
+
import uvicorn
|
|
161
|
+
from .server import create_app
|
|
162
|
+
except ImportError:
|
|
163
|
+
console.print("[red]The server needs extra deps:[/red] pip install raggity[server]")
|
|
164
|
+
raise typer.Exit(1)
|
|
165
|
+
uvicorn.run(create_app(load_config(config)), host=host, port=port)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
app()
|
raggity/config.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tomllib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import platformdirs
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SourcesConfig(BaseModel):
|
|
11
|
+
include: list[str] = Field(default_factory=list)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EmbeddingConfig(BaseModel):
|
|
15
|
+
model: str = "BAAI/bge-small-en-v1.5"
|
|
16
|
+
provider: str = "cpu" # cpu | cuda | directml | rocm
|
|
17
|
+
batch_size: int = 256
|
|
18
|
+
parallel: int = 0
|
|
19
|
+
cache: bool = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RetrievalConfig(BaseModel):
|
|
23
|
+
hybrid: bool = True
|
|
24
|
+
rrf_k: int = 60
|
|
25
|
+
candidates: int = 30
|
|
26
|
+
rerank: bool = True
|
|
27
|
+
rerank_model: str = "Xenova/ms-marco-MiniLM-L-6-v2"
|
|
28
|
+
top_k: int = 5
|
|
29
|
+
dedup_cosine: float = 0.92
|
|
30
|
+
# Dense-cosine sufficiency floor: governs abstention. Reliable signal (~0.6–0.8
|
|
31
|
+
# for relevant, ~0.43–0.47 for off-topic). When max_dense < this value, abstain.
|
|
32
|
+
sufficiency_floor: float = 0.5
|
|
33
|
+
# OPTIONAL secondary rerank-score filter. 0.0 = off (default). Only applied when
|
|
34
|
+
# rerank=True and relevance_floor > 0. Cross-encoder absolute score is unreliable
|
|
35
|
+
# for abstention, so this is off by default.
|
|
36
|
+
relevance_floor: float = 0.0
|
|
37
|
+
parent_document: bool = False
|
|
38
|
+
parent_target_tokens: int = 1024
|
|
39
|
+
child_target_tokens: int = 256
|
|
40
|
+
expand: bool = False
|
|
41
|
+
expand_n: int = 3
|
|
42
|
+
hyde: bool = False
|
|
43
|
+
step_back: bool = False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class GenerationConfig(BaseModel):
|
|
47
|
+
auth: str = "auto" # auto | subscription | api_key
|
|
48
|
+
model: str = "claude-opus-4-8"
|
|
49
|
+
cache: bool = False
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class IndexConfig(BaseModel):
|
|
53
|
+
path: str = ".raggity/index"
|
|
54
|
+
backend: str = "lancedb"
|
|
55
|
+
ann_threshold: int = 50000
|
|
56
|
+
qdrant_location: str = ":memory:"
|
|
57
|
+
qdrant_collection: str = "raggity"
|
|
58
|
+
qdrant_api_key: str | None = None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class RaggityConfig(BaseModel):
|
|
62
|
+
sources: SourcesConfig = Field(default_factory=SourcesConfig)
|
|
63
|
+
embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig)
|
|
64
|
+
retrieval: RetrievalConfig = Field(default_factory=RetrievalConfig)
|
|
65
|
+
generation: GenerationConfig = Field(default_factory=GenerationConfig)
|
|
66
|
+
index: IndexConfig = Field(default_factory=IndexConfig)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _find_config_path(explicit: str | None) -> Path | None:
|
|
70
|
+
if explicit:
|
|
71
|
+
return Path(explicit)
|
|
72
|
+
local = Path.cwd() / "raggity.toml"
|
|
73
|
+
if local.is_file():
|
|
74
|
+
return local
|
|
75
|
+
user = Path(platformdirs.user_config_dir("raggity")) / "raggity.toml"
|
|
76
|
+
if user.is_file():
|
|
77
|
+
return user
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def load_config(path: str | None = None) -> RaggityConfig:
|
|
82
|
+
cfg_path = _find_config_path(path)
|
|
83
|
+
if cfg_path is None:
|
|
84
|
+
return RaggityConfig()
|
|
85
|
+
with open(cfg_path, "rb") as fh:
|
|
86
|
+
data = tomllib.load(fh)
|
|
87
|
+
return RaggityConfig.model_validate(data)
|