loki-mode 7.12.0 → 7.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,258 @@
1
+ """wiki_index.py -- dependency-free codebase index for the R5 auto-wiki.
2
+
3
+ Builds a line-anchored chunk index over a project's source files and provides
4
+ deterministic token-overlap retrieval. This is the grounding substrate for
5
+ cited answers: every chunk carries the REAL repo-relative file path and the
6
+ REAL start/end line numbers it came from, so a citation can always be checked
7
+ against the filesystem.
8
+
9
+ Reuse note: the token-overlap scoring (`_tokenize` + overlap weighting) is
10
+ ported from memory/knowledge_graph.py (OrganizationKnowledgeGraph), which scores
11
+ memory patterns the same way. knowledge_graph.py is NOT a code index (it
12
+ aggregates .loki/memory/semantic patterns), so the code scanning/chunking here
13
+ is new. retrieval.py is a memory retriever, not a code indexer, so it is not
14
+ reused for code retrieval.
15
+
16
+ No third-party dependencies. CI-safe (no Docker, no network, no provider).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import json
23
+ import os
24
+ import re
25
+ import subprocess
26
+ from pathlib import Path
27
+
28
+ # Source extensions we index. Kept broad but excludes lockfiles/binaries.
29
+ SOURCE_EXTS = {
30
+ ".py", ".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs",
31
+ ".rs", ".go", ".rb", ".java", ".kt", ".kts", ".c", ".cc", ".cpp",
32
+ ".h", ".hpp", ".cs", ".php", ".swift", ".sh", ".bash", ".sql",
33
+ ".vue", ".svelte", ".scala", ".clj", ".ex", ".exs", ".lua", ".r",
34
+ }
35
+
36
+ # Directories never worth indexing.
37
+ SKIP_DIRS = {
38
+ "node_modules", ".git", "vendor", "__pycache__", "dist", "build",
39
+ ".next", "target", ".venv", "venv", "coverage", ".loki", ".cache",
40
+ "out", ".turbo", ".pytest_cache", ".mypy_cache",
41
+ }
42
+
43
+ CHUNK_LINES = 60 # lines per chunk; overlap-free, line-anchored.
44
+ MAX_FILES = 800 # safety cap so huge repos stay cheap.
45
+ MAX_FILE_BYTES = 400_000 # skip very large generated/minified files.
46
+
47
+ # Tokenizer ported from memory/knowledge_graph.py:_tokenize / _STOPWORDS.
48
+ _STOPWORDS = {
49
+ "the", "a", "an", "to", "for", "of", "and", "or", "with", "without",
50
+ "is", "are", "be", "up", "on", "in", "by", "not", "this", "that",
51
+ "from", "as", "at", "it", "if", "do", "we", "my", "our", "how",
52
+ "def", "self", "return", "import", "const", "let", "var", "function",
53
+ }
54
+
55
+
56
+ def _tokenize(text):
57
+ """Lowercase, split on non-alphanumerics, drop stopwords + short tokens.
58
+
59
+ Ported from knowledge_graph.OrganizationKnowledgeGraph._tokenize so wiki
60
+ retrieval scores text the same way memory-pattern retrieval does.
61
+ """
62
+ toks = re.split(r"[^a-z0-9_]+", str(text or "").lower())
63
+ return {t for t in toks if len(t) > 2 and t not in _STOPWORDS}
64
+
65
+
66
+ def _git_tracked_files(root):
67
+ """Return git-tracked file paths (repo-relative), or None if not a repo."""
68
+ try:
69
+ out = subprocess.run(
70
+ ["git", "-C", str(root), "ls-files"],
71
+ capture_output=True, text=True, timeout=30,
72
+ )
73
+ if out.returncode != 0:
74
+ return None
75
+ files = [line.strip() for line in out.stdout.splitlines() if line.strip()]
76
+ return files or None
77
+ except (OSError, subprocess.SubprocessError):
78
+ return None
79
+
80
+
81
+ def _walk_files(root):
82
+ """Filtered filesystem walk fallback (when not a git repo)."""
83
+ root = Path(root)
84
+ results = []
85
+ for dirpath, dirnames, filenames in os.walk(root):
86
+ dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
87
+ for fn in filenames:
88
+ rel = os.path.relpath(os.path.join(dirpath, fn), root)
89
+ results.append(rel)
90
+ return results
91
+
92
+
93
+ def list_source_files(root):
94
+ """Return a sorted list of repo-relative source files under root.
95
+
96
+ Prefers git ls-files (respects .gitignore), falls back to a filtered walk.
97
+ """
98
+ root = Path(root)
99
+ candidates = _git_tracked_files(root)
100
+ if candidates is None:
101
+ candidates = _walk_files(root)
102
+
103
+ sources = []
104
+ for rel in candidates:
105
+ # Skip anything inside a skip dir (git tracked files can include them
106
+ # if they were committed; we still exclude noise dirs).
107
+ parts = set(Path(rel).parts)
108
+ if parts & SKIP_DIRS:
109
+ continue
110
+ ext = os.path.splitext(rel)[1].lower()
111
+ if ext not in SOURCE_EXTS:
112
+ continue
113
+ abs_path = root / rel
114
+ try:
115
+ if not abs_path.is_file():
116
+ continue
117
+ if abs_path.stat().st_size > MAX_FILE_BYTES:
118
+ continue
119
+ except OSError:
120
+ continue
121
+ sources.append(rel)
122
+ sources.sort()
123
+ return sources[:MAX_FILES]
124
+
125
+
126
+ def _read_lines(abs_path):
127
+ try:
128
+ with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
129
+ return f.read().splitlines()
130
+ except OSError:
131
+ return []
132
+
133
+
134
+ def build_index(root):
135
+ """Build a line-anchored chunk index over the project's source files.
136
+
137
+ Returns a dict:
138
+ {
139
+ "root": <abs root>,
140
+ "files": [<repo-relative paths>],
141
+ "chunks": [
142
+ {"id": int, "file": <rel>, "start_line": int, "end_line": int,
143
+ "text": <chunk text>},
144
+ ...
145
+ ],
146
+ }
147
+ Paths are ALWAYS repo-relative (no PII, no absolute paths leak).
148
+ Line numbers are 1-based and inclusive.
149
+ """
150
+ root = Path(root).resolve()
151
+ files = list_source_files(root)
152
+ chunks = []
153
+ cid = 0
154
+ for rel in files:
155
+ lines = _read_lines(root / rel)
156
+ if not lines:
157
+ continue
158
+ for start in range(0, len(lines), CHUNK_LINES):
159
+ block = lines[start:start + CHUNK_LINES]
160
+ if not any(line.strip() for line in block):
161
+ continue # skip all-blank chunks
162
+ chunks.append({
163
+ "id": cid,
164
+ "file": rel,
165
+ "start_line": start + 1,
166
+ "end_line": start + len(block),
167
+ "text": "\n".join(block),
168
+ })
169
+ cid += 1
170
+ return {"root": str(root), "files": files, "chunks": chunks}
171
+
172
+
173
+ def retrieve(index, query, k=6):
174
+ """Deterministic top-K chunk retrieval by token overlap.
175
+
176
+ Scoring mirrors knowledge_graph.query_patterns: token overlap between the
177
+ query and the chunk text, plus a small bonus when the query substring
178
+ appears verbatim and when query tokens appear in the file path (so
179
+ "how does the cli dispatch" surfaces cli.* files). No LLM, no network.
180
+ Ties broken by chunk id for stable, reproducible ordering.
181
+ """
182
+ qtokens = _tokenize(query)
183
+ qlower = str(query or "").lower()
184
+ scored = []
185
+ for ch in index.get("chunks", []):
186
+ text = ch.get("text", "")
187
+ score = 0
188
+ overlap = qtokens & _tokenize(text)
189
+ score += 3 * len(overlap)
190
+ # Path tokens (file/dir names are strong signals).
191
+ path_overlap = qtokens & _tokenize(ch.get("file", ""))
192
+ score += 2 * len(path_overlap)
193
+ # Verbatim substring bonus.
194
+ if qlower and len(qlower) > 3 and qlower in text.lower():
195
+ score += 4
196
+ if score > 0:
197
+ scored.append((score, ch["id"], ch))
198
+ # Highest score first; stable tiebreak on id.
199
+ scored.sort(key=lambda t: (-t[0], t[1]))
200
+ return [ch for _, _, ch in scored[:k]]
201
+
202
+
203
+ def compute_signature(root):
204
+ """Cheap-incremental signature over git HEAD + per-file content hashes.
205
+
206
+ Same idea as the proof/docs manifest: a deterministic hash that changes
207
+ iff the indexed source set changes. Used to skip regeneration when the
208
+ codebase is unchanged.
209
+ """
210
+ root = Path(root).resolve()
211
+ h = hashlib.sha256()
212
+ # git HEAD (if available) makes the signature cheap to invalidate on commit.
213
+ try:
214
+ head = subprocess.run(
215
+ ["git", "-C", str(root), "rev-parse", "HEAD"],
216
+ capture_output=True, text=True, timeout=15,
217
+ )
218
+ if head.returncode == 0:
219
+ h.update(b"head:" + head.stdout.strip().encode())
220
+ except (OSError, subprocess.SubprocessError):
221
+ pass
222
+ for rel in list_source_files(root):
223
+ try:
224
+ data = (root / rel).read_bytes()
225
+ except OSError:
226
+ continue
227
+ h.update(rel.encode("utf-8"))
228
+ h.update(hashlib.sha256(data).digest())
229
+ return h.hexdigest()
230
+
231
+
232
+ def extract_definitions(root, rel, limit=12):
233
+ """Return real def/class/function lines for a file, for code-derived citations.
234
+
235
+ Returns a list of {"name": str, "line": int} where line is 1-based and
236
+ points at a real definition in the file. Language-agnostic via a small set
237
+ of regexes; only emits matches that actually exist in the file.
238
+ """
239
+ lines = _read_lines(Path(root) / rel)
240
+ patterns = [
241
+ re.compile(r"^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)"),
242
+ re.compile(r"^\s*class\s+([A-Za-z_][A-Za-z0-9_]*)"),
243
+ re.compile(r"^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)"),
244
+ re.compile(r"^\s*(?:export\s+)?const\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\("),
245
+ re.compile(r"^\s*(?:pub\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)"),
246
+ re.compile(r"^\s*func\s+(?:\([^)]*\)\s*)?([A-Za-z_][A-Za-z0-9_]*)"),
247
+ re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*\(\)\s*\{"), # bash funcs
248
+ ]
249
+ defs = []
250
+ for i, line in enumerate(lines, start=1):
251
+ for pat in patterns:
252
+ m = pat.match(line)
253
+ if m:
254
+ defs.append({"name": m.group(1), "line": i})
255
+ break
256
+ if len(defs) >= limit:
257
+ break
258
+ return defs
@@ -0,0 +1,140 @@
1
+ """wiki_llm.py -- stub-aware LLM invocation + citation validation for R5 wiki.
2
+
3
+ Keeps every paid-call and grounding-guarantee concern in one place so the
4
+ generator and the ask script behave identically.
5
+
6
+ LLM stub contract (CI-safe, zero paid calls in tests):
7
+ LOKI_WIKI_LLM_STUB unset -> call the real provider (claude -p / codex / ...)
8
+ via the same mechanism loki docs uses; if no
9
+ provider is on PATH, return None (callers then
10
+ fall back to extractive/template output).
11
+ LOKI_WIKI_LLM_STUB=<file path> -> read the completion from that file.
12
+ LOKI_WIKI_LLM_STUB=<other> -> use the value literally as the completion.
13
+
14
+ No third-party dependencies.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import os
20
+ import re
21
+ import subprocess
22
+ from pathlib import Path
23
+
24
+
25
+ def invoke_llm(prompt, timeout=120):
26
+ """Return the LLM completion string, or None if unavailable.
27
+
28
+ Honors LOKI_WIKI_LLM_STUB for CI. Otherwise shells out to the configured
29
+ provider, mirroring loki docs `_docs_invoke_provider`.
30
+ """
31
+ stub = os.environ.get("LOKI_WIKI_LLM_STUB")
32
+ if stub is not None:
33
+ # A path to a file with the canned completion, else the literal value.
34
+ if os.path.sep in stub or stub.endswith(".txt"):
35
+ p = Path(stub)
36
+ if p.is_file():
37
+ try:
38
+ return p.read_text(encoding="utf-8", errors="replace")
39
+ except OSError:
40
+ return ""
41
+ return stub
42
+
43
+ provider = os.environ.get("LOKI_PROVIDER", "claude")
44
+ state_provider = Path(".loki/state/provider")
45
+ if state_provider.is_file():
46
+ try:
47
+ provider = state_provider.read_text().strip() or provider
48
+ except OSError:
49
+ pass
50
+
51
+ # Resolve a timeout wrapper if present (matches the bash docs helper).
52
+ timeout_cmd = None
53
+ for cand in ("timeout", "gtimeout"):
54
+ if _which(cand):
55
+ timeout_cmd = cand
56
+ break
57
+
58
+ cmds = {
59
+ "claude": ["claude", "-p", prompt],
60
+ "codex": ["codex", "exec", "--full-auto", prompt],
61
+ "cline": ["cline", "-y", prompt],
62
+ "aider": ["aider", "--message", prompt, "--yes-always", "--no-auto-commits"],
63
+ }
64
+ base = cmds.get(provider)
65
+ if base is None or not _which(base[0]):
66
+ return None
67
+ cmd = ([timeout_cmd, str(timeout)] + base) if timeout_cmd else base
68
+ try:
69
+ out = subprocess.run(
70
+ cmd, capture_output=True, text=True, timeout=timeout + 10,
71
+ stdin=subprocess.DEVNULL,
72
+ )
73
+ except (OSError, subprocess.SubprocessError):
74
+ return None
75
+ if out.returncode != 0 and not out.stdout.strip():
76
+ return None
77
+ return out.stdout
78
+
79
+
80
+ def _which(name):
81
+ for d in os.environ.get("PATH", "").split(os.pathsep):
82
+ cand = os.path.join(d, name)
83
+ if os.path.isfile(cand) and os.access(cand, os.X_OK):
84
+ return cand
85
+ return None
86
+
87
+
88
+ _CITE_RE = re.compile(r"\[(\d+)\]")
89
+
90
+
91
+ def map_and_validate_citations(answer_text, chunks, root):
92
+ """Map [n] indices in answer_text to real {file,line} citations.
93
+
94
+ chunks: the numbered chunk list shown to the LLM. chunks[n-1] is the chunk
95
+ referenced by [n] (1-based). A citation is kept only if:
96
+ - the index is in range (it references a chunk we actually supplied), and
97
+ - the file exists on disk AND start_line <= file length.
98
+ This makes a fabricated citation structurally impossible to survive.
99
+
100
+ Returns (clean_text, citations) where citations is a de-duplicated list of
101
+ {"file": rel, "line": int} in first-appearance order, and clean_text has the
102
+ [n] markers rewritten to [file:line] for human-readable output.
103
+ """
104
+ root = Path(root)
105
+ citations = []
106
+ seen = set()
107
+
108
+ def _resolve(idx):
109
+ if idx < 1 or idx > len(chunks):
110
+ return None
111
+ ch = chunks[idx - 1]
112
+ rel = ch.get("file")
113
+ line = int(ch.get("start_line", 1))
114
+ abs_path = root / rel
115
+ try:
116
+ if not abs_path.is_file():
117
+ return None
118
+ with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
119
+ nlines = sum(1 for _ in f)
120
+ except OSError:
121
+ return None
122
+ if line < 1 or line > max(nlines, 1):
123
+ return None
124
+ return {"file": rel, "line": line}
125
+
126
+ def _sub(m):
127
+ idx = int(m.group(1))
128
+ cite = _resolve(idx)
129
+ if cite is None:
130
+ return "" # drop a bogus/non-resolving citation marker
131
+ key = (cite["file"], cite["line"])
132
+ if key not in seen:
133
+ seen.add(key)
134
+ citations.append(cite)
135
+ return "[%s:%d]" % (cite["file"], cite["line"])
136
+
137
+ clean = _CITE_RE.sub(_sub, answer_text or "")
138
+ # Collapse any double spaces left by dropped markers.
139
+ clean = re.sub(r"[ \t]{2,}", " ", clean)
140
+ return clean, citations
package/autonomy/loki CHANGED
@@ -13205,6 +13205,9 @@ main() {
13205
13205
  docs)
13206
13206
  cmd_docs "$@"
13207
13207
  ;;
13208
+ wiki)
13209
+ cmd_wiki "$@"
13210
+ ;;
13208
13211
  magic)
13209
13212
  cmd_magic "$@"
13210
13213
  ;;
@@ -22633,6 +22636,124 @@ run_debate(
22633
22636
  log_info "Debate complete"
22634
22637
  }
22635
22638
 
22639
+ # =============================================================================
22640
+ # loki wiki -- auto-generated, cited per-project codebase wiki + Q&A (R5).
22641
+ #
22642
+ # Loki's answer to Devin DeepWiki: a persistent, queryable wiki built from the
22643
+ # codebase, where every section cites the real source files it came from, plus
22644
+ # a grounded `ask` that returns cited answers (file:line). Heavy work lives in
22645
+ # the Python core (autonomy/lib/wiki-generator.py, wiki-ask.py, wiki_index.py);
22646
+ # this is a thin dispatcher, mirroring how cmd_proof delegates to the proof
22647
+ # generator. Generation is incremental (skips when the codebase is unchanged).
22648
+ # =============================================================================
22649
+ cmd_wiki() {
22650
+ local subcmd="${1:-}"
22651
+ shift 2>/dev/null || true
22652
+
22653
+ local lib_dir="${_LOKI_SCRIPT_DIR}/lib"
22654
+
22655
+ case "$subcmd" in
22656
+ generate) _wiki_generate "$lib_dir" "$@" ;;
22657
+ show) _wiki_show "$@" ;;
22658
+ ask) _wiki_ask "$lib_dir" "$@" ;;
22659
+ --help|-h|help|"")
22660
+ echo -e "${BOLD}loki wiki${NC} - Auto-generated, cited codebase wiki + Q&A"
22661
+ echo ""
22662
+ echo "Usage: loki wiki <command> [options]"
22663
+ echo ""
22664
+ echo "Commands:"
22665
+ echo " generate [path] [--force] Build/refresh the cited wiki in .loki/wiki/"
22666
+ echo " show [section] Print the wiki (or one section: architecture|modules|data-flow)"
22667
+ echo " ask \"<question>\" Cited answer grounded in the codebase (file:line)"
22668
+ echo ""
22669
+ echo "Each wiki section cites the real source files it was built from."
22670
+ echo "Generation is incremental: it skips when the codebase is unchanged."
22671
+ echo ""
22672
+ echo "Examples:"
22673
+ echo " loki wiki generate # build wiki for current project"
22674
+ echo " loki wiki show architecture # show one section"
22675
+ echo " loki wiki ask \"how does the cli dispatch commands\""
22676
+ return 0
22677
+ ;;
22678
+ *)
22679
+ log_error "Unknown wiki command: $subcmd"
22680
+ echo "Run 'loki wiki --help' for usage."
22681
+ return 1
22682
+ ;;
22683
+ esac
22684
+ }
22685
+
22686
+ _wiki_generate() {
22687
+ local lib_dir="$1"; shift
22688
+ if ! command -v python3 >/dev/null 2>&1; then
22689
+ log_error "python3 is required for 'loki wiki generate'"
22690
+ return 1
22691
+ fi
22692
+ python3 "$lib_dir/wiki-generator.py" "$@"
22693
+ }
22694
+
22695
+ _wiki_show() {
22696
+ local section=""
22697
+ local target="."
22698
+ while [[ $# -gt 0 ]]; do
22699
+ case "$1" in
22700
+ --help|-h)
22701
+ echo "Usage: loki wiki show [section]"
22702
+ echo "Sections: architecture, modules, data-flow"
22703
+ return 0
22704
+ ;;
22705
+ -*) log_error "Unknown option: $1"; return 1 ;;
22706
+ *) section="$1"; shift ;;
22707
+ esac
22708
+ done
22709
+ local wiki_dir="$target/.loki/wiki"
22710
+ if [ ! -d "$wiki_dir" ]; then
22711
+ log_error "No wiki found. Run 'loki wiki generate' first."
22712
+ return 1
22713
+ fi
22714
+ if [ -n "$section" ]; then
22715
+ local f="$wiki_dir/${section}.md"
22716
+ if [ ! -f "$f" ]; then
22717
+ log_error "No such section: $section (try: architecture, modules, data-flow)"
22718
+ return 1
22719
+ fi
22720
+ cat "$f"
22721
+ else
22722
+ if [ -f "$wiki_dir/index.md" ]; then
22723
+ cat "$wiki_dir/index.md"
22724
+ else
22725
+ log_error "Wiki index not found. Run 'loki wiki generate'."
22726
+ return 1
22727
+ fi
22728
+ fi
22729
+ }
22730
+
22731
+ _wiki_ask() {
22732
+ local lib_dir="$1"; shift
22733
+ local question=""
22734
+ local extra=()
22735
+ while [[ $# -gt 0 ]]; do
22736
+ case "$1" in
22737
+ --help|-h)
22738
+ echo "Usage: loki wiki ask \"<question>\" [--json] [--k N]"
22739
+ return 0
22740
+ ;;
22741
+ --json|--quiet) extra+=("$1"); shift ;;
22742
+ --k) extra+=("--k" "${2:-6}"); shift 2 ;;
22743
+ *) if [ -z "$question" ]; then question="$1"; else question="$question $1"; fi; shift ;;
22744
+ esac
22745
+ done
22746
+ if [ -z "$question" ]; then
22747
+ log_error "Provide a question: loki wiki ask \"how does X work\""
22748
+ return 1
22749
+ fi
22750
+ if ! command -v python3 >/dev/null 2>&1; then
22751
+ log_error "python3 is required for 'loki wiki ask'"
22752
+ return 1
22753
+ fi
22754
+ python3 "$lib_dir/wiki-ask.py" --question "$question" "${extra[@]}"
22755
+ }
22756
+
22636
22757
  cmd_magic() {
22637
22758
  local subcmd="${1:-help}"
22638
22759
  shift 2>/dev/null || true
package/bin/loki CHANGED
@@ -116,7 +116,7 @@ fi
116
116
  # Two-token routes (provider show/list, memory list/index) match on the first
117
117
  # token only; the Bun dispatcher handles subcommand routing internally.
118
118
  case "${1:-}" in
119
- version|--version|-v|status|stats|doctor|provider|memory|rollback|internal|kpis|proof)
119
+ version|--version|-v|status|stats|doctor|provider|memory|rollback|internal|kpis|proof|wiki)
120
120
  # v7.5.2: rollback added (wires loki-ts/src/commands/rollback.ts).
121
121
  # v7.5.3: internal added for autonomy/run.sh phase1-hooks calls.
122
122
  # v7.5.28: kpis added (Phase K MVP: read-only KPI snapshot).
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "7.12.0"
10
+ __version__ = "7.13.0"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try:
@@ -7507,6 +7507,114 @@ async def get_proof_html(run_id: str):
7507
7507
  return FileResponse(str(index_html), media_type="text/html")
7508
7508
 
7509
7509
 
7510
+ # ---------------------------------------------------------------------------
7511
+ # R5: Auto-wiki + cited codebase Q&A (Loki's DeepWiki).
7512
+ #
7513
+ # Surfaces the per-project wiki generated by autonomy/lib/wiki-generator.py
7514
+ # (stored under <project>/.loki/wiki/) and the grounded `ask` flow
7515
+ # (autonomy/lib/wiki-ask.py). Citations are file:line and always point at real
7516
+ # code -- the generator/ask scripts validate every citation against the
7517
+ # filesystem before emitting it, so the dashboard never shows a fabricated one.
7518
+ #
7519
+ # The section param is traversal-safe, mirroring _safe_proof_run_dir: only the
7520
+ # known section ids are accepted, so no arbitrary path can be read.
7521
+ # ---------------------------------------------------------------------------
7522
+ _WIKI_SECTIONS = {"architecture", "modules", "data-flow"}
7523
+
7524
+
7525
+ def _wiki_dir() -> _Path:
7526
+ return _get_loki_dir() / "wiki"
7527
+
7528
+
7529
+ def _project_root() -> _Path:
7530
+ """Resolve the active project root (.loki's parent)."""
7531
+ return _get_loki_dir().parent
7532
+
7533
+
7534
+ @app.get("/api/wiki", dependencies=[Depends(auth.require_scope("read"))])
7535
+ async def get_wiki():
7536
+ """Return the wiki manifest + section list for the active project."""
7537
+ wiki_dir = _wiki_dir()
7538
+ wiki_json = wiki_dir / "wiki.json"
7539
+ if not wiki_json.is_file():
7540
+ return {"generated": False, "sections": [],
7541
+ "message": "No wiki generated. Run 'loki wiki generate'."}
7542
+ data = _safe_json_read(wiki_json, default=None)
7543
+ if not isinstance(data, dict):
7544
+ raise HTTPException(status_code=500, detail="wiki.json unreadable")
7545
+ manifest = _safe_json_read(wiki_dir / "wiki-manifest.json", default={}) or {}
7546
+ sections = [
7547
+ {"id": s.get("id"), "title": s.get("title"),
7548
+ "citation_count": len(s.get("citations") or [])}
7549
+ for s in data.get("sections", [])
7550
+ if isinstance(s, dict)
7551
+ ]
7552
+ return {
7553
+ "generated": True,
7554
+ "project": data.get("project"),
7555
+ "generated_at": data.get("generated_at"),
7556
+ "file_count": data.get("file_count"),
7557
+ "signature": manifest.get("signature"),
7558
+ "sections": sections,
7559
+ }
7560
+
7561
+
7562
+ @app.get("/api/wiki/{section}", dependencies=[Depends(auth.require_scope("read"))])
7563
+ async def get_wiki_section(section: str):
7564
+ """Return one wiki section (body + validated file:line citations)."""
7565
+ if section not in _WIKI_SECTIONS:
7566
+ raise HTTPException(status_code=400, detail=f"unknown section: {section}")
7567
+ wiki_json = _wiki_dir() / "wiki.json"
7568
+ if not wiki_json.is_file():
7569
+ raise HTTPException(status_code=404, detail="wiki not generated")
7570
+ data = _safe_json_read(wiki_json, default=None)
7571
+ if not isinstance(data, dict):
7572
+ raise HTTPException(status_code=500, detail="wiki.json unreadable")
7573
+ for s in data.get("sections", []):
7574
+ if isinstance(s, dict) and s.get("id") == section:
7575
+ return JSONResponse(content=s)
7576
+ raise HTTPException(status_code=404, detail=f"section not found: {section}")
7577
+
7578
+
7579
+ class WikiAskRequest(BaseModel):
7580
+ question: str = Field(..., min_length=1, max_length=2000)
7581
+ k: int = Field(default=6, ge=1, le=20)
7582
+
7583
+
7584
+ @app.post("/api/wiki/ask", dependencies=[Depends(auth.require_scope("read"))])
7585
+ async def post_wiki_ask(req: WikiAskRequest):
7586
+ """Grounded, cited codebase Q&A.
7587
+
7588
+ Shells out to autonomy/lib/wiki-ask.py (the single source of truth for the
7589
+ grounding + citation-validation contract) and returns its JSON. Every
7590
+ citation in the response resolves to a real file:line.
7591
+ """
7592
+ project_root = _project_root()
7593
+ repo_root = _Path(__file__).resolve().parent.parent
7594
+ ask_script = repo_root / "autonomy" / "lib" / "wiki-ask.py"
7595
+ if not ask_script.is_file():
7596
+ raise HTTPException(status_code=503, detail="wiki-ask backend missing")
7597
+ try:
7598
+ proc = subprocess.run(
7599
+ ["python3", str(ask_script), "--root", str(project_root),
7600
+ "--question", req.question, "--k", str(req.k), "--json"],
7601
+ capture_output=True, text=True, timeout=180,
7602
+ cwd=str(project_root),
7603
+ )
7604
+ except (OSError, subprocess.SubprocessError) as e:
7605
+ raise HTTPException(status_code=503, detail=f"wiki ask failed: {e}")
7606
+ if proc.returncode == 3:
7607
+ return {"question": req.question, "answer": "",
7608
+ "citations": [], "note": "no relevant code found"}
7609
+ if proc.returncode != 0:
7610
+ raise HTTPException(status_code=500,
7611
+ detail=(proc.stderr or "wiki ask error").strip())
7612
+ try:
7613
+ return JSONResponse(content=json.loads(proc.stdout))
7614
+ except json.JSONDecodeError:
7615
+ raise HTTPException(status_code=500, detail="wiki ask returned bad JSON")
7616
+
7617
+
7510
7618
  # ---------------------------------------------------------------------------
7511
7619
  # SPA catch-all: serve index.html for any path not matched by API routes
7512
7620
  # or static asset mounts. This lets the dashboard UI handle client-side routing.