loki-mode 7.12.0 → 7.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/lib/wiki-ask.py +137 -0
- package/autonomy/lib/wiki-generator.py +322 -0
- package/autonomy/lib/wiki_index.py +258 -0
- package/autonomy/lib/wiki_llm.py +140 -0
- package/autonomy/loki +121 -0
- package/bin/loki +1 -1
- package/dashboard/__init__.py +1 -1
- package/dashboard/server.py +108 -0
- package/dashboard/static/index.html +394 -329
- package/docs/INSTALLATION.md +1 -1
- package/docs/R5-AUTO-WIKI-DESIGN.md +137 -0
- package/loki-ts/dist/loki.js +224 -198
- package/mcp/__init__.py +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""wiki_index.py -- dependency-free codebase index for the R5 auto-wiki.
|
|
2
|
+
|
|
3
|
+
Builds a line-anchored chunk index over a project's source files and provides
|
|
4
|
+
deterministic token-overlap retrieval. This is the grounding substrate for
|
|
5
|
+
cited answers: every chunk carries the REAL repo-relative file path and the
|
|
6
|
+
REAL start/end line numbers it came from, so a citation can always be checked
|
|
7
|
+
against the filesystem.
|
|
8
|
+
|
|
9
|
+
Reuse note: the token-overlap scoring (`_tokenize` + overlap weighting) is
|
|
10
|
+
ported from memory/knowledge_graph.py (OrganizationKnowledgeGraph), which scores
|
|
11
|
+
memory patterns the same way. knowledge_graph.py is NOT a code index (it
|
|
12
|
+
aggregates .loki/memory/semantic patterns), so the code scanning/chunking here
|
|
13
|
+
is new. retrieval.py is a memory retriever, not a code indexer, so it is not
|
|
14
|
+
reused for code retrieval.
|
|
15
|
+
|
|
16
|
+
No third-party dependencies. CI-safe (no Docker, no network, no provider).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import subprocess
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
# Source extensions we index. Kept broad but excludes lockfiles/binaries.
|
|
29
|
+
SOURCE_EXTS = {
|
|
30
|
+
".py", ".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs",
|
|
31
|
+
".rs", ".go", ".rb", ".java", ".kt", ".kts", ".c", ".cc", ".cpp",
|
|
32
|
+
".h", ".hpp", ".cs", ".php", ".swift", ".sh", ".bash", ".sql",
|
|
33
|
+
".vue", ".svelte", ".scala", ".clj", ".ex", ".exs", ".lua", ".r",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Directories never worth indexing.
|
|
37
|
+
SKIP_DIRS = {
|
|
38
|
+
"node_modules", ".git", "vendor", "__pycache__", "dist", "build",
|
|
39
|
+
".next", "target", ".venv", "venv", "coverage", ".loki", ".cache",
|
|
40
|
+
"out", ".turbo", ".pytest_cache", ".mypy_cache",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
CHUNK_LINES = 60 # lines per chunk; overlap-free, line-anchored.
|
|
44
|
+
MAX_FILES = 800 # safety cap so huge repos stay cheap.
|
|
45
|
+
MAX_FILE_BYTES = 400_000 # skip very large generated/minified files.
|
|
46
|
+
|
|
47
|
+
# Tokenizer ported from memory/knowledge_graph.py:_tokenize / _STOPWORDS.
|
|
48
|
+
_STOPWORDS = {
|
|
49
|
+
"the", "a", "an", "to", "for", "of", "and", "or", "with", "without",
|
|
50
|
+
"is", "are", "be", "up", "on", "in", "by", "not", "this", "that",
|
|
51
|
+
"from", "as", "at", "it", "if", "do", "we", "my", "our", "how",
|
|
52
|
+
"def", "self", "return", "import", "const", "let", "var", "function",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _tokenize(text):
|
|
57
|
+
"""Lowercase, split on non-alphanumerics, drop stopwords + short tokens.
|
|
58
|
+
|
|
59
|
+
Ported from knowledge_graph.OrganizationKnowledgeGraph._tokenize so wiki
|
|
60
|
+
retrieval scores text the same way memory-pattern retrieval does.
|
|
61
|
+
"""
|
|
62
|
+
toks = re.split(r"[^a-z0-9_]+", str(text or "").lower())
|
|
63
|
+
return {t for t in toks if len(t) > 2 and t not in _STOPWORDS}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _git_tracked_files(root):
|
|
67
|
+
"""Return git-tracked file paths (repo-relative), or None if not a repo."""
|
|
68
|
+
try:
|
|
69
|
+
out = subprocess.run(
|
|
70
|
+
["git", "-C", str(root), "ls-files"],
|
|
71
|
+
capture_output=True, text=True, timeout=30,
|
|
72
|
+
)
|
|
73
|
+
if out.returncode != 0:
|
|
74
|
+
return None
|
|
75
|
+
files = [line.strip() for line in out.stdout.splitlines() if line.strip()]
|
|
76
|
+
return files or None
|
|
77
|
+
except (OSError, subprocess.SubprocessError):
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _walk_files(root):
|
|
82
|
+
"""Filtered filesystem walk fallback (when not a git repo)."""
|
|
83
|
+
root = Path(root)
|
|
84
|
+
results = []
|
|
85
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
86
|
+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
|
87
|
+
for fn in filenames:
|
|
88
|
+
rel = os.path.relpath(os.path.join(dirpath, fn), root)
|
|
89
|
+
results.append(rel)
|
|
90
|
+
return results
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def list_source_files(root):
|
|
94
|
+
"""Return a sorted list of repo-relative source files under root.
|
|
95
|
+
|
|
96
|
+
Prefers git ls-files (respects .gitignore), falls back to a filtered walk.
|
|
97
|
+
"""
|
|
98
|
+
root = Path(root)
|
|
99
|
+
candidates = _git_tracked_files(root)
|
|
100
|
+
if candidates is None:
|
|
101
|
+
candidates = _walk_files(root)
|
|
102
|
+
|
|
103
|
+
sources = []
|
|
104
|
+
for rel in candidates:
|
|
105
|
+
# Skip anything inside a skip dir (git tracked files can include them
|
|
106
|
+
# if they were committed; we still exclude noise dirs).
|
|
107
|
+
parts = set(Path(rel).parts)
|
|
108
|
+
if parts & SKIP_DIRS:
|
|
109
|
+
continue
|
|
110
|
+
ext = os.path.splitext(rel)[1].lower()
|
|
111
|
+
if ext not in SOURCE_EXTS:
|
|
112
|
+
continue
|
|
113
|
+
abs_path = root / rel
|
|
114
|
+
try:
|
|
115
|
+
if not abs_path.is_file():
|
|
116
|
+
continue
|
|
117
|
+
if abs_path.stat().st_size > MAX_FILE_BYTES:
|
|
118
|
+
continue
|
|
119
|
+
except OSError:
|
|
120
|
+
continue
|
|
121
|
+
sources.append(rel)
|
|
122
|
+
sources.sort()
|
|
123
|
+
return sources[:MAX_FILES]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _read_lines(abs_path):
|
|
127
|
+
try:
|
|
128
|
+
with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
|
|
129
|
+
return f.read().splitlines()
|
|
130
|
+
except OSError:
|
|
131
|
+
return []
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def build_index(root):
|
|
135
|
+
"""Build a line-anchored chunk index over the project's source files.
|
|
136
|
+
|
|
137
|
+
Returns a dict:
|
|
138
|
+
{
|
|
139
|
+
"root": <abs root>,
|
|
140
|
+
"files": [<repo-relative paths>],
|
|
141
|
+
"chunks": [
|
|
142
|
+
{"id": int, "file": <rel>, "start_line": int, "end_line": int,
|
|
143
|
+
"text": <chunk text>},
|
|
144
|
+
...
|
|
145
|
+
],
|
|
146
|
+
}
|
|
147
|
+
Paths are ALWAYS repo-relative (no PII, no absolute paths leak).
|
|
148
|
+
Line numbers are 1-based and inclusive.
|
|
149
|
+
"""
|
|
150
|
+
root = Path(root).resolve()
|
|
151
|
+
files = list_source_files(root)
|
|
152
|
+
chunks = []
|
|
153
|
+
cid = 0
|
|
154
|
+
for rel in files:
|
|
155
|
+
lines = _read_lines(root / rel)
|
|
156
|
+
if not lines:
|
|
157
|
+
continue
|
|
158
|
+
for start in range(0, len(lines), CHUNK_LINES):
|
|
159
|
+
block = lines[start:start + CHUNK_LINES]
|
|
160
|
+
if not any(line.strip() for line in block):
|
|
161
|
+
continue # skip all-blank chunks
|
|
162
|
+
chunks.append({
|
|
163
|
+
"id": cid,
|
|
164
|
+
"file": rel,
|
|
165
|
+
"start_line": start + 1,
|
|
166
|
+
"end_line": start + len(block),
|
|
167
|
+
"text": "\n".join(block),
|
|
168
|
+
})
|
|
169
|
+
cid += 1
|
|
170
|
+
return {"root": str(root), "files": files, "chunks": chunks}
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def retrieve(index, query, k=6):
|
|
174
|
+
"""Deterministic top-K chunk retrieval by token overlap.
|
|
175
|
+
|
|
176
|
+
Scoring mirrors knowledge_graph.query_patterns: token overlap between the
|
|
177
|
+
query and the chunk text, plus a small bonus when the query substring
|
|
178
|
+
appears verbatim and when query tokens appear in the file path (so
|
|
179
|
+
"how does the cli dispatch" surfaces cli.* files). No LLM, no network.
|
|
180
|
+
Ties broken by chunk id for stable, reproducible ordering.
|
|
181
|
+
"""
|
|
182
|
+
qtokens = _tokenize(query)
|
|
183
|
+
qlower = str(query or "").lower()
|
|
184
|
+
scored = []
|
|
185
|
+
for ch in index.get("chunks", []):
|
|
186
|
+
text = ch.get("text", "")
|
|
187
|
+
score = 0
|
|
188
|
+
overlap = qtokens & _tokenize(text)
|
|
189
|
+
score += 3 * len(overlap)
|
|
190
|
+
# Path tokens (file/dir names are strong signals).
|
|
191
|
+
path_overlap = qtokens & _tokenize(ch.get("file", ""))
|
|
192
|
+
score += 2 * len(path_overlap)
|
|
193
|
+
# Verbatim substring bonus.
|
|
194
|
+
if qlower and len(qlower) > 3 and qlower in text.lower():
|
|
195
|
+
score += 4
|
|
196
|
+
if score > 0:
|
|
197
|
+
scored.append((score, ch["id"], ch))
|
|
198
|
+
# Highest score first; stable tiebreak on id.
|
|
199
|
+
scored.sort(key=lambda t: (-t[0], t[1]))
|
|
200
|
+
return [ch for _, _, ch in scored[:k]]
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def compute_signature(root):
|
|
204
|
+
"""Cheap-incremental signature over git HEAD + per-file content hashes.
|
|
205
|
+
|
|
206
|
+
Same idea as the proof/docs manifest: a deterministic hash that changes
|
|
207
|
+
iff the indexed source set changes. Used to skip regeneration when the
|
|
208
|
+
codebase is unchanged.
|
|
209
|
+
"""
|
|
210
|
+
root = Path(root).resolve()
|
|
211
|
+
h = hashlib.sha256()
|
|
212
|
+
# git HEAD (if available) makes the signature cheap to invalidate on commit.
|
|
213
|
+
try:
|
|
214
|
+
head = subprocess.run(
|
|
215
|
+
["git", "-C", str(root), "rev-parse", "HEAD"],
|
|
216
|
+
capture_output=True, text=True, timeout=15,
|
|
217
|
+
)
|
|
218
|
+
if head.returncode == 0:
|
|
219
|
+
h.update(b"head:" + head.stdout.strip().encode())
|
|
220
|
+
except (OSError, subprocess.SubprocessError):
|
|
221
|
+
pass
|
|
222
|
+
for rel in list_source_files(root):
|
|
223
|
+
try:
|
|
224
|
+
data = (root / rel).read_bytes()
|
|
225
|
+
except OSError:
|
|
226
|
+
continue
|
|
227
|
+
h.update(rel.encode("utf-8"))
|
|
228
|
+
h.update(hashlib.sha256(data).digest())
|
|
229
|
+
return h.hexdigest()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def extract_definitions(root, rel, limit=12):
|
|
233
|
+
"""Return real def/class/function lines for a file, for code-derived citations.
|
|
234
|
+
|
|
235
|
+
Returns a list of {"name": str, "line": int} where line is 1-based and
|
|
236
|
+
points at a real definition in the file. Language-agnostic via a small set
|
|
237
|
+
of regexes; only emits matches that actually exist in the file.
|
|
238
|
+
"""
|
|
239
|
+
lines = _read_lines(Path(root) / rel)
|
|
240
|
+
patterns = [
|
|
241
|
+
re.compile(r"^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)"),
|
|
242
|
+
re.compile(r"^\s*class\s+([A-Za-z_][A-Za-z0-9_]*)"),
|
|
243
|
+
re.compile(r"^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)"),
|
|
244
|
+
re.compile(r"^\s*(?:export\s+)?const\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\("),
|
|
245
|
+
re.compile(r"^\s*(?:pub\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)"),
|
|
246
|
+
re.compile(r"^\s*func\s+(?:\([^)]*\)\s*)?([A-Za-z_][A-Za-z0-9_]*)"),
|
|
247
|
+
re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*\(\)\s*\{"), # bash funcs
|
|
248
|
+
]
|
|
249
|
+
defs = []
|
|
250
|
+
for i, line in enumerate(lines, start=1):
|
|
251
|
+
for pat in patterns:
|
|
252
|
+
m = pat.match(line)
|
|
253
|
+
if m:
|
|
254
|
+
defs.append({"name": m.group(1), "line": i})
|
|
255
|
+
break
|
|
256
|
+
if len(defs) >= limit:
|
|
257
|
+
break
|
|
258
|
+
return defs
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""wiki_llm.py -- stub-aware LLM invocation + citation validation for R5 wiki.
|
|
2
|
+
|
|
3
|
+
Keeps every paid-call and grounding-guarantee concern in one place so the
|
|
4
|
+
generator and the ask script behave identically.
|
|
5
|
+
|
|
6
|
+
LLM stub contract (CI-safe, zero paid calls in tests):
|
|
7
|
+
LOKI_WIKI_LLM_STUB unset -> call the real provider (claude -p / codex / ...)
|
|
8
|
+
via the same mechanism loki docs uses; if no
|
|
9
|
+
provider is on PATH, return None (callers then
|
|
10
|
+
fall back to extractive/template output).
|
|
11
|
+
LOKI_WIKI_LLM_STUB=<file path> -> read the completion from that file.
|
|
12
|
+
LOKI_WIKI_LLM_STUB=<other> -> use the value literally as the completion.
|
|
13
|
+
|
|
14
|
+
No third-party dependencies.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
import re
|
|
21
|
+
import subprocess
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def invoke_llm(prompt, timeout=120):
|
|
26
|
+
"""Return the LLM completion string, or None if unavailable.
|
|
27
|
+
|
|
28
|
+
Honors LOKI_WIKI_LLM_STUB for CI. Otherwise shells out to the configured
|
|
29
|
+
provider, mirroring loki docs `_docs_invoke_provider`.
|
|
30
|
+
"""
|
|
31
|
+
stub = os.environ.get("LOKI_WIKI_LLM_STUB")
|
|
32
|
+
if stub is not None:
|
|
33
|
+
# A path to a file with the canned completion, else the literal value.
|
|
34
|
+
if os.path.sep in stub or stub.endswith(".txt"):
|
|
35
|
+
p = Path(stub)
|
|
36
|
+
if p.is_file():
|
|
37
|
+
try:
|
|
38
|
+
return p.read_text(encoding="utf-8", errors="replace")
|
|
39
|
+
except OSError:
|
|
40
|
+
return ""
|
|
41
|
+
return stub
|
|
42
|
+
|
|
43
|
+
provider = os.environ.get("LOKI_PROVIDER", "claude")
|
|
44
|
+
state_provider = Path(".loki/state/provider")
|
|
45
|
+
if state_provider.is_file():
|
|
46
|
+
try:
|
|
47
|
+
provider = state_provider.read_text().strip() or provider
|
|
48
|
+
except OSError:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
# Resolve a timeout wrapper if present (matches the bash docs helper).
|
|
52
|
+
timeout_cmd = None
|
|
53
|
+
for cand in ("timeout", "gtimeout"):
|
|
54
|
+
if _which(cand):
|
|
55
|
+
timeout_cmd = cand
|
|
56
|
+
break
|
|
57
|
+
|
|
58
|
+
cmds = {
|
|
59
|
+
"claude": ["claude", "-p", prompt],
|
|
60
|
+
"codex": ["codex", "exec", "--full-auto", prompt],
|
|
61
|
+
"cline": ["cline", "-y", prompt],
|
|
62
|
+
"aider": ["aider", "--message", prompt, "--yes-always", "--no-auto-commits"],
|
|
63
|
+
}
|
|
64
|
+
base = cmds.get(provider)
|
|
65
|
+
if base is None or not _which(base[0]):
|
|
66
|
+
return None
|
|
67
|
+
cmd = ([timeout_cmd, str(timeout)] + base) if timeout_cmd else base
|
|
68
|
+
try:
|
|
69
|
+
out = subprocess.run(
|
|
70
|
+
cmd, capture_output=True, text=True, timeout=timeout + 10,
|
|
71
|
+
stdin=subprocess.DEVNULL,
|
|
72
|
+
)
|
|
73
|
+
except (OSError, subprocess.SubprocessError):
|
|
74
|
+
return None
|
|
75
|
+
if out.returncode != 0 and not out.stdout.strip():
|
|
76
|
+
return None
|
|
77
|
+
return out.stdout
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _which(name):
|
|
81
|
+
for d in os.environ.get("PATH", "").split(os.pathsep):
|
|
82
|
+
cand = os.path.join(d, name)
|
|
83
|
+
if os.path.isfile(cand) and os.access(cand, os.X_OK):
|
|
84
|
+
return cand
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
_CITE_RE = re.compile(r"\[(\d+)\]")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def map_and_validate_citations(answer_text, chunks, root):
|
|
92
|
+
"""Map [n] indices in answer_text to real {file,line} citations.
|
|
93
|
+
|
|
94
|
+
chunks: the numbered chunk list shown to the LLM. chunks[n-1] is the chunk
|
|
95
|
+
referenced by [n] (1-based). A citation is kept only if:
|
|
96
|
+
- the index is in range (it references a chunk we actually supplied), and
|
|
97
|
+
- the file exists on disk AND start_line <= file length.
|
|
98
|
+
This makes a fabricated citation structurally impossible to survive.
|
|
99
|
+
|
|
100
|
+
Returns (clean_text, citations) where citations is a de-duplicated list of
|
|
101
|
+
{"file": rel, "line": int} in first-appearance order, and clean_text has the
|
|
102
|
+
[n] markers rewritten to [file:line] for human-readable output.
|
|
103
|
+
"""
|
|
104
|
+
root = Path(root)
|
|
105
|
+
citations = []
|
|
106
|
+
seen = set()
|
|
107
|
+
|
|
108
|
+
def _resolve(idx):
|
|
109
|
+
if idx < 1 or idx > len(chunks):
|
|
110
|
+
return None
|
|
111
|
+
ch = chunks[idx - 1]
|
|
112
|
+
rel = ch.get("file")
|
|
113
|
+
line = int(ch.get("start_line", 1))
|
|
114
|
+
abs_path = root / rel
|
|
115
|
+
try:
|
|
116
|
+
if not abs_path.is_file():
|
|
117
|
+
return None
|
|
118
|
+
with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
|
|
119
|
+
nlines = sum(1 for _ in f)
|
|
120
|
+
except OSError:
|
|
121
|
+
return None
|
|
122
|
+
if line < 1 or line > max(nlines, 1):
|
|
123
|
+
return None
|
|
124
|
+
return {"file": rel, "line": line}
|
|
125
|
+
|
|
126
|
+
def _sub(m):
|
|
127
|
+
idx = int(m.group(1))
|
|
128
|
+
cite = _resolve(idx)
|
|
129
|
+
if cite is None:
|
|
130
|
+
return "" # drop a bogus/non-resolving citation marker
|
|
131
|
+
key = (cite["file"], cite["line"])
|
|
132
|
+
if key not in seen:
|
|
133
|
+
seen.add(key)
|
|
134
|
+
citations.append(cite)
|
|
135
|
+
return "[%s:%d]" % (cite["file"], cite["line"])
|
|
136
|
+
|
|
137
|
+
clean = _CITE_RE.sub(_sub, answer_text or "")
|
|
138
|
+
# Collapse any double spaces left by dropped markers.
|
|
139
|
+
clean = re.sub(r"[ \t]{2,}", " ", clean)
|
|
140
|
+
return clean, citations
|
package/autonomy/loki
CHANGED
|
@@ -13205,6 +13205,9 @@ main() {
|
|
|
13205
13205
|
docs)
|
|
13206
13206
|
cmd_docs "$@"
|
|
13207
13207
|
;;
|
|
13208
|
+
wiki)
|
|
13209
|
+
cmd_wiki "$@"
|
|
13210
|
+
;;
|
|
13208
13211
|
magic)
|
|
13209
13212
|
cmd_magic "$@"
|
|
13210
13213
|
;;
|
|
@@ -22633,6 +22636,124 @@ run_debate(
|
|
|
22633
22636
|
log_info "Debate complete"
|
|
22634
22637
|
}
|
|
22635
22638
|
|
|
22639
|
+
# =============================================================================
|
|
22640
|
+
# loki wiki -- auto-generated, cited per-project codebase wiki + Q&A (R5).
|
|
22641
|
+
#
|
|
22642
|
+
# Loki's answer to Devin DeepWiki: a persistent, queryable wiki built from the
|
|
22643
|
+
# codebase, where every section cites the real source files it came from, plus
|
|
22644
|
+
# a grounded `ask` that returns cited answers (file:line). Heavy work lives in
|
|
22645
|
+
# the Python core (autonomy/lib/wiki-generator.py, wiki-ask.py, wiki_index.py);
|
|
22646
|
+
# this is a thin dispatcher, mirroring how cmd_proof delegates to the proof
|
|
22647
|
+
# generator. Generation is incremental (skips when the codebase is unchanged).
|
|
22648
|
+
# =============================================================================
|
|
22649
|
+
cmd_wiki() {
|
|
22650
|
+
local subcmd="${1:-}"
|
|
22651
|
+
shift 2>/dev/null || true
|
|
22652
|
+
|
|
22653
|
+
local lib_dir="${_LOKI_SCRIPT_DIR}/lib"
|
|
22654
|
+
|
|
22655
|
+
case "$subcmd" in
|
|
22656
|
+
generate) _wiki_generate "$lib_dir" "$@" ;;
|
|
22657
|
+
show) _wiki_show "$@" ;;
|
|
22658
|
+
ask) _wiki_ask "$lib_dir" "$@" ;;
|
|
22659
|
+
--help|-h|help|"")
|
|
22660
|
+
echo -e "${BOLD}loki wiki${NC} - Auto-generated, cited codebase wiki + Q&A"
|
|
22661
|
+
echo ""
|
|
22662
|
+
echo "Usage: loki wiki <command> [options]"
|
|
22663
|
+
echo ""
|
|
22664
|
+
echo "Commands:"
|
|
22665
|
+
echo " generate [path] [--force] Build/refresh the cited wiki in .loki/wiki/"
|
|
22666
|
+
echo " show [section] Print the wiki (or one section: architecture|modules|data-flow)"
|
|
22667
|
+
echo " ask \"<question>\" Cited answer grounded in the codebase (file:line)"
|
|
22668
|
+
echo ""
|
|
22669
|
+
echo "Each wiki section cites the real source files it was built from."
|
|
22670
|
+
echo "Generation is incremental: it skips when the codebase is unchanged."
|
|
22671
|
+
echo ""
|
|
22672
|
+
echo "Examples:"
|
|
22673
|
+
echo " loki wiki generate # build wiki for current project"
|
|
22674
|
+
echo " loki wiki show architecture # show one section"
|
|
22675
|
+
echo " loki wiki ask \"how does the cli dispatch commands\""
|
|
22676
|
+
return 0
|
|
22677
|
+
;;
|
|
22678
|
+
*)
|
|
22679
|
+
log_error "Unknown wiki command: $subcmd"
|
|
22680
|
+
echo "Run 'loki wiki --help' for usage."
|
|
22681
|
+
return 1
|
|
22682
|
+
;;
|
|
22683
|
+
esac
|
|
22684
|
+
}
|
|
22685
|
+
|
|
22686
|
+
_wiki_generate() {
|
|
22687
|
+
local lib_dir="$1"; shift
|
|
22688
|
+
if ! command -v python3 >/dev/null 2>&1; then
|
|
22689
|
+
log_error "python3 is required for 'loki wiki generate'"
|
|
22690
|
+
return 1
|
|
22691
|
+
fi
|
|
22692
|
+
python3 "$lib_dir/wiki-generator.py" "$@"
|
|
22693
|
+
}
|
|
22694
|
+
|
|
22695
|
+
_wiki_show() {
|
|
22696
|
+
local section=""
|
|
22697
|
+
local target="."
|
|
22698
|
+
while [[ $# -gt 0 ]]; do
|
|
22699
|
+
case "$1" in
|
|
22700
|
+
--help|-h)
|
|
22701
|
+
echo "Usage: loki wiki show [section]"
|
|
22702
|
+
echo "Sections: architecture, modules, data-flow"
|
|
22703
|
+
return 0
|
|
22704
|
+
;;
|
|
22705
|
+
-*) log_error "Unknown option: $1"; return 1 ;;
|
|
22706
|
+
*) section="$1"; shift ;;
|
|
22707
|
+
esac
|
|
22708
|
+
done
|
|
22709
|
+
local wiki_dir="$target/.loki/wiki"
|
|
22710
|
+
if [ ! -d "$wiki_dir" ]; then
|
|
22711
|
+
log_error "No wiki found. Run 'loki wiki generate' first."
|
|
22712
|
+
return 1
|
|
22713
|
+
fi
|
|
22714
|
+
if [ -n "$section" ]; then
|
|
22715
|
+
local f="$wiki_dir/${section}.md"
|
|
22716
|
+
if [ ! -f "$f" ]; then
|
|
22717
|
+
log_error "No such section: $section (try: architecture, modules, data-flow)"
|
|
22718
|
+
return 1
|
|
22719
|
+
fi
|
|
22720
|
+
cat "$f"
|
|
22721
|
+
else
|
|
22722
|
+
if [ -f "$wiki_dir/index.md" ]; then
|
|
22723
|
+
cat "$wiki_dir/index.md"
|
|
22724
|
+
else
|
|
22725
|
+
log_error "Wiki index not found. Run 'loki wiki generate'."
|
|
22726
|
+
return 1
|
|
22727
|
+
fi
|
|
22728
|
+
fi
|
|
22729
|
+
}
|
|
22730
|
+
|
|
22731
|
+
_wiki_ask() {
|
|
22732
|
+
local lib_dir="$1"; shift
|
|
22733
|
+
local question=""
|
|
22734
|
+
local extra=()
|
|
22735
|
+
while [[ $# -gt 0 ]]; do
|
|
22736
|
+
case "$1" in
|
|
22737
|
+
--help|-h)
|
|
22738
|
+
echo "Usage: loki wiki ask \"<question>\" [--json] [--k N]"
|
|
22739
|
+
return 0
|
|
22740
|
+
;;
|
|
22741
|
+
--json|--quiet) extra+=("$1"); shift ;;
|
|
22742
|
+
--k) extra+=("--k" "${2:-6}"); shift 2 ;;
|
|
22743
|
+
*) if [ -z "$question" ]; then question="$1"; else question="$question $1"; fi; shift ;;
|
|
22744
|
+
esac
|
|
22745
|
+
done
|
|
22746
|
+
if [ -z "$question" ]; then
|
|
22747
|
+
log_error "Provide a question: loki wiki ask \"how does X work\""
|
|
22748
|
+
return 1
|
|
22749
|
+
fi
|
|
22750
|
+
if ! command -v python3 >/dev/null 2>&1; then
|
|
22751
|
+
log_error "python3 is required for 'loki wiki ask'"
|
|
22752
|
+
return 1
|
|
22753
|
+
fi
|
|
22754
|
+
python3 "$lib_dir/wiki-ask.py" --question "$question" "${extra[@]}"
|
|
22755
|
+
}
|
|
22756
|
+
|
|
22636
22757
|
cmd_magic() {
|
|
22637
22758
|
local subcmd="${1:-help}"
|
|
22638
22759
|
shift 2>/dev/null || true
|
package/bin/loki
CHANGED
|
@@ -116,7 +116,7 @@ fi
|
|
|
116
116
|
# Two-token routes (provider show/list, memory list/index) match on the first
|
|
117
117
|
# token only; the Bun dispatcher handles subcommand routing internally.
|
|
118
118
|
case "${1:-}" in
|
|
119
|
-
version|--version|-v|status|stats|doctor|provider|memory|rollback|internal|kpis|proof)
|
|
119
|
+
version|--version|-v|status|stats|doctor|provider|memory|rollback|internal|kpis|proof|wiki)
|
|
120
120
|
# v7.5.2: rollback added (wires loki-ts/src/commands/rollback.ts).
|
|
121
121
|
# v7.5.3: internal added for autonomy/run.sh phase1-hooks calls.
|
|
122
122
|
# v7.5.28: kpis added (Phase K MVP: read-only KPI snapshot).
|
package/dashboard/__init__.py
CHANGED
package/dashboard/server.py
CHANGED
|
@@ -7507,6 +7507,114 @@ async def get_proof_html(run_id: str):
|
|
|
7507
7507
|
return FileResponse(str(index_html), media_type="text/html")
|
|
7508
7508
|
|
|
7509
7509
|
|
|
7510
|
+
# ---------------------------------------------------------------------------
|
|
7511
|
+
# R5: Auto-wiki + cited codebase Q&A (Loki's DeepWiki).
|
|
7512
|
+
#
|
|
7513
|
+
# Surfaces the per-project wiki generated by autonomy/lib/wiki-generator.py
|
|
7514
|
+
# (stored under <project>/.loki/wiki/) and the grounded `ask` flow
|
|
7515
|
+
# (autonomy/lib/wiki-ask.py). Citations are file:line and always point at real
|
|
7516
|
+
# code -- the generator/ask scripts validate every citation against the
|
|
7517
|
+
# filesystem before emitting it, so the dashboard never shows a fabricated one.
|
|
7518
|
+
#
|
|
7519
|
+
# The section param is traversal-safe, mirroring _safe_proof_run_dir: only the
|
|
7520
|
+
# known section ids are accepted, so no arbitrary path can be read.
|
|
7521
|
+
# ---------------------------------------------------------------------------
|
|
7522
|
+
_WIKI_SECTIONS = {"architecture", "modules", "data-flow"}
|
|
7523
|
+
|
|
7524
|
+
|
|
7525
|
+
def _wiki_dir() -> _Path:
|
|
7526
|
+
return _get_loki_dir() / "wiki"
|
|
7527
|
+
|
|
7528
|
+
|
|
7529
|
+
def _project_root() -> _Path:
|
|
7530
|
+
"""Resolve the active project root (.loki's parent)."""
|
|
7531
|
+
return _get_loki_dir().parent
|
|
7532
|
+
|
|
7533
|
+
|
|
7534
|
+
@app.get("/api/wiki", dependencies=[Depends(auth.require_scope("read"))])
|
|
7535
|
+
async def get_wiki():
|
|
7536
|
+
"""Return the wiki manifest + section list for the active project."""
|
|
7537
|
+
wiki_dir = _wiki_dir()
|
|
7538
|
+
wiki_json = wiki_dir / "wiki.json"
|
|
7539
|
+
if not wiki_json.is_file():
|
|
7540
|
+
return {"generated": False, "sections": [],
|
|
7541
|
+
"message": "No wiki generated. Run 'loki wiki generate'."}
|
|
7542
|
+
data = _safe_json_read(wiki_json, default=None)
|
|
7543
|
+
if not isinstance(data, dict):
|
|
7544
|
+
raise HTTPException(status_code=500, detail="wiki.json unreadable")
|
|
7545
|
+
manifest = _safe_json_read(wiki_dir / "wiki-manifest.json", default={}) or {}
|
|
7546
|
+
sections = [
|
|
7547
|
+
{"id": s.get("id"), "title": s.get("title"),
|
|
7548
|
+
"citation_count": len(s.get("citations") or [])}
|
|
7549
|
+
for s in data.get("sections", [])
|
|
7550
|
+
if isinstance(s, dict)
|
|
7551
|
+
]
|
|
7552
|
+
return {
|
|
7553
|
+
"generated": True,
|
|
7554
|
+
"project": data.get("project"),
|
|
7555
|
+
"generated_at": data.get("generated_at"),
|
|
7556
|
+
"file_count": data.get("file_count"),
|
|
7557
|
+
"signature": manifest.get("signature"),
|
|
7558
|
+
"sections": sections,
|
|
7559
|
+
}
|
|
7560
|
+
|
|
7561
|
+
|
|
7562
|
+
@app.get("/api/wiki/{section}", dependencies=[Depends(auth.require_scope("read"))])
|
|
7563
|
+
async def get_wiki_section(section: str):
|
|
7564
|
+
"""Return one wiki section (body + validated file:line citations)."""
|
|
7565
|
+
if section not in _WIKI_SECTIONS:
|
|
7566
|
+
raise HTTPException(status_code=400, detail=f"unknown section: {section}")
|
|
7567
|
+
wiki_json = _wiki_dir() / "wiki.json"
|
|
7568
|
+
if not wiki_json.is_file():
|
|
7569
|
+
raise HTTPException(status_code=404, detail="wiki not generated")
|
|
7570
|
+
data = _safe_json_read(wiki_json, default=None)
|
|
7571
|
+
if not isinstance(data, dict):
|
|
7572
|
+
raise HTTPException(status_code=500, detail="wiki.json unreadable")
|
|
7573
|
+
for s in data.get("sections", []):
|
|
7574
|
+
if isinstance(s, dict) and s.get("id") == section:
|
|
7575
|
+
return JSONResponse(content=s)
|
|
7576
|
+
raise HTTPException(status_code=404, detail=f"section not found: {section}")
|
|
7577
|
+
|
|
7578
|
+
|
|
7579
|
+
class WikiAskRequest(BaseModel):
|
|
7580
|
+
question: str = Field(..., min_length=1, max_length=2000)
|
|
7581
|
+
k: int = Field(default=6, ge=1, le=20)
|
|
7582
|
+
|
|
7583
|
+
|
|
7584
|
+
@app.post("/api/wiki/ask", dependencies=[Depends(auth.require_scope("read"))])
|
|
7585
|
+
async def post_wiki_ask(req: WikiAskRequest):
|
|
7586
|
+
"""Grounded, cited codebase Q&A.
|
|
7587
|
+
|
|
7588
|
+
Shells out to autonomy/lib/wiki-ask.py (the single source of truth for the
|
|
7589
|
+
grounding + citation-validation contract) and returns its JSON. Every
|
|
7590
|
+
citation in the response resolves to a real file:line.
|
|
7591
|
+
"""
|
|
7592
|
+
project_root = _project_root()
|
|
7593
|
+
repo_root = _Path(__file__).resolve().parent.parent
|
|
7594
|
+
ask_script = repo_root / "autonomy" / "lib" / "wiki-ask.py"
|
|
7595
|
+
if not ask_script.is_file():
|
|
7596
|
+
raise HTTPException(status_code=503, detail="wiki-ask backend missing")
|
|
7597
|
+
try:
|
|
7598
|
+
proc = subprocess.run(
|
|
7599
|
+
["python3", str(ask_script), "--root", str(project_root),
|
|
7600
|
+
"--question", req.question, "--k", str(req.k), "--json"],
|
|
7601
|
+
capture_output=True, text=True, timeout=180,
|
|
7602
|
+
cwd=str(project_root),
|
|
7603
|
+
)
|
|
7604
|
+
except (OSError, subprocess.SubprocessError) as e:
|
|
7605
|
+
raise HTTPException(status_code=503, detail=f"wiki ask failed: {e}")
|
|
7606
|
+
if proc.returncode == 3:
|
|
7607
|
+
return {"question": req.question, "answer": "",
|
|
7608
|
+
"citations": [], "note": "no relevant code found"}
|
|
7609
|
+
if proc.returncode != 0:
|
|
7610
|
+
raise HTTPException(status_code=500,
|
|
7611
|
+
detail=(proc.stderr or "wiki ask error").strip())
|
|
7612
|
+
try:
|
|
7613
|
+
return JSONResponse(content=json.loads(proc.stdout))
|
|
7614
|
+
except json.JSONDecodeError:
|
|
7615
|
+
raise HTTPException(status_code=500, detail="wiki ask returned bad JSON")
|
|
7616
|
+
|
|
7617
|
+
|
|
7510
7618
|
# ---------------------------------------------------------------------------
|
|
7511
7619
|
# SPA catch-all: serve index.html for any path not matched by API routes
|
|
7512
7620
|
# or static asset mounts. This lets the dashboard UI handle client-side routing.
|