loki-mode 7.7.24 → 7.7.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ #!/usr/bin/env python3
2
+ """Probe provider documentation pages and report new models.
3
+
4
+ Approach kept deliberately conservative: we fetch known docs URLs, look for
5
+ model IDs that match well-defined regex patterns, and compare against the
6
+ current `providers/model_catalog.json`. We do NOT auto-rewrite the catalog;
7
+ we emit a report and a unified diff so a maintainer (or the cron-driven PR)
8
+ can review.
9
+
10
+ Run locally:
11
+ python3 tools/probe-model-catalog.py # report only
12
+ python3 tools/probe-model-catalog.py --json # machine-readable
13
+
14
+ In CI: see .github/workflows/model-catalog-probe.yml
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+ import re
22
+ import sys
23
+ import urllib.error
24
+ import urllib.request
25
+ from pathlib import Path
26
+
27
+ REPO_ROOT = Path(__file__).resolve().parent.parent
28
+ CATALOG_PATH = REPO_ROOT / "providers" / "model_catalog.json"
29
+
30
+ # Patterns that look like provider model IDs. Conservative -- only the well
31
+ # defined Claude/Codex/Gemini families today.
32
+ PATTERNS: dict[str, list[re.Pattern[str]]] = {
33
+ "claude": [
34
+ re.compile(r"\bclaude-(?:opus|sonnet|haiku)-\d+(?:-\d+)?(?:-\d{8})?\b"),
35
+ ],
36
+ "codex": [
37
+ re.compile(r"\bgpt-\d+(?:\.\d+)?-codex\b"),
38
+ ],
39
+ "gemini": [
40
+ re.compile(r"\bgemini-\d+(?:\.\d+)?-(?:pro|flash)(?:-(?:preview|exp|latest))?\b"),
41
+ ],
42
+ }
43
+
44
+ # Pages we read. These should be public documentation. Failure to fetch any
45
+ # single one is non-fatal -- we report what we got.
46
+ SOURCES: dict[str, list[str]] = {
47
+ "claude": [
48
+ "https://docs.claude.com/en/about-claude/models/overview",
49
+ ],
50
+ "codex": [
51
+ "https://platform.openai.com/docs/models",
52
+ ],
53
+ "gemini": [
54
+ "https://ai.google.dev/gemini-api/docs/models",
55
+ ],
56
+ }
57
+
58
+ USER_AGENT = (
59
+ "loki-mode-model-probe/1.0 "
60
+ "(+https://github.com/asklokesh/loki-mode)"
61
+ )
62
+
63
+
64
+ def fetch(url: str, timeout: int = 30) -> str:
65
+ req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
66
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
67
+ body = resp.read().decode("utf-8", errors="ignore")
68
+ return body
69
+
70
+
71
+ def load_catalog() -> dict:
72
+ with CATALOG_PATH.open("r", encoding="utf-8") as fh:
73
+ return json.load(fh)
74
+
75
+
76
+ def known_ids(catalog: dict, provider: str) -> set[str]:
77
+ p = catalog.get("providers", {}).get(provider, {})
78
+ ids: set[str] = set()
79
+ for m in p.get("models", []):
80
+ if isinstance(m, dict) and m.get("id"):
81
+ ids.add(m["id"])
82
+ for key in ("latest_planning", "latest_development", "latest_fast"):
83
+ if p.get(key):
84
+ ids.add(p[key])
85
+ aliases = p.get("cli_aliases", {})
86
+ if isinstance(aliases, dict):
87
+ for v in aliases.values():
88
+ if isinstance(v, str):
89
+ ids.add(v)
90
+ return ids
91
+
92
+
93
+ def probe_provider(provider: str) -> tuple[set[str], list[str]]:
94
+ """Return (found_ids, errors)."""
95
+ seen: set[str] = set()
96
+ errors: list[str] = []
97
+ for url in SOURCES.get(provider, []):
98
+ try:
99
+ body = fetch(url)
100
+ except urllib.error.URLError as exc:
101
+ errors.append(f"{url}: {exc}")
102
+ continue
103
+ except Exception as exc: # network / parse / etc.
104
+ errors.append(f"{url}: {type(exc).__name__}: {exc}")
105
+ continue
106
+ for pat in PATTERNS.get(provider, []):
107
+ for m in pat.findall(body):
108
+ seen.add(m)
109
+ return seen, errors
110
+
111
+
112
+ def main() -> int:
113
+ ap = argparse.ArgumentParser(description=__doc__)
114
+ ap.add_argument("--json", action="store_true", help="emit machine-readable JSON")
115
+ ap.add_argument("--strict", action="store_true", help="exit nonzero if new models are found")
116
+ args = ap.parse_args()
117
+
118
+ catalog = load_catalog()
119
+ report: dict[str, dict] = {}
120
+ any_new = False
121
+ for provider in PATTERNS:
122
+ found, errors = probe_provider(provider)
123
+ known = known_ids(catalog, provider)
124
+ new_only = sorted(found - known)
125
+ report[provider] = {
126
+ "known_count": len(known),
127
+ "found_count": len(found),
128
+ "new_candidates": new_only,
129
+ "errors": errors,
130
+ }
131
+ if new_only:
132
+ any_new = True
133
+
134
+ if args.json:
135
+ json.dump(report, sys.stdout, indent=2, sort_keys=True)
136
+ sys.stdout.write("\n")
137
+ else:
138
+ for provider, info in report.items():
139
+ new = info["new_candidates"]
140
+ errs = info["errors"]
141
+ print(f"== {provider} ==")
142
+ print(f" known in catalog: {info['known_count']}")
143
+ print(f" found in docs: {info['found_count']}")
144
+ if new:
145
+ print(f" NEW CANDIDATES: {', '.join(new)}")
146
+ else:
147
+ print(f" NEW CANDIDATES: (none)")
148
+ for e in errs:
149
+ print(f" ERROR: {e}")
150
+ print()
151
+ if any_new:
152
+ print("To adopt a new model: edit providers/model_catalog.json -> bump latest_<tier>")
153
+ print("and add to models[]. Then re-run this script to confirm it disappears from new_candidates.")
154
+
155
+ return 1 if (args.strict and any_new) else 0
156
+
157
+
158
+ if __name__ == "__main__":
159
+ sys.exit(main())
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env python3
2
+ """Keep docs/architecture/STATE-MACHINES.md line-number references in sync.
3
+
4
+ Many lines in STATE-MACHINES.md reference source locations like:
5
+ Source: `autonomy/run.sh:7380` (council_should_stop)
6
+ | running | ... | `run.sh:7380` |
7
+
8
+ Whenever run.sh / loki / completion-council.sh / server.py grow, those line
9
+ numbers drift and the doc lies. This script checks every reference that has
10
+ an annotated function name `(func_name)` and verifies the line number in the
11
+ referenced file matches the line where that function is defined.
12
+
13
+ Usage:
14
+ python3 tools/regen-state-machine-refs.py # report drift only
15
+ python3 tools/regen-state-machine-refs.py --fix # rewrite stale numbers in place
16
+ python3 tools/regen-state-machine-refs.py --strict # exit nonzero on drift (CI)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import re
23
+ import sys
24
+ from dataclasses import dataclass
25
+ from pathlib import Path
26
+
27
+ REPO_ROOT = Path(__file__).resolve().parent.parent
28
+ DOC_PATH = REPO_ROOT / "docs" / "architecture" / "STATE-MACHINES.md"
29
+
30
+ # File path aliases used in the doc, mapped to repo-root-relative paths.
31
+ FILE_ALIASES = {
32
+ "autonomy/run.sh": "autonomy/run.sh",
33
+ "run.sh": "autonomy/run.sh",
34
+ "autonomy/loki": "autonomy/loki",
35
+ "loki": "autonomy/loki",
36
+ "autonomy/completion-council.sh": "autonomy/completion-council.sh",
37
+ "completion-council.sh": "autonomy/completion-council.sh",
38
+ "dashboard/server.py": "dashboard/server.py",
39
+ "server.py": "dashboard/server.py",
40
+ "autonomy/openspec-adapter.py": "autonomy/openspec-adapter.py",
41
+ "openspec-adapter.py": "autonomy/openspec-adapter.py",
42
+ "memory/engine.py": "memory/engine.py",
43
+ "memory/storage.py": "memory/storage.py",
44
+ "memory/retrieval.py": "memory/retrieval.py",
45
+ "memory/consolidation.py": "memory/consolidation.py",
46
+ "providers/loader.sh": "providers/loader.sh",
47
+ "providers/claude.sh": "providers/claude.sh",
48
+ "providers/codex.sh": "providers/codex.sh",
49
+ "providers/gemini.sh": "providers/gemini.sh",
50
+ "events/bus.py": "events/bus.py",
51
+ "events/emit.sh": "events/emit.sh",
52
+ "mcp/server.py": "mcp/server.py",
53
+ }
54
+
55
+ # Pattern matches `<file>:<num>(-<num>)? (func_name)` where the parenthesised
56
+ # function name is optional and may appear with surrounding text.
57
+ # Examples it must catch:
58
+ # autonomy/run.sh:7380 (council_should_stop)
59
+ # `run.sh:7380` (council_should_stop)
60
+ # `completion-council.sh:1311` (council_should_stop), ...
61
+ REF_RE = re.compile(
62
+ r"`?(?P<file>[\w./-]+\.(?:sh|py))" # file
63
+ r":(?P<line>\d+)" # start line
64
+ r"(?:-(?P<line_end>\d+))?" # optional range end
65
+ r"`?"
66
+ r"(?:\s*\((?P<func>[a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\(\))?\))?"
67
+ )
68
+
69
+
70
+ def find_func_start(source: str, func_name: str) -> int | None:
71
+ """Return the 1-based line number where `func_name` is defined.
72
+
73
+ Supports bash (`func_name() {`) and Python (`def func_name(`) forms.
74
+ Returns None if not found uniquely.
75
+ """
76
+ bash_pat = re.compile(rf"^\s*{re.escape(func_name)}\s*\(\s*\)\s*\{{?\s*$")
77
+ py_pat = re.compile(rf"^\s*def\s+{re.escape(func_name)}\s*\(")
78
+ matches = []
79
+ for i, line in enumerate(source.splitlines(), start=1):
80
+ if bash_pat.match(line) or py_pat.match(line):
81
+ matches.append(i)
82
+ if len(matches) == 1:
83
+ return matches[0]
84
+ if len(matches) > 1:
85
+ # Multiple definitions; we cannot disambiguate so return None.
86
+ return None
87
+ return None
88
+
89
+
90
+ @dataclass
91
+ class Drift:
92
+ line_no: int # line in STATE-MACHINES.md
93
+ original: str # exact substring to replace
94
+ file: str # source file path
95
+ annotated_line: int # line number written in the doc
96
+ actual_line: int # line number where the function actually lives
97
+ func: str # function name
98
+
99
+
100
+ def scan(doc_text: str, source_cache: dict[str, str]) -> list[Drift]:
101
+ drifts: list[Drift] = []
102
+ for doc_line_no, line in enumerate(doc_text.splitlines(), start=1):
103
+ for m in REF_RE.finditer(line):
104
+ func = m.group("func")
105
+ if not func:
106
+ continue # cannot verify without annotated function name
107
+ file_alias = m.group("file")
108
+ file_rel = FILE_ALIASES.get(file_alias)
109
+ if not file_rel:
110
+ continue
111
+ try:
112
+ annotated = int(m.group("line"))
113
+ except (TypeError, ValueError):
114
+ continue
115
+ src_path = REPO_ROOT / file_rel
116
+ if not src_path.exists():
117
+ continue
118
+ if file_rel not in source_cache:
119
+ source_cache[file_rel] = src_path.read_text(encoding="utf-8", errors="ignore")
120
+ actual = find_func_start(source_cache[file_rel], func)
121
+ if actual is None or actual == annotated:
122
+ continue
123
+ drifts.append(
124
+ Drift(
125
+ line_no=doc_line_no,
126
+ original=m.group(0),
127
+ file=file_alias,
128
+ annotated_line=annotated,
129
+ actual_line=actual,
130
+ func=func,
131
+ )
132
+ )
133
+ return drifts
134
+
135
+
136
+ def apply_fixes(doc_text: str, drifts: list[Drift]) -> str:
137
+ """Rewrite each stale line number to its current value."""
138
+ if not drifts:
139
+ return doc_text
140
+ new_lines = doc_text.splitlines(keepends=True)
141
+ by_line: dict[int, list[Drift]] = {}
142
+ for d in drifts:
143
+ by_line.setdefault(d.line_no, []).append(d)
144
+ for ln, items in by_line.items():
145
+ idx = ln - 1
146
+ line = new_lines[idx]
147
+ for d in items:
148
+ old_seg = f"{d.file}:{d.annotated_line}"
149
+ new_seg = f"{d.file}:{d.actual_line}"
150
+ line = line.replace(old_seg, new_seg, 1)
151
+ new_lines[idx] = line
152
+ return "".join(new_lines)
153
+
154
+
155
+ def main() -> int:
156
+ ap = argparse.ArgumentParser(description=__doc__)
157
+ ap.add_argument("--fix", action="store_true", help="rewrite stale line numbers in place")
158
+ ap.add_argument("--strict", action="store_true", help="exit nonzero if any drift is detected")
159
+ args = ap.parse_args()
160
+
161
+ if not DOC_PATH.exists():
162
+ print(f"missing: {DOC_PATH}", file=sys.stderr)
163
+ return 1
164
+
165
+ doc_text = DOC_PATH.read_text(encoding="utf-8")
166
+ cache: dict[str, str] = {}
167
+ drifts = scan(doc_text, cache)
168
+
169
+ if not drifts:
170
+ print(f"OK -- no drift in {DOC_PATH.relative_to(REPO_ROOT)}")
171
+ return 0
172
+
173
+ print(f"Drift detected in {DOC_PATH.relative_to(REPO_ROOT)}: {len(drifts)} reference(s)")
174
+ for d in drifts:
175
+ print(f" doc L{d.line_no}: {d.file}:{d.annotated_line} ({d.func}) -> actual {d.file}:{d.actual_line}")
176
+
177
+ if args.fix:
178
+ DOC_PATH.write_text(apply_fixes(doc_text, drifts), encoding="utf-8")
179
+ print(f"Rewrote {len(drifts)} reference(s) in {DOC_PATH.relative_to(REPO_ROOT)}")
180
+ return 0
181
+
182
+ if args.strict:
183
+ return 1
184
+ return 0
185
+
186
+
187
+ if __name__ == "__main__":
188
+ sys.exit(main())