trantor 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deploy/setup.sh +3 -3
- package/engine/LICENSE +21 -0
- package/engine/README.md +5 -0
- package/engine/bin/scrooge +1276 -0
- package/engine/bin/scrooge-capabilities +209 -0
- package/engine/bin/scrooge-diverge +263 -0
- package/engine/bin/scrooge-drift +126 -0
- package/engine/bin/scrooge-verify +190 -0
- package/engine/capabilities.seed.json +112 -0
- package/engine/install.sh +138 -0
- package/engine/lessons.seed.json +17 -0
- package/engine/registry.template.json +329 -0
- package/package.json +3 -2
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
scrooge-capabilities — refresh per-model quality scores for the weighted router.
|
|
4
|
+
|
|
5
|
+
Pulls live model-capability data and writes ~/.token-scrooge/capabilities.json, which
|
|
6
|
+
`scrooge` uses to route a --task to the best value model (quality-for-task ÷ cost, gated
|
|
7
|
+
by difficulty) instead of always the cheapest. Run it weekly (cron) so routing tracks how
|
|
8
|
+
models actually rank as they improve and get retired.
|
|
9
|
+
|
|
10
|
+
Sources (zero new deps — plain HTTPS GET + JSON):
|
|
11
|
+
• Artificial Analysis (https://artificialanalysis.ai) — Intelligence/Coding/Math indices,
|
|
12
|
+
GPQA (reasoning proxy), output tok/s. Needs a FREE key in $AA_API_KEY (or
|
|
13
|
+
$ARTIFICIAL_ANALYSIS_API_KEY); header x-api-key. Attribution: artificialanalysis.ai.
|
|
14
|
+
• OpenRouter (/api/v1/models) — context length + input/output modalities (best-effort,
|
|
15
|
+
uses your existing OpenRouter key if present).
|
|
16
|
+
|
|
17
|
+
Matching: each registry model id maps to an AA slug via its stored `aa_slug` (from the seed),
|
|
18
|
+
else by normalising the id (lowercase, '.'/'_' → '-'). Unmatched models are reported and
|
|
19
|
+
left as-is. Nothing is overwritten destructively — only the score fields are updated.
|
|
20
|
+
|
|
21
|
+
Usage: scrooge-capabilities # refresh from AA (+OpenRouter), show a report
|
|
22
|
+
scrooge-capabilities --dry-run # fetch + match, print, but don't write
|
|
23
|
+
Exit: 0 updated · 1 nothing fetched (no key / network) · 2 error.
|
|
24
|
+
"""
|
|
25
|
+
import sys, os, json, argparse, urllib.request, urllib.error
|
|
26
|
+
|
|
27
|
+
HOME = os.path.expanduser("~")
|
|
28
|
+
SCROOGE_DIR = os.environ.get("SCROOGE_HOME", os.path.join(HOME, ".token-scrooge"))
|
|
29
|
+
REGISTRY = os.path.join(SCROOGE_DIR, "registry.json")
|
|
30
|
+
CAPS = os.path.join(SCROOGE_DIR, "capabilities.json")
|
|
31
|
+
AA_URL = "https://artificialanalysis.ai/api/v2/data/llms/models"
|
|
32
|
+
|
|
33
|
+
def _c(code, s):
|
|
34
|
+
return s if not sys.stderr.isatty() else "\033[%sm%s\033[0m" % (code, s)
|
|
35
|
+
GOLD = lambda s: _c("38;5;208", s)
|
|
36
|
+
DIM = lambda s: _c("2", s)
|
|
37
|
+
OK = lambda s: _c("32", s)
|
|
38
|
+
WARN = lambda s: _c("33", s)
|
|
39
|
+
ERR = lambda s: _c("31", s)
|
|
40
|
+
|
|
41
|
+
def load_env_file(path):
|
|
42
|
+
if not os.path.exists(path):
|
|
43
|
+
return
|
|
44
|
+
try:
|
|
45
|
+
for line in open(path):
|
|
46
|
+
line = line.strip()
|
|
47
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
48
|
+
continue
|
|
49
|
+
k, v = line.split("=", 1)
|
|
50
|
+
k = k.strip()
|
|
51
|
+
if k.startswith("export "):
|
|
52
|
+
k = k[len("export "):].strip()
|
|
53
|
+
v = v.strip().strip('"').strip("'")
|
|
54
|
+
if k and k not in os.environ:
|
|
55
|
+
os.environ[k] = v
|
|
56
|
+
except Exception:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def http_get_json(url, headers, timeout=30):
|
|
60
|
+
req = urllib.request.Request(url, headers=headers, method="GET")
|
|
61
|
+
with urllib.request.urlopen(req, timeout=timeout) as r:
|
|
62
|
+
return json.loads(r.read().decode())
|
|
63
|
+
|
|
64
|
+
def norm(s):
|
|
65
|
+
return s.lower().replace(".", "-").replace("_", "-")
|
|
66
|
+
|
|
67
|
+
def load_json(path):
|
|
68
|
+
try:
|
|
69
|
+
with open(path) as fh:
|
|
70
|
+
d = json.load(fh)
|
|
71
|
+
return d if isinstance(d, dict) else {}
|
|
72
|
+
except Exception:
|
|
73
|
+
return {}
|
|
74
|
+
|
|
75
|
+
def seed_path():
|
|
76
|
+
here = os.path.dirname(os.path.realpath(__file__))
|
|
77
|
+
for c in (os.path.join(SCROOGE_DIR, "capabilities.seed.json"),
|
|
78
|
+
os.path.join(here, "..", "capabilities.seed.json")):
|
|
79
|
+
if os.path.exists(c):
|
|
80
|
+
return c
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def fetch_aa():
|
|
84
|
+
key = os.environ.get("AA_API_KEY") or os.environ.get("ARTIFICIAL_ANALYSIS_API_KEY")
|
|
85
|
+
if not key:
|
|
86
|
+
sys.stderr.write(WARN(" Artificial Analysis: no $AA_API_KEY set — skipping quality refresh.\n"))
|
|
87
|
+
return {}
|
|
88
|
+
try:
|
|
89
|
+
data = http_get_json(AA_URL, {"x-api-key": key})
|
|
90
|
+
except urllib.error.HTTPError as e:
|
|
91
|
+
sys.stderr.write(ERR(" Artificial Analysis: HTTP %s (%s)\n" % (e.code, "bad key?" if e.code in (401, 403) else "")))
|
|
92
|
+
return {}
|
|
93
|
+
except Exception as e:
|
|
94
|
+
sys.stderr.write(ERR(" Artificial Analysis: %s\n" % str(e)[:80]))
|
|
95
|
+
return {}
|
|
96
|
+
rows = data.get("data") if isinstance(data, dict) else data
|
|
97
|
+
by_slug = {}
|
|
98
|
+
for m in (rows or []):
|
|
99
|
+
s = m.get("slug")
|
|
100
|
+
if s:
|
|
101
|
+
by_slug[s] = m
|
|
102
|
+
sys.stderr.write(DIM(" Artificial Analysis: %d model endpoints fetched.\n" % len(by_slug)))
|
|
103
|
+
return by_slug
|
|
104
|
+
|
|
105
|
+
def fetch_openrouter():
|
|
106
|
+
reg = load_json(REGISTRY)
|
|
107
|
+
prov = (reg.get("providers", {}) or {}).get("openrouter", {})
|
|
108
|
+
key = None
|
|
109
|
+
for name in prov.get("env", ["OPENROUTER_API_KEY"]):
|
|
110
|
+
if os.environ.get(name):
|
|
111
|
+
key = os.environ[name]; break
|
|
112
|
+
if not key:
|
|
113
|
+
return {}
|
|
114
|
+
base = prov.get("base_url", "https://openrouter.ai/api/v1").rstrip("/")
|
|
115
|
+
try:
|
|
116
|
+
data = http_get_json(base + "/models", {"Authorization": "Bearer " + key})
|
|
117
|
+
except Exception:
|
|
118
|
+
return {}
|
|
119
|
+
out = {}
|
|
120
|
+
for m in (data.get("data") or []):
|
|
121
|
+
mid = m.get("id")
|
|
122
|
+
if not mid:
|
|
123
|
+
continue
|
|
124
|
+
arch = m.get("architecture") or {}
|
|
125
|
+
out[norm(mid.split("/")[-1])] = {
|
|
126
|
+
"context": m.get("context_length"),
|
|
127
|
+
"modalities": arch.get("input_modalities"),
|
|
128
|
+
}
|
|
129
|
+
sys.stderr.write(DIM(" OpenRouter: %d models fetched (context/modality).\n" % len(out)))
|
|
130
|
+
return out
|
|
131
|
+
|
|
132
|
+
def aa_scores(m):
|
|
133
|
+
ev = m.get("evaluations") or {}
|
|
134
|
+
g = ev.get("gpqa")
|
|
135
|
+
return {
|
|
136
|
+
"intelligence": ev.get("artificial_analysis_intelligence_index"),
|
|
137
|
+
"coding": ev.get("artificial_analysis_coding_index"),
|
|
138
|
+
"math": ev.get("artificial_analysis_math_index"),
|
|
139
|
+
"reasoning": round(g * 100, 1) if isinstance(g, (int, float)) else None,
|
|
140
|
+
"speed_tps": m.get("median_output_tokens_per_second"),
|
|
141
|
+
"aa_slug": m.get("slug"),
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
def main():
|
|
145
|
+
ap = argparse.ArgumentParser(prog="scrooge-capabilities")
|
|
146
|
+
ap.add_argument("--dry-run", action="store_true", help="fetch + match + print, but don't write")
|
|
147
|
+
args = ap.parse_args()
|
|
148
|
+
|
|
149
|
+
load_env_file(os.path.join(SCROOGE_DIR, ".env"))
|
|
150
|
+
reg = load_json(REGISTRY)
|
|
151
|
+
if not reg.get("models"):
|
|
152
|
+
sys.stderr.write(ERR("No registry at %s — run `scrooge setup` first.\n" % REGISTRY)); return 2
|
|
153
|
+
|
|
154
|
+
# base store: existing user caps, else the shipped seed
|
|
155
|
+
caps = load_json(CAPS) or load_json(seed_path() or "")
|
|
156
|
+
caps.setdefault("_meta", {})
|
|
157
|
+
|
|
158
|
+
by_slug = fetch_aa()
|
|
159
|
+
by_or = fetch_openrouter()
|
|
160
|
+
if not by_slug and not by_or:
|
|
161
|
+
sys.stderr.write(WARN("Nothing fetched — set $AA_API_KEY (free at artificialanalysis.ai) and/or an OpenRouter key.\n"))
|
|
162
|
+
return 1
|
|
163
|
+
|
|
164
|
+
today = __import__("time").strftime("%Y-%m-%d")
|
|
165
|
+
matched, unmatched = [], []
|
|
166
|
+
for mid in reg["models"]:
|
|
167
|
+
existing = caps.get(mid) if isinstance(caps.get(mid), dict) else {}
|
|
168
|
+
slug = (existing.get("aa_slug") or norm(mid))
|
|
169
|
+
m = by_slug.get(slug) or by_slug.get(norm(mid))
|
|
170
|
+
rec = dict(existing)
|
|
171
|
+
if m:
|
|
172
|
+
rec.update({k: v for k, v in aa_scores(m).items() if v is not None or k == "math"})
|
|
173
|
+
rec["updated"] = today
|
|
174
|
+
rec["source"] = "artificialanalysis"
|
|
175
|
+
matched.append(mid)
|
|
176
|
+
elif by_slug:
|
|
177
|
+
unmatched.append(mid)
|
|
178
|
+
orx = by_or.get(norm(mid))
|
|
179
|
+
if orx:
|
|
180
|
+
if orx.get("context"):
|
|
181
|
+
rec["context"] = orx["context"]
|
|
182
|
+
if orx.get("modalities"):
|
|
183
|
+
rec["modalities"] = orx["modalities"]
|
|
184
|
+
if rec:
|
|
185
|
+
caps[mid] = rec
|
|
186
|
+
|
|
187
|
+
caps["_meta"].update({"source": "artificialanalysis.ai + openrouter", "refreshed": today,
|
|
188
|
+
"attribution": "https://artificialanalysis.ai/"})
|
|
189
|
+
|
|
190
|
+
sys.stderr.write(GOLD("🪙 scrooge-capabilities — %d matched, %d unmatched\n" % (len(matched), len(unmatched))))
|
|
191
|
+
for mid in matched:
|
|
192
|
+
c = caps[mid]
|
|
193
|
+
sys.stderr.write(" %s %-24s intel=%-5s code=%-5s reason=%-5s %st/s\n" % (
|
|
194
|
+
OK("✓"), mid, c.get("intelligence"), c.get("coding"), c.get("reasoning"),
|
|
195
|
+
(str(round(c["speed_tps"])) if isinstance(c.get("speed_tps"), (int, float)) else "?")))
|
|
196
|
+
for mid in unmatched:
|
|
197
|
+
sys.stderr.write(" %s %-24s %s\n" % (WARN("?"), mid,
|
|
198
|
+
DIM("no AA match for slug '%s' — set its aa_slug in capabilities.json" % norm(mid))))
|
|
199
|
+
|
|
200
|
+
if args.dry_run:
|
|
201
|
+
sys.stderr.write(DIM(" (--dry-run: not written)\n")); return 0
|
|
202
|
+
os.makedirs(SCROOGE_DIR, exist_ok=True)
|
|
203
|
+
with open(CAPS, "w") as fh:
|
|
204
|
+
json.dump(caps, fh, indent=2); fh.write("\n")
|
|
205
|
+
sys.stderr.write(OK(" wrote %s\n" % CAPS.replace(HOME, "~")))
|
|
206
|
+
return 0
|
|
207
|
+
|
|
208
|
+
if __name__ == "__main__":
|
|
209
|
+
sys.exit(main())
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
diverge — native "diverge then focus" idea generator (claude-adhd pattern).
|
|
4
|
+
|
|
5
|
+
PHASE 1 (Diverge): spawn N isolated ideation calls, each under a DIFFERENT
|
|
6
|
+
cognitive frame, fanned across DIFFERENT cheap model families (DeepSeek / GLM /
|
|
7
|
+
Gemini / Kimi) so branches can't anchor on each other or on one model's bias.
|
|
8
|
+
Runs in parallel via the ~/.claude/bin/llm harness (cheap, ~free, transparent).
|
|
9
|
+
|
|
10
|
+
PHASE 2 (Focus): a critic pass scores ideas (novelty/viability/fit), flags
|
|
11
|
+
seductive-but-broken "traps", clusters by approach, and deepens the top-K.
|
|
12
|
+
|
|
13
|
+
• Default : Phase 1 (cheap, parallel) + Phase 2 critic (cheap model).
|
|
14
|
+
• --raw : Phase 1 only — emit aggregated ideas JSON so OPUS
|
|
15
|
+
(the orchestrator) can run the focus pass at full quality.
|
|
16
|
+
This is the preferred flow inside Claude Code.
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
diverge "design a rate limiter that survives leader election"
|
|
20
|
+
diverge "name this function" --frames 4 --ideas 6 --top 2
|
|
21
|
+
diverge "..." --raw # ideas JSON -> Opus does the critic
|
|
22
|
+
diverge "..." --critic kimi # autonomous, pick the critic model
|
|
23
|
+
diverge --list-frames
|
|
24
|
+
|
|
25
|
+
Only for DIVERGENT problems (design, naming, API surface, debug hypotheses,
|
|
26
|
+
"a few ways to X"). Not for convergent/execution work.
|
|
27
|
+
"""
|
|
28
|
+
import sys, os, json, argparse, subprocess, re, concurrent.futures, shutil
|
|
29
|
+
|
|
30
|
+
# Resolve the cheap-model router (sibling `scrooge`), with PATH fallback.
|
|
31
|
+
_HERE = os.path.dirname(os.path.realpath(__file__))
|
|
32
|
+
LLM = os.path.join(_HERE, "scrooge")
|
|
33
|
+
if not os.path.exists(LLM):
|
|
34
|
+
LLM = shutil.which("scrooge") or LLM
|
|
35
|
+
|
|
36
|
+
# Cheap model families to rotate across (maximizes cross-family diversity).
|
|
37
|
+
# Only models that are funded/working belong here; edit freely.
|
|
38
|
+
# Fast, non-thinking models with clean JSON output across diverse families.
|
|
39
|
+
# (Thinking models like gemini-2.5-flash / kimi-k2.6 burn the token budget on
|
|
40
|
+
# reasoning and truncate the JSON — use them explicitly, not in the default fan.)
|
|
41
|
+
DIVERGE_MODELS = ["deepseek-chat", "glm-4.6", "glm-4.5-air", "gemini-2.5-flash-lite"]
|
|
42
|
+
DEFAULT_CRITIC = "deepseek-chat"
|
|
43
|
+
|
|
44
|
+
# --- live-model resolution (only fan across models the user actually has keys for) ---
|
|
45
|
+
def _scrooge_home():
|
|
46
|
+
return os.environ.get("SCROOGE_HOME", os.path.join(os.path.expanduser("~"), ".opus-scrooge"))
|
|
47
|
+
|
|
48
|
+
def _load_keys():
|
|
49
|
+
"""Load SCROOGE_HOME/.env and $SCROOGE_ENV_FILE into os.environ (no override)."""
|
|
50
|
+
for path in (os.path.join(_scrooge_home(), ".env"), os.environ.get("SCROOGE_ENV_FILE", "")):
|
|
51
|
+
if path and os.path.exists(path):
|
|
52
|
+
for line in open(path):
|
|
53
|
+
line = line.strip()
|
|
54
|
+
if line and not line.startswith("#") and "=" in line:
|
|
55
|
+
k, v = line.split("=", 1); k = k.strip()
|
|
56
|
+
if k.startswith("export "): k = k[7:].strip()
|
|
57
|
+
if k and k not in os.environ: os.environ[k] = v.strip().strip('"').strip("'")
|
|
58
|
+
|
|
59
|
+
def live_models(prefer):
|
|
60
|
+
"""Filter `prefer` to models whose provider has a live key; fall back to any live model."""
|
|
61
|
+
_load_keys()
|
|
62
|
+
try:
|
|
63
|
+
reg = json.load(open(os.path.join(_scrooge_home(), "registry.json")))
|
|
64
|
+
except Exception:
|
|
65
|
+
return prefer
|
|
66
|
+
provs, models = reg.get("providers", {}), reg.get("models", {})
|
|
67
|
+
has = lambda p: any(os.environ.get(n) for n in provs.get(p, {}).get("env", []))
|
|
68
|
+
live = [m for m in prefer if m in models and has(models[m]["provider"])]
|
|
69
|
+
if live:
|
|
70
|
+
return live
|
|
71
|
+
return [m for m, c in models.items() if has(c["provider"])] or prefer
|
|
72
|
+
|
|
73
|
+
def first_live(prefer_list):
|
|
74
|
+
pool = live_models(prefer_list)
|
|
75
|
+
return pool[0] if pool else prefer_list[0]
|
|
76
|
+
|
|
77
|
+
FRAMES = [
|
|
78
|
+
("first-principles", "Ignore all convention and prior art. Derive solutions purely from the underlying constraints and physics of the problem."),
|
|
79
|
+
("constraint-inversion", "Identify the single biggest assumed constraint, then imagine it removed or reversed. What becomes possible?"),
|
|
80
|
+
("adversary", "Think like an attacker / red-teamer. Design from the angle of how this breaks, gets abused, or fails under hostile conditions — then what design survives that."),
|
|
81
|
+
("minimalist", "Find the absolute simplest thing that could possibly work. Strip every non-essential part. Bias toward less."),
|
|
82
|
+
("extreme-scale", "Assume 1000x the load, users, data, or concurrency. What designs only make sense at extreme scale?"),
|
|
83
|
+
("cross-domain", "Borrow from a completely different field (biology, logistics, games, finance, distributed systems). What analogy transfers?"),
|
|
84
|
+
("user-empathy", "Reason only from the end-user's lived experience and emotional needs. What do THEY actually feel and want?"),
|
|
85
|
+
("temporal", "Think 2 years out. What ages badly, what becomes legacy, what is still good in the long run?"),
|
|
86
|
+
("pragmatic-cost", "Optimize for cheapest and fastest to ship and operate. What is the 80/20 that ships this week?"),
|
|
87
|
+
("composition", "Solve it by combining existing, proven building blocks rather than inventing anything new."),
|
|
88
|
+
("contrarian", "Argue hard for the OPPOSITE of the obvious/popular approach. Make the strongest case for the unconventional path."),
|
|
89
|
+
("second-order", "Focus on downstream and emergent effects. What does this cause two steps later that nobody planned for?"),
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
def frame_lookup():
|
|
93
|
+
return {n: v for n, v in FRAMES}
|
|
94
|
+
|
|
95
|
+
def extract_json(text):
|
|
96
|
+
"""Pull a JSON object/array out of model output (handles code fences/prose)."""
|
|
97
|
+
if not text:
|
|
98
|
+
return None
|
|
99
|
+
t = text.strip()
|
|
100
|
+
t = re.sub(r"^```(?:json)?\s*|\s*```$", "", t, flags=re.MULTILINE).strip()
|
|
101
|
+
for opener, closer in (("{", "}"), ("[", "]")):
|
|
102
|
+
i = t.find(opener)
|
|
103
|
+
if i >= 0:
|
|
104
|
+
depth = 0
|
|
105
|
+
for j in range(i, len(t)):
|
|
106
|
+
if t[j] == opener: depth += 1
|
|
107
|
+
elif t[j] == closer:
|
|
108
|
+
depth -= 1
|
|
109
|
+
if depth == 0:
|
|
110
|
+
try:
|
|
111
|
+
return json.loads(t[i:j+1])
|
|
112
|
+
except Exception:
|
|
113
|
+
break
|
|
114
|
+
try:
|
|
115
|
+
return json.loads(t)
|
|
116
|
+
except Exception:
|
|
117
|
+
pass
|
|
118
|
+
# Salvage: pull complete {...} objects out of a truncated array.
|
|
119
|
+
objs = []
|
|
120
|
+
for m in re.finditer(r"\{[^{}]*\}", t):
|
|
121
|
+
try:
|
|
122
|
+
objs.append(json.loads(m.group(0)))
|
|
123
|
+
except Exception:
|
|
124
|
+
continue
|
|
125
|
+
return objs or None
|
|
126
|
+
|
|
127
|
+
def call_llm(model, prompt, system=None, max_tokens=900, want_json=True):
|
|
128
|
+
cmd = [LLM, "--model", model, "--max-tokens", str(max_tokens)]
|
|
129
|
+
if want_json:
|
|
130
|
+
cmd.append("--json")
|
|
131
|
+
if system:
|
|
132
|
+
cmd += ["--system", system]
|
|
133
|
+
cmd.append(prompt)
|
|
134
|
+
# stderr inherits -> the 🔶 EXTERNAL-LLM banners stay visible (transparency).
|
|
135
|
+
r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=None, text=True)
|
|
136
|
+
return r.stdout
|
|
137
|
+
|
|
138
|
+
def run_frame(problem, frame_name, vantage, model, ideas_per_frame):
|
|
139
|
+
system = ("You are an idea GENERATOR working under a strict cognitive frame. "
|
|
140
|
+
"Generate diverse ideas ONLY — do NOT evaluate, rank, or hedge. "
|
|
141
|
+
"Adopt this lens completely: " + vantage)
|
|
142
|
+
prompt = (f'PROBLEM: {problem}\n\n'
|
|
143
|
+
f'Through the "{frame_name}" lens above, produce {ideas_per_frame} DISTINCT ideas. '
|
|
144
|
+
f'Each idea: a short title and a 1-3 sentence sketch. '
|
|
145
|
+
f'Respond as JSON: {{"ideas":[{{"title":"...","sketch":"..."}}]}}')
|
|
146
|
+
out = call_llm(model, prompt, system=system, max_tokens=1400)
|
|
147
|
+
parsed = extract_json(out)
|
|
148
|
+
ideas = []
|
|
149
|
+
if isinstance(parsed, dict):
|
|
150
|
+
ideas = parsed.get("ideas", []) or []
|
|
151
|
+
elif isinstance(parsed, list):
|
|
152
|
+
ideas = parsed
|
|
153
|
+
norm = []
|
|
154
|
+
for it in ideas:
|
|
155
|
+
if isinstance(it, dict) and it.get("title"):
|
|
156
|
+
norm.append({"title": str(it.get("title"))[:160], "sketch": str(it.get("sketch", ""))[:600],
|
|
157
|
+
"frame": frame_name, "model": model})
|
|
158
|
+
return norm
|
|
159
|
+
|
|
160
|
+
def diverge(problem, n_frames, ideas_per_frame, workers):
|
|
161
|
+
chosen = FRAMES[:n_frames] if n_frames <= len(FRAMES) else FRAMES
|
|
162
|
+
pool = live_models(DIVERGE_MODELS) # only models the user has keys for
|
|
163
|
+
sys.stderr.write("\n\033[1m◆ DIVERGE\033[0m — %d frames × %d ideas, fanned across %d cheap model(s) (parallel)\n" % (len(chosen), ideas_per_frame, len(pool)))
|
|
164
|
+
sys.stderr.write(" frames: %s\n models: %s\n\n" % (", ".join(n for n, _ in chosen), ", ".join(pool)))
|
|
165
|
+
results = []
|
|
166
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as ex:
|
|
167
|
+
futs = {}
|
|
168
|
+
for i, (fname, vantage) in enumerate(chosen):
|
|
169
|
+
model = pool[i % len(pool)]
|
|
170
|
+
futs[ex.submit(run_frame, problem, fname, vantage, model, ideas_per_frame)] = fname
|
|
171
|
+
for fut in concurrent.futures.as_completed(futs):
|
|
172
|
+
try:
|
|
173
|
+
ideas = fut.result()
|
|
174
|
+
results.extend(ideas)
|
|
175
|
+
sys.stderr.write(" ✓ %-20s %d ideas\n" % (futs[fut], len(ideas)))
|
|
176
|
+
except Exception as e:
|
|
177
|
+
sys.stderr.write(" ✗ %-20s %s\n" % (futs[fut], e))
|
|
178
|
+
return results
|
|
179
|
+
|
|
180
|
+
def focus(problem, ideas, top_k, critic_model):
|
|
181
|
+
# De-identify frames so the critic doesn't bias toward a label.
|
|
182
|
+
listing = "\n".join("%d. %s — %s" % (i+1, it["title"], it["sketch"]) for i, it in enumerate(ideas))
|
|
183
|
+
system = ("You are a sharp, skeptical critic. You evaluate a pool of candidate ideas. "
|
|
184
|
+
"Your highest-value job is spotting SEDUCTIVE-BUT-BROKEN ideas (traps) and saying why. "
|
|
185
|
+
"Be concrete and honest; do not inflate scores.")
|
|
186
|
+
prompt = (f'PROBLEM: {problem}\n\nCANDIDATE IDEAS:\n{listing}\n\n'
|
|
187
|
+
f'Do all of the following and respond as JSON:\n'
|
|
188
|
+
f'1. Cluster the ideas into a few distinct approaches (name each cluster).\n'
|
|
189
|
+
f'2. Flag "traps": ideas that look attractive but are broken/risky, WITH the reason.\n'
|
|
190
|
+
f'3. Pick the top {top_k} ideas overall (by novelty+viability+fit) and DEEPEN each into '
|
|
191
|
+
f'{{"title","why_it_wins","risks":["..."],"first_steps":["..."]}}.\n'
|
|
192
|
+
f'JSON shape: {{"clusters":[{{"name","idea_indexes":[..]}}],'
|
|
193
|
+
f'"traps":[{{"title","why_broken"}}],'
|
|
194
|
+
f'"top":[{{"title","why_it_wins","risks":[],"first_steps":[]}}]}}')
|
|
195
|
+
sys.stderr.write("\n\033[1m◆ FOCUS\033[0m — critic pass on %s\n" % critic_model)
|
|
196
|
+
out = call_llm(critic_model, prompt, system=system, max_tokens=2000)
|
|
197
|
+
return extract_json(out)
|
|
198
|
+
|
|
199
|
+
def render(problem, ideas, report):
|
|
200
|
+
print("=" * 70)
|
|
201
|
+
print("DIVERGE → FOCUS:", problem)
|
|
202
|
+
print("=" * 70)
|
|
203
|
+
print("\nGenerated %d ideas across %d frames.\n" % (len(ideas), len(set(i["frame"] for i in ideas))))
|
|
204
|
+
if not report:
|
|
205
|
+
print("(critic returned no parseable result — raw ideas below)")
|
|
206
|
+
for it in ideas:
|
|
207
|
+
print(" • [%s] %s — %s" % (it["frame"], it["title"], it["sketch"]))
|
|
208
|
+
return
|
|
209
|
+
cl = report.get("clusters", [])
|
|
210
|
+
if cl:
|
|
211
|
+
print("APPROACHES:")
|
|
212
|
+
for c in cl:
|
|
213
|
+
print(" ▸ %s (%d ideas)" % (c.get("name", "?"), len(c.get("idea_indexes", []))))
|
|
214
|
+
traps = report.get("traps", [])
|
|
215
|
+
if traps:
|
|
216
|
+
print("\n⚠ TRAPS (seductive but broken):")
|
|
217
|
+
for t in traps:
|
|
218
|
+
print(" ✗ %s — %s" % (t.get("title", "?"), t.get("why_broken", "")))
|
|
219
|
+
top = report.get("top", [])
|
|
220
|
+
if top:
|
|
221
|
+
print("\n★ TOP %d (deepened):" % len(top))
|
|
222
|
+
for i, t in enumerate(top, 1):
|
|
223
|
+
print("\n %d. %s" % (i, t.get("title", "?")))
|
|
224
|
+
print(" why: %s" % t.get("why_it_wins", ""))
|
|
225
|
+
for r in t.get("risks", []): print(" risk: %s" % r)
|
|
226
|
+
for s in t.get("first_steps", []): print(" step: %s" % s)
|
|
227
|
+
print()
|
|
228
|
+
|
|
229
|
+
def main():
|
|
230
|
+
ap = argparse.ArgumentParser(prog="diverge")
|
|
231
|
+
ap.add_argument("problem", nargs="?")
|
|
232
|
+
ap.add_argument("--frames", type=int, default=6)
|
|
233
|
+
ap.add_argument("--ideas", type=int, default=5, help="ideas per frame")
|
|
234
|
+
ap.add_argument("--top", type=int, default=3)
|
|
235
|
+
ap.add_argument("--workers", type=int, default=6)
|
|
236
|
+
ap.add_argument("--critic", default=DEFAULT_CRITIC)
|
|
237
|
+
ap.add_argument("--raw", action="store_true", help="emit ideas JSON only (Opus does the focus)")
|
|
238
|
+
ap.add_argument("--list-frames", action="store_true")
|
|
239
|
+
args = ap.parse_args()
|
|
240
|
+
|
|
241
|
+
if args.list_frames:
|
|
242
|
+
for n, v in FRAMES:
|
|
243
|
+
print("%-20s %s" % (n, v))
|
|
244
|
+
return
|
|
245
|
+
if not args.problem:
|
|
246
|
+
ap.error("provide a problem, or --list-frames")
|
|
247
|
+
|
|
248
|
+
ideas = diverge(args.problem, args.frames, args.ideas, args.workers)
|
|
249
|
+
if not ideas:
|
|
250
|
+
sys.stderr.write("No ideas generated.\n"); sys.exit(2)
|
|
251
|
+
|
|
252
|
+
if args.raw:
|
|
253
|
+
# Hand structured ideas to the orchestrator (Opus) for a full-quality focus pass.
|
|
254
|
+
json.dump({"problem": args.problem, "ideas": ideas}, sys.stdout, indent=2)
|
|
255
|
+
sys.stdout.write("\n")
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
critic = first_live([args.critic] + DIVERGE_MODELS) # ensure the critic model is live
|
|
259
|
+
report = focus(args.problem, ideas, args.top, critic)
|
|
260
|
+
render(args.problem, ideas, report)
|
|
261
|
+
|
|
262
|
+
if __name__ == "__main__":
|
|
263
|
+
main()
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
scrooge-drift — detect when the scrooge registry has fallen behind reality.
|
|
4
|
+
|
|
5
|
+
For every provider that has a live API key, lists the models the provider
|
|
6
|
+
actually serves right now (`scrooge models <provider>`) and diffs that against
|
|
7
|
+
the model ids the registry routes to.
|
|
8
|
+
|
|
9
|
+
Reports:
|
|
10
|
+
DEAD — model id in the registry that the provider no longer serves
|
|
11
|
+
(these calls will FAIL — fix ASAP)
|
|
12
|
+
NEW — current-gen model the provider serves that the registry doesn't know
|
|
13
|
+
(candidate to adopt; filtered to likely chat models)
|
|
14
|
+
|
|
15
|
+
Exit code 0 = registry in sync. Exit code 1 = drift found (DEAD or NEW).
|
|
16
|
+
Run weekly via cron; act on the report next session.
|
|
17
|
+
|
|
18
|
+
Usage: scrooge-drift [--json] [--quiet]
|
|
19
|
+
"""
|
|
20
|
+
import os, sys, json, subprocess, re
|
|
21
|
+
|
|
22
|
+
HOME = os.path.expanduser("~")
|
|
23
|
+
SCROOGE_DIR = os.environ.get("SCROOGE_HOME", os.path.join(HOME, ".token-scrooge"))
|
|
24
|
+
REGISTRY = os.path.join(SCROOGE_DIR, "registry.json")
|
|
25
|
+
SCROOGE = os.environ.get("SCROOGE_BIN", os.path.join(HOME, ".local", "bin", "scrooge"))
|
|
26
|
+
|
|
27
|
+
# Live model ids matching these are noise we'd never route grunt work to:
|
|
28
|
+
# media/audio/embeddings/etc, legacy generations, and dated pin snapshots.
|
|
29
|
+
NOISE = re.compile(
|
|
30
|
+
r"(image|video|audio|tts|embed|whisper|imagine|aqa|computer-use|"
|
|
31
|
+
r"deep-research|antigravity|guard|rerank|moderation|robotics|realtime|"
|
|
32
|
+
r"transcribe|search-preview|sora|veo|lyria|gemma|nano-banana|"
|
|
33
|
+
r"babbage|davinci|instruct|moonshot-v1|"
|
|
34
|
+
r"gpt-3|gpt-4-|gpt-4o|gpt-4\.|gpt-4$|" # superseded OpenAI gens
|
|
35
|
+
r"gemini-2\.0|gemini-1|" # superseded Gemini gens
|
|
36
|
+
r"o1|o3|o4-|" # superseded OpenAI reasoners
|
|
37
|
+
r"-latest$|" # rolling aliases, not pinned ids
|
|
38
|
+
r"-\d{4}-\d{2}-\d{2}|-\d{6}$|-\d{4}$|-preview-\d)", # dated pin snapshots
|
|
39
|
+
re.I,
|
|
40
|
+
)
|
|
41
|
+
NEW_CAP = 10 # don't flood the report; the refresh pass does the real curation
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_registry():
|
|
45
|
+
with open(REGISTRY) as fh:
|
|
46
|
+
return json.load(fh)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def live_models(provider):
|
|
50
|
+
"""Return the set of model ids the provider serves right now, or None on error."""
|
|
51
|
+
try:
|
|
52
|
+
out = subprocess.run(
|
|
53
|
+
[SCROOGE, "models", provider],
|
|
54
|
+
capture_output=True, text=True, timeout=40,
|
|
55
|
+
)
|
|
56
|
+
except Exception as e:
|
|
57
|
+
return None, str(e)
|
|
58
|
+
if out.returncode != 0:
|
|
59
|
+
return None, (out.stderr or out.stdout).strip()[:200]
|
|
60
|
+
ids = set()
|
|
61
|
+
for line in out.stdout.splitlines():
|
|
62
|
+
s = line.strip()
|
|
63
|
+
if not s or " " in s: # skip headers / banner lines
|
|
64
|
+
continue
|
|
65
|
+
ids.add(s.split("/")[-1]) # strip "models/" gemini prefix
|
|
66
|
+
return ids, None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def main():
|
|
70
|
+
as_json = "--json" in sys.argv
|
|
71
|
+
quiet = "--quiet" in sys.argv
|
|
72
|
+
reg = load_registry()
|
|
73
|
+
|
|
74
|
+
# provider -> registry model ids
|
|
75
|
+
reg_by_prov = {}
|
|
76
|
+
for mid, m in reg["models"].items():
|
|
77
|
+
reg_by_prov.setdefault(m["provider"], set()).add(mid)
|
|
78
|
+
|
|
79
|
+
report = {"dead": {}, "new": {}, "errors": {}}
|
|
80
|
+
for prov in reg["providers"]:
|
|
81
|
+
# only providers we actually route to AND have a key for
|
|
82
|
+
if prov not in reg_by_prov:
|
|
83
|
+
continue
|
|
84
|
+
live, err = live_models(prov)
|
|
85
|
+
if live is None:
|
|
86
|
+
report["errors"][prov] = err
|
|
87
|
+
continue
|
|
88
|
+
registered = reg_by_prov[prov]
|
|
89
|
+
dead = sorted(m for m in registered if m.split("/")[-1] not in live)
|
|
90
|
+
new = sorted(
|
|
91
|
+
m for m in live
|
|
92
|
+
if m not in {r.split("/")[-1] for r in registered} and not NOISE.search(m)
|
|
93
|
+
)
|
|
94
|
+
if dead:
|
|
95
|
+
report["dead"][prov] = dead
|
|
96
|
+
if new:
|
|
97
|
+
report["new"][prov] = new
|
|
98
|
+
|
|
99
|
+
drift = bool(report["dead"] or report["new"])
|
|
100
|
+
|
|
101
|
+
if as_json:
|
|
102
|
+
print(json.dumps({"drift": drift, **report}, indent=2))
|
|
103
|
+
elif not quiet or drift:
|
|
104
|
+
if report["dead"]:
|
|
105
|
+
print("DEAD (registry routes to retired models — calls will FAIL):")
|
|
106
|
+
for p, ms in report["dead"].items():
|
|
107
|
+
print(" %-10s %s" % (p, ", ".join(ms)))
|
|
108
|
+
if report["new"]:
|
|
109
|
+
print("NEW (current-gen models not yet in registry):")
|
|
110
|
+
for p, ms in report["new"].items():
|
|
111
|
+
shown = ms[:NEW_CAP]
|
|
112
|
+
extra = len(ms) - len(shown)
|
|
113
|
+
tail = " (+%d more)" % extra if extra > 0 else ""
|
|
114
|
+
print(" %-10s %s%s" % (p, ", ".join(shown), tail))
|
|
115
|
+
if report["errors"]:
|
|
116
|
+
print("ERRORS (couldn't list — skipped):")
|
|
117
|
+
for p, e in report["errors"].items():
|
|
118
|
+
print(" %-10s %s" % (p, e))
|
|
119
|
+
if not drift and not report["errors"]:
|
|
120
|
+
print("registry in sync — no drift.")
|
|
121
|
+
|
|
122
|
+
sys.exit(1 if drift else 0)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
if __name__ == "__main__":
|
|
126
|
+
main()
|