trantor 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ verify — harness-backed verification gate (Tier 3 executor).
4
+
5
+ Two-part verification, cheapest-first:
6
+ 1. DETERMINISTIC (free, ground truth): detect the toolchain and actually run
7
+ build / typecheck / test, capturing real exit codes + output tails. A
8
+ non-zero exit is an objective FAIL — no LLM can override it.
9
+ 2. JUDGMENT (cheap LLM via ~/.claude/bin/llm --task verify): only if the
10
+ deterministic steps pass AND a --claim is given, ask a cheap model whether
11
+ the evidence actually SUPPORTS the claim and what it does NOT cover
12
+ (e.g. "tests pass but none exercise the new function"). Opus adjudicates last.
13
+
14
+ The agent that wrote the code is never the judge: this re-runs everything itself.
15
+
16
+ Usage:
17
+ verify --dir . --claim "added POST /v1/refunds that 409s on duplicate"
18
+ verify --dir path/to/repo # deterministic only (no claim)
19
+ verify --cmd "npm test" --claim "..." # explicit command instead of autodetect
20
+ verify --no-llm # deterministic only, skip judgment
21
+ verify --judge kimi --json # choose judge model / JSON output
22
+
23
+ Verdict: VERIFIED (built+tested+judgment supports) · FAILED (a step errored or
24
+ judge refutes) · INCONCLUSIVE (passes but judge finds gaps / nothing to run).
25
+ Exit code: 0 VERIFIED, 1 INCONCLUSIVE, 2 FAILED.
26
+ """
27
+ import sys, os, json, argparse, subprocess, shutil
28
+
29
+ # Resolve the cheap-model router (sibling `scrooge`), with PATH fallback.
30
+ _HERE = os.path.dirname(os.path.realpath(__file__))
31
+ LLM = os.path.join(_HERE, "scrooge")
32
+ if not os.path.exists(LLM):
33
+ LLM = shutil.which("scrooge") or LLM
34
+
35
+ def sh(cmd, cwd, timeout=600):
36
+ try:
37
+ r = subprocess.run(cmd, cwd=cwd, shell=True, stdout=subprocess.PIPE,
38
+ stderr=subprocess.STDOUT, text=True, timeout=timeout)
39
+ return r.returncode, r.stdout
40
+ except subprocess.TimeoutExpired:
41
+ return 124, "(timed out after %ds)" % timeout
42
+ except Exception as e:
43
+ return 1, str(e)
44
+
45
+ def tail(s, n=30):
46
+ lines = (s or "").strip().splitlines()
47
+ return "\n".join(lines[-n:])
48
+
49
+ def has(cwd, *names):
50
+ return any(os.path.exists(os.path.join(cwd, n)) for n in names)
51
+
52
+ def pkg_scripts(cwd):
53
+ try:
54
+ return json.load(open(os.path.join(cwd, "package.json"))).get("scripts", {}) or {}
55
+ except Exception:
56
+ return {}
57
+
58
+ def detect_steps(cwd):
59
+ """Return [(name, cmd), ...] of build/typecheck/test commands that exist."""
60
+ steps = []
61
+ if has(cwd, "package.json"):
62
+ sc = pkg_scripts(cwd)
63
+ runner = "npm run"
64
+ pm = "npm"
65
+ if has(cwd, "pnpm-lock.yaml"): runner, pm = "pnpm", "pnpm"
66
+ elif has(cwd, "yarn.lock"): runner, pm = "yarn", "yarn"
67
+ if "build" in sc: steps.append(("build", "%s build" % runner))
68
+ if "typecheck" in sc: steps.append(("typecheck", "%s typecheck" % runner))
69
+ elif "type-check" in sc: steps.append(("typecheck", "%s type-check" % runner))
70
+ elif has(cwd, "tsconfig.json") and shutil.which("npx"): steps.append(("typecheck", "npx tsc --noEmit"))
71
+ if "test" in sc: steps.append(("test", "npm test" if pm == "npm" else "%s test" % runner))
72
+ return "node", steps
73
+ if has(cwd, "Cargo.toml"):
74
+ return "rust", [("build", "cargo build"), ("test", "cargo test")]
75
+ if has(cwd, "go.mod"):
76
+ return "go", [("build", "go build ./..."), ("test", "go test ./...")]
77
+ if has(cwd, "pyproject.toml", "setup.py", "pytest.ini", "tox.ini"):
78
+ steps = []
79
+ if shutil.which("ruff"): steps.append(("lint", "ruff check ."))
80
+ steps.append(("test", "python3 -m pytest -q"))
81
+ return "python", steps
82
+ if has(cwd, "Makefile"):
83
+ steps = []
84
+ mk = open(os.path.join(cwd, "Makefile")).read()
85
+ if "\nbuild:" in mk or mk.startswith("build:"): steps.append(("build", "make build"))
86
+ if "\ntest:" in mk or mk.startswith("test:"): steps.append(("test", "make test"))
87
+ return "make", steps
88
+ return "unknown", []
89
+
90
+ def judge(claim, steps, judge_model):
91
+ ev = "\n".join("- %s: `%s` → exit %d\n output tail:\n%s" %
92
+ (s["name"], s["cmd"], s["exit"], "\n".join(" " + l for l in s["tail"].splitlines()[-12:]))
93
+ for s in steps)
94
+ prompt = (
95
+ "You are a skeptical verification judge. A claim of completed work is below, "
96
+ "with the ACTUAL build/test commands that were run and their real output.\n\n"
97
+ "CLAIM: %s\n\nEVIDENCE (commands actually executed):\n%s\n\n"
98
+ "Decide, strictly from the evidence, whether it SUPPORTS the claim. Passing tests "
99
+ "on unrelated code do NOT support a specific claim. If nothing here actually exercises "
100
+ "the claimed behavior, say so. Respond as JSON: "
101
+ '{"supports": true|false, "gaps": ["what is not proven by this evidence"], '
102
+ '"verdict": "VERIFIED|INCONCLUSIVE|FAILED", "reasoning": "one or two sentences"}'
103
+ ) % (claim, ev)
104
+ try:
105
+ out = subprocess.run([LLM, "--task", "verify", "--model", judge_model, "--json",
106
+ "--max-tokens", "700", prompt],
107
+ stdout=subprocess.PIPE, stderr=None, text=True).stdout
108
+ import re
109
+ m = re.search(r"\{.*\}", out, re.DOTALL)
110
+ return json.loads(m.group(0)) if m else None
111
+ except Exception as e:
112
+ sys.stderr.write("[verify] judge error: %s\n" % e)
113
+ return None
114
+
115
+ def main():
116
+ ap = argparse.ArgumentParser(prog="verify")
117
+ ap.add_argument("--dir", default=".")
118
+ ap.add_argument("--claim")
119
+ ap.add_argument("--cmd", action="append", help="explicit command(s) to run instead of autodetect")
120
+ ap.add_argument("--judge", default="deepseek-chat")
121
+ ap.add_argument("--no-llm", action="store_true")
122
+ ap.add_argument("--json", action="store_true")
123
+ ap.add_argument("--timeout", type=int, default=600)
124
+ args = ap.parse_args()
125
+
126
+ cwd = os.path.abspath(args.dir)
127
+ if not os.path.isdir(cwd):
128
+ sys.stderr.write("no such dir: %s\n" % cwd); sys.exit(2)
129
+
130
+ if args.cmd:
131
+ toolchain, plan = "custom", [("cmd%d" % i, c) for i, c in enumerate(args.cmd, 1)]
132
+ else:
133
+ toolchain, plan = detect_steps(cwd)
134
+
135
+ sys.stderr.write("\033[1m◆ VERIFY\033[0m %s [toolchain: %s]\n" % (cwd, toolchain))
136
+ steps = []
137
+ any_fail = False
138
+ for name, cmd in plan:
139
+ sys.stderr.write(" ▶ %-10s %s\n" % (name, cmd))
140
+ code, out = sh(cmd, cwd, args.timeout)
141
+ ok = code == 0
142
+ any_fail = any_fail or not ok
143
+ steps.append({"name": name, "cmd": cmd, "exit": code, "ok": ok, "tail": tail(out)})
144
+ sys.stderr.write(" %s exit %d\n" % ("✓" if ok else "✗", code))
145
+
146
+ built = any(s["name"] in ("build", "typecheck") and s["ok"] for s in steps) or \
147
+ not any(s["name"] in ("build", "typecheck") for s in steps)
148
+ tested = any(s["name"] == "test" and s["ok"] for s in steps)
149
+
150
+ result = {"dir": cwd, "toolchain": toolchain, "steps": steps,
151
+ "built": built, "tested": tested, "ran_anything": bool(steps)}
152
+
153
+ if any_fail:
154
+ verdict = "FAILED"
155
+ result["blockingIssues"] = ["%s failed (exit %d)" % (s["name"], s["exit"]) for s in steps if not s["ok"]]
156
+ elif not steps:
157
+ verdict = "INCONCLUSIVE"
158
+ result["blockingIssues"] = ["no build/test commands detected — nothing was actually run"]
159
+ elif args.claim and not args.no_llm:
160
+ j = judge(args.claim, steps, args.judge)
161
+ result["llm_judgment"] = j
162
+ if not j:
163
+ verdict = "INCONCLUSIVE"
164
+ result["blockingIssues"] = ["judge unavailable; deterministic steps passed but claim not independently assessed"]
165
+ elif j.get("verdict") == "FAILED" or j.get("supports") is False:
166
+ verdict = "FAILED"
167
+ result["blockingIssues"] = j.get("gaps", []) or ["judge refuted the claim"]
168
+ elif j.get("verdict") == "INCONCLUSIVE" or j.get("gaps"):
169
+ verdict = "INCONCLUSIVE"
170
+ result["blockingIssues"] = j.get("gaps", [])
171
+ else:
172
+ verdict = "VERIFIED"
173
+ else:
174
+ verdict = "VERIFIED" # steps passed, no claim to judge
175
+
176
+ result["verdict"] = verdict
177
+
178
+ if args.json:
179
+ json.dump(result, sys.stdout, indent=2); sys.stdout.write("\n")
180
+ else:
181
+ icon = {"VERIFIED": "✅", "INCONCLUSIVE": "⚠️", "FAILED": "❌"}[verdict]
182
+ sys.stderr.write("\n%s \033[1mVERDICT: %s\033[0m\n" % (icon, verdict))
183
+ for b in result.get("blockingIssues", []):
184
+ sys.stderr.write(" • %s\n" % b)
185
+ if result.get("llm_judgment", {}).get("reasoning"):
186
+ sys.stderr.write(" judge: %s\n" % result["llm_judgment"]["reasoning"])
187
+ sys.exit({"VERIFIED": 0, "INCONCLUSIVE": 1, "FAILED": 2}[verdict])
188
+
189
+ if __name__ == "__main__":
190
+ main()
@@ -0,0 +1,112 @@
1
+ {
2
+ "_comment": "Token Scrooge capability seed \u2014 per-model quality scores used by the weighted router (quality-for-task / cost), gated by difficulty. Numbers are Artificial Analysis (artificialanalysis.ai) metrics, snapshot 2026-06-04: intelligence=AA Intelligence Index, coding=AA Coding Index, math=AA Math Index, reasoning=GPQA Diamond x100, speed_tps=median output tokens/sec (all 0-100 except speed). Committed as a starter set; refresh/override the user-local ~/.token-scrooge/capabilities.json weekly via `scrooge-capabilities` (AA + OpenRouter). null = AA had no score for that eval; the router falls back to the intelligence index.",
3
+ "_meta": {
4
+ "source": "artificialanalysis.ai",
5
+ "snapshot": "2026-06-04",
6
+ "attribution": "https://artificialanalysis.ai/"
7
+ },
8
+ "deepseek-v4-flash": {
9
+ "intelligence": 46.5,
10
+ "coding": 38.7,
11
+ "math": null,
12
+ "reasoning": 89.4,
13
+ "speed_tps": 119.722,
14
+ "aa_slug": "deepseek-v4-flash"
15
+ },
16
+ "deepseek-v4-pro": {
17
+ "intelligence": 51.5,
18
+ "coding": 47.5,
19
+ "math": null,
20
+ "reasoning": 88.8,
21
+ "speed_tps": 46.223,
22
+ "aa_slug": "deepseek-v4-pro"
23
+ },
24
+ "kimi-k2.6": {
25
+ "intelligence": 53.9,
26
+ "coding": 47.1,
27
+ "math": null,
28
+ "reasoning": 91.1,
29
+ "speed_tps": 41.575,
30
+ "aa_slug": "kimi-k2-6"
31
+ },
32
+ "glm-4.5-air": {
33
+ "intelligence": 23.2,
34
+ "coding": 23.8,
35
+ "math": 80.7,
36
+ "reasoning": 73.3,
37
+ "speed_tps": 74.495,
38
+ "aa_slug": "glm-4-5-air"
39
+ },
40
+ "glm-4.7": {
41
+ "intelligence": 42.1,
42
+ "coding": 36.3,
43
+ "math": 95,
44
+ "reasoning": 85.9,
45
+ "speed_tps": 79.245,
46
+ "aa_slug": "glm-4-7"
47
+ },
48
+ "glm-5": {
49
+ "intelligence": 49.8,
50
+ "coding": 44.2,
51
+ "math": null,
52
+ "reasoning": 82.0,
53
+ "speed_tps": 79.532,
54
+ "aa_slug": "glm-5"
55
+ },
56
+ "gemini-2.5-flash-lite": {
57
+ "intelligence": 12.7,
58
+ "coding": 7.4,
59
+ "math": 35.3,
60
+ "reasoning": 47.4,
61
+ "speed_tps": 229.515,
62
+ "aa_slug": "gemini-2-5-flash-lite"
63
+ },
64
+ "gemini-2.5-flash": {
65
+ "intelligence": 20.6,
66
+ "coding": 17.8,
67
+ "math": 60.3,
68
+ "reasoning": 68.3,
69
+ "speed_tps": 185.129,
70
+ "aa_slug": "gemini-2-5-flash"
71
+ },
72
+ "gemini-3-flash-preview": {
73
+ "intelligence": 35,
74
+ "coding": 37.8,
75
+ "math": 55.7,
76
+ "reasoning": 81.2,
77
+ "speed_tps": 181.264,
78
+ "aa_slug": "gemini-3-flash"
79
+ },
80
+ "gpt-5-nano": {
81
+ "intelligence": 26.8,
82
+ "coding": 20.3,
83
+ "math": 83.7,
84
+ "reasoning": 67.6,
85
+ "speed_tps": 150.373,
86
+ "aa_slug": "gpt-5-nano"
87
+ },
88
+ "gpt-5-mini": {
89
+ "intelligence": 41.2,
90
+ "coding": 35.3,
91
+ "math": 90.7,
92
+ "reasoning": 82.8,
93
+ "speed_tps": 87.426,
94
+ "aa_slug": "gpt-5-mini"
95
+ },
96
+ "gpt-4.1-mini": {
97
+ "intelligence": 22.9,
98
+ "coding": 18.5,
99
+ "math": 46.3,
100
+ "reasoning": 66.4,
101
+ "speed_tps": 79.254,
102
+ "aa_slug": "gpt-4-1-mini"
103
+ },
104
+ "grok-4.3": {
105
+ "intelligence": 53.2,
106
+ "coding": 41,
107
+ "math": null,
108
+ "reasoning": 90.1,
109
+ "speed_tps": 125.405,
110
+ "aa_slug": "grok-4-3"
111
+ }
112
+ }
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env bash
2
+ # Token Scrooge installer — make the cheap models do the grunt work.
3
+ # Usage:
4
+ # git clone https://github.com/sashabogi/token-scrooge && cd token-scrooge && ./install.sh
5
+ # curl -fsSL https://raw.githubusercontent.com/sashabogi/token-scrooge/main/install.sh | bash
6
+ set -euo pipefail
7
+
8
+ REPO_URL="${SCROOGE_REPO_URL:-https://github.com/sashabogi/token-scrooge}"
9
+ BIN_DIR="${SCROOGE_BIN_DIR:-$HOME/.local/bin}"
10
+ SCROOGE_HOME="${SCROOGE_HOME:-$HOME/.token-scrooge}"
11
+
12
+ say() { printf '%s\n' "$*"; }
13
+
14
+ # --- prerequisites -------------------------------------------------------
15
+ command -v python3 >/dev/null 2>&1 || { say "✗ python3 is required (3.8+)."; exit 1; }
16
+
17
+ # --- locate the repo (clone if piped via curl) ---------------------------
18
+ SRC="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" 2>/dev/null && pwd || true)"
19
+ if [ -z "${SRC:-}" ] || [ ! -f "$SRC/bin/scrooge" ]; then
20
+ command -v git >/dev/null 2>&1 || { say "✗ git is required to bootstrap (or run ./install.sh from a clone)."; exit 1; }
21
+ SRC="$SCROOGE_HOME/repo"
22
+ say "▸ Fetching Token Scrooge into $SRC ..."
23
+ if [ -d "$SRC/.git" ]; then git -C "$SRC" pull --ff-only --quiet; else git clone --depth 1 "$REPO_URL" "$SRC" --quiet; fi
24
+ fi
25
+
26
+ # --- install -------------------------------------------------------------
27
+ mkdir -p "$BIN_DIR" "$SCROOGE_HOME"
28
+ for b in scrooge scrooge-diverge scrooge-verify scrooge-drift scrooge-capabilities; do
29
+ chmod +x "$SRC/bin/$b"
30
+ ln -sf "$SRC/bin/$b" "$BIN_DIR/$b" # symlink → `git pull` keeps tools current
31
+ done
32
+ # --- registry: refresh untouched copies, never clobber local edits ----------
33
+ # We keep the last-shipped template at $SCROOGE_HOME/registry.template.json as a
34
+ # baseline. If your live registry.json is byte-identical to that baseline you
35
+ # never edited it, so it's safe to roll forward to the new template. If it
36
+ # differs, you (or a manual sync) changed it — we preserve it and just flag that
37
+ # a newer template exists.
38
+ NEW_TPL="$SRC/registry.template.json"
39
+ OLD_TPL="$SCROOGE_HOME/registry.template.json"
40
+ REG="$SCROOGE_HOME/registry.json"
41
+ if [ ! -f "$REG" ]; then
42
+ cp "$NEW_TPL" "$REG" # fresh install
43
+ say "✓ Registry installed."
44
+ elif cmp -s "$REG" "$NEW_TPL"; then
45
+ : # already current — nothing to do
46
+ elif [ -f "$OLD_TPL" ] && cmp -s "$REG" "$OLD_TPL"; then
47
+ cp "$NEW_TPL" "$REG" # untouched copy → roll forward
48
+ say "✓ Registry auto-refreshed to the latest models (no local edits detected)."
49
+ else
50
+ say "⚠ A newer registry template is available, but your registry.json has local"
51
+ say " edits — leaving it untouched. Compare with:"
52
+ say " diff \"$REG\" \"$NEW_TPL\" (or run: scrooge-drift)"
53
+ fi
54
+ cp "$NEW_TPL" "$OLD_TPL" # update baseline for next run
55
+
56
+ # --- live-training seed: keep a current copy in $SCROOGE_HOME ----------------
57
+ # The committed seed (lessons.seed.json) ships starter guardrails. The user-local
58
+ # lessons.json (gitignored) is created from it on first use and never clobbered.
59
+ if [ -f "$SRC/lessons.seed.json" ]; then
60
+ cp "$SRC/lessons.seed.json" "$SCROOGE_HOME/lessons.seed.json"
61
+ fi
62
+ # --- capability seed: quality scores for the weighted router (refreshed by scrooge-capabilities)
63
+ if [ -f "$SRC/capabilities.seed.json" ]; then
64
+ cp "$SRC/capabilities.seed.json" "$SCROOGE_HOME/capabilities.seed.json"
65
+ fi
66
+
67
+ say "✓ Installed: scrooge, scrooge-diverge, scrooge-verify, scrooge-drift, scrooge-capabilities → $BIN_DIR"
68
+
69
+ # --- weekly self-maintenance: refresh model quality scores (capability routing) ----------
70
+ # macOS uses a user LaunchAgent (no Full Disk Access needed, unlike crontab); Linux uses cron.
71
+ # Idempotent and non-fatal — a failure here never blocks the install.
72
+ setup_weekly_refresh() {
73
+ local tool="$BIN_DIR/scrooge-capabilities"
74
+ local log="$SCROOGE_HOME/capabilities-refresh.log"
75
+ [ -x "$tool" ] || return 0
76
+ case "$(uname -s)" in
77
+ Darwin)
78
+ local label="com.tokenscrooge.capabilities"
79
+ local plist="$HOME/Library/LaunchAgents/$label.plist"
80
+ local py; py="$(command -v python3 || echo /usr/bin/python3)"
81
+ mkdir -p "$HOME/Library/LaunchAgents"
82
+ cat > "$plist" <<PLIST
83
+ <?xml version="1.0" encoding="UTF-8"?>
84
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
85
+ <plist version="1.0">
86
+ <dict>
87
+ <key>Label</key><string>$label</string>
88
+ <key>ProgramArguments</key>
89
+ <array><string>$py</string><string>$tool</string></array>
90
+ <key>StartCalendarInterval</key>
91
+ <dict><key>Weekday</key><integer>1</integer><key>Hour</key><integer>9</integer><key>Minute</key><integer>5</integer></dict>
92
+ <key>StandardOutPath</key><string>$log</string>
93
+ <key>StandardErrorPath</key><string>$log</string>
94
+ <key>RunAtLoad</key><false/>
95
+ </dict>
96
+ </plist>
97
+ PLIST
98
+ launchctl bootout "gui/$(id -u)/$label" 2>/dev/null || true
99
+ if launchctl bootstrap "gui/$(id -u)" "$plist" 2>/dev/null; then
100
+ say "✓ Weekly capability refresh scheduled (LaunchAgent · Mondays 09:05)."
101
+ else
102
+ say "ℹ LaunchAgent written to $plist — load it with: launchctl bootstrap gui/$(id -u) \"$plist\""
103
+ fi
104
+ ;;
105
+ *)
106
+ local line="5 9 * * 1 $tool > $log 2>&1"
107
+ if command -v crontab >/dev/null 2>&1; then
108
+ if crontab -l 2>/dev/null | grep -q "scrooge-capabilities"; then
109
+ say "✓ Weekly capability refresh already in crontab."
110
+ elif ( crontab -l 2>/dev/null; printf '%s\n' "$line" ) | crontab - 2>/dev/null; then
111
+ say "✓ Weekly capability refresh added to crontab (Mondays 09:05)."
112
+ else
113
+ say "ℹ Could not edit crontab automatically. Add this line yourself:"
114
+ say " $line"
115
+ fi
116
+ else
117
+ say "ℹ No crontab found — schedule '$tool' weekly however you prefer."
118
+ fi
119
+ ;;
120
+ esac
121
+ }
122
+ setup_weekly_refresh || true
123
+
124
+ case ":$PATH:" in
125
+ *":$BIN_DIR:"*) ;;
126
+ *) say "⚠ $BIN_DIR is not on your PATH. Add it:"
127
+ say " echo 'export PATH=\"$BIN_DIR:\$PATH\"' >> ~/.zshrc && source ~/.zshrc" ;;
128
+ esac
129
+
130
+ # --- first-run setup -----------------------------------------------------
131
+ if [ "${1:-}" = "--no-setup" ] || [ ! -t 0 ]; then
132
+ say ""
133
+ say "Next: run the setup wizard to pick your orchestrator and add API keys:"
134
+ say " scrooge setup"
135
+ else
136
+ say ""
137
+ "$SRC/bin/scrooge" setup
138
+ fi
@@ -0,0 +1,17 @@
1
+ {
2
+ "_comment": "Token Scrooge live-training seed lessons. Short corrective guardrails injected into a cheap model's system prompt at routing time. Shape: { \"<model-id-or-alias>\": { \"<task>\"|\"*\": [\"one-liner\", ...] } }, plus a top-level \"*\" model bucket for lessons that apply to EVERY routed (cheap, execution-only) model. This file is COMMITTED and read-only at runtime; on first use it is copied to the user-local $SCROOGE_HOME/lessons.json, which then overrides/extends it (edit there via `scrooge learn` / `scrooge forget`, or re-merge new seeds with `scrooge learn --seed`). Keys starting with \"_\" are metadata and ignored by the loader.",
3
+ "_generalization_note": "The three deepseek-v4-flash/code lessons come from the 2026-06-04 polymarket dogfood. Decision: the order-book ordering pitfall is kept model-specific (deepseek-v4-flash, where it was observed), while the two model-agnostic correctness rules (absent numerics -> 0.0/schema default, exact schema key names) are promoted to the top-level \"*\"/code bucket so EVERY cheap code model inherits them. Scrooge only ever routes cheap execution models, so a \"*\" lesson never reaches an orchestrator. Exact-text de-dup means deepseek/code still shows all three with no duplication.",
4
+ "deepseek-v4-flash": {
5
+ "code": [
6
+ "Never assume API array ordering — sort order-book bids/asks explicitly by price.",
7
+ "Use 0.0 (or the schema default) for absent numeric values, not None.",
8
+ "Use the exact key names from the provided schema/example; do not invent fields."
9
+ ]
10
+ },
11
+ "*": {
12
+ "code": [
13
+ "Use 0.0 (or the schema default) for absent numeric values, not None.",
14
+ "Use the exact key names from the provided schema/example; do not invent fields."
15
+ ]
16
+ }
17
+ }