trantor 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1276 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ scrooge — make the cheap models do the grunt work.
4
+
5
+ Routes a single task to a CHEAP external model (DeepSeek/Kimi/ZAI-GLM/Gemini/
6
+ OpenAI/xAI/OpenRouter, …) via the OpenAI-compatible chat API. Your expensive
7
+ orchestrator (Claude Opus, etc.) stays in charge; this is ONLY for disconnected,
8
+ individually-scoped execution work — drafts, summaries, extraction, judgment.
9
+
10
+ TRANSPARENCY (the whole point): every call prints a loud banner to STDERR and
11
+ appends to a cost ledger. The model's text goes to STDOUT only.
12
+
13
+ Usage:
14
+ scrooge "prompt" # default = cheapest live model (nothing hardcoded)
15
+ scrooge --latest "prompt" # re-check the provider's live model list now
16
+ scrooge --model kimi "prompt" # force a model (alias or full id)
17
+ scrooge --task code "prompt" # weigh capability×cost for the task (best value)
18
+ scrooge --task code -d hard "prompt" # difficulty floor → escalate hard work off the cheap model
19
+ scrooge --task code --spread 3 "prompt" # fan a batch across the top-3 capable models
20
+ scrooge --json --system "..." "prompt" # JSON-object output + system prompt
21
+ echo "long input" | scrooge --task summarize # prompt from stdin (or - )
22
+ scrooge models <provider> # list live model ids from a provider
23
+ scrooge list # show registry (providers/models/tasks)
24
+ scrooge ledger [--since 24h|7d|all] # usage + cost totals, savings vs Opus
25
+ scrooge watch [--tail N|--all] # LIVE feed of cheap-model calls as they happen
26
+
27
+ Live training (per-model lessons): short corrective guardrails, learned from
28
+ observed failures, are auto-injected into the model's system prompt at routing
29
+ time (the banner shows "+N lessons"; --no-lessons bypasses):
30
+ scrooge learn -m <model> [-t <task>] "lesson" # capture (dedup; -t omitted => "*")
31
+ scrooge learn --seed # merge the shipped seed set
32
+ scrooge lessons [-m <model>] [-t <task>] # show the store
33
+ scrooge forget -m <model> [-t <task>] <index>|--all # remove
34
+
35
+ Config: $SCROOGE_HOME (default ~/.token-scrooge) holds registry.json + calls.jsonl
36
+ + lessons.json (user-local; seeded from the committed lessons.seed.json)
37
+ Keys: read from the environment; optionally also from $SCROOGE_ENV_FILE (KEY=VALUE)
38
+ Exit: 0 ok, non-zero on error (message to stderr).
39
+ """
40
+ import sys, os, json, time, argparse, urllib.request, urllib.error, re, shutil, hashlib
41
+
42
+ HOME = os.path.expanduser("~")
43
+ SCROOGE_DIR = os.environ.get("SCROOGE_HOME", os.path.join(HOME, ".token-scrooge"))
44
+ REGISTRY = os.path.join(SCROOGE_DIR, "registry.json")
45
+ LEDGER = os.path.join(SCROOGE_DIR, "calls.jsonl")
46
+ MODELS_CACHE = os.path.join(SCROOGE_DIR, "models-cache.json") # short-lived /models snapshots per provider
47
+ CAPS = os.path.join(SCROOGE_DIR, "capabilities.json") # per-model quality scores for the weighted router
48
+ ENV_FALLBACK = os.environ.get("SCROOGE_ENV_FILE", "") # optional extra KEY=VALUE file (e.g. a proxy .env)
49
+ OPUS_IN, OPUS_OUT = 15.0, 75.0 # $/1M reference for savings math (Opus 4.x)
50
+
51
+ # ---- ANSI (stderr only) -------------------------------------------------
52
+ def _c(code, s):
53
+ return s if not sys.stderr.isatty() else "\033[%sm%s\033[0m" % (code, s)
54
+ ORANGE = lambda s: _c("38;5;208", s)
55
+ DIM = lambda s: _c("2", s)
56
+ RED = lambda s: _c("31", s)
57
+
58
+ def err(*a):
59
+ sys.stderr.write(" ".join(str(x) for x in a) + "\n")
60
+
61
+ # ---- key loading --------------------------------------------------------
62
+ def _load_env_file(path):
63
+ """Merge KEY=VALUE lines from a file into os.environ (no override of existing)."""
64
+ if not path or not os.path.exists(path):
65
+ return
66
+ try:
67
+ with open(path) as fh:
68
+ for line in fh:
69
+ line = line.strip()
70
+ if not line or line.startswith("#") or "=" not in line:
71
+ continue
72
+ k, v = line.split("=", 1)
73
+ k = k.strip()
74
+ if k.startswith("export "):
75
+ k = k[len("export "):].strip()
76
+ v = v.strip().strip('"').strip("'")
77
+ if k and k not in os.environ:
78
+ os.environ[k] = v
79
+ except Exception as e:
80
+ err(DIM("[scrooge] could not read %s: %s" % (path, e)))
81
+
82
+ def load_env_fallback():
83
+ """Keys come from the environment, plus (no override): ~/.token-scrooge/.env
84
+ (written by `scrooge setup`) and an optional $SCROOGE_ENV_FILE."""
85
+ _load_env_file(os.path.join(SCROOGE_DIR, ".env"))
86
+ _load_env_file(ENV_FALLBACK)
87
+
88
+ def load_registry():
89
+ with open(REGISTRY) as fh:
90
+ return json.load(fh)
91
+
92
+ def provider_key(reg, provider):
93
+ p = reg["providers"].get(provider) or {}
94
+ for name in p.get("env", []):
95
+ if os.environ.get(name):
96
+ return os.environ[name]
97
+ return None
98
+
99
+ # ---- model resolution ---------------------------------------------------
100
+ def resolve_model(reg, model=None, task=None, latest=False):
101
+ """Return (model_id, provider) or raise."""
102
+ if model:
103
+ model = reg.get("aliases", {}).get(model, model)
104
+ if model in reg["models"]:
105
+ return model, reg["models"][model]["provider"]
106
+ if "/" in model: # provider/model form, e.g. openrouter explicit
107
+ prov, _ = model.split("/", 1)
108
+ if prov in reg["providers"]:
109
+ return model, prov
110
+ # unknown but maybe valid for openrouter
111
+ raise SystemExit("Unknown model '%s'. Try: scrooge list" % model)
112
+ if task:
113
+ cands = reg.get("tasks", {}).get(task)
114
+ if not cands:
115
+ raise SystemExit("Unknown task '%s'. Tasks: %s" % (task, ", ".join(reg.get("tasks", {}))))
116
+ for mid in cands:
117
+ prov = reg["models"][mid]["provider"]
118
+ if provider_key(reg, prov):
119
+ return mid, prov
120
+ raise SystemExit("No live API key for any model serving task '%s'." % task)
121
+ # No model, no task: derive the default dynamically from what's actually live.
122
+ return default_model(reg, latest=latest)
123
+
124
+ # ---- capability-weighted routing ----------------------------------------
125
+ # Pick the best *value* model for a task: weight each candidate's quality-for-the-task
126
+ # against its price, but first gate by a DIFFICULTY floor so hard work escalates off the
127
+ # cheapest model while easy work stays cheap. Quality data lives in capabilities.json
128
+ # (seeded from the committed capabilities.seed.json; refreshed by `scrooge-capabilities`).
129
+ TASK_METRIC = {"code": "coding", "code-review": "coding", "reason": "reasoning",
130
+ "verify": "reasoning", "math": "math"} # else -> "intelligence"
131
+ DIFF_PCTL = {"easy": 0.0, "medium": 0.5, "hard": 0.8} # capability floor = this percentile of candidates
132
+ HARD_TASKS = ("code", "code-review", "reason", "verify") # default to 'medium' difficulty when unspecified
133
+
134
+ def _caps_seed_path():
135
+ here = os.path.dirname(os.path.realpath(__file__))
136
+ for cand in (os.path.join(SCROOGE_DIR, "capabilities.seed.json"),
137
+ os.path.join(here, "..", "capabilities.seed.json")):
138
+ if os.path.exists(cand):
139
+ return cand
140
+ return None
141
+
142
+ def load_caps():
143
+ """Per-model quality scores (user store → seed). Missing/malformed → {}."""
144
+ for path in (CAPS, _caps_seed_path()):
145
+ if not path:
146
+ continue
147
+ try:
148
+ with open(path) as fh:
149
+ d = json.load(fh)
150
+ if isinstance(d, dict):
151
+ return {k: v for k, v in d.items() if not k.startswith("_")}
152
+ except Exception:
153
+ continue
154
+ return {}
155
+
156
+ def task_metric(task):
157
+ return TASK_METRIC.get(task or "", "intelligence")
158
+
159
+ def infer_difficulty(task, prompt):
160
+ """Fallback when --difficulty is omitted: harder default for code/reasoning tasks,
161
+ bumped a notch for very long prompts."""
162
+ base = "medium" if (task in HARD_TASKS) else "easy"
163
+ if prompt and len(prompt) > 8000:
164
+ base = {"easy": "medium", "medium": "hard", "hard": "hard"}[base]
165
+ return base
166
+
167
+ def model_quality(caps, mid, metric):
168
+ c = caps.get(mid) or {}
169
+ v = c.get(metric)
170
+ if not isinstance(v, (int, float)):
171
+ v = c.get("intelligence") # fall back to the general index
172
+ return float(v) if isinstance(v, (int, float)) else 0.0
173
+
174
+ def blended_cost(reg, mid):
175
+ m = reg["models"].get(mid, {})
176
+ return max(1e-6, 0.3 * m.get("cost_in", 0) + 0.7 * m.get("cost_out", 0))
177
+
178
+ def weigh_candidates(reg, caps, cand_ids, task, difficulty):
179
+ """Apply the difficulty floor, then rank survivors by quality^qw / cost^cw.
180
+ Returns [(model_id, score)] best-first."""
181
+ metric = task_metric(task)
182
+ quals = [(mid, model_quality(caps, mid, metric)) for mid in cand_ids]
183
+ scored_q = sorted(q for _, q in quals if q > 0)
184
+ pct = DIFF_PCTL.get(difficulty, 0.5)
185
+ floor = 0.0
186
+ if scored_q and pct > 0:
187
+ floor = scored_q[min(len(scored_q) - 1, int(round(pct * (len(scored_q) - 1))))]
188
+ survivors = [(mid, q) for mid, q in quals if q >= floor] or quals
189
+ rw = reg.get("routing") or {}
190
+ qw, cw = rw.get("q_weight", 1.5), rw.get("c_weight", 0.5)
191
+ out = [(mid, (max(q, 1e-6) ** qw) / (blended_cost(reg, mid) ** cw)) for mid, q in survivors]
192
+ out.sort(key=lambda x: -x[1])
193
+ return out
194
+
195
+ def _spread_index(prompt, k):
196
+ """Deterministic, process-independent bucket so a parallel batch fans across the
197
+ top-k models instead of hammering one (built-in hash() is salted — use sha1)."""
198
+ h = int(hashlib.sha1((prompt or "").encode("utf-8", "replace")).hexdigest()[:8], 16)
199
+ return h % k
200
+
201
+ def route_task(reg, caps, task, difficulty, prompt, spread=0):
202
+ """Capability-weighted pick for a --task. Returns (model_id, provider, info).
203
+ Falls back to registry cheapest-first order when no capability data is available."""
204
+ cands = reg.get("tasks", {}).get(task)
205
+ if not cands:
206
+ raise SystemExit("Unknown task '%s'. Tasks: %s" % (task, ", ".join(reg.get("tasks", {}))))
207
+ live = [m for m in cands if provider_key(reg, reg["models"][m]["provider"])]
208
+ if not live:
209
+ raise SystemExit("No live API key for any model serving task '%s'." % task)
210
+ if not caps: # no quality data → legacy cheapest-first
211
+ return live[0], reg["models"][live[0]]["provider"], {"difficulty": None, "weighed": False}
212
+ diff = difficulty or infer_difficulty(task, prompt)
213
+ scored = weigh_candidates(reg, caps, live, task, diff)
214
+ ranked = [m for m, _ in scored]
215
+ if spread and len(ranked) > 1:
216
+ k = min(spread, len(ranked))
217
+ mid = ranked[_spread_index(prompt, k)]
218
+ else:
219
+ mid = ranked[0]
220
+ return mid, reg["models"][mid]["provider"], {
221
+ "difficulty": diff, "weighed": True, "metric": task_metric(task),
222
+ "considered": len(live), "pool": ranked[:max(spread, 1)] if spread else ranked[:1]}
223
+
224
+ # ---- HTTP ---------------------------------------------------------------
225
+ def http_post(url, headers, payload, timeout=120):
226
+ data = json.dumps(payload).encode()
227
+ req = urllib.request.Request(url, data=data, headers=headers, method="POST")
228
+ with urllib.request.urlopen(req, timeout=timeout) as r:
229
+ return json.loads(r.read().decode())
230
+
231
+ def http_get(url, headers, timeout=30):
232
+ req = urllib.request.Request(url, headers=headers, method="GET")
233
+ with urllib.request.urlopen(req, timeout=timeout) as r:
234
+ return json.loads(r.read().decode())
235
+
236
+ def list_live_models(reg, provider, ttl=600):
237
+ """The model ids a provider actually serves *right now*, via its OpenAI-compatible
238
+ /models endpoint. Cached per provider in MODELS_CACHE for `ttl` seconds so routing
239
+ never hardcodes a model id yet stays fast. Returns [] (best-effort) on any failure
240
+ — callers must tolerate an empty list (offline, no key, rate-limited)."""
241
+ cache = {}
242
+ try:
243
+ with open(MODELS_CACHE) as fh:
244
+ cache = json.load(fh)
245
+ if not isinstance(cache, dict):
246
+ cache = {}
247
+ except Exception:
248
+ cache = {}
249
+ ent = cache.get(provider)
250
+ if ttl and isinstance(ent, dict) and isinstance(ent.get("ids"), list) \
251
+ and (time.time() - ent.get("ts", 0)) < ttl:
252
+ return ent["ids"]
253
+ key = provider_key(reg, provider)
254
+ if not key:
255
+ return []
256
+ base = reg["providers"][provider]["base_url"].rstrip("/")
257
+ headers = {"Authorization": "Bearer " + key}
258
+ headers.update(reg["providers"][provider].get("extra_headers", {}))
259
+ try:
260
+ data = http_get(base + "/models", headers, timeout=15)
261
+ ids = sorted(str(m.get("id")) for m in (data.get("data") or []) if m.get("id"))
262
+ except Exception:
263
+ return []
264
+ cache[provider] = {"ts": int(time.time()), "ids": ids}
265
+ try:
266
+ os.makedirs(SCROOGE_DIR, exist_ok=True)
267
+ with open(MODELS_CACHE, "w") as fh:
268
+ json.dump(cache, fh)
269
+ except Exception:
270
+ pass
271
+ return ids
272
+
273
+ def default_model(reg, latest=False):
274
+ """Pick the default model dynamically — NOTHING is hardcoded. Among the registry's
275
+ models whose provider currently has a key, choose the cheapest one the provider is
276
+ actually serving live (verified against /models). If every priced candidate has
277
+ drifted out of the live list, fall back to a live-discovered id (pricing unknown).
278
+ `latest=True` bypasses the cache to force a fresh liveness check."""
279
+ priced = sorted(
280
+ ((mid, cfg["provider"], cfg.get("cost_in", 0) + cfg.get("cost_out", 0))
281
+ for mid, cfg in reg["models"].items() if provider_key(reg, cfg["provider"])),
282
+ key=lambda x: x[2])
283
+ if not priced:
284
+ raise SystemExit("No default model available — no API key is set for any provider. "
285
+ "Run `scrooge setup` (or set a provider key), then `scrooge list`.")
286
+ ttl = 0 if latest else 600
287
+ live_by_provider = {}
288
+ for mid, prov, _ in priced:
289
+ if prov not in live_by_provider:
290
+ live_by_provider[prov] = set(list_live_models(reg, prov, ttl=ttl))
291
+ live = live_by_provider[prov]
292
+ # Empty set ⇒ couldn't reach the API (offline/etc.): trust the registry rather than block.
293
+ if not live or mid in live:
294
+ return mid, prov
295
+ # Every priced candidate has drifted vs the live list — route to a real live id.
296
+ prov = priced[0][1]
297
+ live = sorted(live_by_provider.get(prov) or [])
298
+ if live:
299
+ err(DIM("[scrooge] registry models for %s look stale; routing to live '%s' "
300
+ "(pricing unknown — run scrooge-drift)" % (prov, live[0])))
301
+ return live[0], prov
302
+ return priced[0][0], priced[0][1]
303
+
304
+ # ---- ledger -------------------------------------------------------------
305
+ def project_label(start=None):
306
+ """A stable per-project name so a single shared ledger can be filtered by project.
307
+ Priority: $SCROOGE_PROJECT (explicit, set it per terminal for full control) →
308
+ nearest enclosing git repo's dir name → the cwd's base name."""
309
+ env = os.environ.get("SCROOGE_PROJECT")
310
+ if env:
311
+ return env
312
+ base = start or os.getcwd()
313
+ cur = base
314
+ for _ in range(40):
315
+ if os.path.isdir(os.path.join(cur, ".git")):
316
+ return os.path.basename(cur) or cur
317
+ parent = os.path.dirname(cur)
318
+ if parent == cur:
319
+ break
320
+ cur = parent
321
+ return os.path.basename(base.rstrip("/")) or base
322
+
323
+ def append_ledger(entry):
324
+ try:
325
+ os.makedirs(SCROOGE_DIR, exist_ok=True)
326
+ with open(LEDGER, "a") as fh:
327
+ fh.write(json.dumps(entry) + "\n")
328
+ # return 1-based line count cheaply
329
+ with open(LEDGER) as fh:
330
+ return sum(1 for _ in fh)
331
+ except Exception:
332
+ return None
333
+
334
+ def cost_usd(reg, model, tin, tout):
335
+ m = reg["models"].get(model.split("/")[-1]) or reg["models"].get(model)
336
+ if not m:
337
+ return 0.0
338
+ return (tin / 1e6) * m.get("cost_in", 0) + (tout / 1e6) * m.get("cost_out", 0)
339
+
340
+ # ---- live training: per-model lessons -----------------------------------
341
+ # A user-local, mutable store of short corrective guardrails learned from
342
+ # observed cheap-model failures. The relevant lessons are auto-injected into a
343
+ # model's system prompt at routing time so recurring bugs are preempted. The
344
+ # store is SEPARATE from the capability/pricing registry. Shape:
345
+ # { "<model-id-or-alias>": { "<task>"|"*": ["one-liner", ...] }, "*": {...} }
346
+ # Keys starting with "_" are metadata (comments) and ignored.
347
+ LESSONS = os.path.join(SCROOGE_DIR, "lessons.json")
348
+ LESSON_CAP = 8 # max lessons taken per (model, task) bucket
349
+ LESSON_CHAR_CAP = 1200 # total injected-char ceiling for the guardrail block
350
+
351
+ def _is_meta_key(k):
352
+ return isinstance(k, str) and k.startswith("_")
353
+
354
+ def load_lessons():
355
+ """Read the user-local lessons store. Missing/malformed -> empty dict."""
356
+ try:
357
+ with open(LESSONS) as fh:
358
+ d = json.load(fh)
359
+ return d if isinstance(d, dict) else {}
360
+ except Exception:
361
+ return {}
362
+
363
+ def save_lessons(store):
364
+ os.makedirs(SCROOGE_DIR, exist_ok=True)
365
+ with open(LESSONS, "w") as fh:
366
+ json.dump(store, fh, indent=2)
367
+ fh.write("\n")
368
+
369
+ def _seed_path():
370
+ """Locate the committed seed set (SCROOGE_HOME first, then alongside the repo)."""
371
+ here = os.path.dirname(os.path.realpath(__file__)) # resolves symlinks → repo/bin
372
+ for cand in (os.path.join(SCROOGE_DIR, "lessons.seed.json"),
373
+ os.path.join(here, "..", "lessons.seed.json")):
374
+ if os.path.exists(cand):
375
+ return cand
376
+ return None
377
+
378
+ def load_seed():
379
+ p = _seed_path()
380
+ if not p:
381
+ return {}
382
+ try:
383
+ with open(p) as fh:
384
+ d = json.load(fh)
385
+ return d if isinstance(d, dict) else {}
386
+ except Exception:
387
+ return {}
388
+
389
+ def merge_seed(store):
390
+ """Add seed lessons not already present (dedup on exact text). Returns count added."""
391
+ added = 0
392
+ for model, tasks in load_seed().items():
393
+ if _is_meta_key(model) or not isinstance(tasks, dict):
394
+ continue
395
+ for task, lessons in tasks.items():
396
+ if _is_meta_key(task) or not isinstance(lessons, list):
397
+ continue
398
+ bucket = store.setdefault(model, {}).setdefault(task, [])
399
+ for L in lessons:
400
+ if L not in bucket:
401
+ bucket.append(L); added += 1
402
+ return added
403
+
404
+ def seed_lessons_if_absent():
405
+ """First-use bootstrap: if no user store exists yet, copy the shipped seed in."""
406
+ if os.path.exists(LESSONS):
407
+ return
408
+ seed = load_seed()
409
+ if seed:
410
+ save_lessons(seed)
411
+
412
+ def aliases_for(reg, model):
413
+ """All alias names that resolve to this full model id."""
414
+ return [a for a, full in reg.get("aliases", {}).items() if full == model]
415
+
416
+ def gather_lessons(reg, model, task):
417
+ """Ordered, de-duplicated lessons for (full id + aliases) × (task + '*'),
418
+ then the top-level universal '*' model bucket. <=LESSON_CAP per bucket."""
419
+ store = load_lessons()
420
+ if not store:
421
+ return []
422
+ model_keys = [model] + [a for a in aliases_for(reg, model) if a != model]
423
+ task_keys = ([task] if task else []) + ["*"]
424
+ out, seen = [], set()
425
+
426
+ def drain(mkey):
427
+ mbucket = store.get(mkey)
428
+ if not isinstance(mbucket, dict):
429
+ return
430
+ for tkey in task_keys:
431
+ lst = mbucket.get(tkey)
432
+ if not isinstance(lst, list):
433
+ continue
434
+ taken = 0
435
+ for L in lst:
436
+ if taken >= LESSON_CAP:
437
+ break
438
+ if L not in seen:
439
+ seen.add(L); out.append(L)
440
+ taken += 1
441
+
442
+ for mkey in model_keys:
443
+ drain(mkey)
444
+ drain("*") # universal lessons (apply to every routed cheap model)
445
+ return out
446
+
447
+ def build_lessons_block(reg, model, task):
448
+ """Compose the terse guardrail block and its lesson count, within the char cap."""
449
+ lessons = gather_lessons(reg, model, task)
450
+ if not lessons:
451
+ return "", 0
452
+ header = "Known pitfalls to avoid:"
453
+ lines, used, n = [header], len(header), 0
454
+ for L in lessons:
455
+ line = "- " + L
456
+ if used + 1 + len(line) > LESSON_CHAR_CAP:
457
+ break
458
+ lines.append(line); used += 1 + len(line); n += 1
459
+ if not n:
460
+ return "", 0
461
+ return "\n".join(lines), n
462
+
463
+ # ---- commands -----------------------------------------------------------
464
+ def cmd_call(reg, args):
465
+ # Read the prompt first — the capability weigher uses it to infer difficulty and to
466
+ # fan a --spread batch deterministically.
467
+ prompt = args.prompt
468
+ if prompt in (None, "-"):
469
+ prompt = sys.stdin.read()
470
+ if not prompt or not prompt.strip():
471
+ raise SystemExit("Empty prompt.")
472
+
473
+ # Route: explicit --model wins; a --task is weighed by capability×cost (gated by
474
+ # difficulty); otherwise the dynamic cheapest-live default.
475
+ route_info = {}
476
+ if args.model:
477
+ model, provider = resolve_model(reg, model=args.model)
478
+ elif args.task and not getattr(args, "no_weigh", False):
479
+ model, provider, route_info = route_task(
480
+ reg, load_caps(), args.task, getattr(args, "difficulty", None),
481
+ prompt, spread=getattr(args, "spread", 0) or 0)
482
+ else:
483
+ model, provider = resolve_model(reg, args.model, args.task,
484
+ latest=getattr(args, "latest", False))
485
+ key = provider_key(reg, provider)
486
+ if not key:
487
+ raise SystemExit("No API key set for provider '%s' (env: %s)" %
488
+ (provider, ", ".join(reg["providers"][provider].get("env", []))))
489
+
490
+ cwd = os.getcwd()
491
+ proj = project_label(cwd) # stamped on the ledger so `scrooge watch --here` can filter
492
+
493
+ base = reg["providers"][provider]["base_url"].rstrip("/")
494
+ url = base + "/chat/completions"
495
+ headers = {"Authorization": "Bearer " + key, "Content-Type": "application/json"}
496
+ headers.update(reg["providers"][provider].get("extra_headers", {}))
497
+
498
+ # Live training: gather per-model lessons and fold them into the system prompt.
499
+ lessons_block, n_lessons = ("", 0)
500
+ if not args.no_lessons:
501
+ seed_lessons_if_absent()
502
+ lessons_block, n_lessons = build_lessons_block(reg, model, args.task)
503
+
504
+ # Compose the system message: user's --system (or the JSON-mode instruction)
505
+ # leads; injected guardrails follow.
506
+ sys_parts = []
507
+ if args.system:
508
+ sys_parts.append(args.system)
509
+ elif args.json:
510
+ sys_parts.append("Respond ONLY with a single valid JSON object. No prose, no code fences.")
511
+ if lessons_block:
512
+ sys_parts.append(lessons_block)
513
+ msgs = []
514
+ if sys_parts:
515
+ msgs.append({"role": "system", "content": "\n\n".join(sys_parts)})
516
+ msgs.append({"role": "user", "content": prompt})
517
+ # Per-model constraint: some models (e.g. kimi-k2.6) require a fixed temperature.
518
+ mcfg = reg["models"].get(model, {})
519
+ temp = mcfg["force_temperature"] if "force_temperature" in mcfg else args.temperature
520
+ payload = {"model": model.split("/", 1)[1] if (provider == "openrouter" and "/" in model) else model,
521
+ "messages": msgs, "temperature": temp}
522
+ if provider == "openrouter":
523
+ payload["model"] = model if "/" in model else model
524
+ if args.max_tokens:
525
+ # Some models (OpenAI GPT-5 / reasoning class) reject "max_tokens" and
526
+ # require "max_completion_tokens" — overridable per-model in the registry.
527
+ payload[mcfg.get("token_param", "max_tokens")] = args.max_tokens
528
+ if args.json:
529
+ payload["response_format"] = {"type": "json_object"}
530
+ # (the "JSON object only" system instruction is composed above)
531
+
532
+ if args.task and route_info.get("difficulty"):
533
+ spread_n = len(route_info.get("pool") or [])
534
+ sp = " · spread/%d" % spread_n if spread_n > 1 else ""
535
+ label = "[task: %s · %s%s]" % (args.task, route_info["difficulty"], sp)
536
+ else:
537
+ label = ("[task: %s]" % args.task) if args.task else ""
538
+ extra = (" +%d lessons" % n_lessons) if n_lessons else ""
539
+ err(ORANGE("🪙 scrooge ▸ %s/%s %s%s" % (provider, model, label, extra)))
540
+
541
+ t0 = time.time()
542
+ try:
543
+ resp = http_post(url, headers, payload)
544
+ except urllib.error.HTTPError as e:
545
+ body = e.read().decode(errors="replace")[:500]
546
+ err(RED("🪙 scrooge ✗ %s/%s HTTP %s: %s" % (provider, model, e.code, body)))
547
+ append_ledger({"ts": int(t0), "provider": provider, "model": model, "task": args.task,
548
+ "project": proj, "cwd": cwd,
549
+ "ok": False, "error": "HTTP %s" % e.code, "duration_ms": int((time.time()-t0)*1000)})
550
+ raise SystemExit(2)
551
+ except Exception as e:
552
+ err(RED("🪙 scrooge ✗ %s/%s: %s" % (provider, model, e)))
553
+ append_ledger({"ts": int(t0), "provider": provider, "model": model, "task": args.task,
554
+ "project": proj, "cwd": cwd,
555
+ "ok": False, "error": str(e), "duration_ms": int((time.time()-t0)*1000)})
556
+ raise SystemExit(2)
557
+
558
+ dt = time.time() - t0
559
+ choice = (resp.get("choices") or [{}])[0]
560
+ msg = choice.get("message", {}) or {}
561
+ text = msg.get("content") or msg.get("reasoning_content") or ""
562
+ usage = resp.get("usage", {}) or {}
563
+ tin = usage.get("prompt_tokens") or 0
564
+ tout = usage.get("completion_tokens") or 0
565
+ c = cost_usd(reg, model, tin, tout)
566
+ # A short, whitespace-collapsed preview so `scrooge watch` can show WHAT each cheap
567
+ # model is doing in real time. Local-only (the ledger is gitignored); opt out with
568
+ # SCROOGE_NO_PREVIEW=1 if you'd rather not write any prompt text to disk.
569
+ preview = "" if os.environ.get("SCROOGE_NO_PREVIEW", "").lower() in ("1", "true", "yes") \
570
+ else " ".join(prompt.split())[:100]
571
+ line_no = append_ledger({"ts": int(t0), "provider": provider, "model": model, "task": args.task,
572
+ "project": proj, "cwd": cwd,
573
+ "tokens_in": tin, "tokens_out": tout, "cost_usd": round(c, 6),
574
+ "duration_ms": int(dt*1000), "ok": True, "prompt_chars": len(prompt),
575
+ "prompt_preview": preview})
576
+ err(ORANGE("🪙 scrooge ✓ %s/%s · %d→%d tok · ~$%.5f · %.1fs%s" %
577
+ (provider, model, tin, tout, c, dt, (" · ledger#%d" % line_no) if line_no else "")))
578
+ sys.stdout.write(text)
579
+ if not text.endswith("\n"):
580
+ sys.stdout.write("\n")
581
+
582
+ def cmd_models(reg, args):
583
+ provider = args.provider
584
+ if provider not in reg["providers"]:
585
+ raise SystemExit("Unknown provider. Known: %s" % ", ".join(reg["providers"]))
586
+ if not provider_key(reg, provider):
587
+ raise SystemExit("No key for %s" % provider)
588
+ ids = list_live_models(reg, provider, ttl=0) # always show a fresh list here
589
+ err(DIM("[scrooge] %d models from %s:" % (len(ids), provider)))
590
+ for i in ids:
591
+ print(i)
592
+
593
+ def cmd_list(reg, args):
594
+ print("PROVIDERS (live = key present):")
595
+ for p, cfg in reg["providers"].items():
596
+ live = "✓" if provider_key(reg, p) else "✗"
597
+ print(" %s %-11s %s" % (live, p, cfg["base_url"]))
598
+ caps = load_caps()
599
+ cap_hdr = "intel/code/reason · " if caps else ""
600
+ print("\nMODELS ($/1M in/out · %strust · good_for):" % cap_hdr)
601
+ for m, c in reg["models"].items():
602
+ live = "✓" if provider_key(reg, c["provider"]) else "✗"
603
+ cap = ""
604
+ if caps:
605
+ q = caps.get(m) or {}
606
+ fmt = lambda v: ("%2.0f" % v) if isinstance(v, (int, float)) else " -"
607
+ cap = "%s/%s/%s " % (fmt(q.get("intelligence")), fmt(q.get("coding")), fmt(q.get("reasoning")))
608
+ print(" %s %-24s %5.2f/%-5.2f %s%-9s %s" % (live, m, c.get("cost_in",0), c.get("cost_out",0),
609
+ cap, c.get("trust",""), ",".join(c.get("good_for", []))))
610
+ print("\nTASKS (weighed by capability×cost, gated by difficulty):")
611
+ for t, ms in reg.get("tasks", {}).items():
612
+ print(" %-13s → %s" % (t, ", ".join(ms)))
613
+ print("\nALIASES:", ", ".join("%s=%s" % (k, v) for k, v in reg.get("aliases", {}).items()))
614
+
615
+ def parse_since(s):
616
+ if s == "all":
617
+ return 0
618
+ m = re.match(r"(\d+)([hd])", s or "24h")
619
+ if not m:
620
+ return 0
621
+ n, u = int(m.group(1)), m.group(2)
622
+ return time.time() - n * (3600 if u == "h" else 86400)
623
+
624
+ def cmd_ledger(reg, args):
625
+ since = parse_since(args.since)
626
+ want_proj = project_label(os.getcwd()) if getattr(args, "here", False) else getattr(args, "project", None)
627
+ if not os.path.exists(LEDGER):
628
+ print("No calls logged yet."); return
629
+ rows = []
630
+ with open(LEDGER) as fh:
631
+ for line in fh:
632
+ try:
633
+ o = json.loads(line)
634
+ except Exception:
635
+ continue
636
+ if want_proj and (o.get("project") or "") != want_proj:
637
+ continue
638
+ if o.get("ts", 0) >= since and o.get("ok"):
639
+ rows.append(o)
640
+ scope = (" · project=%s" % want_proj) if want_proj else ""
641
+ if not rows:
642
+ print("No successful calls in window '%s'%s." % (args.since, scope)); return
643
+ orch = reg.get("orchestrator") or {}
644
+ base_in = orch.get("cost_in", OPUS_IN)
645
+ base_out = orch.get("cost_out", OPUS_OUT)
646
+ orch_name = orch.get("name", "Opus")
647
+ total_cost = sum(r.get("cost_usd", 0) for r in rows)
648
+ tin = sum(r.get("tokens_in", 0) for r in rows)
649
+ tout = sum(r.get("tokens_out", 0) for r in rows)
650
+ orch_equiv = (tin/1e6)*base_in + (tout/1e6)*base_out
651
+ by = {}
652
+ for r in rows:
653
+ k = "%s/%s" % (r.get("provider"), r.get("model"))
654
+ d = by.setdefault(k, {"n": 0, "cost": 0, "tin": 0, "tout": 0})
655
+ d["n"] += 1; d["cost"] += r.get("cost_usd", 0)
656
+ d["tin"] += r.get("tokens_in", 0); d["tout"] += r.get("tokens_out", 0)
657
+ print("SCROOGE LEDGER — window: %s%s (%d calls)" % (args.since, scope, len(rows)))
658
+ print(" tokens: %s in / %s out" % (f"{tin:,}", f"{tout:,}"))
659
+ print(" spent on cheap models: $%.4f" % total_cost)
660
+ print(" same tokens on %s: ~$%.2f" % (orch_name, orch_equiv))
661
+ if orch_equiv > 0:
662
+ print(" → saved ~$%.2f (%.0f%% cheaper)" % (orch_equiv - total_cost, 100*(1 - total_cost/orch_equiv)))
663
+ print("\n by model:")
664
+ for k, d in sorted(by.items(), key=lambda x: -x[1]["cost"]):
665
+ print(" %-28s %3d calls $%.4f (%s→%s tok)" % (k, d["n"], d["cost"], f"{d['tin']:,}", f"{d['tout']:,}"))
666
+
667
+ def cmd_watch(reg, args):
668
+ """Live feed of every cheap-model call as it hits the ledger — a real-time view of
669
+ the orchestrator delegating grunt work. Catches foreground, background, and subagent
670
+ calls alike (they all append to the ledger). Keep it open in a side pane."""
671
+ try:
672
+ sys.stdout.reconfigure(line_buffering=True) # flush each line even when piped/backgrounded
673
+ except Exception:
674
+ pass
675
+ orch = reg.get("orchestrator") or {}
676
+ base_in, base_out = orch.get("cost_in", OPUS_IN), orch.get("cost_out", OPUS_OUT)
677
+ orch_name = orch.get("name", "Opus")
678
+ tot = {"n": 0, "cost": 0.0, "tin": 0, "tout": 0}
679
+
680
+ # ---- per-project filter (many projects share one ledger) ----------------
681
+ want_proj = project_label(os.getcwd()) if getattr(args, "here", False) else getattr(args, "project", None)
682
+ cwd_prefix = os.path.abspath(os.path.expanduser(args.cwd)) if getattr(args, "cwd", None) else None
683
+ single = bool(want_proj or cwd_prefix) # single-project view → no per-line project tag
684
+
685
+ def matches(o):
686
+ if want_proj and (o.get("project") or "") != want_proj:
687
+ return False
688
+ if cwd_prefix:
689
+ c = o.get("cwd") or ""
690
+ if not (c == cwd_prefix or c.startswith(cwd_prefix.rstrip("/") + "/")):
691
+ return False
692
+ return True
693
+
694
+ def render(o):
695
+ ts = time.strftime("%H:%M:%S", time.localtime(o.get("ts", 0)))
696
+ pm = "%s/%s" % (o.get("provider"), o.get("model"))
697
+ tag = AMBER("[%s]" % o["task"]) if o.get("task") else GREYc("[·]")
698
+ pfx = "" if single else GREYc("%-16s " % (o.get("project") or "?")[:16])
699
+ if not o.get("ok", False):
700
+ return "%s%s %s %s %s %s" % (pfx, GREYc(ts), ERRc("✗"), GOLD(pm), tag,
701
+ ERRc(str(o.get("error", "error"))))
702
+ tin, tout = o.get("tokens_in", 0), o.get("tokens_out", 0)
703
+ dur = o.get("duration_ms", 0) / 1000.0
704
+ prev = (o.get("prompt_preview") or "").strip()
705
+ return "%s%s %s %s %s %s %s %s%s" % (
706
+ pfx, GREYc(ts), OKc("✓"), GOLD(pm), tag,
707
+ GREYc("%d→%d tok" % (tin, tout)), AMBER("$%.5f" % o.get("cost_usd", 0)),
708
+ GREYc("%.1fs" % dur), (" " + GREYc("· " + prev)) if prev else "")
709
+
710
+ def summary(final=False):
711
+ if not tot["n"]:
712
+ if final:
713
+ print(GREYc(" (no calls observed while watching)"))
714
+ return
715
+ equiv = (tot["tin"] / 1e6) * base_in + (tot["tout"] / 1e6) * base_out
716
+ saved = equiv - tot["cost"]
717
+ pct = (100 * (1 - tot["cost"] / equiv)) if equiv > 0 else 0
718
+ print(AMBER(" ── %d calls · $%.4f cheap · ~$%.2f on %s · saved ~$%.2f (%.0f%%) ──"
719
+ % (tot["n"], tot["cost"], equiv, orch_name, saved, pct)))
720
+
721
+ def show(o):
722
+ if not matches(o):
723
+ return
724
+ print(render(o))
725
+ if o.get("ok"):
726
+ tot["n"] += 1; tot["cost"] += o.get("cost_usd", 0)
727
+ tot["tin"] += o.get("tokens_in", 0); tot["tout"] += o.get("tokens_out", 0)
728
+ if tot["n"] % 10 == 0:
729
+ summary()
730
+
731
+ path = LEDGER
732
+ scope = ("project=%s" % want_proj) if want_proj else \
733
+ ("cwd=%s" % cwd_prefix.replace(HOME, "~")) if cwd_prefix else "all projects"
734
+ print(GOLD(BOLD("🪙 scrooge watch")) +
735
+ GREYc(" %s · following %s · Ctrl-C to stop" % (scope, path.replace(HOME, "~"))))
736
+ # Where to start: --all replays the whole ledger; otherwise follow only NEW calls
737
+ # (so you literally watch them happen), with --tail N backfilling recent context.
738
+ last = 0
739
+ if os.path.exists(path):
740
+ last = 0 if args.all else os.path.getsize(path)
741
+ if args.tail and not args.all and os.path.exists(path):
742
+ with open(path, "rb") as fh:
743
+ recent = [l for l in fh.read().split(b"\n") if l.strip()][-args.tail:]
744
+ for bl in recent:
745
+ try: show(json.loads(bl.decode("utf-8", "replace")))
746
+ except Exception: pass
747
+ try:
748
+ while True:
749
+ if not os.path.exists(path):
750
+ time.sleep(0.5); continue
751
+ size = os.path.getsize(path)
752
+ if size < last: # truncated / rotated
753
+ last = 0
754
+ if size > last:
755
+ with open(path, "rb") as fh:
756
+ fh.seek(last)
757
+ raw = fh.read()
758
+ cut = raw.rfind(b"\n")
759
+ if cut != -1:
760
+ chunk = raw[:cut + 1]
761
+ last += len(chunk)
762
+ for bl in chunk.split(b"\n"):
763
+ if not bl.strip():
764
+ continue
765
+ try: show(json.loads(bl.decode("utf-8", "replace")))
766
+ except Exception: pass
767
+ if not args.follow:
768
+ break
769
+ time.sleep(0.3)
770
+ except KeyboardInterrupt:
771
+ pass
772
+ print()
773
+ summary(final=True)
774
+
775
+ # ---- live-training subcommands (learn / lessons / forget) ---------------
776
+ def cmd_learn(reg, args):
777
+ """Append a lesson (dedup exact). --seed merges the shipped seed set."""
778
+ if args.seed:
779
+ store = load_lessons()
780
+ added = merge_seed(store)
781
+ save_lessons(store)
782
+ print("%s merged %d seed lesson(s) into %s" %
783
+ (OKc("✓"), added, LESSONS.replace(HOME, "~")))
784
+ return
785
+ if not args.model:
786
+ raise SystemExit("learn: -m/--model is required (or use --seed to load the shipped seed set).")
787
+ if not args.text or not args.text.strip():
788
+ raise SystemExit('learn: provide the lesson text, e.g. scrooge learn -m deepseek -t code "Sort explicitly; never assume API ordering."')
789
+ seed_lessons_if_absent() # first-use bootstrap, regardless of entry point
790
+ model = reg.get("aliases", {}).get(args.model, args.model) # store under canonical full id
791
+ task = args.task or "*"
792
+ store = load_lessons()
793
+ bucket = store.setdefault(model, {}).setdefault(task, [])
794
+ text = args.text.strip()
795
+ scope = "%s/%s" % (GOLD(model), task)
796
+ if text in bucket:
797
+ print("%s already known for %s" % (GREYc("•"), scope)); return
798
+ bucket.append(text)
799
+ save_lessons(store)
800
+ note = "" if (model in reg["models"] or args.model in reg.get("aliases", {})) \
801
+ else GREYc(" (note: '%s' isn't a known model/alias — stored anyway)" % model)
802
+ print("%s learned for %s: %s%s" % (OKc("✓"), scope, text, note))
803
+
804
+ def _print_model_lessons(model, tasks, tfilter):
805
+ """Print one model's buckets; returns how many lessons were shown."""
806
+ shown = 0
807
+ header_done = False
808
+ for task in sorted(tasks.keys()):
809
+ if _is_meta_key(task):
810
+ continue
811
+ if tfilter and task != tfilter:
812
+ continue
813
+ lessons = tasks.get(task) or []
814
+ if not isinstance(lessons, list) or not lessons:
815
+ continue
816
+ if not header_done:
817
+ print("\n" + GOLD("● " + model)); header_done = True
818
+ print(" " + AMBER(task if task != "*" else "* (all tasks)"))
819
+ for i, L in enumerate(lessons):
820
+ print(" %s %s" % (GREYc("%d." % i), L)); shown += 1
821
+ return shown
822
+
823
+ def cmd_lessons(reg, args):
824
+ """Pretty-print the lessons store (optionally filtered by -m / -t)."""
825
+ seed_lessons_if_absent()
826
+ store = load_lessons()
827
+ real = {k: v for k, v in store.items() if not _is_meta_key(k)}
828
+ print(GOLD(BOLD("LESSONS")) + GREYc(" (%s)" % LESSONS.replace(HOME, "~")))
829
+ if not real:
830
+ print(GREYc(' none yet — add one: scrooge learn -m <model> -t <task> "…"')); return
831
+ mfilter = reg.get("aliases", {}).get(args.model, args.model) if args.model else None
832
+ shown = 0
833
+ if mfilter:
834
+ # the model's own buckets, plus the universal "*" bucket that also applies
835
+ shown += _print_model_lessons(mfilter, real.get(mfilter, {}), args.task)
836
+ if "*" in real and mfilter != "*":
837
+ shown += _print_model_lessons("* (every cheap model)", real.get("*", {}), args.task)
838
+ else:
839
+ for model in sorted(real.keys()):
840
+ shown += _print_model_lessons(model, real.get(model, {}), args.task)
841
+ if not shown:
842
+ print(GREYc(" (nothing matches that filter)"))
843
+
844
+ def cmd_forget(reg, args):
845
+ """Remove a lesson by 0-based index, or --all for a whole (model[/task]) scope."""
846
+ if not args.model:
847
+ raise SystemExit("forget: -m/--model is required.")
848
+ seed_lessons_if_absent() # so a fresh store reflects the shipped seed before removal
849
+ model = reg.get("aliases", {}).get(args.model, args.model)
850
+ store = load_lessons()
851
+ tasks = store.get(model)
852
+ if not isinstance(tasks, dict) or not tasks:
853
+ raise SystemExit("No lessons stored for model '%s'." % model)
854
+ if args.all:
855
+ if args.task:
856
+ if args.task not in tasks:
857
+ raise SystemExit("No lessons for %s/%s." % (model, args.task))
858
+ tasks.pop(args.task, None)
859
+ scope = "%s/%s" % (model, args.task)
860
+ else:
861
+ store.pop(model, None)
862
+ scope = model
863
+ if model in store and not store[model]:
864
+ store.pop(model, None)
865
+ save_lessons(store)
866
+ print("%s forgot all lessons for %s" % (OKc("✓"), GOLD(scope)))
867
+ return
868
+ task = args.task or "*"
869
+ bucket = tasks.get(task)
870
+ if not isinstance(bucket, list) or not bucket:
871
+ raise SystemExit("No lessons for %s/%s." % (model, task))
872
+ try:
873
+ idx = int(args.index)
874
+ except (TypeError, ValueError):
875
+ raise SystemExit("forget: give a 0-based <index> (see `scrooge lessons`) or --all.")
876
+ if idx < 0 or idx >= len(bucket):
877
+ raise SystemExit("Index %s out of range (0..%d) for %s/%s." % (args.index, len(bucket) - 1, model, task))
878
+ removed = bucket.pop(idx)
879
+ if not bucket:
880
+ tasks.pop(task, None)
881
+ if not tasks:
882
+ store.pop(model, None)
883
+ save_lessons(store)
884
+ print("%s forgot %s/%s[%d]: %s" % (OKc("✓"), GOLD(model), task, idx, removed))
885
+
886
+ # ---- pretty UI (stdlib only: truecolor + box-drawing + arrow menu) ------
887
+ def _uitty(): return sys.stdout.isatty() and os.environ.get("TERM", "") not in ("", "dumb") and not os.environ.get("NO_COLOR")
888
+ def _rgb(r, g, b, s): return ("\033[38;2;%d;%d;%dm%s\033[0m" % (r, g, b, s)) if _uitty() else s
889
+ GOLD = lambda s: _rgb(240, 196, 80, s)
890
+ AMBER = lambda s: _rgb(190, 145, 45, s)
891
+ GREYc = lambda s: _rgb(128, 128, 138, s)
892
+ OKc = lambda s: _rgb(90, 200, 130, s)
893
+ ERRc = lambda s: _rgb(230, 100, 100, s)
894
+ def BOLD(s): return ("\033[1m%s\033[0m" % s) if _uitty() else s
895
+ def GREEN(s): return OKc(s)
896
+ _ANSI = re.compile(r"\033\[[0-9;]*m")
897
+ def _vlen(s): return len(_ANSI.sub("", s))
898
+ UW = 60 # panel inner width
899
+
900
+ def _box(lines, color=AMBER, pad=2):
901
+ """Draw a rounded box. Content lines must be ASCII-display-width (ANSI ok)."""
902
+ top = color("╭" + "─" * UW + "╮"); bot = color("╰" + "─" * UW + "╯")
903
+ body = []
904
+ for ln in lines:
905
+ space = UW - pad - _vlen(ln)
906
+ body.append(color("│") + " " * pad + ln + " " * max(0, space) + color("│"))
907
+ return "\n".join([top] + body + [bot])
908
+
909
+ def _banner():
910
+ if not _uitty():
911
+ print("Token Scrooge — setup"); return
912
+ print()
913
+ print(_box([GOLD(BOLD("TOKEN SCROOGE")) + GREYc(" $ make the cheap models do the grunt work"),
914
+ GREYc("one orchestrator in charge · cheap labor with receipts")], color=GOLD))
915
+
916
+ def _step(n, total, title, sub=""):
917
+ print()
918
+ print(" " + GOLD(BOLD("%d/%d" % (n, total))) + " " + BOLD(title) + ((" " + GREYc(sub)) if sub else ""))
919
+ print(" " + AMBER("─" * UW))
920
+
921
+ def _menu(options, default=0):
922
+ """options: list of (label, hint). Arrow-key select with ❯; numbered fallback when not a TTY."""
923
+ if not (sys.stdin.isatty() and _uitty()):
924
+ for i, (lab, hint) in enumerate(options, 1):
925
+ print(" %d) %s %s" % (i, lab, GREYc(hint)))
926
+ c = _ask(" Select [1-%d] (default %d): " % (len(options), default + 1), str(default + 1))
927
+ try: return max(1, min(len(options), int(c))) - 1
928
+ except ValueError: return default
929
+ import termios, tty
930
+ idx = default; fd = sys.stdin.fileno(); old = termios.tcgetattr(fd)
931
+ def draw(first=False):
932
+ if not first: sys.stdout.write("\033[%dA" % len(options))
933
+ for i, (lab, hint) in enumerate(options):
934
+ sel = i == idx
935
+ ptr = GOLD("❯") if sel else " "
936
+ txt = GOLD(BOLD(lab)) if sel else lab
937
+ sys.stdout.write("\r\033[K %s %s %s\n" % (ptr, txt, GREYc(hint)))
938
+ sys.stdout.flush()
939
+ try:
940
+ sys.stdout.write(GREYc(" (↑/↓ to move, Enter to select)\n"))
941
+ draw(first=True)
942
+ tty.setcbreak(fd)
943
+ while True:
944
+ ch = sys.stdin.read(1)
945
+ if ch in ("\r", "\n"): break
946
+ elif ch == "\x1b":
947
+ seq = sys.stdin.read(2)
948
+ if seq == "[A": idx = (idx - 1) % len(options); draw()
949
+ elif seq == "[B": idx = (idx + 1) % len(options); draw()
950
+ elif ch == "k": idx = (idx - 1) % len(options); draw()
951
+ elif ch == "j": idx = (idx + 1) % len(options); draw()
952
+ elif ch.isdigit() and 1 <= int(ch) <= len(options): idx = int(ch) - 1; draw()
953
+ elif ch == "\x03": raise KeyboardInterrupt
954
+ except Exception:
955
+ return default
956
+ finally:
957
+ termios.tcsetattr(fd, termios.TCSADRAIN, old)
958
+ return idx
959
+
960
+ def _spin(label, fn):
961
+ """Run fn() while animating a spinner on `label`; return fn()'s result. Plain when not a TTY."""
962
+ if not _uitty():
963
+ return fn()
964
+ import threading, itertools, time as _t
965
+ box = {}
966
+ th = threading.Thread(target=lambda: box.__setitem__("r", fn())); th.start()
967
+ for fr in itertools.cycle("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"):
968
+ if not th.is_alive(): break
969
+ sys.stdout.write("\r %s %s" % (GOLD(fr), label)); sys.stdout.flush()
970
+ _t.sleep(0.08)
971
+ th.join()
972
+ sys.stdout.write("\r\033[K")
973
+ return box.get("r")
974
+
975
+ # Orchestrator presets: (key, label, $/1M in, $/1M out). The orchestrator is the
976
+ # model YOU drive your agent with; it only sets the ledger's savings baseline
977
+ # (Scrooge never calls it). Prices are approximate and editable in registry.json.
978
+ # Ordered premium → budget: a cheap flagship can orchestrate for the truly thrifty.
979
+ ORCHESTRATORS = [
980
+ # frontier / premium
981
+ ("claude-opus", "Claude Opus", 15.0, 75.0),
982
+ ("claude-sonnet", "Claude Sonnet", 3.0, 15.0),
983
+ ("gpt-flagship", "OpenAI GPT (flagship)", 10.0, 30.0),
984
+ ("gemini-pro", "Gemini Pro", 1.25, 10.0),
985
+ ("grok", "xAI Grok", 3.0, 15.0),
986
+ ("mistral-large", "Mistral Large", 2.0, 6.0),
987
+ # budget flagships (cheap enough to orchestrate on a tight budget)
988
+ ("deepseek", "DeepSeek V3 / R1 · budget", 0.27, 1.10),
989
+ ("kimi", "Kimi K2 (Moonshot) · budget", 0.60, 2.50),
990
+ ("qwen", "Qwen Max · budget", 1.60, 6.40),
991
+ ("glm", "Zhipu GLM-4.6 · budget", 0.60, 2.20),
992
+ ("custom", "Other flagship / custom", 0.0, 0.0),
993
+ ]
994
+
995
+ def _ask(prompt, default=""):
996
+ try:
997
+ v = input(prompt).strip()
998
+ return v or default
999
+ except EOFError:
1000
+ return default
1001
+
1002
+ def _ask_secret(prompt):
1003
+ if sys.stdin.isatty():
1004
+ import getpass
1005
+ try:
1006
+ return getpass.getpass(prompt).strip()
1007
+ except Exception:
1008
+ return _ask(prompt)
1009
+ return _ask(prompt) # piped (non-interactive / tests)
1010
+
1011
+ def _ensure_registry():
1012
+ os.makedirs(SCROOGE_DIR, exist_ok=True)
1013
+ if os.path.exists(REGISTRY):
1014
+ return
1015
+ here = os.path.dirname(os.path.realpath(__file__))
1016
+ for cand in (os.path.join(SCROOGE_DIR, "registry.template.json"),
1017
+ os.path.join(here, "..", "registry.template.json")):
1018
+ if os.path.exists(cand):
1019
+ shutil.copy(cand, REGISTRY)
1020
+ return
1021
+ raise SystemExit("No registry template found near %s — reinstall Token Scrooge." % here)
1022
+
1023
+ def _write_env_file(new_keys):
1024
+ """Merge new KEY=VALUE pairs into ~/.token-scrooge/.env, preserving existing, chmod 600."""
1025
+ path = os.path.join(SCROOGE_DIR, ".env")
1026
+ existing = {}
1027
+ if os.path.exists(path):
1028
+ for line in open(path):
1029
+ line = line.strip()
1030
+ if line and not line.startswith("#") and "=" in line:
1031
+ k, v = line.split("=", 1); existing[k.strip()] = v.strip()
1032
+ existing.update({k: v for k, v in new_keys.items() if v})
1033
+ with open(path, "w") as fh:
1034
+ fh.write("# Token Scrooge — provider API keys. Loaded automatically by `scrooge`.\n")
1035
+ fh.write("# Created by `scrooge setup`. Keep private (this file is chmod 600).\n")
1036
+ for k, v in existing.items():
1037
+ fh.write("%s=%s\n" % (k, v))
1038
+ os.chmod(path, 0o600)
1039
+ return path
1040
+
1041
+ def _ping(reg, provider):
1042
+ """Live-test a provider key by listing its models. Returns (ok, detail)."""
1043
+ key = provider_key(reg, provider)
1044
+ if not key:
1045
+ return False, "no key"
1046
+ base = reg["providers"][provider]["base_url"].rstrip("/")
1047
+ headers = {"Authorization": "Bearer " + key}
1048
+ headers.update(reg["providers"][provider].get("extra_headers", {}))
1049
+ try:
1050
+ data = http_get(base + "/models", headers, timeout=15)
1051
+ n = len(data.get("data", []) or [])
1052
+ return True, "%d models" % n
1053
+ except urllib.error.HTTPError as e:
1054
+ return False, "HTTP %s" % e.code
1055
+ except Exception as e:
1056
+ return False, str(e)[:40]
1057
+
1058
+ def _install_claude_gate():
1059
+ """Opt-in: copy the verification gate (hook + agent + skill) into ~/.claude and
1060
+ wire the Stop/SubagentStop hook idempotently. Non-destructive."""
1061
+ repo = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
1062
+ claude = os.path.join(HOME, ".claude")
1063
+ copies = [
1064
+ (os.path.join(repo, "hooks", "verify-done-gate.py"), os.path.join(claude, "hooks", "verify-done-gate.py")),
1065
+ (os.path.join(repo, "hooks", "scrooge-announce.py"), os.path.join(claude, "hooks", "scrooge-announce.py")),
1066
+ (os.path.join(repo, "agents", "adversarial-verifier.md"), os.path.join(claude, "agents", "adversarial-verifier.md")),
1067
+ (os.path.join(repo, "skills", "diverge", "SKILL.md"), os.path.join(claude, "skills", "diverge", "SKILL.md")),
1068
+ ]
1069
+ for src, dst in copies:
1070
+ if not os.path.exists(src):
1071
+ err(RED(" skipped (missing in repo): %s" % src)); continue
1072
+ os.makedirs(os.path.dirname(dst), exist_ok=True)
1073
+ shutil.copy(src, dst)
1074
+ print(" ✓ %s" % dst.replace(HOME, "~"))
1075
+ # merge hooks into settings.json
1076
+ sp = os.path.join(claude, "settings.json")
1077
+ settings = {}
1078
+ if os.path.exists(sp):
1079
+ try: settings = json.load(open(sp))
1080
+ except Exception: settings = {}
1081
+ hooks = settings.setdefault("hooks", {})
1082
+ gate = {"type": "command", "command": "python3 %s" % os.path.join(claude, "hooks", "verify-done-gate.py")}
1083
+ def _has(arr, needle): return any(any(needle in h.get("command","") for h in e.get("hooks",[])) for e in arr)
1084
+ for ev in ("Stop", "SubagentStop"):
1085
+ arr = hooks.setdefault(ev, [])
1086
+ if not _has(arr, "verify-done-gate.py"):
1087
+ arr.append({"hooks": [gate]})
1088
+ # surface scrooge delegations live (PreToolUse on Bash)
1089
+ announce = {"type": "command", "command": "python3 %s" % os.path.join(claude, "hooks", "scrooge-announce.py")}
1090
+ pre = hooks.setdefault("PreToolUse", [])
1091
+ if not _has(pre, "scrooge-announce.py"):
1092
+ pre.append({"matcher": "Bash", "hooks": [announce]})
1093
+ json.dump(settings, open(sp, "w"), indent=2); open(sp, "a").write("\n")
1094
+ print(" ✓ wired verify-done-gate.py into Stop + SubagentStop (%s)" % sp.replace(HOME, "~"))
1095
+ print(" ✓ wired scrooge-announce.py into PreToolUse(Bash) — marks cheap-model delegations")
1096
+ print(DIM(" (disable a block any time with VERIFY_DONE_GATE_OFF=1)"))
1097
+
1098
+ def cmd_setup(args):
1099
+ _banner()
1100
+ _ensure_registry()
1101
+ reg = load_registry()
1102
+ TOTAL = 4
1103
+
1104
+ # 1) Orchestrator — arrow-key menu
1105
+ _step(1, TOTAL, "Your orchestrator", "the expensive model you drive with — sets the savings baseline")
1106
+ opts = [(label, ("$%.0f/$%.0f per 1M" % (ci, co)) if k != "custom" else "enter your own pricing")
1107
+ for (k, label, ci, co) in ORCHESTRATORS]
1108
+ idx = _menu(opts, default=0)
1109
+ okey, olabel, oci, oco = ORCHESTRATORS[idx]
1110
+ if okey == "custom":
1111
+ olabel = _ask(" Orchestrator name: ", "Custom")
1112
+ oci = float(_ask(" Its input $/1M tokens: ", "15") or 15)
1113
+ oco = float(_ask(" Its output $/1M tokens: ", "75") or 75)
1114
+ reg["orchestrator"] = {"name": olabel, "cost_in": oci, "cost_out": oco}
1115
+ print(" " + OKc("✓") + " orchestrator: " + GOLD(BOLD(olabel)))
1116
+
1117
+ # 2) Keys — show status dots, prompt only for missing
1118
+ _step(2, TOTAL, "Provider API keys", "paste to enable · Enter to skip · detected env keys kept")
1119
+ new_keys = {}
1120
+ for provider, cfg in reg["providers"].items():
1121
+ env_names = cfg.get("env", [])
1122
+ already = next((n for n in env_names if os.environ.get(n)), None)
1123
+ if already:
1124
+ print(" %s %-11s %s" % (GOLD("●"), provider, GREYc("detected · " + already)))
1125
+ continue
1126
+ print(" %s %-11s %s" % (GREYc("○"), provider, GREYc("needs " + (env_names[0] if env_names else "key"))))
1127
+ val = _ask_secret(" ↳ paste key (or Enter to skip): ")
1128
+ if val:
1129
+ new_keys[(env_names[0] if env_names else provider.upper() + "_API_KEY")] = val
1130
+ print(" " + OKc("✓ added"))
1131
+ # Optional Artificial Analysis key — powers capability-aware routing's WEEKLY score refresh.
1132
+ # Routing already works from the shipped capabilities.seed.json; this just keeps the numbers
1133
+ # current as models change. Free key: artificialanalysis.ai (create account → API).
1134
+ if os.environ.get("AA_API_KEY") or os.environ.get("ARTIFICIAL_ANALYSIS_API_KEY"):
1135
+ print(" %s %-11s %s" % (GOLD("●"), "capability", GREYc("AA key detected · model quality scores will auto-refresh weekly")))
1136
+ else:
1137
+ print(" %s %-11s %s" % (GREYc("○"), "capability", GREYc("optional · keeps model quality scores fresh for smart routing")))
1138
+ print(" " + GREYc("free key at artificialanalysis.ai (account → API). Skip and scrooge still"))
1139
+ print(" " + GREYc("routes from the shipped scores — this only enables the weekly refresh."))
1140
+ aav = _ask_secret(" ↳ paste Artificial Analysis key (or Enter to skip): ")
1141
+ if aav:
1142
+ new_keys["AA_API_KEY"] = aav
1143
+ print(" " + OKc("✓ added — weekly capability refresh enabled"))
1144
+ env_path = _write_env_file(new_keys)
1145
+ json.dump(reg, open(REGISTRY, "w"), indent=2); open(REGISTRY, "a").write("\n")
1146
+ print(" " + OKc("✓") + " keys saved to " + GOLD(env_path.replace(HOME, "~")) + GREYc(" (chmod 600)"))
1147
+
1148
+ # 3) Live test — spinner per provider, with inline retry for failures
1149
+ _step(3, TOTAL, "Testing live providers", "")
1150
+ def run_tests():
1151
+ load_env_fallback()
1152
+ r = load_registry()
1153
+ livec, failed = 0, []
1154
+ for provider in r["providers"]:
1155
+ if not provider_key(r, provider):
1156
+ continue
1157
+ ok, detail = _spin("testing %s …" % provider, lambda p=provider: _ping(r, p))
1158
+ print(" %s %-11s %s" % (OKc("✓") if ok else ERRc("✗"), provider, (OKc(detail) if ok else ERRc(detail))))
1159
+ if ok: livec += 1
1160
+ else: failed.append(provider)
1161
+ return r, livec, failed
1162
+ reg, live, failed = run_tests()
1163
+ # A failure is almost always a wrong paste or a stale/auto-detected key.
1164
+ # Offer to re-enter the right key and re-test, in place, until resolved.
1165
+ while failed and sys.stdin.isatty():
1166
+ print(" " + AMBER("%d provider(s) failed — usually a wrong or stale key." % len(failed)))
1167
+ if not _ask(" Re-enter keys for the failed ones now? " + GREYc("[Y/n]") + " ", "y").lower().startswith("y"):
1168
+ break
1169
+ fixes = {}
1170
+ for provider in failed:
1171
+ envn = reg["providers"][provider].get("env", [])
1172
+ primary = envn[0] if envn else provider.upper() + "_API_KEY"
1173
+ val = _ask_secret(" %s → paste %s (Enter to skip): " % (provider, primary))
1174
+ if val:
1175
+ fixes[primary] = val
1176
+ os.environ[primary] = val # picked up on the next test pass
1177
+ if not fixes:
1178
+ break
1179
+ _write_env_file(fixes)
1180
+ print(" " + GREYc("re-testing…"))
1181
+ reg, live, failed = run_tests()
1182
+ print(" " + (OKc("● %d provider(s) live" % live) if live else ERRc("no working providers yet — add a key and re-run `scrooge setup`")))
1183
+
1184
+ # 4) Optional Claude Code gate
1185
+ _step(4, TOTAL, "Claude Code verification gate", "optional")
1186
+ print(" " + GREYc("diverge skill + adversarial-verifier agent + a Stop hook that blocks"))
1187
+ print(" " + GREYc("'done' claims with no build/test evidence."))
1188
+ if _ask(" Install into ~/.claude? " + GREYc("[y/N]") + " ", "n").lower().startswith("y"):
1189
+ _install_claude_gate()
1190
+ else:
1191
+ print(" " + GREYc("skipped — run `scrooge setup` again any time to add it."))
1192
+
1193
+ # Done panel
1194
+ print()
1195
+ print(_box([OKc(BOLD("✓ Ready.")) + GREYc(" saving baseline: ") + GOLD(olabel),
1196
+ "",
1197
+ GREYc("try ") + GOLD("scrooge list"),
1198
+ GREYc(" ") + GOLD("scrooge --task summarize < file.md"),
1199
+ GREYc(" ") + GOLD("scrooge ledger") + GREYc(" # spend + savings")], color=OKc))
1200
+ print()
1201
+
1202
+ def main():
1203
+ load_env_fallback()
1204
+ argv = sys.argv[1:]
1205
+ if argv and argv[0] == "setup":
1206
+ return cmd_setup(None)
1207
+ reg = load_registry()
1208
+
1209
+ # Manual subcommand dispatch (avoids argparse subparser vs positional-prompt clash).
1210
+ if argv and argv[0] in ("models", "list", "ledger", "watch", "learn", "lessons", "forget"):
1211
+ cmd, rest = argv[0], argv[1:]
1212
+ if cmd == "models":
1213
+ ap = argparse.ArgumentParser(prog="scrooge models"); ap.add_argument("provider")
1214
+ return cmd_models(reg, ap.parse_args(rest))
1215
+ if cmd == "list":
1216
+ return cmd_list(reg, None)
1217
+ if cmd == "ledger":
1218
+ ap = argparse.ArgumentParser(prog="scrooge ledger")
1219
+ ap.add_argument("--since", default="24h")
1220
+ ap.add_argument("--here", action="store_true", help="only this project (cwd's git repo / dir)")
1221
+ ap.add_argument("--project", help="only this project name (see SCROOGE_PROJECT)")
1222
+ return cmd_ledger(reg, ap.parse_args(rest))
1223
+ if cmd == "watch":
1224
+ ap = argparse.ArgumentParser(prog="scrooge watch")
1225
+ ap.add_argument("--tail", type=int, default=3, help="show the last N calls before following")
1226
+ ap.add_argument("--all", action="store_true", help="replay the entire ledger, then follow")
1227
+ ap.add_argument("--no-follow", action="store_false", dest="follow",
1228
+ help="print matching calls and exit (don't stream)")
1229
+ ap.add_argument("--here", action="store_true",
1230
+ help="only THIS project (cwd's git repo / dir) — run it in the project's terminal")
1231
+ ap.add_argument("--project", help="only this project name (the SCROOGE_PROJECT / git-dir label)")
1232
+ ap.add_argument("--cwd", help="only calls whose working dir is under this path")
1233
+ return cmd_watch(reg, ap.parse_args(rest))
1234
+ if cmd == "learn":
1235
+ ap = argparse.ArgumentParser(prog="scrooge learn")
1236
+ ap.add_argument("--model", "-m")
1237
+ ap.add_argument("--task", "-t")
1238
+ ap.add_argument("--seed", action="store_true", help="merge the shipped seed set into your store")
1239
+ ap.add_argument("text", nargs="?")
1240
+ return cmd_learn(reg, ap.parse_args(rest))
1241
+ if cmd == "lessons":
1242
+ ap = argparse.ArgumentParser(prog="scrooge lessons")
1243
+ ap.add_argument("--model", "-m")
1244
+ ap.add_argument("--task", "-t")
1245
+ return cmd_lessons(reg, ap.parse_args(rest))
1246
+ if cmd == "forget":
1247
+ ap = argparse.ArgumentParser(prog="scrooge forget")
1248
+ ap.add_argument("--model", "-m")
1249
+ ap.add_argument("--task", "-t")
1250
+ ap.add_argument("--all", action="store_true")
1251
+ ap.add_argument("index", nargs="?")
1252
+ return cmd_forget(reg, ap.parse_args(rest))
1253
+
1254
+ # Default: a model call.
1255
+ ap = argparse.ArgumentParser(prog="scrooge")
1256
+ ap.add_argument("prompt", nargs="?")
1257
+ ap.add_argument("--model", "-m")
1258
+ ap.add_argument("--task", "-t")
1259
+ ap.add_argument("--system", "-s")
1260
+ ap.add_argument("--json", action="store_true")
1261
+ ap.add_argument("--max-tokens", type=int, dest="max_tokens")
1262
+ ap.add_argument("--temperature", type=float, default=0.3)
1263
+ ap.add_argument("--no-lessons", action="store_true", dest="no_lessons",
1264
+ help="skip injecting learned per-model lessons (A/B comparison)")
1265
+ ap.add_argument("--latest", action="store_true",
1266
+ help="for the default model, re-check the provider's live model list now (bypass cache)")
1267
+ ap.add_argument("--difficulty", "-d", choices=["easy", "medium", "hard"],
1268
+ help="task difficulty → sets the capability floor (else inferred)")
1269
+ ap.add_argument("--spread", type=int, default=0, metavar="N",
1270
+ help="fan a batch across the top-N capable models (rate-limit/throughput)")
1271
+ ap.add_argument("--no-weigh", action="store_true", dest="no_weigh",
1272
+ help="skip capability weighing; use the registry's cheapest-first task order")
1273
+ return cmd_call(reg, ap.parse_args(argv))
1274
+
1275
+ if __name__ == "__main__":
1276
+ main()