loki-mode 7.30.0 → 7.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2159,6 +2159,21 @@ class FocusRequest(BaseModel):
2159
2159
  project_dir: str
2160
2160
 
2161
2161
 
2162
+ # Mid-flight model switching: the allowlist of aliases a live run may switch to.
2163
+ # MUST stay identical to the read-side allowlist in run.sh (the override file is
2164
+ # fed straight into `claude --model`). Fable is the top-tier advisory model at
2165
+ # 2x Opus cost; the UI shows that. `None`/empty clears the override.
2166
+ _SESSION_MODEL_ALLOWLIST = ("haiku", "sonnet", "opus", "fable")
2167
+
2168
+
2169
+ class SessionModelRequest(BaseModel):
2170
+ """Schema for setting (or clearing) the live run's model override."""
2171
+ # Disable Pydantic's protected "model_" namespace so a field literally named
2172
+ # "model" does not emit a warning.
2173
+ model_config = ConfigDict(protected_namespaces=())
2174
+ model: str | None = None
2175
+
2176
+
2162
2177
  @app.post("/api/focus", dependencies=[Depends(auth.require_scope("control"))])
2163
2178
  async def set_focus(request: FocusRequest):
2164
2179
  """Set the active project directory for .loki/ resolution.
@@ -2202,6 +2217,287 @@ async def clear_focus():
2202
2217
  return {"project_dir": None, "loki_dir": str(_get_loki_dir())}
2203
2218
 
2204
2219
 
2220
+ def _model_override_path() -> _Path:
2221
+ """Project-scoped path to the mid-flight model override file."""
2222
+ return _get_loki_dir() / "state" / "model-override"
2223
+
2224
+
2225
+ def _normalize_session_model(raw: str | None) -> str:
2226
+ """Canonical model-alias normalization shared with run.sh + the estimator.
2227
+
2228
+ Trim, lowercase, and accept ONLY an exact allowlisted alias. A value with
2229
+ interior whitespace (e.g. "fab le") normalizes to "" and is rejected, so the
2230
+ dashboard, the runner, and the estimator agree on what a value means.
2231
+ """
2232
+ val = (raw or "").strip().lower()
2233
+ return val if val in _SESSION_MODEL_ALLOWLIST else ""
2234
+
2235
+
2236
+ # Session-pin allowlist is BROADER than the override-file allowlist above.
2237
+ # run.sh's session-pin case (run.sh:12331) accepts the four model aliases AND
2238
+ # the three raw tier names (planning|development|fast) -- documented at
2239
+ # skills/model-selection.md:8. The OVERRIDE file / POST path keeps the narrow
2240
+ # _SESSION_MODEL_ALLOWLIST because that value is fed straight to `claude
2241
+ # --model`, where tier names are not valid. The session pin is a tier route, so
2242
+ # tier names ARE valid pins.
2243
+ _SESSION_PIN_ALLOWLIST = _SESSION_MODEL_ALLOWLIST + ("planning", "development", "fast")
2244
+
2245
+
2246
+ def _normalize_session_pin(raw: str | None) -> str:
2247
+ """Normalize a LOKI_SESSION_MODEL pin value (aliases + raw tier names).
2248
+
2249
+ Mirrors run.sh's session-pin case: trim + lowercase, accept the four model
2250
+ aliases and the three tier names. Interior whitespace is preserved (so
2251
+ "fab le" stays junk and falls through to the default tier, exactly like the
2252
+ runner's "*" arm). Use this for the session-pin (no-override) derivation;
2253
+ use _normalize_session_model for the override-file / POST path.
2254
+ """
2255
+ val = (raw or "").strip().lower()
2256
+ return val if val in _SESSION_PIN_ALLOWLIST else ""
2257
+
2258
+
2259
+ # Provider-config model resolution mirror.
2260
+ #
2261
+ # SYNC: This is a byte-faithful python port of the claude provider's tier->model
2262
+ # resolution in providers/claude.sh (CLAUDE_DEFAULT_FAST / CLAUDE_DEFAULT_DEVELOPMENT
2263
+ # and the PROVIDER_MODEL_FAST / PROVIDER_MODEL_DEVELOPMENT resolution chains,
2264
+ # claude.sh:55-67) plus loki_apply_max_tier_clamp (claude.sh:318). The same port
2265
+ # also lives in the `loki plan` estimator (autonomy/loki, _provider_model_fast /
2266
+ # _provider_model_development / _loki_clamp_alias). All three readers MUST agree;
2267
+ # the agreement is locked by the parity test in tests/test-model-override.sh
2268
+ # ("resolver parity matrix") and the cross-route tests in test-plan-command.sh.
2269
+ # If you change resolution here, change it in claude.sh AND autonomy/loki, and
2270
+ # re-run those tests. The `or` chains mirror bash `:-` empty-string-fallthrough;
2271
+ # allow_haiku uses an exact "true" match to mirror bash `[ "$x" = "true" ]`.
2272
+ def _allow_haiku() -> bool:
2273
+ return (os.environ.get("LOKI_ALLOW_HAIKU", "false") or "false") == "true"
2274
+
2275
+
2276
+ def _provider_model_fast() -> str:
2277
+ # claude.sh:67 -> LOKI_CLAUDE_MODEL_FAST > LOKI_MODEL_FAST > haiku-aware default.
2278
+ return (
2279
+ os.environ.get("LOKI_CLAUDE_MODEL_FAST")
2280
+ or os.environ.get("LOKI_MODEL_FAST")
2281
+ or ("haiku" if _allow_haiku() else "sonnet")
2282
+ )
2283
+
2284
+
2285
+ def _provider_model_development() -> str:
2286
+ # claude.sh:66 -> LOKI_CLAUDE_MODEL_DEVELOPMENT > LOKI_MODEL_DEVELOPMENT > default.
2287
+ return (
2288
+ os.environ.get("LOKI_CLAUDE_MODEL_DEVELOPMENT")
2289
+ or os.environ.get("LOKI_MODEL_DEVELOPMENT")
2290
+ or ("sonnet" if _allow_haiku() else "opus")
2291
+ )
2292
+
2293
+
2294
+ def _provider_model_planning() -> str:
2295
+ # claude.sh:65 -> LOKI_CLAUDE_MODEL_PLANNING > LOKI_MODEL_PLANNING > opus.
2296
+ # CLAUDE_DEFAULT_PLANNING is always opus (LOKI_ALLOW_HAIKU lowers only the
2297
+ # development and fast defaults, not planning).
2298
+ return (
2299
+ os.environ.get("LOKI_CLAUDE_MODEL_PLANNING")
2300
+ or os.environ.get("LOKI_MODEL_PLANNING")
2301
+ or "opus"
2302
+ )
2303
+
2304
+
2305
+ def _clamp_to_max_tier(alias: str) -> str:
2306
+ """Apply the operator LOKI_MAX_TIER ceiling to a model alias.
2307
+
2308
+ Mirrors providers/claude.sh loki_apply_max_tier_clamp EXACTLY (resolving the
2309
+ clamp result through the SAME provider config the runner uses): a haiku cap
2310
+ pins everything to PROVIDER_MODEL_FAST (sonnet by default, haiku when
2311
+ LOKI_ALLOW_HAIKU=true), and a sonnet cap resolves fable down to
2312
+ PROVIDER_MODEL_DEVELOPMENT (opus by default, sonnet when LOKI_ALLOW_HAIKU=true).
2313
+ The LOKI_CLAUDE_MODEL_FAST/DEVELOPMENT and LOKI_MODEL_FAST/DEVELOPMENT env
2314
+ overrides are honored too. So the dashboard's reported `effective` model agrees
2315
+ byte-for-byte with the model the run will dispatch when a cost ceiling is set.
2316
+
2317
+ This is invoked with alias as both model and tier (the override-path
2318
+ convention), matching the run.sh mid-flight override clamp.
2319
+ """
2320
+ max_tier = (os.environ.get("LOKI_MAX_TIER") or "").strip().lower()
2321
+ if not max_tier:
2322
+ return alias
2323
+ if max_tier == "haiku":
2324
+ return _provider_model_fast()
2325
+ if max_tier == "sonnet":
2326
+ # The runner's sonnet arm downgrades iff tier/model is planning or fable;
2327
+ # called with alias as both, that reduces to "downgrade iff alias==fable".
2328
+ return _provider_model_development() if alias == "fable" else alias
2329
+ if max_tier == "opus":
2330
+ return "opus" if alias == "fable" else alias
2331
+ return alias
2332
+
2333
+
2334
+ def _resolve_session_pin(alias: str) -> str:
2335
+ """Resolve a session-pin alias the way the runner's NO-OVERRIDE path does.
2336
+
2337
+ The runner does NOT feed a session pin straight to --model. It maps the alias
2338
+ to an abstract TIER (run.sh:12331 -- opus->planning, sonnet->development,
2339
+ haiku->fast, fable->fable) and resolves that tier through
2340
+ resolve_model_for_tier (claude.sh:353), then applies
2341
+ loki_apply_max_tier_clamp(model, REAL_tier). This DIFFERS from
2342
+ _clamp_to_max_tier (the override-path clamp): a 'sonnet' SESSION pin
2343
+ dispatches OPUS (development tier -> PROVIDER_MODEL_DEVELOPMENT=opus on stock
2344
+ config), whereas a 'sonnet' OVERRIDE file dispatches sonnet (fed straight to
2345
+ --model). Use this for the no-override `default`/`effective` derivation so the
2346
+ dashboard reports the model the run actually dispatches on the default path.
2347
+
2348
+ SYNC: byte-faithful with run.sh's session-pin case + claude.sh
2349
+ resolve_model_for_tier + loki_apply_max_tier_clamp, and with the estimator's
2350
+ _resolve_session_pin in autonomy/loki. Locked by the session-pin parity matrix
2351
+ in tests/test-model-override.sh.
2352
+ """
2353
+ pin_tier = {
2354
+ "opus": "planning",
2355
+ "sonnet": "development",
2356
+ "haiku": "fast",
2357
+ "fable": "fable",
2358
+ # Raw tier-name pins (run.sh:12336 passthrough arm) map to their own
2359
+ # tier, NOT through the alias table. pin=fast -> fast tier ->
2360
+ # PROVIDER_MODEL_FAST, matching the runner's dispatch instead of
2361
+ # collapsing onto development.
2362
+ "planning": "planning",
2363
+ "development": "development",
2364
+ "fast": "fast",
2365
+ }.get((alias or "").strip().lower(), "development")
2366
+ if pin_tier == "planning":
2367
+ model = _provider_model_planning()
2368
+ elif pin_tier == "fast":
2369
+ model = _provider_model_fast()
2370
+ elif pin_tier == "fable":
2371
+ model = "fable"
2372
+ else: # development (and the unknown-alias '*' fallthrough)
2373
+ model = _provider_model_development()
2374
+ max_tier = (os.environ.get("LOKI_MAX_TIER") or "").strip().lower()
2375
+ if not max_tier:
2376
+ return model
2377
+ if max_tier == "haiku":
2378
+ return _provider_model_fast()
2379
+ if max_tier == "sonnet":
2380
+ # claude.sh sonnet-cap downgrades planning/fable tiers (or a fable model)
2381
+ # to PROVIDER_MODEL_DEVELOPMENT; development/fast pass through.
2382
+ if pin_tier in ("planning", "fable") or model == "fable":
2383
+ return _provider_model_development()
2384
+ return model
2385
+ if max_tier == "opus":
2386
+ return "opus" if model == "fable" else model
2387
+ return model
2388
+
2389
+
2390
+ @app.get("/api/session/model", dependencies=[Depends(auth.require_scope("read"))])
2391
+ async def get_session_model():
2392
+ """Report the live run's model override and the effective default.
2393
+
2394
+ `override` is the alias currently written to .loki/state/model-override
2395
+ (None when no override is active). `default` is the session pin alias the run
2396
+ falls back to when there is no override (LOKI_SESSION_MODEL or "sonnet").
2397
+ `effective` is the model the next iteration will actually DISPATCH, resolved
2398
+ on the SAME route the runner uses for the active case, so the dashboard never
2399
+ reports a model that differs from what the run runs:
2400
+
2401
+ - OVERRIDE active: the runner feeds the alias straight to --model via
2402
+ loki_apply_max_tier_clamp(alias, alias). `effective` = _clamp_to_max_tier
2403
+ (the override-path clamp). A "sonnet" override dispatches sonnet.
2404
+ - NO override (session pin): the runner maps the pin through a tier
2405
+ (opus->planning, sonnet->development, haiku->fast) and resolves the tier
2406
+ through PROVIDER_MODEL_* (then the cost-ceiling clamp). `effective` =
2407
+ _resolve_session_pin. A "sonnet" pin dispatches OPUS (development tier ->
2408
+ PROVIDER_MODEL_DEVELOPMENT=opus on stock config).
2409
+
2410
+ Both routes resolve through the SAME provider config the runner uses
2411
+ (LOKI_ALLOW_HAIKU plus the LOKI_CLAUDE_MODEL_PLANNING/FAST/DEVELOPMENT and
2412
+ LOKI_MODEL_* overrides) and the SAME LOKI_MAX_TIER ceiling, mirroring
2413
+ providers/claude.sh byte-for-byte. The agreement (estimator == dashboard ==
2414
+ runner) on BOTH routes -- including the no-override stock path -- is locked by
2415
+ the cross-route cases and the session-pin parity matrix in
2416
+ tests/test-model-override.sh. (Before task 568 the no-override path applied the
2417
+ override-path clamp to the pin, so a stock "sonnet" pin reported "sonnet" while
2418
+ the run dispatched opus; that gap is now closed.)
2419
+
2420
+ KNOWN LIMITATION (cross-process env divergence): the resolution reads
2421
+ LOKI_MAX_TIER, LOKI_ALLOW_HAIKU, LOKI_SESSION_MODEL and the model-override env
2422
+ vars from the DASHBOARD process's environment, which is usually a different
2423
+ process than the live run. So if the run was launched with a different
2424
+ environment than the dashboard, the no-override `default`/`effective` may not
2425
+ reflect the run's real pinned tier or ceiling (e.g. a run launched with
2426
+ LOKI_SESSION_MODEL=opus while the dashboard's env has no pin still reads the
2427
+ default here). The override case reads the run's own state file, so its alias
2428
+ is always accurate and the resolution is exact whenever the dashboard shares
2429
+ the run's environment.
2430
+ """
2431
+ override = None
2432
+ try:
2433
+ p = _model_override_path()
2434
+ if p.is_file():
2435
+ override = _normalize_session_model(p.read_text()) or None
2436
+ except OSError:
2437
+ override = None
2438
+ # Session pin accepts tier names too (run.sh:12336), so use the broader
2439
+ # session-pin normalizer here (NOT the narrow override allowlist).
2440
+ default = _normalize_session_pin(os.environ.get("LOKI_SESSION_MODEL")) or "sonnet"
2441
+ # Resolve on the route the runner will actually take: override-path clamp when
2442
+ # an override file is present, session-pin tier route otherwise. This closes
2443
+ # the task-568 stock-path gap (a "sonnet" pin dispatches opus).
2444
+ if override is not None:
2445
+ effective = _clamp_to_max_tier(override)
2446
+ else:
2447
+ effective = _resolve_session_pin(default)
2448
+ return {
2449
+ "override": override,
2450
+ "default": default,
2451
+ "effective": effective,
2452
+ "allowed": list(_SESSION_MODEL_ALLOWLIST),
2453
+ }
2454
+
2455
+
2456
+ @app.post("/api/session/model", dependencies=[Depends(auth.require_scope("control"))])
2457
+ async def set_session_model(request: SessionModelRequest):
2458
+ """Set (or clear) the model a live Loki run uses, applied from the NEXT
2459
+ iteration boundary.
2460
+
2461
+ The run reads .loki/state/model-override at the top of each iteration, so a
2462
+ switch takes effect when the current iteration finishes and the next
2463
+ `claude -p` is spawned (the model is fixed per invocation). The override
2464
+ applies to the CURRENT run only: the runner clears a leftover override at the
2465
+ start of a fresh run, so a switch does not persist into future runs. Body
2466
+ {"model": null} or {"model": ""} clears the override and reverts to the tier
2467
+ mapping. The value is allowlist-validated server-side because the file is fed
2468
+ straight into `claude --model`; arbitrary strings are rejected.
2469
+
2470
+ The `effective` field reports the model the next iteration will actually use
2471
+ after the LOKI_MAX_TIER cost ceiling is applied (e.g. a fable override under
2472
+ a sonnet ceiling reports the clamped model), so the response never claims a
2473
+ model the run would clamp down. `clamped` is True when the ceiling reduced
2474
+ the requested model.
2475
+ """
2476
+ requested_raw = (request.model or "").strip().lower()
2477
+ override_path = _model_override_path()
2478
+ if requested_raw == "":
2479
+ # Clear the override; revert to tier mapping.
2480
+ try:
2481
+ if override_path.exists():
2482
+ override_path.unlink()
2483
+ except OSError as exc:
2484
+ raise HTTPException(status_code=500, detail=f"Could not clear override: {exc}")
2485
+ return {"model": None, "effective": "next_iteration", "clamped": False}
2486
+ model = _normalize_session_model(requested_raw)
2487
+ if not model:
2488
+ raise HTTPException(
2489
+ status_code=400,
2490
+ detail=f"Invalid model '{request.model}'. Allowed: {', '.join(_SESSION_MODEL_ALLOWLIST)}",
2491
+ )
2492
+ try:
2493
+ override_path.parent.mkdir(parents=True, exist_ok=True)
2494
+ override_path.write_text(model + "\n")
2495
+ except OSError as exc:
2496
+ raise HTTPException(status_code=500, detail=f"Could not write override: {exc}")
2497
+ effective = _clamp_to_max_tier(model)
2498
+ return {"model": model, "effective": effective, "clamped": effective != model}
2499
+
2500
+
2205
2501
  @app.get("/api/running-projects")
2206
2502
  async def list_running_projects():
2207
2503
  """List registered projects enriched with live status for the dashboard
@@ -4389,6 +4685,9 @@ async def stop_session(request: Request):
4389
4685
  # At runtime, overridden by .loki/pricing.json if available
4390
4686
  _DEFAULT_PRICING = {
4391
4687
  # Claude (Anthropic)
4688
+ # Fable 5 is the top-tier advisory model at exactly 2x Opus per token.
4689
+ "fable": {"input": 10.00, "output": 50.00},
4690
+ "claude-fable-5": {"input": 10.00, "output": 50.00},
4392
4691
  "opus": {"input": 5.00, "output": 25.00},
4393
4692
  "sonnet": {"input": 3.00, "output": 15.00},
4394
4693
  "haiku": {"input": 1.00, "output": 5.00},
@@ -7997,4 +8296,30 @@ def run_server(host: str = None, port: int = None) -> None:
7997
8296
 
7998
8297
 
7999
8298
  if __name__ == "__main__":
8000
- run_server()
8299
+ # Honor an explicit --port/--host on a direct module launch
8300
+ # (python -m dashboard.server --port N). The supported `loki dashboard start`
8301
+ # path sets LOKI_DASHBOARD_PORT in the environment and passes NO argv flags,
8302
+ # so it is unaffected. Previously --port was silently accepted and discarded,
8303
+ # binding the default 57374 and risking a collision with another project's
8304
+ # dashboard; now an unknown flag fails loudly via argparse (exit 2).
8305
+ import argparse
8306
+
8307
+ parser = argparse.ArgumentParser(
8308
+ prog="python -m dashboard.server",
8309
+ description="Loki Mode dashboard server. The supported launcher is "
8310
+ "'loki dashboard start' (which uses LOKI_DASHBOARD_PORT / "
8311
+ "LOKI_DASHBOARD_HOST); these flags are for direct module launches.",
8312
+ )
8313
+ parser.add_argument(
8314
+ "--port",
8315
+ type=int,
8316
+ default=None,
8317
+ help="Port to bind (default: $LOKI_DASHBOARD_PORT or 57374).",
8318
+ )
8319
+ parser.add_argument(
8320
+ "--host",
8321
+ default=None,
8322
+ help="Host to bind (default: $LOKI_DASHBOARD_HOST or 127.0.0.1).",
8323
+ )
8324
+ _args = parser.parse_args()
8325
+ run_server(host=_args.host, port=_args.port)