loki-mode 7.30.0 → 7.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2159,6 +2159,21 @@ class FocusRequest(BaseModel):
2159
2159
  project_dir: str
2160
2160
 
2161
2161
 
2162
+ # Mid-flight model switching: the allowlist of aliases a live run may switch to.
2163
+ # MUST stay identical to the read-side allowlist in run.sh (the override file is
2164
+ # fed straight into `claude --model`). Fable is the top-tier advisory model at
2165
+ # 2x Opus cost; the UI shows that. `None`/empty clears the override.
2166
+ _SESSION_MODEL_ALLOWLIST = ("haiku", "sonnet", "opus", "fable")
2167
+
2168
+
2169
+ class SessionModelRequest(BaseModel):
2170
+ """Schema for setting (or clearing) the live run's model override."""
2171
+ # Disable Pydantic's protected "model_" namespace so a field literally named
2172
+ # "model" does not emit a warning.
2173
+ model_config = ConfigDict(protected_namespaces=())
2174
+ model: str | None = None
2175
+
2176
+
2162
2177
  @app.post("/api/focus", dependencies=[Depends(auth.require_scope("control"))])
2163
2178
  async def set_focus(request: FocusRequest):
2164
2179
  """Set the active project directory for .loki/ resolution.
@@ -2202,6 +2217,187 @@ async def clear_focus():
2202
2217
  return {"project_dir": None, "loki_dir": str(_get_loki_dir())}
2203
2218
 
2204
2219
 
2220
+ def _model_override_path() -> _Path:
2221
+ """Project-scoped path to the mid-flight model override file."""
2222
+ return _get_loki_dir() / "state" / "model-override"
2223
+
2224
+
2225
+ def _normalize_session_model(raw: str | None) -> str:
2226
+ """Canonical model-alias normalization shared with run.sh + the estimator.
2227
+
2228
+ Trim, lowercase, and accept ONLY an exact allowlisted alias. A value with
2229
+ interior whitespace (e.g. "fab le") normalizes to "" and is rejected, so the
2230
+ dashboard, the runner, and the estimator agree on what a value means.
2231
+ """
2232
+ val = (raw or "").strip().lower()
2233
+ return val if val in _SESSION_MODEL_ALLOWLIST else ""
2234
+
2235
+
2236
+ # Provider-config model resolution mirror.
2237
+ #
2238
+ # SYNC: This is a byte-faithful python port of the claude provider's tier->model
2239
+ # resolution in providers/claude.sh (CLAUDE_DEFAULT_FAST / CLAUDE_DEFAULT_DEVELOPMENT
2240
+ # and the PROVIDER_MODEL_FAST / PROVIDER_MODEL_DEVELOPMENT resolution chains,
2241
+ # claude.sh:55-67) plus loki_apply_max_tier_clamp (claude.sh:318). The same port
2242
+ # also lives in the `loki plan` estimator (autonomy/loki, _provider_model_fast /
2243
+ # _provider_model_development / _loki_clamp_alias). All three readers MUST agree;
2244
+ # the agreement is locked by the parity test in tests/test-model-override.sh
2245
+ # ("resolver parity matrix") and the cross-route tests in test-plan-command.sh.
2246
+ # If you change resolution here, change it in claude.sh AND autonomy/loki, and
2247
+ # re-run those tests. The `or` chains mirror bash `:-` empty-string-fallthrough;
2248
+ # allow_haiku uses an exact "true" match to mirror bash `[ "$x" = "true" ]`.
2249
+ def _allow_haiku() -> bool:
2250
+ return (os.environ.get("LOKI_ALLOW_HAIKU", "false") or "false") == "true"
2251
+
2252
+
2253
+ def _provider_model_fast() -> str:
2254
+ # claude.sh:67 -> LOKI_CLAUDE_MODEL_FAST > LOKI_MODEL_FAST > haiku-aware default.
2255
+ return (
2256
+ os.environ.get("LOKI_CLAUDE_MODEL_FAST")
2257
+ or os.environ.get("LOKI_MODEL_FAST")
2258
+ or ("haiku" if _allow_haiku() else "sonnet")
2259
+ )
2260
+
2261
+
2262
+ def _provider_model_development() -> str:
2263
+ # claude.sh:66 -> LOKI_CLAUDE_MODEL_DEVELOPMENT > LOKI_MODEL_DEVELOPMENT > default.
2264
+ return (
2265
+ os.environ.get("LOKI_CLAUDE_MODEL_DEVELOPMENT")
2266
+ or os.environ.get("LOKI_MODEL_DEVELOPMENT")
2267
+ or ("sonnet" if _allow_haiku() else "opus")
2268
+ )
2269
+
2270
+
2271
+ def _clamp_to_max_tier(alias: str) -> str:
2272
+ """Apply the operator LOKI_MAX_TIER ceiling to a model alias.
2273
+
2274
+ Mirrors providers/claude.sh loki_apply_max_tier_clamp EXACTLY (resolving the
2275
+ clamp result through the SAME provider config the runner uses): a haiku cap
2276
+ pins everything to PROVIDER_MODEL_FAST (sonnet by default, haiku when
2277
+ LOKI_ALLOW_HAIKU=true), and a sonnet cap resolves fable down to
2278
+ PROVIDER_MODEL_DEVELOPMENT (opus by default, sonnet when LOKI_ALLOW_HAIKU=true).
2279
+ The LOKI_CLAUDE_MODEL_FAST/DEVELOPMENT and LOKI_MODEL_FAST/DEVELOPMENT env
2280
+ overrides are honored too. So the dashboard's reported `effective` model agrees
2281
+ byte-for-byte with the model the run will dispatch when a cost ceiling is set.
2282
+
2283
+ This is invoked with alias as both model and tier (the override-path
2284
+ convention), matching the run.sh mid-flight override clamp.
2285
+ """
2286
+ max_tier = (os.environ.get("LOKI_MAX_TIER") or "").strip().lower()
2287
+ if not max_tier:
2288
+ return alias
2289
+ if max_tier == "haiku":
2290
+ return _provider_model_fast()
2291
+ if max_tier == "sonnet":
2292
+ # The runner's sonnet arm downgrades iff tier/model is planning or fable;
2293
+ # called with alias as both, that reduces to "downgrade iff alias==fable".
2294
+ return _provider_model_development() if alias == "fable" else alias
2295
+ if max_tier == "opus":
2296
+ return "opus" if alias == "fable" else alias
2297
+ return alias
2298
+
2299
+
2300
+ @app.get("/api/session/model", dependencies=[Depends(auth.require_scope("read"))])
2301
+ async def get_session_model():
2302
+ """Report the live run's model override and the effective default.
2303
+
2304
+ `override` is the alias currently written to .loki/state/model-override
2305
+ (None when no override is active). `default` is the session model the run
2306
+ falls back to when there is no override (LOKI_SESSION_MODEL or the catalog
2307
+ default). `effective` is the model the next iteration will actually use,
2308
+ after the LOKI_MAX_TIER cost ceiling is applied (so the dashboard never
2309
+ reports a model the run would clamp down).
2310
+
2311
+ The clamp resolves through the SAME provider config the runner uses
2312
+ (LOKI_ALLOW_HAIKU plus the LOKI_CLAUDE_MODEL_FAST/DEVELOPMENT and
2313
+ LOKI_MODEL_FAST/DEVELOPMENT overrides): _clamp_to_max_tier mirrors
2314
+ providers/claude.sh loki_apply_max_tier_clamp byte-for-byte (locked by the
2315
+ resolver parity matrix in tests/test-model-override.sh). So for the OVERRIDE
2316
+ case -- the feature this endpoint exists for -- the reported `effective` model
2317
+ equals the model the runner's mid-flight override path dispatches, given the
2318
+ same environment.
2319
+
2320
+ KNOWN LIMITATION (cross-process env divergence): the resolution reads
2321
+ LOKI_MAX_TIER, LOKI_ALLOW_HAIKU, LOKI_SESSION_MODEL and the model-override env
2322
+ vars from the DASHBOARD process's environment, which is usually a different
2323
+ process than the live run. So if the run was launched with a different
2324
+ environment than the dashboard, the no-override `default`/`effective` may not
2325
+ reflect the run's real pinned tier or ceiling (e.g. a run pinned to opus still
2326
+ reads "sonnet" here). The override case reads the run's own state file, so its
2327
+ alias is always accurate and the clamp is exact whenever the dashboard shares
2328
+ the run's environment.
2329
+
2330
+ SCOPE NOTE (no-override default path): when there is no override, `effective`
2331
+ applies the override-path clamp to the session default. The runner's
2332
+ no-override route instead maps a session pin through a tier
2333
+ (resolve_model_for_tier: opus->planning, sonnet->development), which can differ
2334
+ from the override-path clamp in one cell (e.g. an opus pin under sonnet cap +
2335
+ LOKI_ALLOW_HAIKU: the tier route yields sonnet, the override-path clamp yields
2336
+ opus). That session-pin modeling gap is pre-existing and out of scope here;
2337
+ the override case this endpoint serves is exact.
2338
+ """
2339
+ override = None
2340
+ try:
2341
+ p = _model_override_path()
2342
+ if p.is_file():
2343
+ override = _normalize_session_model(p.read_text()) or None
2344
+ except OSError:
2345
+ override = None
2346
+ default = _normalize_session_model(os.environ.get("LOKI_SESSION_MODEL")) or "sonnet"
2347
+ effective = _clamp_to_max_tier(override or default)
2348
+ return {
2349
+ "override": override,
2350
+ "default": default,
2351
+ "effective": effective,
2352
+ "allowed": list(_SESSION_MODEL_ALLOWLIST),
2353
+ }
2354
+
2355
+
2356
+ @app.post("/api/session/model", dependencies=[Depends(auth.require_scope("control"))])
2357
+ async def set_session_model(request: SessionModelRequest):
2358
+ """Set (or clear) the model a live Loki run uses, applied from the NEXT
2359
+ iteration boundary.
2360
+
2361
+ The run reads .loki/state/model-override at the top of each iteration, so a
2362
+ switch takes effect when the current iteration finishes and the next
2363
+ `claude -p` is spawned (the model is fixed per invocation). The override
2364
+ applies to the CURRENT run only: the runner clears a leftover override at the
2365
+ start of a fresh run, so a switch does not persist into future runs. Body
2366
+ {"model": null} or {"model": ""} clears the override and reverts to the tier
2367
+ mapping. The value is allowlist-validated server-side because the file is fed
2368
+ straight into `claude --model`; arbitrary strings are rejected.
2369
+
2370
+ The `effective` field reports the model the next iteration will actually use
2371
+ after the LOKI_MAX_TIER cost ceiling is applied (e.g. a fable override under
2372
+ a sonnet ceiling reports the clamped model), so the response never claims a
2373
+ model the run would clamp down. `clamped` is True when the ceiling reduced
2374
+ the requested model.
2375
+ """
2376
+ requested_raw = (request.model or "").strip().lower()
2377
+ override_path = _model_override_path()
2378
+ if requested_raw == "":
2379
+ # Clear the override; revert to tier mapping.
2380
+ try:
2381
+ if override_path.exists():
2382
+ override_path.unlink()
2383
+ except OSError as exc:
2384
+ raise HTTPException(status_code=500, detail=f"Could not clear override: {exc}")
2385
+ return {"model": None, "effective": "next_iteration", "clamped": False}
2386
+ model = _normalize_session_model(requested_raw)
2387
+ if not model:
2388
+ raise HTTPException(
2389
+ status_code=400,
2390
+ detail=f"Invalid model '{request.model}'. Allowed: {', '.join(_SESSION_MODEL_ALLOWLIST)}",
2391
+ )
2392
+ try:
2393
+ override_path.parent.mkdir(parents=True, exist_ok=True)
2394
+ override_path.write_text(model + "\n")
2395
+ except OSError as exc:
2396
+ raise HTTPException(status_code=500, detail=f"Could not write override: {exc}")
2397
+ effective = _clamp_to_max_tier(model)
2398
+ return {"model": model, "effective": effective, "clamped": effective != model}
2399
+
2400
+
2205
2401
  @app.get("/api/running-projects")
2206
2402
  async def list_running_projects():
2207
2403
  """List registered projects enriched with live status for the dashboard
@@ -4389,6 +4585,9 @@ async def stop_session(request: Request):
4389
4585
  # At runtime, overridden by .loki/pricing.json if available
4390
4586
  _DEFAULT_PRICING = {
4391
4587
  # Claude (Anthropic)
4588
+ # Fable 5 is the top-tier advisory model at exactly 2x Opus per token.
4589
+ "fable": {"input": 10.00, "output": 50.00},
4590
+ "claude-fable-5": {"input": 10.00, "output": 50.00},
4392
4591
  "opus": {"input": 5.00, "output": 25.00},
4393
4592
  "sonnet": {"input": 3.00, "output": 15.00},
4394
4593
  "haiku": {"input": 1.00, "output": 5.00},
@@ -7997,4 +8196,30 @@ def run_server(host: str = None, port: int = None) -> None:
7997
8196
 
7998
8197
 
7999
8198
  if __name__ == "__main__":
8000
- run_server()
8199
+ # Honor an explicit --port/--host on a direct module launch
8200
+ # (python -m dashboard.server --port N). The supported `loki dashboard start`
8201
+ # path sets LOKI_DASHBOARD_PORT in the environment and passes NO argv flags,
8202
+ # so it is unaffected. Previously --port was silently accepted and discarded,
8203
+ # binding the default 57374 and risking a collision with another project's
8204
+ # dashboard; now an unknown flag fails loudly via argparse (exit 2).
8205
+ import argparse
8206
+
8207
+ parser = argparse.ArgumentParser(
8208
+ prog="python -m dashboard.server",
8209
+ description="Loki Mode dashboard server. The supported launcher is "
8210
+ "'loki dashboard start' (which uses LOKI_DASHBOARD_PORT / "
8211
+ "LOKI_DASHBOARD_HOST); these flags are for direct module launches.",
8212
+ )
8213
+ parser.add_argument(
8214
+ "--port",
8215
+ type=int,
8216
+ default=None,
8217
+ help="Port to bind (default: $LOKI_DASHBOARD_PORT or 57374).",
8218
+ )
8219
+ parser.add_argument(
8220
+ "--host",
8221
+ default=None,
8222
+ help="Host to bind (default: $LOKI_DASHBOARD_HOST or 127.0.0.1).",
8223
+ )
8224
+ _args = parser.parse_args()
8225
+ run_server(host=_args.host, port=_args.port)