vskill 0.5.84 → 0.5.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,9 @@ import { testActivation } from "../eval/activation-tester.js";
28
28
  import { detectMcpDependencies, detectSkillDependencies } from "../eval/mcp-detector.js";
29
29
  import { writeActivationRun, listActivationRuns, getActivationRun } from "../eval/activation-history.js";
30
30
  import { AGENTS_REGISTRY, detectInstalledAgents } from "../agents/agents-registry.js";
31
+ import { resolveOllamaBaseUrl } from "../eval/env.js";
32
+ import * as settingsStore from "./settings-store.js";
33
+ import { loadStudioSelection, saveStudioSelection } from "./studio-json.js";
31
34
  /**
32
35
  * Build the response for GET /api/agents/installed.
33
36
  * Returns all known agents with installed flag based on detected agents.
@@ -299,22 +302,44 @@ const PROVIDER_MODELS = {
299
302
  { id: "meta-llama/llama-3.1-70b-instruct", label: "Llama 3.1 70B" },
300
303
  { id: "google/gemini-2.5-pro", label: "Gemini 2.5 Pro (via OpenRouter)" },
301
304
  ],
305
+ // LM Studio's default model list is empty because the actual list depends on
306
+ // what models the user has loaded. The probe at probeLmStudio() populates
307
+ // this dynamically from GET /v1/models.
308
+ "lm-studio": [],
302
309
  };
303
310
  // ---------------------------------------------------------------------------
304
- // Ollama detection cacheavoids 500ms+ probe on every /api/config request.
305
- // Without this, page load blocks on a 2s timeout when Ollama is not running.
311
+ // Local provider detection cachesavoid 500ms+ probes on every /api/config
312
+ // request. Without the caches, page load blocks on the timeout when the
313
+ // local server is not running. TTL is 30s to balance freshness with latency.
314
+ //
315
+ // Both Ollama and LM Studio share the same TTL and silent-failure semantics
316
+ // (probe → non-2xx / throw → `available: false`, no log above debug).
317
+ //
318
+ // Follow-up (out of scope for 0677): Ollama's upstream standard env var is
319
+ // OLLAMA_HOST but this codebase uses OLLAMA_BASE_URL. Do not change here;
320
+ // tracked separately.
306
321
  // ---------------------------------------------------------------------------
322
+ const PROBE_CACHE_TTL = 30_000; // re-probe every 30s
307
323
  let ollamaCache = null;
308
- const OLLAMA_CACHE_TTL = 30_000; // re-probe every 30s
324
+ let lmStudioCache = null;
325
+ export const OPENROUTER_CACHE = new Map();
326
+ export function resetOpenRouterCache() {
327
+ OPENROUTER_CACHE.clear();
328
+ }
329
+ /** Test hook: clear all probe caches so the next detectAvailableProviders() re-probes. */
330
+ export function resetDetectionCache() {
331
+ ollamaCache = null;
332
+ lmStudioCache = null;
333
+ }
309
334
  async function probeOllama() {
310
335
  const now = Date.now();
311
- if (ollamaCache && now - ollamaCache.ts < OLLAMA_CACHE_TTL) {
336
+ if (ollamaCache && now - ollamaCache.ts < PROBE_CACHE_TTL) {
312
337
  return ollamaCache;
313
338
  }
314
339
  let models = PROVIDER_MODELS["ollama"];
315
340
  let available = false;
316
341
  try {
317
- const baseUrl = process.env.OLLAMA_BASE_URL || "http://localhost:11434";
342
+ const baseUrl = resolveOllamaBaseUrl(process.env);
318
343
  const resp = await fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(500) });
319
344
  if (resp.ok) {
320
345
  available = true;
@@ -328,37 +353,128 @@ async function probeOllama() {
328
353
  ollamaCache = { available, models, ts: now };
329
354
  return ollamaCache;
330
355
  }
331
- async function detectAvailableProviders() {
356
+ // ---------------------------------------------------------------------------
357
+ // probeLmStudio — hits GET <base>/models to detect LM Studio and populate the
358
+ // model list from the server's loaded models. Mirrors the Ollama pattern:
359
+ // 500ms AbortSignal timeout, 30s in-memory cache, silent failure on any
360
+ // exception. Base URL is overridable via LM_STUDIO_BASE_URL.
361
+ // ---------------------------------------------------------------------------
362
+ async function probeLmStudio() {
363
+ const now = Date.now();
364
+ if (lmStudioCache && now - lmStudioCache.ts < PROBE_CACHE_TTL) {
365
+ return lmStudioCache;
366
+ }
367
+ let models = PROVIDER_MODELS["lm-studio"];
368
+ let available = false;
369
+ try {
370
+ const baseUrl = process.env.LM_STUDIO_BASE_URL || "http://localhost:1234/v1";
371
+ const resp = await fetch(`${baseUrl}/models`, { signal: AbortSignal.timeout(500) });
372
+ if (resp.ok) {
373
+ available = true;
374
+ const data = await resp.json();
375
+ if (data.data?.length) {
376
+ models = data.data.map((m) => ({ id: m.id, label: m.id }));
377
+ }
378
+ }
379
+ }
380
+ catch { /* lm studio not running */ }
381
+ lmStudioCache = { available, models, ts: now };
382
+ return lmStudioCache;
383
+ }
384
+ const DETECTION_WRAPPER_FOLDERS = [
385
+ ".claude",
386
+ ".cursor",
387
+ ".codex",
388
+ ".gemini",
389
+ ".github",
390
+ ".zed",
391
+ ".specweave",
392
+ ];
393
+ const DETECTION_BINARIES = ["claude", "cursor", "codex", "gemini"];
394
+ let detectionCache = null;
395
+ const DETECTION_CACHE_TTL = 30_000;
396
+ export function resetProjectDetectionCache() {
397
+ detectionCache = null;
398
+ }
399
+ /**
400
+ * Scan the project root for known agent wrapper folders and the system
401
+ * PATH for known agent binaries. Cheap synchronous scan (`existsSync` +
402
+ * `which`) cached for 30 s so repeated `/api/config` polls don't burn CPU.
403
+ */
404
+ export function detectProjectAgents(root) {
405
+ const now = Date.now();
406
+ if (detectionCache && now - detectionCache.ts < DETECTION_CACHE_TTL) {
407
+ return detectionCache.data;
408
+ }
409
+ const wrapperFolders = {};
410
+ for (const folder of DETECTION_WRAPPER_FOLDERS) {
411
+ try {
412
+ wrapperFolders[folder] = existsSync(join(root, folder));
413
+ }
414
+ catch {
415
+ wrapperFolders[folder] = false;
416
+ }
417
+ }
418
+ const binaries = {};
419
+ for (const bin of DETECTION_BINARIES) {
420
+ binaries[bin] = isBinaryOnPath(bin);
421
+ }
422
+ const data = { wrapperFolders, binaries };
423
+ detectionCache = { data, ts: now };
424
+ return data;
425
+ }
426
+ function isBinaryOnPath(name) {
427
+ try {
428
+ const cmd = process.platform === "win32" ? `where ${name}` : `command -v ${name}`;
429
+ execSync(cmd, { stdio: "ignore", timeout: 1000 });
430
+ return true;
431
+ }
432
+ catch {
433
+ return false;
434
+ }
435
+ }
436
+ export async function detectAvailableProviders() {
332
437
  const providers = [];
333
- // Claude CLI — always available for the eval server (runs in a separate terminal)
438
+ // Claude CLI — delegates to the `claude` binary; the CLI owns session auth.
439
+ // See src/eval/llm.ts createClaudeCliClient compliance doc-block.
334
440
  providers.push({
335
441
  id: "claude-cli",
336
- label: "Claude (Max/Pro subscription)",
442
+ label: "Use current Claude Code session",
337
443
  available: true,
338
444
  models: PROVIDER_MODELS["claude-cli"],
339
445
  });
340
- // Anthropic API — available if ANTHROPIC_API_KEY is set
446
+ // Anthropic API — available if ANTHROPIC_API_KEY is set OR a key is in the
447
+ // settings-store (browser tier or Darwin keychain).
341
448
  providers.push({
342
449
  id: "anthropic",
343
- label: "Anthropic API (requires key)",
344
- available: !!process.env.ANTHROPIC_API_KEY,
450
+ label: "Anthropic API",
451
+ available: !!process.env.ANTHROPIC_API_KEY ||
452
+ settingsStore.hasKeySync("anthropic"),
345
453
  models: PROVIDER_MODELS["anthropic"],
346
454
  });
347
- // OpenRouter — available if OPENROUTER_API_KEY is set
455
+ // OpenRouter — available if OPENROUTER_API_KEY is set OR a key is stored.
348
456
  providers.push({
349
457
  id: "openrouter",
350
- label: "OpenRouter (100+ models, requires key)",
351
- available: !!process.env.OPENROUTER_API_KEY,
458
+ label: "OpenRouter",
459
+ available: !!process.env.OPENROUTER_API_KEY ||
460
+ settingsStore.hasKeySync("openrouter"),
352
461
  models: PROVIDER_MODELS["openrouter"],
353
462
  });
354
- // Ollama — cached probe (500ms timeout, refreshes every 30s)
355
- const ollama = await probeOllama();
463
+ // Local providers (Ollama + LM Studio) — cached probes fired in parallel so
464
+ // total detection time stays ≤ 550ms even if both time out.
465
+ const [ollama, lmStudio] = await Promise.all([probeOllama(), probeLmStudio()]);
356
466
  providers.push({
357
467
  id: "ollama",
358
468
  label: "Ollama (local, free)",
359
469
  available: ollama.available,
360
470
  models: ollama.models,
361
471
  });
472
+ providers.push({
473
+ id: "lm-studio",
474
+ label: "LM Studio (local, free)",
475
+ available: lmStudio.available,
476
+ models: lmStudio.models,
477
+ });
362
478
  return providers;
363
479
  }
364
480
  export function registerRoutes(router, root, projectName) {
@@ -401,19 +517,41 @@ export function registerRoutes(router, root, projectName) {
401
517
  req.on("close", cleanup);
402
518
  req.on("aborted", cleanup);
403
519
  });
404
- // OpenRouter model search proxy
520
+ // OpenRouter model search proxy — 10-minute in-memory cache keyed by the
521
+ // last-8 chars of the API key so different keys don't collide while the
522
+ // key itself is never stored in the cache map. Stale cache served (with
523
+ // X-Vskill-Catalog-Age header) when upstream is down.
405
524
  router.get("/api/openrouter/models", async (_req, res) => {
406
- const apiKey = process.env.OPENROUTER_API_KEY;
525
+ const envKey = process.env.OPENROUTER_API_KEY;
526
+ const storedKey = settingsStore.readKeySync("openrouter");
527
+ const apiKey = envKey || storedKey;
407
528
  if (!apiKey) {
408
529
  sendJson(res, { error: "OPENROUTER_API_KEY not configured" }, 400);
409
530
  return;
410
531
  }
532
+ const cacheKey = apiKey.slice(-8);
533
+ const now = Date.now();
534
+ const cached = OPENROUTER_CACHE.get(cacheKey);
535
+ const CACHE_TTL_MS = 600_000; // 10 min
536
+ // Fresh cache hit — serve immediately without upstream.
537
+ if (cached && now - cached.fetchedAt < CACHE_TTL_MS) {
538
+ const ageSec = Math.floor((now - cached.fetchedAt) / 1000);
539
+ res.setHeader?.("X-Vskill-Catalog-Age", String(ageSec));
540
+ sendJson(res, { models: cached.value, ageSec });
541
+ return;
542
+ }
411
543
  try {
412
544
  const resp = await fetch("https://openrouter.ai/api/v1/models", {
413
545
  headers: { "Authorization": `Bearer ${apiKey}` },
414
546
  signal: AbortSignal.timeout(10_000),
415
547
  });
416
548
  if (!resp.ok) {
549
+ if (cached) {
550
+ const ageSec = Math.floor((now - cached.fetchedAt) / 1000);
551
+ res.setHeader?.("X-Vskill-Catalog-Age", String(ageSec));
552
+ sendJson(res, { models: cached.value, ageSec, stale: true });
553
+ return;
554
+ }
417
555
  sendJson(res, { error: `OpenRouter API returned ${resp.status}` }, 502);
418
556
  return;
419
557
  }
@@ -421,40 +559,122 @@ export function registerRoutes(router, root, projectName) {
421
559
  const models = (data.data || []).map((m) => ({
422
560
  id: m.id,
423
561
  name: m.name || m.id,
562
+ contextWindow: typeof m.context_length === "number" ? m.context_length : undefined,
424
563
  pricing: {
425
564
  prompt: parseFloat(m.pricing?.prompt || "0"),
426
565
  completion: parseFloat(m.pricing?.completion || "0"),
427
566
  },
428
567
  }));
429
- sendJson(res, { models });
568
+ OPENROUTER_CACHE.set(cacheKey, { value: models, fetchedAt: now });
569
+ res.setHeader?.("X-Vskill-Catalog-Age", "0");
570
+ sendJson(res, { models, ageSec: 0 });
430
571
  }
431
572
  catch (err) {
573
+ if (cached) {
574
+ const ageSec = Math.floor((now - cached.fetchedAt) / 1000);
575
+ res.setHeader?.("X-Vskill-Catalog-Age", String(ageSec));
576
+ sendJson(res, { models: cached.value, ageSec, stale: true });
577
+ return;
578
+ }
432
579
  sendJson(res, { error: err.message }, 500);
433
580
  }
434
581
  });
582
+ // Settings / API key endpoints (0682 — US-004).
583
+ // Keys live on-device only. Never logged, never synced, never returned
584
+ // through GET. Response includes only metadata (stored, updatedAt, tier).
585
+ router.get("/api/settings/keys", async (_req, res) => {
586
+ sendJson(res, settingsStore.listKeys());
587
+ });
588
+ router.post("/api/settings/keys", async (req, res) => {
589
+ // Reject any request that smuggles the key in a query-string — JSON body only.
590
+ const url = req.url || "";
591
+ if (/[?&]key=/.test(url)) {
592
+ sendJson(res, { error: "key must not appear in query string" }, 400);
593
+ return;
594
+ }
595
+ const body = (await readBody(req));
596
+ if (!body.key || typeof body.key !== "string" || body.key.trim().length === 0) {
597
+ sendJson(res, { error: "key must be non-empty string" }, 400);
598
+ return;
599
+ }
600
+ if (body.provider !== "anthropic" && body.provider !== "openrouter") {
601
+ sendJson(res, { error: `unknown provider: ${String(body.provider)}` }, 400);
602
+ return;
603
+ }
604
+ try {
605
+ const saved = await settingsStore.saveKey(body.provider, body.key.trim(), body.tier ?? "browser");
606
+ // Prefix hint — non-blocking, purely informational
607
+ let warning;
608
+ if (body.provider === "anthropic" && !body.key.startsWith("sk-ant-")) {
609
+ warning = "key doesn't match typical Anthropic prefix sk-ant-";
610
+ }
611
+ else if (body.provider === "openrouter" && !body.key.startsWith("sk-or-")) {
612
+ warning = "key doesn't match typical OpenRouter prefix sk-or-";
613
+ }
614
+ sendJson(res, {
615
+ ok: true,
616
+ updatedAt: saved.updatedAt,
617
+ tier: saved.tier,
618
+ available: true,
619
+ ...(warning ? { warning } : {}),
620
+ });
621
+ }
622
+ catch (err) {
623
+ sendJson(res, { error: err.message }, 500);
624
+ }
625
+ });
626
+ router.delete("/api/settings/keys/:provider", async (req, res) => {
627
+ const provider = req.params?.provider;
628
+ if (provider !== "anthropic" && provider !== "openrouter") {
629
+ sendJson(res, { error: `unknown provider: ${String(provider)}` }, 400);
630
+ return;
631
+ }
632
+ await settingsStore.removeKey(provider);
633
+ sendJson(res, { ok: true });
634
+ });
435
635
  // Config — expose current provider/model + available providers + project
436
636
  // IMPORTANT: Return raw model IDs (e.g. "sonnet"), NOT display models
437
637
  // (e.g. "claude-sonnet"). The frontend round-trips config.model back to
438
638
  // generate-evals and other endpoints, so it must be a valid CLI model ID.
439
639
  router.get("/api/config", async (_req, res) => {
640
+ // On first load (no currentOverrides), try to restore from .vskill/studio.json.
641
+ if (!currentOverrides.provider) {
642
+ const stored = loadStudioSelection(root);
643
+ if (stored) {
644
+ currentOverrides.provider = stored.activeAgent;
645
+ if (stored.activeModel)
646
+ currentOverrides.model = stored.activeModel;
647
+ }
648
+ }
440
649
  try {
441
650
  // Validate the client can be created (catches missing API keys etc.)
442
651
  getClient();
443
652
  const providers = await detectAvailableProviders();
653
+ const detection = detectProjectAgents(root);
444
654
  sendJson(res, {
445
655
  provider: currentOverrides.provider || null,
446
656
  model: getEffectiveRawModel(),
447
657
  providers,
658
+ detection,
448
659
  projectName: projectName || null,
449
660
  root,
450
661
  });
451
662
  }
452
663
  catch (err) {
453
664
  const providers = await detectAvailableProviders().catch(() => []);
454
- sendJson(res, { provider: null, model: "unknown", error: err.message, providers, projectName: projectName || null, root });
665
+ const detection = detectProjectAgents(root);
666
+ sendJson(res, {
667
+ provider: null,
668
+ model: "unknown",
669
+ error: err.message,
670
+ providers,
671
+ detection,
672
+ projectName: projectName || null,
673
+ root,
674
+ });
455
675
  }
456
676
  });
457
- // Update config — change provider/model at runtime
677
+ // Update config — change provider/model at runtime and persist atomically.
458
678
  router.post("/api/config", async (req, res) => {
459
679
  const body = (await readBody(req));
460
680
  if (body.provider)
@@ -468,6 +688,21 @@ export function registerRoutes(router, root, projectName) {
468
688
  // Validate the client can be created
469
689
  getClient();
470
690
  const providers = await detectAvailableProviders();
691
+ // Persist to .vskill/studio.json (atomic tmp-then-rename). Fire-and-forget
692
+ // from the handler's perspective — errors are logged but not surfaced,
693
+ // matching how currentOverrides already survives process lifetime.
694
+ if (currentOverrides.provider) {
695
+ try {
696
+ await saveStudioSelection(root, {
697
+ activeAgent: currentOverrides.provider,
698
+ activeModel: getEffectiveRawModel(),
699
+ updatedAt: new Date().toISOString(),
700
+ });
701
+ }
702
+ catch (e) {
703
+ console.warn(`[studio.json] atomic write failed: ${e.message}`);
704
+ }
705
+ }
471
706
  sendJson(res, { provider: currentOverrides.provider || null, model: getEffectiveRawModel(), providers });
472
707
  }
473
708
  catch (err) {