@minhpnq1807/contextos 0.5.50 → 0.5.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +114 -9
  3. package/bin/ctx.js +64 -8
  4. package/eval/skill-routing/cases.yaml +366 -0
  5. package/eval/skill-routing/fixtures/docker-node/Dockerfile +4 -0
  6. package/eval/skill-routing/fixtures/docker-node/docker-compose.yml +5 -0
  7. package/eval/skill-routing/fixtures/docker-node/package.json +6 -0
  8. package/eval/skill-routing/fixtures/expo-eas/.github/workflows/eas.yml +1 -0
  9. package/eval/skill-routing/fixtures/expo-eas/app.json +5 -0
  10. package/eval/skill-routing/fixtures/expo-eas/eas.json +6 -0
  11. package/eval/skill-routing/fixtures/expo-eas/package.json +11 -0
  12. package/eval/skill-routing/fixtures/expo-with-vercel-json/app.json +6 -0
  13. package/eval/skill-routing/fixtures/expo-with-vercel-json/eas.json +5 -0
  14. package/eval/skill-routing/fixtures/expo-with-vercel-json/package.json +8 -0
  15. package/eval/skill-routing/fixtures/expo-with-vercel-json/vercel.json +3 -0
  16. package/eval/skill-routing/fixtures/express-mongo-jwt/package.json +8 -0
  17. package/eval/skill-routing/fixtures/firebase-hosting/firebase.json +11 -0
  18. package/eval/skill-routing/fixtures/firebase-hosting/package.json +6 -0
  19. package/eval/skill-routing/fixtures/flutter-firebase/pubspec.yaml +5 -0
  20. package/eval/skill-routing/fixtures/frontend-only-next/package.json +8 -0
  21. package/eval/skill-routing/fixtures/integration-test/jest.config.js +3 -0
  22. package/eval/skill-routing/fixtures/integration-test/package.json +10 -0
  23. package/eval/skill-routing/fixtures/jest-project/jest.config.js +3 -0
  24. package/eval/skill-routing/fixtures/jest-project/package.json +7 -0
  25. package/eval/skill-routing/fixtures/nest-prisma/package.json +10 -0
  26. package/eval/skill-routing/fixtures/nest-prisma/prisma/schema.prisma +4 -0
  27. package/eval/skill-routing/fixtures/next-vercel/.github/workflows/deploy.yml +1 -0
  28. package/eval/skill-routing/fixtures/next-vercel/package.json +8 -0
  29. package/eval/skill-routing/fixtures/next-vercel/vercel.json +3 -0
  30. package/eval/skill-routing/fixtures/oauth-google/.env.example +3 -0
  31. package/eval/skill-routing/fixtures/oauth-google/package.json +9 -0
  32. package/eval/skill-routing/fixtures/password-reset/package.json +8 -0
  33. package/eval/skill-routing/fixtures/playwright-project/package.json +6 -0
  34. package/eval/skill-routing/fixtures/playwright-project/playwright.config.ts +5 -0
  35. package/eval/skill-routing/fixtures/railway-render/package.json +6 -0
  36. package/eval/skill-routing/fixtures/railway-render/railway.json +6 -0
  37. package/eval/skill-routing/fixtures/railway-render/render.yaml +5 -0
  38. package/eval/skill-routing/fixtures/rbac-api/package.json +8 -0
  39. package/eval/skill-routing/fixtures/redis-cache/package.json +7 -0
  40. package/eval/skill-routing/fixtures/static-docs/README.md +3 -0
  41. package/eval/skill-routing/run-eval.js +278 -0
  42. package/package.json +3 -1
  43. package/plugins/ctx/.codex-plugin/plugin.json +1 -1
  44. package/plugins/ctx/lib/analyzer.js +17 -2
  45. package/plugins/ctx/lib/auto-warm.js +1 -0
  46. package/plugins/ctx/lib/ctx-mcp-client.js +21 -0
  47. package/plugins/ctx/lib/embedding-scorer.js +34 -0
  48. package/plugins/ctx/lib/hook-io.js +11 -1
  49. package/plugins/ctx/lib/package-install.js +1 -1
  50. package/plugins/ctx/lib/project-profiler.js +5 -1
  51. package/plugins/ctx/lib/prompt-hook.js +17 -2
  52. package/plugins/ctx/lib/score-context.js +13 -2
  53. package/plugins/ctx/lib/setup-wizard.js +8 -3
  54. package/plugins/ctx/lib/skill-discoverer.js +480 -27
  55. package/plugins/ctx/lib/skillshare-sync.js +112 -0
  56. package/plugins/ctx/lib/workflow-discoverer.js +3 -1
  57. package/plugins/ctx/mcp/contextos-server.js +29 -1
  58. package/plugins/ctx/mcp/server.js +50 -4
package/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.5.52
4
+
5
+ - **Release candidate polish:** Updated README positioning around ContextOS as a runtime context router, added npm/CI/license badges, a same-prompt/different-repo demo section, a benchmark table, a 30-second install callout, and an AGENTS.md vs RAG vs ContextOS comparison table.
6
+ - **Non-interactive setup safety:** `ctx setup --yes` now defaults to Codex instead of failing with no selected agents, and skips the community skill installer when no TTY is available so release/install smoke tests can complete unattended.
7
+ - **Hot MCP scorer:** `ctx-mcp` now preloads the local embedding pipeline and exposes `ctx_health`/bridge health so prompt hooks only call semantic scoring when the long-running scorer is ready.
8
+ - **Skill Router v2:** Skill suggestions now combine semantic similarity with prompt triggers, dependency evidence, config-file evidence, negative triggers, and confidence explanations. Optional `skill.yaml` metadata beside `SKILL.md` can define positive/negative triggers and related skills.
9
+ - **Confidence calibration:** Skill Router confidence is now calibrated separately from ranking. Prompt-only or semantic-only matches are capped, prompt+project-evidence matches are promoted to medium confidence, dependency+file evidence promotes to high confidence, negative signals cap confidence, and `ctx skills doctor` shows `high`/`medium`/`low` bands.
10
+ - **Skill doctor:** Added `ctx skills doctor -- "task"` to explain selected skills with semantic score, prompt trigger score, project evidence, file evidence, negative signals, and final confidence.
11
+ - **Skill routing eval:** Added `eval/skill-routing` fixtures and `ctx benchmark --skills` to report top-1 accuracy, top-3 recall, false positive rate, confidence calibration, and negative gate accuracy for evidence-based skill routing.
12
+ - **Expanded Skill Router benchmark:** Expanded the eval from the initial 6-case smoke set to 52 cases across deployment, auth, database, testing, mobile, and adversarial negative gates. Current local benchmark: Top-1 Accuracy 92.3%, Top-3 Recall 94.2%, False Positive Rate 0.0%, Confidence Calibration 100.0%, Negative Gate Accuracy 100.0%.
13
+ - **Faster prompt fallback:** Direct prompt-hook fallback now skips embedding work and uses a shorter timeout, so context injection can still return deterministic rule, file, skill, and workflow candidates when MCP or semantic scoring is unavailable.
14
+ - **Shared skill index fallback:** Skill discovery now warms a shared global skill index and searches it when the workspace-specific skill index has no matches, improving reuse across projects.
15
+ - **Agent-visible skill dedupe:** Community skill installs and skill sync now remove duplicate skills visible through shared, Codex, and Antigravity roots while preserving unique agent-specific skills.
16
+ - **Workspace prompt path detection:** Explicit file paths in prompts now tolerate line and column suffixes and can resolve files from workspace packages, improving suggested-file accuracy in monorepos.
17
+
3
18
  ## 0.5.50
4
19
 
5
20
  - **Explicit skill activation:** Prompt skills named with `$skill-name` are now preserved and ranked before semantic suggestions, so user-requested skills such as `$threejs` or `$design-taste-frontend` appear in prompt context even when semantic ranking would not select them.
package/README.md CHANGED
@@ -1,8 +1,12 @@
1
1
  # ContextOS
2
2
 
3
- Codex ignores the middle of your `AGENTS.md`. ContextOS fixes that.
3
+ Runtime context router for coding agents.
4
4
 
5
- It ranks your project rules against the current prompt, injects the right ones at the moment the agent starts work, suggests relevant files/skills/workflows, and reports what the agent actually followed after the task.
5
+ Rules, files, skills, workflows, and evidence: injected before the agent writes code.
6
+
7
+ [![npm version](https://img.shields.io/npm/v/@minhpnq1807/contextos.svg)](https://www.npmjs.com/package/@minhpnq1807/contextos)
8
+ [![CI](https://github.com/khovan123/contextOS/actions/workflows/ci.yml/badge.svg)](https://github.com/khovan123/contextOS/actions/workflows/ci.yml)
9
+ [![license: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
6
10
 
7
11
  ```text
8
12
  WITHOUT ContextOS
@@ -17,11 +21,38 @@ WITH ContextOS
17
21
  -> report followed / ignored / unknown
18
22
  ```
19
23
 
24
+ ContextOS is not another `AGENTS.md` loader. It is a runtime context router for coding agents: it chooses the task-relevant rules, files, skills, workflows, and evidence before the agent starts editing.
25
+
20
26
  Published package: [`@minhpnq1807/contextos`](https://www.npmjs.com/package/@minhpnq1807/contextos)
21
27
 
22
28
  ## Demo
23
29
 
24
- ![ContextOS actual terminal demo](docs/demo/contextos-demo.gif)
30
+ ![ContextOS demo: same prompt, different repo, correct skills](docs/demo/contextos-demo.gif)
31
+
32
+ Same prompt. Different repo. Correct skills.
33
+
34
+ ```bash
35
+ ctx skills doctor -- "fix deployed"
36
+ ```
37
+
38
+ | Repo evidence | Expected route |
39
+ | --- | --- |
40
+ | `eas.json`, `expo`, `react-native` | `eas`, `mobile-deployment`, `github-actions-ci-cd` |
41
+ | `vercel.json`, `next`, GitHub workflow | `vercel-deployment`, `github-actions-ci-cd`, `env-secret-management` |
42
+ | ContextOS repo with no app deploy evidence | no deployment skill selected |
43
+
44
+ Skill Router internal fixture benchmark:
45
+
46
+ | Metric | Result |
47
+ | --- | ---: |
48
+ | Cases | 52 |
49
+ | Top-1 Accuracy | 92.3% |
50
+ | Top-3 Recall | 94.2% |
51
+ | False Positive Rate | 0.0% |
52
+ | Confidence Calibration | 100.0% |
53
+ | Negative Gate Accuracy | 100.0% |
54
+
55
+ This is an internal fixture benchmark, not an external real-world benchmark. It is designed to prove the router behavior across controlled Expo/EAS, Next/Vercel, Docker, Railway/Render, Firebase, auth, database, testing, mobile, and adversarial negative-gate cases.
25
56
 
26
57
  Example hook context injected before the agent works:
27
58
 
@@ -51,6 +82,8 @@ Runtime telemetry: code-review-graph, code-review-graph.query_graph_tool
51
82
 
52
83
  ## Quick Install
53
84
 
85
+ Install in 30 seconds:
86
+
54
87
  ```bash
55
88
  npm install -g @minhpnq1807/contextos
56
89
  ctx setup
@@ -104,6 +137,14 @@ The problem is not that agents cannot read `AGENTS.md`. The problem is that larg
104
137
  | Sync | Rules/MCP via Ruler, skills via skillshare, workflows via ContextOS. |
105
138
  | Evidence | Stop hooks persist `followed`, `ignored`, `unknown`, and runtime telemetry for explicit reports. |
106
139
 
140
+ ## Comparison
141
+
142
+ | Approach | What it gives the agent | Main gap |
143
+ | --- | --- | --- |
144
+ | Plain `AGENTS.md` | Static repo instructions. | Important rules get buried or ignored when the task changes. |
145
+ | Generic RAG | Semantically related files or snippets. | It usually does not route skills/workflows or prove rule compliance. |
146
+ | ContextOS | Task-routed rules, files, skills, workflows, and evidence. | Requires local setup and warm indexes for best results. |
147
+
107
148
  ## Quick Commands
108
149
 
109
150
  | Command | Use it for |
@@ -114,6 +155,7 @@ The problem is not that agents cannot read `AGENTS.md`. The problem is that larg
114
155
  | `ctx evidence` | Show why each rule was marked followed/ignored/unknown. |
115
156
  | `ctx stats` | Show workspace-level usage and effectiveness metrics. |
116
157
  | `ctx benchmark -- "task"` | Compare raw AGENTS.md ordering vs ContextOS scheduling. |
158
+ | `ctx benchmark --skills` | Run the Skill Router eval benchmark. |
117
159
  | `ctx sync --rules` | Sync AGENTS/Ruler/MCP config across agents. |
118
160
  | `ctx sync --skills` | Sync skills across agents through skillshare. |
119
161
  | `ctx sync --workflows` | Sync workflow markdown across Claude/Codex/Antigravity. |
@@ -225,6 +267,14 @@ Restart Antigravity or `agy` after installing.
225
267
 
226
268
  The embedding model is mandatory. `ctx install` checks `~/.ctx/contextos/models` first and downloads the MiniLM model only when the required local files are missing. It intentionally fails if the model cannot be prepared, because otherwise the first prompt hook would have to cold-load or download the model.
227
269
 
270
+ ContextOS keeps the embedding model hot inside `ctx-mcp`. Prompt hooks never cold-load transformers; if the MCP bridge is unavailable or the model is still warming, hooks fail open with lightweight scoring. Current local smoke metrics:
271
+
272
+ ```text
273
+ MCP warm p95: 15-58ms observed
274
+ Hook lightweight fallback: 0.69s
275
+ MCP embedding hot startup: 477ms
276
+ ```
277
+
228
278
  During install, ContextOS prints a 0-100 progress indicator. The longest stage is usually embedding warmup; if the model is already cached, install skips the download and only refreshes vectors.
229
279
 
230
280
  Verify the published package in any project:
@@ -418,7 +468,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
418
468
  | `ctx install --inject` | Installs ContextOS with explicit injection mode. | You want to be explicit in scripts or docs. | Same runtime behavior as the default install mode; if combined with `--quiet`, `--inject` wins. |
419
469
  | `ctx install --copy` | Copies only the plugin payload to `$CODEX_HOME/plugins/ctx`. | Legacy local development or manual plugin experiments. | Does not sync the active marketplace, rebuild indexes, register MCP, or install global hooks. Prefer `ctx refresh` for active local updates. |
420
470
  | `ctx setup` | Runs the first-run setup wizard. | You want the recommended onboarding flow after `npm install -g @minhpnq1807/contextos`. | Installs selected agents, optionally syncs Ruler rules/MCP and skillshare skills, asks which prompt sections to show, then prints next steps. |
421
- | `ctx setup --yes` | Runs setup with defaults non-interactively. | You want scriptable all-agent setup. | Uses `codex,claude,agy`, enables injection, syncs rules, syncs skills, and passes `--yes` to dependency setup prompts. |
471
+ | `ctx setup --yes` | Runs setup with defaults non-interactively. | You want scriptable Codex setup. | Uses `codex`, enables injection, syncs rules, syncs skills, skips interactive community-skill installation when no TTY is available, and passes `--yes` to dependency setup prompts. Use `--agents codex,claude,agy` for multi-agent setup. |
422
472
  | `ctx setup --agents <list>` | Runs setup for selected agents. | You want only part of the default set. | Accepts comma-separated `codex`, `claude`, `agy`, or `antigravity`. |
423
473
  | `ctx setup --no-rules` | Skips Ruler sync during setup. | You only want hooks/MCP install and maybe skill sync. | Does not run `ctx sync --rules`. |
424
474
  | `ctx setup --no-skills` | Skips skillshare sync during setup. | You do not want shared skills configured. | Does not run `ctx sync --skills`. |
@@ -428,6 +478,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
428
478
  | `ctx evidence` | Shows detailed evidence behind the last report for the current workspace. | You want to inspect why a rule was marked `followed`, `ignored`, `unknown`, or `unmeasurable`. | Prints a compact evidence table plus per-rule detail tables. |
429
479
  | `ctx stats` | Shows aggregate runtime metrics for the current workspace. | You want to know whether ContextOS is active and useful over time. | Prints sectioned tables for prompt/report counts, injection rate, efficiency, rule outcomes, hook events, last prompt, and last report. |
430
480
  | `ctx benchmark -- "task"` | Compares baseline AGENTS.md ordering with ContextOS task-aware scheduling. | You want a before/after signal for lost-in-the-middle risk. | Prints tables for parsed/actionable/filtered rules, baseline middle-risk, scheduled high/mid rules, recency reminder status, and top scored rules. |
481
+ | `ctx benchmark --skills` | Runs the Skill Router eval benchmark. | You want evidence for skill routing accuracy and negative gates. | Prints top-1 accuracy, top-3 recall, false positive rate, confidence calibration, and negative gate accuracy across `eval/skill-routing` fixtures. |
431
482
  | `ctx sync --rules` | Syncs project rules and MCP servers through Ruler. | You want Codex, Claude Code, and Antigravity to share one project rule/MCP source of truth. | Ensures `.ruler/ruler.toml`, injects `ctx-mcp`, imports existing MCP servers from Codex and project `.mcp.json`, runs `ruler apply --agents codex,claude,antigravity`, mirrors MCP servers to Antigravity MCP configs, and verifies generated config. |
432
483
  | `ctx sync --rules --agents <list>` | Syncs only selected agents through Ruler. | You want to update one or two agents without touching the others. | Accepts comma-separated values such as `codex`, `claude`, `agy`, `antigravity`, or `codex,claude,agy`; `agy` is normalized to Ruler's `antigravity`. |
433
484
  | `ctx sync --rules --dry-run` | Previews Ruler sync without writing files or running apply. | You want to inspect behavior before changing project config. | Prints the same flow with dry-run status. |
@@ -521,13 +572,67 @@ Injected prompt sections are intentionally compact: rules show only detected rul
521
572
 
522
573
  Codex may flatten newlines in its `UserPromptSubmit hook (completed)` preview. The injected `additionalContext` payload remains multiline; this is a Codex preview display limitation.
523
574
 
524
- Skill ranking is semantic-only. ContextOS builds a fused query from the user prompt plus a cached project profile, then compares that vector with cached skill vectors:
575
+ Skill ranking uses Skill Router v2. ContextOS still starts with semantic retrieval, but final confidence is evidence-based:
576
+
577
+ ```text
578
+ final_score =
579
+ semantic_score * 0.35
580
+ + prompt_trigger_score * 0.20
581
+ + project_evidence_score * 0.25
582
+ + file_config_score * 0.10
583
+ + graph_score * 0.05
584
+ - negative_penalty * 0.20
585
+ ```
586
+
587
+ Skill metadata can live beside `SKILL.md` as `skill.yaml`:
588
+
589
+ ```yaml
590
+ id: eas
591
+ name: Expo EAS Deployment
592
+ positive_triggers:
593
+ prompts: [eas, expo build, deployed, android, ios]
594
+ files: [eas.json, app.json, app.config.ts]
595
+ dependencies: [expo, eas-cli]
596
+ negative_triggers:
597
+ dependencies: [next, vite]
598
+ files: [vercel.json]
599
+ related_skills:
600
+ - mobile-deployment
601
+ - github-actions-ci-cd
602
+ - env-secret-management
603
+ ```
604
+
605
+ The project profile is built from bounded root/workspace `package.json` metadata, dependencies, scripts, detected languages, recent git files, and config files such as `eas.json`, `app.json`, `vercel.json`, and `.github/workflows/*`. ContextOS only gives high confidence to domain-specific skills when project evidence supports them. For example, `fix deployed` can rank `eas` highly in an Expo project with `eas.json` and `expo`, but a Next.js/Vercel project should route to Vercel and CI/CD deployment skills instead. Skill catalogs are deduplicated by normalized skill name before indexing and rendering.
606
+
607
+ Use `ctx skills doctor -- "task"` to inspect routing:
608
+
609
+ ```bash
610
+ ctx skills doctor -- "fix deployed"
611
+ ```
612
+
613
+ The doctor output shows semantic score, prompt triggers, dependency/file evidence, negative signals, and final confidence for each selected skill.
614
+ Confidence is calibrated separately from ranking and includes a band:
615
+
616
+ ```text
617
+ high: >= 0.85
618
+ medium: 0.65-0.84
619
+ low: < 0.65
620
+ ```
621
+
622
+ Use `ctx benchmark --skills` to run the local Skill Router benchmark. The eval lives in `eval/skill-routing` and currently covers 52 cases across deployment, auth, database, testing, mobile, and adversarial negative gates.
623
+
624
+ Current local benchmark:
525
625
 
526
626
  ```text
527
- embed(prompt + project profile) -> cosine -> embed(skill name + description)
627
+ Cases: 52
628
+ Top-1 Accuracy: 92.3%
629
+ Top-3 Recall: 94.2%
630
+ False Positive Rate: 0.0%
631
+ Confidence Calibration: 100.0%
632
+ Negative Gate Accuracy: 100.0%
528
633
  ```
529
634
 
530
- The project profile is an embeddable string built from bounded root/workspace `package.json` metadata, dependencies, scripts, detected languages, and recent git files. It is cached under the ContextOS workspace data directory and invalidated when package metadata or git `HEAD` changes. ContextOS does not maintain a skill taxonomy or domain gate list for ranking; if the skill index is cold for a large catalog, prompt hooks fail open instead of falling back to arbitrary keyword matches. Skill catalogs are deduplicated by normalized skill name before indexing and rendering.
635
+ The benchmark includes same-prompt/different-repo checks such as `fix deployed` in Expo/EAS, Next/Vercel, and ContextOS itself, plus adversarial cases like `expo-with-vercel-json` where `eas` is expected and `vercel-deployment` must be rejected.
531
636
 
532
637
  After `ctx refresh`, ContextOS invalidates the private hook bridge socket so prompts fall back to direct scoring until Codex restarts the long-running `ctx-mcp` process. Hook clients also discard a same-inode socket if an older bridge revision is detected.
533
638
 
@@ -541,10 +646,10 @@ CONTEXTOS_EMBEDDINGS=0 disable embedding rule scoring
541
646
  CONTEXTOS_MCP_CONNECT_TIMEOUT_MS=100 stale ctx-mcp socket connect timeout
542
647
  CONTEXTOS_MCP_BRIDGE_TIMEOUT_MS=2000 ctx-mcp hook bridge timeout
543
648
  CONTEXTOS_HOOK_DEADLINE_MS=8500 hard fail-open deadline for prompt hooks
544
- CONTEXTOS_DIRECT_FALLBACK_TIMEOUT_MS=6000 direct scoring timeout when the bridge is unavailable
649
+ CONTEXTOS_DIRECT_FALLBACK_TIMEOUT_MS=2500 direct scoring timeout when the bridge is unavailable
545
650
  CONTEXTOS_HOOK_EMBEDDING_TIMEOUT_MS=500 rule embedding timeout during hook direct fallback
546
651
  CONTEXTOS_EMBEDDING_TIMEOUT_MS=800 embedding scoring timeout inside ctx-mcp/debug
547
- CONTEXTOS_HOOK_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout during hook direct fallback
652
+ CONTEXTOS_HOOK_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout when embeddings are enabled
548
653
  CONTEXTOS_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout inside ctx-mcp/debug
549
654
  CONTEXTOS_FILE_EMBEDDINGS=0 disable file-path embedding retrieval
550
655
  CONTEXTOS_HOOK_FILE_EMBEDDING_TIMEOUT_MS=500 file retrieval timeout during hook direct fallback
package/bin/ctx.js CHANGED
@@ -19,6 +19,7 @@ import { scoreContext } from "../plugins/ctx/lib/score-context.js";
19
19
  import { defaultDataRoot, workspaceDataDir, workspaceMarkerPath } from "../plugins/ctx/lib/workspace-data.js";
20
20
  import { installMcpTelemetryProxies } from "../plugins/ctx/lib/mcp-proxy-install.js";
21
21
  import { benchmarkWorkspace, formatBenchmark } from "../plugins/ctx/lib/benchmark.js";
22
+ import { formatSkillRoutingBenchmark, runSkillRoutingEval } from "../eval/skill-routing/run-eval.js";
22
23
  import { copyDir, copyPackageRoot, syncPackageRoot } from "../plugins/ctx/lib/package-install.js";
23
24
  import { installClaudeHooks } from "../plugins/ctx/lib/claude-hooks.js";
24
25
  import { installClaudeMcp } from "../plugins/ctx/lib/claude-mcp.js";
@@ -29,8 +30,8 @@ import { installCopilotMcp } from "../plugins/ctx/lib/copilot-mcp.js";
29
30
  import { readCodexMcpServers, syncRules } from "../plugins/ctx/lib/ruler-sync.js";
30
31
  import { detectGraphStrategy, embedCodeReviewGraph, formatCodeReviewGraphEmbedding, formatGraphStrategy } from "../plugins/ctx/lib/graph-strategy.js";
31
32
  import { writeInnerGitignore, ensureRootGitignore } from "../plugins/ctx/lib/gitignore.js";
32
- import { repairSkillSymlinks, syncSkills, detectExistingSkills } from "../plugins/ctx/lib/skillshare-sync.js";
33
- import { scanSkills, warmSkillEmbeddings } from "../plugins/ctx/lib/skill-discoverer.js";
33
+ import { dedupeAgentVisibleSkills, repairSkillSymlinks, syncSkills, detectExistingSkills } from "../plugins/ctx/lib/skillshare-sync.js";
34
+ import { diagnoseSkills, scanSkills, warmSkillEmbeddings } from "../plugins/ctx/lib/skill-discoverer.js";
34
35
  import { parsePassthroughArgs, runPassthrough } from "../plugins/ctx/lib/passthrough.js";
35
36
  import { parseAgentList, parseSetupArgs, setupSummaryLines } from "../plugins/ctx/lib/setup-wizard.js";
36
37
  import { multiSelect } from "../plugins/ctx/lib/multi-select.js";
@@ -143,6 +144,10 @@ async function runCommunitySkillInstaller(agents = []) {
143
144
  if (afterRepair.repaired.length || afterRepair.removedBroken.length) {
144
145
  console.log(`${DIM}│${RESET} Repaired ${afterRepair.repaired.length} skill links after install.`);
145
146
  }
147
+ const deduped = dedupeAgentVisibleSkills({ cwd: process.cwd(), home: os.homedir(), agents });
148
+ if (deduped.removed.length) {
149
+ console.log(`${DIM}│${RESET} Removed ${deduped.removed.length} duplicate agent-visible skills.`);
150
+ }
146
151
  successCount++;
147
152
 
148
153
  if (installInfo.verify) {
@@ -189,6 +194,7 @@ Usage:
189
194
  ctx evidence Show evidence from last report
190
195
  ctx stats Show workspace statistics
191
196
  ctx benchmark -- "task" Benchmark workspace for a task
197
+ ctx benchmark --skills Run skill routing eval benchmark
192
198
  ctx sync --rules Sync AGENTS.md rules to all agents
193
199
  ctx sync --rules --agents <names> Sync rules to specific agents only
194
200
  ctx sync --rules --dry-run Preview rule sync without writing
@@ -203,6 +209,7 @@ Usage:
203
209
  ctx sync --workflows --agents <names> Sync workflows to specific agents
204
210
  ctx sync --workflows --dry-run Preview workflow sync without writing
205
211
  ctx skills Browse community skill libraries
212
+ ctx skills doctor -- "task" Explain skill routing for a task
206
213
  ctx skills --agents <names> Filter skills for specific agents
207
214
  ctx skills --refresh Force refresh skill library cache
208
215
  ctx --config Choose prompt context sections to show
@@ -646,6 +653,38 @@ async function debug(task) {
646
653
  console.log(scheduled.additionalContext || "(empty)");
647
654
  }
648
655
 
656
+ async function skillsDoctor(task) {
657
+ if (!String(task || "").trim()) throw new Error('Usage: ctx skills doctor -- "task"');
658
+ const result = await diagnoseSkills({
659
+ cwd: process.cwd(),
660
+ prompt: task,
661
+ dataDir: contextOSDataDir(),
662
+ skills: scanSkills({ cwd: process.cwd() }),
663
+ limit: outputConfigLimits(loadOutputConfig({ dataRoot: contextOSDataDir() })).skills,
664
+ timeoutMs: Number(process.env.CONTEXTOS_SKILL_DOCTOR_TIMEOUT_MS || 3000)
665
+ });
666
+
667
+ console.log("ContextOS skill doctor");
668
+ console.log(`cwd: ${result.cwd}`);
669
+ console.log(`prompt: ${result.prompt}`);
670
+ console.log("");
671
+ console.log("Project evidence:");
672
+ console.log(`dependencies: ${result.projectEvidence.dependencies.slice(0, 30).join(", ") || "(none)"}`);
673
+ console.log(`files: ${result.projectEvidence.files.slice(0, 30).join(", ") || "(none)"}`);
674
+ console.log("");
675
+ console.log("Skills:");
676
+ if (!result.skills.length) {
677
+ console.log("(none)");
678
+ return;
679
+ }
680
+ for (const skill of result.skills) {
681
+ console.log(`${Number(skill.confidence || skill.score || 0).toFixed(2)} ${skill.confidenceBand || "low"} ${skill.name}`);
682
+ console.log(` semantic:${Number(skill.semanticScore || 0).toFixed(2)} prompt:${Number(skill.promptTriggerScore || 0).toFixed(2)} project:${Number(skill.projectEvidenceScore || 0).toFixed(2)} files:${Number(skill.fileConfigScore || 0).toFixed(2)} negative:${Number(skill.negativePenalty || 0).toFixed(2)}`);
683
+ if (skill.evidence?.length) console.log(` evidence: ${skill.evidence.join(", ")}`);
684
+ if (skill.negativeEvidence?.length) console.log(` rejected signals: ${skill.negativeEvidence.join(", ")}`);
685
+ }
686
+ }
687
+
649
688
  async function warmEmbeddings(task, { syncMarketplace = true, quiet = false } = {}) {
650
689
  const warmResult = await warmWorkspaceIndexes({ task });
651
690
  const marketplaceSync = syncMarketplace ? syncActiveCodexMarketplace() : null;
@@ -870,15 +909,21 @@ async function setup({ args = [], cwd = process.cwd() } = {}) {
870
909
  const totalExisting = existing.reduce((sum, e) => sum + e.count, 0);
871
910
  if (totalExisting === 0) {
872
911
  console.log("");
873
- console.log(`${YELLOW}⚠${RESET} No skills found on this machine.`);
874
- console.log(`${DIM}│${RESET} Install community skills to get started.`);
912
+ console.log("⚠ No skills found on this machine.");
913
+ console.log("│ Install community skills to get started.");
875
914
  console.log("");
876
915
 
877
- const installed = await runCommunitySkillInstaller(options.agents);
878
- if (installed > 0) {
916
+ if (options.yes || !process.stdin.isTTY) {
917
+ console.log("│ Skipping community skill installer in non-interactive setup.");
918
+ console.log("│ Run: ctx skills");
879
919
  console.log("");
880
- console.log("◇ Re-syncing skills after install...");
881
- await doSyncSkills();
920
+ } else {
921
+ const installed = await runCommunitySkillInstaller(options.agents);
922
+ if (installed > 0) {
923
+ console.log("");
924
+ console.log("◇ Re-syncing skills after install...");
925
+ await doSyncSkills();
926
+ }
882
927
  }
883
928
  }
884
929
  }
@@ -977,11 +1022,21 @@ try {
977
1022
  } else if (command === "stats") {
978
1023
  console.log(formatStats(loadStats(contextOSWorkspaceDataDir())));
979
1024
  } else if (command === "benchmark") {
1025
+ if (args.includes("--skills")) {
1026
+ console.log(formatSkillRoutingBenchmark(await runSkillRoutingEval({ rootDir })));
1027
+ } else {
980
1028
  const marker = args.indexOf("--");
981
1029
  const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(1).join(" ");
982
1030
  if (!task.trim()) throw new Error('Usage: ctx benchmark -- "task"');
983
1031
  console.log(formatBenchmark(benchmarkWorkspace({ cwd: process.cwd(), task })));
1032
+ }
984
1033
  } else if (command === "skills") {
1034
+ if (args[1] === "doctor") {
1035
+ const marker = args.indexOf("--");
1036
+ const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(2).join(" ");
1037
+ await skillsDoctor(task);
1038
+ process.exitCode = 0;
1039
+ } else {
985
1040
  // Interactive community skill library selector + installer
986
1041
  const agentsFlag = args.indexOf("--agents");
987
1042
  const forceRefresh = args.includes("--refresh");
@@ -1016,6 +1071,7 @@ try {
1016
1071
  }));
1017
1072
  }
1018
1073
  console.log("");
1074
+ }
1019
1075
  } else if (command === "sync") {
1020
1076
  if (args.includes("--workflows")) {
1021
1077
  await syncWorkflows({