@minhpnq1807/contextos 0.5.51 → 0.5.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -2
- package/README.md +114 -9
- package/bin/ctx.js +59 -7
- package/eval/skill-routing/cases.yaml +366 -0
- package/eval/skill-routing/fixtures/docker-node/Dockerfile +4 -0
- package/eval/skill-routing/fixtures/docker-node/docker-compose.yml +5 -0
- package/eval/skill-routing/fixtures/docker-node/package.json +6 -0
- package/eval/skill-routing/fixtures/expo-eas/.github/workflows/eas.yml +1 -0
- package/eval/skill-routing/fixtures/expo-eas/app.json +5 -0
- package/eval/skill-routing/fixtures/expo-eas/eas.json +6 -0
- package/eval/skill-routing/fixtures/expo-eas/package.json +11 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/app.json +6 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/eas.json +5 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/package.json +8 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/vercel.json +3 -0
- package/eval/skill-routing/fixtures/express-mongo-jwt/package.json +8 -0
- package/eval/skill-routing/fixtures/firebase-hosting/firebase.json +11 -0
- package/eval/skill-routing/fixtures/firebase-hosting/package.json +6 -0
- package/eval/skill-routing/fixtures/flutter-firebase/pubspec.yaml +5 -0
- package/eval/skill-routing/fixtures/frontend-only-next/package.json +8 -0
- package/eval/skill-routing/fixtures/integration-test/jest.config.js +3 -0
- package/eval/skill-routing/fixtures/integration-test/package.json +10 -0
- package/eval/skill-routing/fixtures/jest-project/jest.config.js +3 -0
- package/eval/skill-routing/fixtures/jest-project/package.json +7 -0
- package/eval/skill-routing/fixtures/nest-prisma/package.json +10 -0
- package/eval/skill-routing/fixtures/nest-prisma/prisma/schema.prisma +4 -0
- package/eval/skill-routing/fixtures/next-vercel/.github/workflows/deploy.yml +1 -0
- package/eval/skill-routing/fixtures/next-vercel/package.json +8 -0
- package/eval/skill-routing/fixtures/next-vercel/vercel.json +3 -0
- package/eval/skill-routing/fixtures/oauth-google/.env.example +3 -0
- package/eval/skill-routing/fixtures/oauth-google/package.json +9 -0
- package/eval/skill-routing/fixtures/password-reset/package.json +8 -0
- package/eval/skill-routing/fixtures/playwright-project/package.json +6 -0
- package/eval/skill-routing/fixtures/playwright-project/playwright.config.ts +5 -0
- package/eval/skill-routing/fixtures/railway-render/package.json +6 -0
- package/eval/skill-routing/fixtures/railway-render/railway.json +6 -0
- package/eval/skill-routing/fixtures/railway-render/render.yaml +5 -0
- package/eval/skill-routing/fixtures/rbac-api/package.json +8 -0
- package/eval/skill-routing/fixtures/redis-cache/package.json +7 -0
- package/eval/skill-routing/fixtures/static-docs/README.md +3 -0
- package/eval/skill-routing/run-eval.js +278 -0
- package/package.json +3 -1
- package/plugins/ctx/.codex-plugin/plugin.json +1 -1
- package/plugins/ctx/lib/ctx-mcp-client.js +19 -0
- package/plugins/ctx/lib/embedding-scorer.js +34 -0
- package/plugins/ctx/lib/package-install.js +1 -1
- package/plugins/ctx/lib/prompt-hook.js +13 -2
- package/plugins/ctx/lib/setup-wizard.js +8 -3
- package/plugins/ctx/lib/skill-discoverer.js +432 -18
- package/plugins/ctx/mcp/contextos-server.js +29 -1
- package/plugins/ctx/mcp/server.js +50 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## 0.5.
|
|
4
|
-
|
|
3
|
+
## 0.5.52
|
|
4
|
+
|
|
5
|
+
- **Release candidate polish:** Updated README positioning around ContextOS as a runtime context router, added npm/CI/license badges, a same-prompt/different-repo demo section, a benchmark table, a 30-second install callout, and an AGENTS.md vs RAG vs ContextOS comparison table.
|
|
6
|
+
- **Non-interactive setup safety:** `ctx setup --yes` now defaults to Codex instead of failing with no selected agents, and skips the community skill installer when no TTY is available so release/install smoke tests can complete unattended.
|
|
7
|
+
- **Hot MCP scorer:** `ctx-mcp` now preloads the local embedding pipeline and exposes `ctx_health`/bridge health so prompt hooks only call semantic scoring when the long-running scorer is ready.
|
|
8
|
+
- **Skill Router v2:** Skill suggestions now combine semantic similarity with prompt triggers, dependency evidence, config-file evidence, negative triggers, and confidence explanations. Optional `skill.yaml` metadata beside `SKILL.md` can define positive/negative triggers and related skills.
|
|
9
|
+
- **Confidence calibration:** Skill Router confidence is now calibrated separately from ranking. Prompt-only or semantic-only matches are capped, prompt+project-evidence matches are promoted to medium confidence, dependency+file evidence promotes to high confidence, negative signals cap confidence, and `ctx skills doctor` shows `high`/`medium`/`low` bands.
|
|
10
|
+
- **Skill doctor:** Added `ctx skills doctor -- "task"` to explain selected skills with semantic score, prompt trigger score, project evidence, file evidence, negative signals, and final confidence.
|
|
11
|
+
- **Skill routing eval:** Added `eval/skill-routing` fixtures and `ctx benchmark --skills` to report top-1 accuracy, top-3 recall, false positive rate, confidence calibration, and negative gate accuracy for evidence-based skill routing.
|
|
12
|
+
- **Expanded Skill Router benchmark:** Expanded the eval from the initial 6-case smoke set to 52 cases across deployment, auth, database, testing, mobile, and adversarial negative gates. Current local benchmark: Top-1 Accuracy 92.3%, Top-3 Recall 94.2%, False Positive Rate 0.0%, Confidence Calibration 100.0%, Negative Gate Accuracy 100.0%.
|
|
5
13
|
- **Faster prompt fallback:** Direct prompt-hook fallback now skips embedding work and uses a shorter timeout, so context injection can still return deterministic rule, file, skill, and workflow candidates when MCP or semantic scoring is unavailable.
|
|
6
14
|
- **Shared skill index fallback:** Skill discovery now warms a shared global skill index and searches it when the workspace-specific skill index has no matches, improving reuse across projects.
|
|
7
15
|
- **Agent-visible skill dedupe:** Community skill installs and skill sync now remove duplicate skills visible through shared, Codex, and Antigravity roots while preserving unique agent-specific skills.
|
package/README.md
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
# ContextOS
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Runtime context router for coding agents.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Rules, files, skills, workflows, and evidence: injected before the agent writes code.
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/@minhpnq1807/contextos)
|
|
8
|
+
[](https://github.com/khovan123/contextOS/actions/workflows/ci.yml)
|
|
9
|
+
[](LICENSE)
|
|
6
10
|
|
|
7
11
|
```text
|
|
8
12
|
WITHOUT ContextOS
|
|
@@ -17,11 +21,38 @@ WITH ContextOS
|
|
|
17
21
|
-> report followed / ignored / unknown
|
|
18
22
|
```
|
|
19
23
|
|
|
24
|
+
ContextOS is not another `AGENTS.md` loader. It is a runtime context router for coding agents: it chooses the task-relevant rules, files, skills, workflows, and evidence before the agent starts editing.
|
|
25
|
+
|
|
20
26
|
Published package: [`@minhpnq1807/contextos`](https://www.npmjs.com/package/@minhpnq1807/contextos)
|
|
21
27
|
|
|
22
28
|
## Demo
|
|
23
29
|
|
|
24
|
-

|
|
31
|
+
|
|
32
|
+
Same prompt. Different repo. Correct skills.
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
ctx skills doctor -- "fix deployed"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
| Repo evidence | Expected route |
|
|
39
|
+
| --- | --- |
|
|
40
|
+
| `eas.json`, `expo`, `react-native` | `eas`, `mobile-deployment`, `github-actions-ci-cd` |
|
|
41
|
+
| `vercel.json`, `next`, GitHub workflow | `vercel-deployment`, `github-actions-ci-cd`, `env-secret-management` |
|
|
42
|
+
| ContextOS repo with no app deploy evidence | no deployment skill selected |
|
|
43
|
+
|
|
44
|
+
Skill Router internal fixture benchmark:
|
|
45
|
+
|
|
46
|
+
| Metric | Result |
|
|
47
|
+
| --- | ---: |
|
|
48
|
+
| Cases | 52 |
|
|
49
|
+
| Top-1 Accuracy | 92.3% |
|
|
50
|
+
| Top-3 Recall | 94.2% |
|
|
51
|
+
| False Positive Rate | 0.0% |
|
|
52
|
+
| Confidence Calibration | 100.0% |
|
|
53
|
+
| Negative Gate Accuracy | 100.0% |
|
|
54
|
+
|
|
55
|
+
This is an internal fixture benchmark, not an external real-world benchmark. It is designed to prove the router behavior across controlled Expo/EAS, Next/Vercel, Docker, Railway/Render, Firebase, auth, database, testing, mobile, and adversarial negative-gate cases.
|
|
25
56
|
|
|
26
57
|
Example hook context injected before the agent works:
|
|
27
58
|
|
|
@@ -51,6 +82,8 @@ Runtime telemetry: code-review-graph, code-review-graph.query_graph_tool
|
|
|
51
82
|
|
|
52
83
|
## Quick Install
|
|
53
84
|
|
|
85
|
+
Install in 30 seconds:
|
|
86
|
+
|
|
54
87
|
```bash
|
|
55
88
|
npm install -g @minhpnq1807/contextos
|
|
56
89
|
ctx setup
|
|
@@ -104,6 +137,14 @@ The problem is not that agents cannot read `AGENTS.md`. The problem is that larg
|
|
|
104
137
|
| Sync | Rules/MCP via Ruler, skills via skillshare, workflows via ContextOS. |
|
|
105
138
|
| Evidence | Stop hooks persist `followed`, `ignored`, `unknown`, and runtime telemetry for explicit reports. |
|
|
106
139
|
|
|
140
|
+
## Comparison
|
|
141
|
+
|
|
142
|
+
| Approach | What it gives the agent | Main gap |
|
|
143
|
+
| --- | --- | --- |
|
|
144
|
+
| Plain `AGENTS.md` | Static repo instructions. | Important rules get buried or ignored when the task changes. |
|
|
145
|
+
| Generic RAG | Semantically related files or snippets. | It usually does not route skills/workflows or prove rule compliance. |
|
|
146
|
+
| ContextOS | Task-routed rules, files, skills, workflows, and evidence. | Requires local setup and warm indexes for best results. |
|
|
147
|
+
|
|
107
148
|
## Quick Commands
|
|
108
149
|
|
|
109
150
|
| Command | Use it for |
|
|
@@ -114,6 +155,7 @@ The problem is not that agents cannot read `AGENTS.md`. The problem is that larg
|
|
|
114
155
|
| `ctx evidence` | Show why each rule was marked followed/ignored/unknown. |
|
|
115
156
|
| `ctx stats` | Show workspace-level usage and effectiveness metrics. |
|
|
116
157
|
| `ctx benchmark -- "task"` | Compare raw AGENTS.md ordering vs ContextOS scheduling. |
|
|
158
|
+
| `ctx benchmark --skills` | Run the Skill Router eval benchmark. |
|
|
117
159
|
| `ctx sync --rules` | Sync AGENTS/Ruler/MCP config across agents. |
|
|
118
160
|
| `ctx sync --skills` | Sync skills across agents through skillshare. |
|
|
119
161
|
| `ctx sync --workflows` | Sync workflow markdown across Claude/Codex/Antigravity. |
|
|
@@ -225,6 +267,14 @@ Restart Antigravity or `agy` after installing.
|
|
|
225
267
|
|
|
226
268
|
The embedding model is mandatory. `ctx install` checks `~/.ctx/contextos/models` first and downloads the MiniLM model only when the required local files are missing. It intentionally fails if the model cannot be prepared, because otherwise the first prompt hook would have to cold-load or download the model.
|
|
227
269
|
|
|
270
|
+
ContextOS keeps the embedding model hot inside `ctx-mcp`. Prompt hooks never cold-load transformers; if the MCP bridge is unavailable or the model is still warming, hooks fail open with lightweight scoring. Current local smoke metrics:
|
|
271
|
+
|
|
272
|
+
```text
|
|
273
|
+
MCP warm p95: 15-58ms observed
|
|
274
|
+
Hook lightweight fallback: 0.69s
|
|
275
|
+
MCP embedding hot startup: 477ms
|
|
276
|
+
```
|
|
277
|
+
|
|
228
278
|
During install, ContextOS prints a 0-100 progress indicator. The longest stage is usually embedding warmup; if the model is already cached, install skips the download and only refreshes vectors.
|
|
229
279
|
|
|
230
280
|
Verify the published package in any project:
|
|
@@ -418,7 +468,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
|
|
|
418
468
|
| `ctx install --inject` | Installs ContextOS with explicit injection mode. | You want to be explicit in scripts or docs. | Same runtime behavior as the default install mode; if combined with `--quiet`, `--inject` wins. |
|
|
419
469
|
| `ctx install --copy` | Copies only the plugin payload to `$CODEX_HOME/plugins/ctx`. | Legacy local development or manual plugin experiments. | Does not sync the active marketplace, rebuild indexes, register MCP, or install global hooks. Prefer `ctx refresh` for active local updates. |
|
|
420
470
|
| `ctx setup` | Runs the first-run setup wizard. | You want the recommended onboarding flow after `npm install -g @minhpnq1807/contextos`. | Installs selected agents, optionally syncs Ruler rules/MCP and skillshare skills, asks which prompt sections to show, then prints next steps. |
|
|
421
|
-
| `ctx setup --yes` | Runs setup with defaults non-interactively. | You want scriptable
|
|
471
|
+
| `ctx setup --yes` | Runs setup with defaults non-interactively. | You want scriptable Codex setup. | Uses `codex`, enables injection, syncs rules, syncs skills, skips interactive community-skill installation when no TTY is available, and passes `--yes` to dependency setup prompts. Use `--agents codex,claude,agy` for multi-agent setup. |
|
|
422
472
|
| `ctx setup --agents <list>` | Runs setup for selected agents. | You want only part of the default set. | Accepts comma-separated `codex`, `claude`, `agy`, or `antigravity`. |
|
|
423
473
|
| `ctx setup --no-rules` | Skips Ruler sync during setup. | You only want hooks/MCP install and maybe skill sync. | Does not run `ctx sync --rules`. |
|
|
424
474
|
| `ctx setup --no-skills` | Skips skillshare sync during setup. | You do not want shared skills configured. | Does not run `ctx sync --skills`. |
|
|
@@ -428,6 +478,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
|
|
|
428
478
|
| `ctx evidence` | Shows detailed evidence behind the last report for the current workspace. | You want to inspect why a rule was marked `followed`, `ignored`, `unknown`, or `unmeasurable`. | Prints a compact evidence table plus per-rule detail tables. |
|
|
429
479
|
| `ctx stats` | Shows aggregate runtime metrics for the current workspace. | You want to know whether ContextOS is active and useful over time. | Prints sectioned tables for prompt/report counts, injection rate, efficiency, rule outcomes, hook events, last prompt, and last report. |
|
|
430
480
|
| `ctx benchmark -- "task"` | Compares baseline AGENTS.md ordering with ContextOS task-aware scheduling. | You want a before/after signal for lost-in-the-middle risk. | Prints tables for parsed/actionable/filtered rules, baseline middle-risk, scheduled high/mid rules, recency reminder status, and top scored rules. |
|
|
481
|
+
| `ctx benchmark --skills` | Runs the Skill Router eval benchmark. | You want evidence for skill routing accuracy and negative gates. | Prints top-1 accuracy, top-3 recall, false positive rate, confidence calibration, and negative gate accuracy across `eval/skill-routing` fixtures. |
|
|
431
482
|
| `ctx sync --rules` | Syncs project rules and MCP servers through Ruler. | You want Codex, Claude Code, and Antigravity to share one project rule/MCP source of truth. | Ensures `.ruler/ruler.toml`, injects `ctx-mcp`, imports existing MCP servers from Codex and project `.mcp.json`, runs `ruler apply --agents codex,claude,antigravity`, mirrors MCP servers to Antigravity MCP configs, and verifies generated config. |
|
|
432
483
|
| `ctx sync --rules --agents <list>` | Syncs only selected agents through Ruler. | You want to update one or two agents without touching the others. | Accepts comma-separated values such as `codex`, `claude`, `agy`, `antigravity`, or `codex,claude,agy`; `agy` is normalized to Ruler's `antigravity`. |
|
|
433
484
|
| `ctx sync --rules --dry-run` | Previews Ruler sync without writing files or running apply. | You want to inspect behavior before changing project config. | Prints the same flow with dry-run status. |
|
|
@@ -521,13 +572,67 @@ Injected prompt sections are intentionally compact: rules show only detected rul
|
|
|
521
572
|
|
|
522
573
|
Codex may flatten newlines in its `UserPromptSubmit hook (completed)` preview. The injected `additionalContext` payload remains multiline; this is a Codex preview display limitation.
|
|
523
574
|
|
|
524
|
-
Skill ranking
|
|
575
|
+
Skill ranking uses Skill Router v2. ContextOS still starts with semantic retrieval, but final confidence is evidence-based:
|
|
576
|
+
|
|
577
|
+
```text
|
|
578
|
+
final_score =
|
|
579
|
+
semantic_score * 0.35
|
|
580
|
+
+ prompt_trigger_score * 0.20
|
|
581
|
+
+ project_evidence_score * 0.25
|
|
582
|
+
+ file_config_score * 0.10
|
|
583
|
+
+ graph_score * 0.05
|
|
584
|
+
- negative_penalty * 0.20
|
|
585
|
+
```
|
|
586
|
+
|
|
587
|
+
Skill metadata can live beside `SKILL.md` as `skill.yaml`:
|
|
588
|
+
|
|
589
|
+
```yaml
|
|
590
|
+
id: eas
|
|
591
|
+
name: Expo EAS Deployment
|
|
592
|
+
positive_triggers:
|
|
593
|
+
prompts: [eas, expo build, deployed, android, ios]
|
|
594
|
+
files: [eas.json, app.json, app.config.ts]
|
|
595
|
+
dependencies: [expo, eas-cli]
|
|
596
|
+
negative_triggers:
|
|
597
|
+
dependencies: [next, vite]
|
|
598
|
+
files: [vercel.json]
|
|
599
|
+
related_skills:
|
|
600
|
+
- mobile-deployment
|
|
601
|
+
- github-actions-ci-cd
|
|
602
|
+
- env-secret-management
|
|
603
|
+
```
|
|
604
|
+
|
|
605
|
+
The project profile is built from bounded root/workspace `package.json` metadata, dependencies, scripts, detected languages, recent git files, and config files such as `eas.json`, `app.json`, `vercel.json`, and `.github/workflows/*`. ContextOS only gives high confidence to domain-specific skills when project evidence supports them. For example, `fix deployed` can rank `eas` highly in an Expo project with `eas.json` and `expo`, but a Next.js/Vercel project should route to Vercel and CI/CD deployment skills instead. Skill catalogs are deduplicated by normalized skill name before indexing and rendering.
|
|
606
|
+
|
|
607
|
+
Use `ctx skills doctor -- "task"` to inspect routing:
|
|
608
|
+
|
|
609
|
+
```bash
|
|
610
|
+
ctx skills doctor -- "fix deployed"
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
The doctor output shows semantic score, prompt triggers, dependency/file evidence, negative signals, and final confidence for each selected skill.
|
|
614
|
+
Confidence is calibrated separately from ranking and includes a band:
|
|
615
|
+
|
|
616
|
+
```text
|
|
617
|
+
high: >= 0.85
|
|
618
|
+
medium: 0.65-0.84
|
|
619
|
+
low: < 0.65
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
Use `ctx benchmark --skills` to run the local Skill Router benchmark. The eval lives in `eval/skill-routing` and currently covers 52 cases across deployment, auth, database, testing, mobile, and adversarial negative gates.
|
|
623
|
+
|
|
624
|
+
Current local benchmark:
|
|
525
625
|
|
|
526
626
|
```text
|
|
527
|
-
|
|
627
|
+
Cases: 52
|
|
628
|
+
Top-1 Accuracy: 92.3%
|
|
629
|
+
Top-3 Recall: 94.2%
|
|
630
|
+
False Positive Rate: 0.0%
|
|
631
|
+
Confidence Calibration: 100.0%
|
|
632
|
+
Negative Gate Accuracy: 100.0%
|
|
528
633
|
```
|
|
529
634
|
|
|
530
|
-
The
|
|
635
|
+
The benchmark includes same-prompt/different-repo checks such as `fix deployed` in Expo/EAS, Next/Vercel, and ContextOS itself, plus adversarial cases like `expo-with-vercel-json` where `eas` is expected and `vercel-deployment` must be rejected.
|
|
531
636
|
|
|
532
637
|
After `ctx refresh`, ContextOS invalidates the private hook bridge socket so prompts fall back to direct scoring until Codex restarts the long-running `ctx-mcp` process. Hook clients also discard a same-inode socket if an older bridge revision is detected.
|
|
533
638
|
|
|
@@ -541,10 +646,10 @@ CONTEXTOS_EMBEDDINGS=0 disable embedding rule scoring
|
|
|
541
646
|
CONTEXTOS_MCP_CONNECT_TIMEOUT_MS=100 stale ctx-mcp socket connect timeout
|
|
542
647
|
CONTEXTOS_MCP_BRIDGE_TIMEOUT_MS=2000 ctx-mcp hook bridge timeout
|
|
543
648
|
CONTEXTOS_HOOK_DEADLINE_MS=8500 hard fail-open deadline for prompt hooks
|
|
544
|
-
CONTEXTOS_DIRECT_FALLBACK_TIMEOUT_MS=
|
|
649
|
+
CONTEXTOS_DIRECT_FALLBACK_TIMEOUT_MS=2500 direct scoring timeout when the bridge is unavailable
|
|
545
650
|
CONTEXTOS_HOOK_EMBEDDING_TIMEOUT_MS=500 rule embedding timeout during hook direct fallback
|
|
546
651
|
CONTEXTOS_EMBEDDING_TIMEOUT_MS=800 embedding scoring timeout inside ctx-mcp/debug
|
|
547
|
-
CONTEXTOS_HOOK_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout
|
|
652
|
+
CONTEXTOS_HOOK_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout when embeddings are enabled
|
|
548
653
|
CONTEXTOS_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout inside ctx-mcp/debug
|
|
549
654
|
CONTEXTOS_FILE_EMBEDDINGS=0 disable file-path embedding retrieval
|
|
550
655
|
CONTEXTOS_HOOK_FILE_EMBEDDING_TIMEOUT_MS=500 file retrieval timeout during hook direct fallback
|
package/bin/ctx.js
CHANGED
|
@@ -19,6 +19,7 @@ import { scoreContext } from "../plugins/ctx/lib/score-context.js";
|
|
|
19
19
|
import { defaultDataRoot, workspaceDataDir, workspaceMarkerPath } from "../plugins/ctx/lib/workspace-data.js";
|
|
20
20
|
import { installMcpTelemetryProxies } from "../plugins/ctx/lib/mcp-proxy-install.js";
|
|
21
21
|
import { benchmarkWorkspace, formatBenchmark } from "../plugins/ctx/lib/benchmark.js";
|
|
22
|
+
import { formatSkillRoutingBenchmark, runSkillRoutingEval } from "../eval/skill-routing/run-eval.js";
|
|
22
23
|
import { copyDir, copyPackageRoot, syncPackageRoot } from "../plugins/ctx/lib/package-install.js";
|
|
23
24
|
import { installClaudeHooks } from "../plugins/ctx/lib/claude-hooks.js";
|
|
24
25
|
import { installClaudeMcp } from "../plugins/ctx/lib/claude-mcp.js";
|
|
@@ -30,7 +31,7 @@ import { readCodexMcpServers, syncRules } from "../plugins/ctx/lib/ruler-sync.js
|
|
|
30
31
|
import { detectGraphStrategy, embedCodeReviewGraph, formatCodeReviewGraphEmbedding, formatGraphStrategy } from "../plugins/ctx/lib/graph-strategy.js";
|
|
31
32
|
import { writeInnerGitignore, ensureRootGitignore } from "../plugins/ctx/lib/gitignore.js";
|
|
32
33
|
import { dedupeAgentVisibleSkills, repairSkillSymlinks, syncSkills, detectExistingSkills } from "../plugins/ctx/lib/skillshare-sync.js";
|
|
33
|
-
import { scanSkills, warmSkillEmbeddings } from "../plugins/ctx/lib/skill-discoverer.js";
|
|
34
|
+
import { diagnoseSkills, scanSkills, warmSkillEmbeddings } from "../plugins/ctx/lib/skill-discoverer.js";
|
|
34
35
|
import { parsePassthroughArgs, runPassthrough } from "../plugins/ctx/lib/passthrough.js";
|
|
35
36
|
import { parseAgentList, parseSetupArgs, setupSummaryLines } from "../plugins/ctx/lib/setup-wizard.js";
|
|
36
37
|
import { multiSelect } from "../plugins/ctx/lib/multi-select.js";
|
|
@@ -193,6 +194,7 @@ Usage:
|
|
|
193
194
|
ctx evidence Show evidence from last report
|
|
194
195
|
ctx stats Show workspace statistics
|
|
195
196
|
ctx benchmark -- "task" Benchmark workspace for a task
|
|
197
|
+
ctx benchmark --skills Run skill routing eval benchmark
|
|
196
198
|
ctx sync --rules Sync AGENTS.md rules to all agents
|
|
197
199
|
ctx sync --rules --agents <names> Sync rules to specific agents only
|
|
198
200
|
ctx sync --rules --dry-run Preview rule sync without writing
|
|
@@ -207,6 +209,7 @@ Usage:
|
|
|
207
209
|
ctx sync --workflows --agents <names> Sync workflows to specific agents
|
|
208
210
|
ctx sync --workflows --dry-run Preview workflow sync without writing
|
|
209
211
|
ctx skills Browse community skill libraries
|
|
212
|
+
ctx skills doctor -- "task" Explain skill routing for a task
|
|
210
213
|
ctx skills --agents <names> Filter skills for specific agents
|
|
211
214
|
ctx skills --refresh Force refresh skill library cache
|
|
212
215
|
ctx --config Choose prompt context sections to show
|
|
@@ -650,6 +653,38 @@ async function debug(task) {
|
|
|
650
653
|
console.log(scheduled.additionalContext || "(empty)");
|
|
651
654
|
}
|
|
652
655
|
|
|
656
|
+
async function skillsDoctor(task) {
|
|
657
|
+
if (!String(task || "").trim()) throw new Error('Usage: ctx skills doctor -- "task"');
|
|
658
|
+
const result = await diagnoseSkills({
|
|
659
|
+
cwd: process.cwd(),
|
|
660
|
+
prompt: task,
|
|
661
|
+
dataDir: contextOSDataDir(),
|
|
662
|
+
skills: scanSkills({ cwd: process.cwd() }),
|
|
663
|
+
limit: outputConfigLimits(loadOutputConfig({ dataRoot: contextOSDataDir() })).skills,
|
|
664
|
+
timeoutMs: Number(process.env.CONTEXTOS_SKILL_DOCTOR_TIMEOUT_MS || 3000)
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
console.log("ContextOS skill doctor");
|
|
668
|
+
console.log(`cwd: ${result.cwd}`);
|
|
669
|
+
console.log(`prompt: ${result.prompt}`);
|
|
670
|
+
console.log("");
|
|
671
|
+
console.log("Project evidence:");
|
|
672
|
+
console.log(`dependencies: ${result.projectEvidence.dependencies.slice(0, 30).join(", ") || "(none)"}`);
|
|
673
|
+
console.log(`files: ${result.projectEvidence.files.slice(0, 30).join(", ") || "(none)"}`);
|
|
674
|
+
console.log("");
|
|
675
|
+
console.log("Skills:");
|
|
676
|
+
if (!result.skills.length) {
|
|
677
|
+
console.log("(none)");
|
|
678
|
+
return;
|
|
679
|
+
}
|
|
680
|
+
for (const skill of result.skills) {
|
|
681
|
+
console.log(`${Number(skill.confidence || skill.score || 0).toFixed(2)} ${skill.confidenceBand || "low"} ${skill.name}`);
|
|
682
|
+
console.log(` semantic:${Number(skill.semanticScore || 0).toFixed(2)} prompt:${Number(skill.promptTriggerScore || 0).toFixed(2)} project:${Number(skill.projectEvidenceScore || 0).toFixed(2)} files:${Number(skill.fileConfigScore || 0).toFixed(2)} negative:${Number(skill.negativePenalty || 0).toFixed(2)}`);
|
|
683
|
+
if (skill.evidence?.length) console.log(` evidence: ${skill.evidence.join(", ")}`);
|
|
684
|
+
if (skill.negativeEvidence?.length) console.log(` rejected signals: ${skill.negativeEvidence.join(", ")}`);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
653
688
|
async function warmEmbeddings(task, { syncMarketplace = true, quiet = false } = {}) {
|
|
654
689
|
const warmResult = await warmWorkspaceIndexes({ task });
|
|
655
690
|
const marketplaceSync = syncMarketplace ? syncActiveCodexMarketplace() : null;
|
|
@@ -874,15 +909,21 @@ async function setup({ args = [], cwd = process.cwd() } = {}) {
|
|
|
874
909
|
const totalExisting = existing.reduce((sum, e) => sum + e.count, 0);
|
|
875
910
|
if (totalExisting === 0) {
|
|
876
911
|
console.log("");
|
|
877
|
-
console.log(
|
|
878
|
-
console.log(
|
|
912
|
+
console.log("⚠ No skills found on this machine.");
|
|
913
|
+
console.log("│ Install community skills to get started.");
|
|
879
914
|
console.log("");
|
|
880
915
|
|
|
881
|
-
|
|
882
|
-
|
|
916
|
+
if (options.yes || !process.stdin.isTTY) {
|
|
917
|
+
console.log("│ Skipping community skill installer in non-interactive setup.");
|
|
918
|
+
console.log("│ Run: ctx skills");
|
|
883
919
|
console.log("");
|
|
884
|
-
|
|
885
|
-
await
|
|
920
|
+
} else {
|
|
921
|
+
const installed = await runCommunitySkillInstaller(options.agents);
|
|
922
|
+
if (installed > 0) {
|
|
923
|
+
console.log("");
|
|
924
|
+
console.log("◇ Re-syncing skills after install...");
|
|
925
|
+
await doSyncSkills();
|
|
926
|
+
}
|
|
886
927
|
}
|
|
887
928
|
}
|
|
888
929
|
}
|
|
@@ -981,11 +1022,21 @@ try {
|
|
|
981
1022
|
} else if (command === "stats") {
|
|
982
1023
|
console.log(formatStats(loadStats(contextOSWorkspaceDataDir())));
|
|
983
1024
|
} else if (command === "benchmark") {
|
|
1025
|
+
if (args.includes("--skills")) {
|
|
1026
|
+
console.log(formatSkillRoutingBenchmark(await runSkillRoutingEval({ rootDir })));
|
|
1027
|
+
} else {
|
|
984
1028
|
const marker = args.indexOf("--");
|
|
985
1029
|
const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(1).join(" ");
|
|
986
1030
|
if (!task.trim()) throw new Error('Usage: ctx benchmark -- "task"');
|
|
987
1031
|
console.log(formatBenchmark(benchmarkWorkspace({ cwd: process.cwd(), task })));
|
|
1032
|
+
}
|
|
988
1033
|
} else if (command === "skills") {
|
|
1034
|
+
if (args[1] === "doctor") {
|
|
1035
|
+
const marker = args.indexOf("--");
|
|
1036
|
+
const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(2).join(" ");
|
|
1037
|
+
await skillsDoctor(task);
|
|
1038
|
+
process.exitCode = 0;
|
|
1039
|
+
} else {
|
|
989
1040
|
// Interactive community skill library selector + installer
|
|
990
1041
|
const agentsFlag = args.indexOf("--agents");
|
|
991
1042
|
const forceRefresh = args.includes("--refresh");
|
|
@@ -1020,6 +1071,7 @@ try {
|
|
|
1020
1071
|
}));
|
|
1021
1072
|
}
|
|
1022
1073
|
console.log("");
|
|
1074
|
+
}
|
|
1023
1075
|
} else if (command === "sync") {
|
|
1024
1076
|
if (args.includes("--workflows")) {
|
|
1025
1077
|
await syncWorkflows({
|