@minhpnq1807/contextos 0.5.51 → 0.5.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -2
- package/README.md +147 -13
- package/bin/ctx.js +59 -7
- package/eval/skill-routing/cases.yaml +366 -0
- package/eval/skill-routing/fixtures/docker-node/Dockerfile +4 -0
- package/eval/skill-routing/fixtures/docker-node/docker-compose.yml +5 -0
- package/eval/skill-routing/fixtures/docker-node/package.json +6 -0
- package/eval/skill-routing/fixtures/expo-eas/.github/workflows/eas.yml +1 -0
- package/eval/skill-routing/fixtures/expo-eas/app.json +5 -0
- package/eval/skill-routing/fixtures/expo-eas/eas.json +6 -0
- package/eval/skill-routing/fixtures/expo-eas/package.json +11 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/app.json +6 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/eas.json +5 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/package.json +8 -0
- package/eval/skill-routing/fixtures/expo-with-vercel-json/vercel.json +3 -0
- package/eval/skill-routing/fixtures/express-mongo-jwt/package.json +8 -0
- package/eval/skill-routing/fixtures/firebase-hosting/firebase.json +11 -0
- package/eval/skill-routing/fixtures/firebase-hosting/package.json +6 -0
- package/eval/skill-routing/fixtures/flutter-firebase/pubspec.yaml +5 -0
- package/eval/skill-routing/fixtures/frontend-only-next/package.json +8 -0
- package/eval/skill-routing/fixtures/integration-test/jest.config.js +3 -0
- package/eval/skill-routing/fixtures/integration-test/package.json +10 -0
- package/eval/skill-routing/fixtures/jest-project/jest.config.js +3 -0
- package/eval/skill-routing/fixtures/jest-project/package.json +7 -0
- package/eval/skill-routing/fixtures/nest-prisma/package.json +10 -0
- package/eval/skill-routing/fixtures/nest-prisma/prisma/schema.prisma +4 -0
- package/eval/skill-routing/fixtures/next-vercel/.github/workflows/deploy.yml +1 -0
- package/eval/skill-routing/fixtures/next-vercel/package.json +8 -0
- package/eval/skill-routing/fixtures/next-vercel/vercel.json +3 -0
- package/eval/skill-routing/fixtures/oauth-google/.env.example +3 -0
- package/eval/skill-routing/fixtures/oauth-google/package.json +9 -0
- package/eval/skill-routing/fixtures/password-reset/package.json +8 -0
- package/eval/skill-routing/fixtures/playwright-project/package.json +6 -0
- package/eval/skill-routing/fixtures/playwright-project/playwright.config.ts +5 -0
- package/eval/skill-routing/fixtures/railway-render/package.json +6 -0
- package/eval/skill-routing/fixtures/railway-render/railway.json +6 -0
- package/eval/skill-routing/fixtures/railway-render/render.yaml +5 -0
- package/eval/skill-routing/fixtures/rbac-api/package.json +8 -0
- package/eval/skill-routing/fixtures/redis-cache/package.json +7 -0
- package/eval/skill-routing/fixtures/static-docs/README.md +3 -0
- package/eval/skill-routing/run-eval.js +278 -0
- package/package.json +3 -1
- package/plugins/ctx/.codex-plugin/plugin.json +1 -1
- package/plugins/ctx/lib/ctx-mcp-client.js +19 -0
- package/plugins/ctx/lib/embedding-scorer.js +34 -0
- package/plugins/ctx/lib/package-install.js +1 -1
- package/plugins/ctx/lib/prompt-hook.js +13 -2
- package/plugins/ctx/lib/setup-wizard.js +8 -3
- package/plugins/ctx/lib/skill-discoverer.js +439 -18
- package/plugins/ctx/mcp/contextos-server.js +29 -1
- package/plugins/ctx/mcp/server.js +50 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,7 +1,22 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## 0.5.
|
|
4
|
-
|
|
3
|
+
## 0.5.53
|
|
4
|
+
|
|
5
|
+
- **Optional adapter positioning:** Clarified that ContextOS core works standalone and that `code-review-graph`, `codegraph`, and `agent-memory` are optional adapters. Skill Router scoring now exposes separate `importGraphScore`, `externalGraphScore`, and `memoryScore` fields so missing adapters degrade to zero score instead of becoming install/runtime requirements.
|
|
6
|
+
- **Adapter-aware benchmark update:** Updated the Skill Router formula to reserve explicit weights for local import graph, optional external graph, and optional memory adapters. The 52-case internal benchmark now reports Top-1 Accuracy 94.2%, Top-3 Recall 94.2%, False Positive Rate 0.0%, Confidence Calibration 100.0%, and Negative Gate Accuracy 100.0%.
|
|
7
|
+
- **Release safety docs:** Added a README safety model covering standalone install, optional adapters, fail-open hooks, local-only telemetry, no hook network calls, and no postinstall behavior.
|
|
8
|
+
- **Launch roadmap template:** Added a GitHub issue template for release hardening, README polish, benchmarks, optional adapters, setup, and telemetry roadmap work.
|
|
9
|
+
|
|
10
|
+
## 0.5.52
|
|
11
|
+
|
|
12
|
+
- **Release candidate polish:** Updated README positioning around ContextOS as a runtime context router, added npm/CI/license badges, a same-prompt/different-repo demo section, a benchmark table, a 30-second install callout, and an AGENTS.md vs RAG vs ContextOS comparison table.
|
|
13
|
+
- **Non-interactive setup safety:** `ctx setup --yes` now defaults to Codex instead of failing with no selected agents, and skips the community skill installer when no TTY is available so release/install smoke tests can complete unattended.
|
|
14
|
+
- **Hot MCP scorer:** `ctx-mcp` now preloads the local embedding pipeline and exposes `ctx_health`/bridge health so prompt hooks only call semantic scoring when the long-running scorer is ready.
|
|
15
|
+
- **Skill Router v2:** Skill suggestions now combine semantic similarity with prompt triggers, dependency evidence, config-file evidence, negative triggers, and confidence explanations. Optional `skill.yaml` metadata beside `SKILL.md` can define positive/negative triggers and related skills.
|
|
16
|
+
- **Confidence calibration:** Skill Router confidence is now calibrated separately from ranking. Prompt-only or semantic-only matches are capped, prompt+project-evidence matches are promoted to medium confidence, dependency+file evidence promotes to high confidence, negative signals cap confidence, and `ctx skills doctor` shows `high`/`medium`/`low` bands.
|
|
17
|
+
- **Skill doctor:** Added `ctx skills doctor -- "task"` to explain selected skills with semantic score, prompt trigger score, project evidence, file evidence, negative signals, and final confidence.
|
|
18
|
+
- **Skill routing eval:** Added `eval/skill-routing` fixtures and `ctx benchmark --skills` to report top-1 accuracy, top-3 recall, false positive rate, confidence calibration, and negative gate accuracy for evidence-based skill routing.
|
|
19
|
+
- **Expanded Skill Router benchmark:** Expanded the eval from the initial 6-case smoke set to 52 cases across deployment, auth, database, testing, mobile, and adversarial negative gates. Current local benchmark: Top-1 Accuracy 92.3%, Top-3 Recall 94.2%, False Positive Rate 0.0%, Confidence Calibration 100.0%, Negative Gate Accuracy 100.0%.
|
|
5
20
|
- **Faster prompt fallback:** Direct prompt-hook fallback now skips embedding work and uses a shorter timeout, so context injection can still return deterministic rule, file, skill, and workflow candidates when MCP or semantic scoring is unavailable.
|
|
6
21
|
- **Shared skill index fallback:** Skill discovery now warms a shared global skill index and searches it when the workspace-specific skill index has no matches, improving reuse across projects.
|
|
7
22
|
- **Agent-visible skill dedupe:** Community skill installs and skill sync now remove duplicate skills visible through shared, Codex, and Antigravity roots while preserving unique agent-specific skills.
|
package/README.md
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
# ContextOS
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Runtime context router for coding agents.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Rules, files, skills, workflows, and evidence: injected before the agent writes code.
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/@minhpnq1807/contextos)
|
|
8
|
+
[](https://github.com/khovan123/contextOS/actions/workflows/ci.yml)
|
|
9
|
+
[](LICENSE)
|
|
6
10
|
|
|
7
11
|
```text
|
|
8
12
|
WITHOUT ContextOS
|
|
@@ -17,11 +21,38 @@ WITH ContextOS
|
|
|
17
21
|
-> report followed / ignored / unknown
|
|
18
22
|
```
|
|
19
23
|
|
|
24
|
+
ContextOS is not another `AGENTS.md` loader. It is a runtime context router for coding agents: it chooses the task-relevant rules, files, skills, workflows, and evidence before the agent starts editing.
|
|
25
|
+
|
|
20
26
|
Published package: [`@minhpnq1807/contextos`](https://www.npmjs.com/package/@minhpnq1807/contextos)
|
|
21
27
|
|
|
22
28
|
## Demo
|
|
23
29
|
|
|
24
|
-

|
|
31
|
+
|
|
32
|
+
Same prompt. Different repo. Correct skills.
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
ctx skills doctor -- "fix deployed"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
| Repo evidence | Expected route |
|
|
39
|
+
| --- | --- |
|
|
40
|
+
| `eas.json`, `expo`, `react-native` | `eas`, `mobile-deployment`, `github-actions-ci-cd` |
|
|
41
|
+
| `vercel.json`, `next`, GitHub workflow | `vercel-deployment`, `github-actions-ci-cd`, `env-secret-management` |
|
|
42
|
+
| ContextOS repo with no app deploy evidence | no deployment skill selected |
|
|
43
|
+
|
|
44
|
+
Skill Router internal fixture benchmark:
|
|
45
|
+
|
|
46
|
+
| Metric | Result |
|
|
47
|
+
| --- | ---: |
|
|
48
|
+
| Cases | 52 |
|
|
49
|
+
| Top-1 Accuracy | 94.2% |
|
|
50
|
+
| Top-3 Recall | 94.2% |
|
|
51
|
+
| False Positive Rate | 0.0% |
|
|
52
|
+
| Confidence Calibration | 100.0% |
|
|
53
|
+
| Negative Gate Accuracy | 100.0% |
|
|
54
|
+
|
|
55
|
+
This is an internal fixture benchmark, not an external real-world benchmark. It is designed to prove the router behavior across controlled Expo/EAS, Next/Vercel, Docker, Railway/Render, Firebase, auth, database, testing, mobile, and adversarial negative-gate cases.
|
|
25
56
|
|
|
26
57
|
Example hook context injected before the agent works:
|
|
27
58
|
|
|
@@ -51,6 +82,8 @@ Runtime telemetry: code-review-graph, code-review-graph.query_graph_tool
|
|
|
51
82
|
|
|
52
83
|
## Quick Install
|
|
53
84
|
|
|
85
|
+
Install in 30 seconds:
|
|
86
|
+
|
|
54
87
|
```bash
|
|
55
88
|
npm install -g @minhpnq1807/contextos
|
|
56
89
|
ctx setup
|
|
@@ -104,6 +137,29 @@ The problem is not that agents cannot read `AGENTS.md`. The problem is that larg
|
|
|
104
137
|
| Sync | Rules/MCP via Ruler, skills via skillshare, workflows via ContextOS. |
|
|
105
138
|
| Evidence | Stop hooks persist `followed`, `ignored`, `unknown`, and runtime telemetry for explicit reports. |
|
|
106
139
|
|
|
140
|
+
## Comparison
|
|
141
|
+
|
|
142
|
+
| Approach | What it gives the agent | Main gap |
|
|
143
|
+
| --- | --- | --- |
|
|
144
|
+
| Plain `AGENTS.md` | Static repo instructions. | Important rules get buried or ignored when the task changes. |
|
|
145
|
+
| Generic RAG | Semantically related files or snippets. | It usually does not route skills/workflows or prove rule compliance. |
|
|
146
|
+
| ContextOS | Task-routed rules, files, skills, workflows, and evidence. | Requires local setup and warm indexes for best results. |
|
|
147
|
+
|
|
148
|
+
## Safety Model
|
|
149
|
+
|
|
150
|
+
ContextOS is designed to be OSS-friendly and low-friction:
|
|
151
|
+
|
|
152
|
+
| Guarantee | Behavior |
|
|
153
|
+
| --- | --- |
|
|
154
|
+
| Standalone by default | `ctx setup` works without `code-review-graph`, `codegraph`, or `agent-memory`. |
|
|
155
|
+
| Optional adapters | Graph and memory backends add signal when available; missing adapters contribute score `0`. |
|
|
156
|
+
| Fail-open hooks | Prompt hooks return local context or nothing instead of blocking the agent when MCP, embeddings, graph, or memory is unavailable. |
|
|
157
|
+
| Local-only telemetry | Reports, prompt history, evidence, and telemetry stay under `~/.ctx/contextos/`. |
|
|
158
|
+
| No hook network calls | Prompt and stop hooks do not call external services. Install/warm commands may download the local embedding model when explicitly run. |
|
|
159
|
+
| No postinstall surprise | `npm install` only installs the CLI. Setup runs only when you call `ctx setup`. |
|
|
160
|
+
|
|
161
|
+
Positioning: ContextOS works standalone and gets smarter when graph or memory adapters are available.
|
|
162
|
+
|
|
107
163
|
## Quick Commands
|
|
108
164
|
|
|
109
165
|
| Command | Use it for |
|
|
@@ -114,6 +170,7 @@ The problem is not that agents cannot read `AGENTS.md`. The problem is that larg
|
|
|
114
170
|
| `ctx evidence` | Show why each rule was marked followed/ignored/unknown. |
|
|
115
171
|
| `ctx stats` | Show workspace-level usage and effectiveness metrics. |
|
|
116
172
|
| `ctx benchmark -- "task"` | Compare raw AGENTS.md ordering vs ContextOS scheduling. |
|
|
173
|
+
| `ctx benchmark --skills` | Run the Skill Router eval benchmark. |
|
|
117
174
|
| `ctx sync --rules` | Sync AGENTS/Ruler/MCP config across agents. |
|
|
118
175
|
| `ctx sync --skills` | Sync skills across agents through skillshare. |
|
|
119
176
|
| `ctx sync --workflows` | Sync workflow markdown across Claude/Codex/Antigravity. |
|
|
@@ -225,6 +282,14 @@ Restart Antigravity or `agy` after installing.
|
|
|
225
282
|
|
|
226
283
|
The embedding model is mandatory. `ctx install` checks `~/.ctx/contextos/models` first and downloads the MiniLM model only when the required local files are missing. It intentionally fails if the model cannot be prepared, because otherwise the first prompt hook would have to cold-load or download the model.
|
|
227
284
|
|
|
285
|
+
ContextOS keeps the embedding model hot inside `ctx-mcp`. Prompt hooks never cold-load transformers; if the MCP bridge is unavailable or the model is still warming, hooks fail open with lightweight scoring. Current local smoke metrics:
|
|
286
|
+
|
|
287
|
+
```text
|
|
288
|
+
MCP warm p95: 15-58ms observed
|
|
289
|
+
Hook lightweight fallback: 0.69s
|
|
290
|
+
MCP embedding hot startup: 477ms
|
|
291
|
+
```
|
|
292
|
+
|
|
228
293
|
During install, ContextOS prints a 0-100 progress indicator. The longest stage is usually embedding warmup; if the model is already cached, install skips the download and only refreshes vectors.
|
|
229
294
|
|
|
230
295
|
Verify the published package in any project:
|
|
@@ -418,7 +483,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
|
|
|
418
483
|
| `ctx install --inject` | Installs ContextOS with explicit injection mode. | You want to be explicit in scripts or docs. | Same runtime behavior as the default install mode; if combined with `--quiet`, `--inject` wins. |
|
|
419
484
|
| `ctx install --copy` | Copies only the plugin payload to `$CODEX_HOME/plugins/ctx`. | Legacy local development or manual plugin experiments. | Does not sync the active marketplace, rebuild indexes, register MCP, or install global hooks. Prefer `ctx refresh` for active local updates. |
|
|
420
485
|
| `ctx setup` | Runs the first-run setup wizard. | You want the recommended onboarding flow after `npm install -g @minhpnq1807/contextos`. | Installs selected agents, optionally syncs Ruler rules/MCP and skillshare skills, asks which prompt sections to show, then prints next steps. |
|
|
421
|
-
| `ctx setup --yes` | Runs setup with defaults non-interactively. | You want scriptable
|
|
486
|
+
| `ctx setup --yes` | Runs setup with defaults non-interactively. | You want scriptable Codex setup. | Uses `codex`, enables injection, syncs rules, syncs skills, skips interactive community-skill installation when no TTY is available, and passes `--yes` to dependency setup prompts. Use `--agents codex,claude,agy` for multi-agent setup. |
|
|
422
487
|
| `ctx setup --agents <list>` | Runs setup for selected agents. | You want only part of the default set. | Accepts comma-separated `codex`, `claude`, `agy`, or `antigravity`. |
|
|
423
488
|
| `ctx setup --no-rules` | Skips Ruler sync during setup. | You only want hooks/MCP install and maybe skill sync. | Does not run `ctx sync --rules`. |
|
|
424
489
|
| `ctx setup --no-skills` | Skips skillshare sync during setup. | You do not want shared skills configured. | Does not run `ctx sync --skills`. |
|
|
@@ -428,6 +493,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
|
|
|
428
493
|
| `ctx evidence` | Shows detailed evidence behind the last report for the current workspace. | You want to inspect why a rule was marked `followed`, `ignored`, `unknown`, or `unmeasurable`. | Prints a compact evidence table plus per-rule detail tables. |
|
|
429
494
|
| `ctx stats` | Shows aggregate runtime metrics for the current workspace. | You want to know whether ContextOS is active and useful over time. | Prints sectioned tables for prompt/report counts, injection rate, efficiency, rule outcomes, hook events, last prompt, and last report. |
|
|
430
495
|
| `ctx benchmark -- "task"` | Compares baseline AGENTS.md ordering with ContextOS task-aware scheduling. | You want a before/after signal for lost-in-the-middle risk. | Prints tables for parsed/actionable/filtered rules, baseline middle-risk, scheduled high/mid rules, recency reminder status, and top scored rules. |
|
|
496
|
+
| `ctx benchmark --skills` | Runs the Skill Router eval benchmark. | You want evidence for skill routing accuracy and negative gates. | Prints top-1 accuracy, top-3 recall, false positive rate, confidence calibration, and negative gate accuracy across `eval/skill-routing` fixtures. |
|
|
431
497
|
| `ctx sync --rules` | Syncs project rules and MCP servers through Ruler. | You want Codex, Claude Code, and Antigravity to share one project rule/MCP source of truth. | Ensures `.ruler/ruler.toml`, injects `ctx-mcp`, imports existing MCP servers from Codex and project `.mcp.json`, runs `ruler apply --agents codex,claude,antigravity`, mirrors MCP servers to Antigravity MCP configs, and verifies generated config. |
|
|
432
498
|
| `ctx sync --rules --agents <list>` | Syncs only selected agents through Ruler. | You want to update one or two agents without touching the others. | Accepts comma-separated values such as `codex`, `claude`, `agy`, `antigravity`, or `codex,claude,agy`; `agy` is normalized to Ruler's `antigravity`. |
|
|
433
499
|
| `ctx sync --rules --dry-run` | Previews Ruler sync without writing files or running apply. | You want to inspect behavior before changing project config. | Prints the same flow with dry-run status. |
|
|
@@ -492,7 +558,17 @@ These files are local telemetry only. Hooks do not make network calls.
|
|
|
492
558
|
|
|
493
559
|
## Project Understanding
|
|
494
560
|
|
|
495
|
-
ContextOS
|
|
561
|
+
ContextOS works standalone. The core path is local rules, file embeddings, import graph expansion, skill routing, workflow routing, and evidence capture.
|
|
562
|
+
|
|
563
|
+
Project graph and memory backends are optional adapters:
|
|
564
|
+
|
|
565
|
+
| Adapter | What it adds | Required? |
|
|
566
|
+
| --- | --- | --- |
|
|
567
|
+
| `code-review-graph` | Blast radius, semantic node search, and test relationships. | No |
|
|
568
|
+
| `codegraph` | Symbol/call graph context once its MCP schema is stable. | No |
|
|
569
|
+
| `agent-memory` / `agentmemory` | Prior task history, decisions, and recurring bug-fix context. | No |
|
|
570
|
+
|
|
571
|
+
ContextOS does not require `code-review-graph`, `codegraph`, or `agent-memory` to install or run. It gets smarter when those backends are available; when they are missing, the adapter scores stay at zero and the hook continues with local context.
|
|
496
572
|
|
|
497
573
|
For file suggestions, ContextOS now runs a local RAG-style retrieval pass:
|
|
498
574
|
|
|
@@ -502,12 +578,12 @@ prompt
|
|
|
502
578
|
-> ctx-mcp reads AGENTS.md and scores rules with local MiniLM
|
|
503
579
|
-> query the persisted file-vector index in embeddings.db for semantic file candidates
|
|
504
580
|
-> expand candidates through relative import graph links
|
|
505
|
-
-> query code-review-graph semantic_search_nodes with seed entity names
|
|
506
|
-
-> merge and deduplicate semantic, import-graph, and
|
|
581
|
+
-> optionally query code-review-graph semantic_search_nodes with seed entity names
|
|
582
|
+
-> merge and deduplicate semantic, import-graph, and optional graph matches
|
|
507
583
|
-> inject top suggested files with graph evidence reasons
|
|
508
584
|
```
|
|
509
585
|
|
|
510
|
-
This keeps the hook fast and local while still using graph semantics when available. The graph search path is visible in runtime data through file reasons such as `graph:content-moderation.service`.
|
|
586
|
+
This keeps the hook fast and local while still using graph semantics when available. The graph search path is visible in runtime data through file reasons such as `graph:content-moderation.service`. When no graph adapter is available, file suggestions still use local file vectors and import graph expansion.
|
|
511
587
|
|
|
512
588
|
Prompt scoring does not walk the repository for file candidates or import expansion. `ctx install` and `ctx embeddings warm` rebuild the persisted file-vector index and one-hop import adjacency index by walking source paths once; prompt hooks query those indexes directly. Rules, files, skills, and workflows are scored concurrently with `Promise.all()`.
|
|
513
589
|
|
|
@@ -521,13 +597,71 @@ Injected prompt sections are intentionally compact: rules show only detected rul
|
|
|
521
597
|
|
|
522
598
|
Codex may flatten newlines in its `UserPromptSubmit hook (completed)` preview. The injected `additionalContext` payload remains multiline; this is a Codex preview display limitation.
|
|
523
599
|
|
|
524
|
-
Skill ranking
|
|
600
|
+
Skill ranking uses Skill Router v2. ContextOS still starts with semantic retrieval, but final confidence is evidence-based:
|
|
601
|
+
|
|
602
|
+
```text
|
|
603
|
+
final_score =
|
|
604
|
+
semantic_score * 0.30
|
|
605
|
+
+ prompt_trigger_score * 0.20
|
|
606
|
+
+ project_evidence_score * 0.20
|
|
607
|
+
+ file_config_score * 0.10
|
|
608
|
+
+ import_graph_score * 0.10
|
|
609
|
+
+ external_graph_score * 0.05
|
|
610
|
+
+ memory_score * 0.05
|
|
611
|
+
- negative_penalty * 0.20
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
`external_graph_score` is supplied by optional project graph adapters such as `code-review-graph` or `codegraph`. `memory_score` is reserved for optional memory adapters such as `agent-memory`. Without those adapters, both scores are `0`.
|
|
615
|
+
|
|
616
|
+
Skill metadata can live beside `SKILL.md` as `skill.yaml`:
|
|
617
|
+
|
|
618
|
+
```yaml
|
|
619
|
+
id: eas
|
|
620
|
+
name: Expo EAS Deployment
|
|
621
|
+
positive_triggers:
|
|
622
|
+
prompts: [eas, expo build, deployed, android, ios]
|
|
623
|
+
files: [eas.json, app.json, app.config.ts]
|
|
624
|
+
dependencies: [expo, eas-cli]
|
|
625
|
+
negative_triggers:
|
|
626
|
+
dependencies: [next, vite]
|
|
627
|
+
files: [vercel.json]
|
|
628
|
+
related_skills:
|
|
629
|
+
- mobile-deployment
|
|
630
|
+
- github-actions-ci-cd
|
|
631
|
+
- env-secret-management
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
The project profile is built from bounded root/workspace `package.json` metadata, dependencies, scripts, detected languages, recent git files, and config files such as `eas.json`, `app.json`, `vercel.json`, and `.github/workflows/*`. ContextOS only gives high confidence to domain-specific skills when project evidence supports them. For example, `fix deployed` can rank `eas` highly in an Expo project with `eas.json` and `expo`, but a Next.js/Vercel project should route to Vercel and CI/CD deployment skills instead. Skill catalogs are deduplicated by normalized skill name before indexing and rendering.
|
|
635
|
+
|
|
636
|
+
Use `ctx skills doctor -- "task"` to inspect routing:
|
|
637
|
+
|
|
638
|
+
```bash
|
|
639
|
+
ctx skills doctor -- "fix deployed"
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
The doctor output shows semantic score, prompt triggers, dependency/file evidence, negative signals, and final confidence for each selected skill.
|
|
643
|
+
Confidence is calibrated separately from ranking and includes a band:
|
|
644
|
+
|
|
645
|
+
```text
|
|
646
|
+
high: >= 0.85
|
|
647
|
+
medium: 0.65-0.84
|
|
648
|
+
low: < 0.65
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
Use `ctx benchmark --skills` to run the local Skill Router benchmark. The eval lives in `eval/skill-routing` and currently covers 52 cases across deployment, auth, database, testing, mobile, and adversarial negative gates.
|
|
652
|
+
|
|
653
|
+
Current local benchmark:
|
|
525
654
|
|
|
526
655
|
```text
|
|
527
|
-
|
|
656
|
+
Cases: 52
|
|
657
|
+
Top-1 Accuracy: 94.2%
|
|
658
|
+
Top-3 Recall: 94.2%
|
|
659
|
+
False Positive Rate: 0.0%
|
|
660
|
+
Confidence Calibration: 100.0%
|
|
661
|
+
Negative Gate Accuracy: 100.0%
|
|
528
662
|
```
|
|
529
663
|
|
|
530
|
-
The
|
|
664
|
+
The benchmark includes same-prompt/different-repo checks such as `fix deployed` in Expo/EAS, Next/Vercel, and ContextOS itself, plus adversarial cases like `expo-with-vercel-json` where `eas` is expected and `vercel-deployment` must be rejected.
|
|
531
665
|
|
|
532
666
|
After `ctx refresh`, ContextOS invalidates the private hook bridge socket so prompts fall back to direct scoring until Codex restarts the long-running `ctx-mcp` process. Hook clients also discard a same-inode socket if an older bridge revision is detected.
|
|
533
667
|
|
|
@@ -541,10 +675,10 @@ CONTEXTOS_EMBEDDINGS=0 disable embedding rule scoring
|
|
|
541
675
|
CONTEXTOS_MCP_CONNECT_TIMEOUT_MS=100 stale ctx-mcp socket connect timeout
|
|
542
676
|
CONTEXTOS_MCP_BRIDGE_TIMEOUT_MS=2000 ctx-mcp hook bridge timeout
|
|
543
677
|
CONTEXTOS_HOOK_DEADLINE_MS=8500 hard fail-open deadline for prompt hooks
|
|
544
|
-
CONTEXTOS_DIRECT_FALLBACK_TIMEOUT_MS=
|
|
678
|
+
CONTEXTOS_DIRECT_FALLBACK_TIMEOUT_MS=2500 direct scoring timeout when the bridge is unavailable
|
|
545
679
|
CONTEXTOS_HOOK_EMBEDDING_TIMEOUT_MS=500 rule embedding timeout during hook direct fallback
|
|
546
680
|
CONTEXTOS_EMBEDDING_TIMEOUT_MS=800 embedding scoring timeout inside ctx-mcp/debug
|
|
547
|
-
CONTEXTOS_HOOK_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout
|
|
681
|
+
CONTEXTOS_HOOK_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout when embeddings are enabled
|
|
548
682
|
CONTEXTOS_SKILL_EMBEDDING_TIMEOUT_MS=2000 skill retrieval timeout inside ctx-mcp/debug
|
|
549
683
|
CONTEXTOS_FILE_EMBEDDINGS=0 disable file-path embedding retrieval
|
|
550
684
|
CONTEXTOS_HOOK_FILE_EMBEDDING_TIMEOUT_MS=500 file retrieval timeout during hook direct fallback
|
package/bin/ctx.js
CHANGED
|
@@ -19,6 +19,7 @@ import { scoreContext } from "../plugins/ctx/lib/score-context.js";
|
|
|
19
19
|
import { defaultDataRoot, workspaceDataDir, workspaceMarkerPath } from "../plugins/ctx/lib/workspace-data.js";
|
|
20
20
|
import { installMcpTelemetryProxies } from "../plugins/ctx/lib/mcp-proxy-install.js";
|
|
21
21
|
import { benchmarkWorkspace, formatBenchmark } from "../plugins/ctx/lib/benchmark.js";
|
|
22
|
+
import { formatSkillRoutingBenchmark, runSkillRoutingEval } from "../eval/skill-routing/run-eval.js";
|
|
22
23
|
import { copyDir, copyPackageRoot, syncPackageRoot } from "../plugins/ctx/lib/package-install.js";
|
|
23
24
|
import { installClaudeHooks } from "../plugins/ctx/lib/claude-hooks.js";
|
|
24
25
|
import { installClaudeMcp } from "../plugins/ctx/lib/claude-mcp.js";
|
|
@@ -30,7 +31,7 @@ import { readCodexMcpServers, syncRules } from "../plugins/ctx/lib/ruler-sync.js
|
|
|
30
31
|
import { detectGraphStrategy, embedCodeReviewGraph, formatCodeReviewGraphEmbedding, formatGraphStrategy } from "../plugins/ctx/lib/graph-strategy.js";
|
|
31
32
|
import { writeInnerGitignore, ensureRootGitignore } from "../plugins/ctx/lib/gitignore.js";
|
|
32
33
|
import { dedupeAgentVisibleSkills, repairSkillSymlinks, syncSkills, detectExistingSkills } from "../plugins/ctx/lib/skillshare-sync.js";
|
|
33
|
-
import { scanSkills, warmSkillEmbeddings } from "../plugins/ctx/lib/skill-discoverer.js";
|
|
34
|
+
import { diagnoseSkills, scanSkills, warmSkillEmbeddings } from "../plugins/ctx/lib/skill-discoverer.js";
|
|
34
35
|
import { parsePassthroughArgs, runPassthrough } from "../plugins/ctx/lib/passthrough.js";
|
|
35
36
|
import { parseAgentList, parseSetupArgs, setupSummaryLines } from "../plugins/ctx/lib/setup-wizard.js";
|
|
36
37
|
import { multiSelect } from "../plugins/ctx/lib/multi-select.js";
|
|
@@ -193,6 +194,7 @@ Usage:
|
|
|
193
194
|
ctx evidence Show evidence from last report
|
|
194
195
|
ctx stats Show workspace statistics
|
|
195
196
|
ctx benchmark -- "task" Benchmark workspace for a task
|
|
197
|
+
ctx benchmark --skills Run skill routing eval benchmark
|
|
196
198
|
ctx sync --rules Sync AGENTS.md rules to all agents
|
|
197
199
|
ctx sync --rules --agents <names> Sync rules to specific agents only
|
|
198
200
|
ctx sync --rules --dry-run Preview rule sync without writing
|
|
@@ -207,6 +209,7 @@ Usage:
|
|
|
207
209
|
ctx sync --workflows --agents <names> Sync workflows to specific agents
|
|
208
210
|
ctx sync --workflows --dry-run Preview workflow sync without writing
|
|
209
211
|
ctx skills Browse community skill libraries
|
|
212
|
+
ctx skills doctor -- "task" Explain skill routing for a task
|
|
210
213
|
ctx skills --agents <names> Filter skills for specific agents
|
|
211
214
|
ctx skills --refresh Force refresh skill library cache
|
|
212
215
|
ctx --config Choose prompt context sections to show
|
|
@@ -650,6 +653,38 @@ async function debug(task) {
|
|
|
650
653
|
console.log(scheduled.additionalContext || "(empty)");
|
|
651
654
|
}
|
|
652
655
|
|
|
656
|
+
async function skillsDoctor(task) {
|
|
657
|
+
if (!String(task || "").trim()) throw new Error('Usage: ctx skills doctor -- "task"');
|
|
658
|
+
const result = await diagnoseSkills({
|
|
659
|
+
cwd: process.cwd(),
|
|
660
|
+
prompt: task,
|
|
661
|
+
dataDir: contextOSDataDir(),
|
|
662
|
+
skills: scanSkills({ cwd: process.cwd() }),
|
|
663
|
+
limit: outputConfigLimits(loadOutputConfig({ dataRoot: contextOSDataDir() })).skills,
|
|
664
|
+
timeoutMs: Number(process.env.CONTEXTOS_SKILL_DOCTOR_TIMEOUT_MS || 3000)
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
console.log("ContextOS skill doctor");
|
|
668
|
+
console.log(`cwd: ${result.cwd}`);
|
|
669
|
+
console.log(`prompt: ${result.prompt}`);
|
|
670
|
+
console.log("");
|
|
671
|
+
console.log("Project evidence:");
|
|
672
|
+
console.log(`dependencies: ${result.projectEvidence.dependencies.slice(0, 30).join(", ") || "(none)"}`);
|
|
673
|
+
console.log(`files: ${result.projectEvidence.files.slice(0, 30).join(", ") || "(none)"}`);
|
|
674
|
+
console.log("");
|
|
675
|
+
console.log("Skills:");
|
|
676
|
+
if (!result.skills.length) {
|
|
677
|
+
console.log("(none)");
|
|
678
|
+
return;
|
|
679
|
+
}
|
|
680
|
+
for (const skill of result.skills) {
|
|
681
|
+
console.log(`${Number(skill.confidence || skill.score || 0).toFixed(2)} ${skill.confidenceBand || "low"} ${skill.name}`);
|
|
682
|
+
console.log(` semantic:${Number(skill.semanticScore || 0).toFixed(2)} prompt:${Number(skill.promptTriggerScore || 0).toFixed(2)} project:${Number(skill.projectEvidenceScore || 0).toFixed(2)} files:${Number(skill.fileConfigScore || 0).toFixed(2)} import:${Number(skill.importGraphScore || 0).toFixed(2)} graph:${Number(skill.externalGraphScore || skill.graphScore || 0).toFixed(2)} memory:${Number(skill.memoryScore || 0).toFixed(2)} negative:${Number(skill.negativePenalty || 0).toFixed(2)}`);
|
|
683
|
+
if (skill.evidence?.length) console.log(` evidence: ${skill.evidence.join(", ")}`);
|
|
684
|
+
if (skill.negativeEvidence?.length) console.log(` rejected signals: ${skill.negativeEvidence.join(", ")}`);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
653
688
|
async function warmEmbeddings(task, { syncMarketplace = true, quiet = false } = {}) {
|
|
654
689
|
const warmResult = await warmWorkspaceIndexes({ task });
|
|
655
690
|
const marketplaceSync = syncMarketplace ? syncActiveCodexMarketplace() : null;
|
|
@@ -874,15 +909,21 @@ async function setup({ args = [], cwd = process.cwd() } = {}) {
|
|
|
874
909
|
const totalExisting = existing.reduce((sum, e) => sum + e.count, 0);
|
|
875
910
|
if (totalExisting === 0) {
|
|
876
911
|
console.log("");
|
|
877
|
-
console.log(
|
|
878
|
-
console.log(
|
|
912
|
+
console.log("⚠ No skills found on this machine.");
|
|
913
|
+
console.log("│ Install community skills to get started.");
|
|
879
914
|
console.log("");
|
|
880
915
|
|
|
881
|
-
|
|
882
|
-
|
|
916
|
+
if (options.yes || !process.stdin.isTTY) {
|
|
917
|
+
console.log("│ Skipping community skill installer in non-interactive setup.");
|
|
918
|
+
console.log("│ Run: ctx skills");
|
|
883
919
|
console.log("");
|
|
884
|
-
|
|
885
|
-
await
|
|
920
|
+
} else {
|
|
921
|
+
const installed = await runCommunitySkillInstaller(options.agents);
|
|
922
|
+
if (installed > 0) {
|
|
923
|
+
console.log("");
|
|
924
|
+
console.log("◇ Re-syncing skills after install...");
|
|
925
|
+
await doSyncSkills();
|
|
926
|
+
}
|
|
886
927
|
}
|
|
887
928
|
}
|
|
888
929
|
}
|
|
@@ -981,11 +1022,21 @@ try {
|
|
|
981
1022
|
} else if (command === "stats") {
|
|
982
1023
|
console.log(formatStats(loadStats(contextOSWorkspaceDataDir())));
|
|
983
1024
|
} else if (command === "benchmark") {
|
|
1025
|
+
if (args.includes("--skills")) {
|
|
1026
|
+
console.log(formatSkillRoutingBenchmark(await runSkillRoutingEval({ rootDir })));
|
|
1027
|
+
} else {
|
|
984
1028
|
const marker = args.indexOf("--");
|
|
985
1029
|
const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(1).join(" ");
|
|
986
1030
|
if (!task.trim()) throw new Error('Usage: ctx benchmark -- "task"');
|
|
987
1031
|
console.log(formatBenchmark(benchmarkWorkspace({ cwd: process.cwd(), task })));
|
|
1032
|
+
}
|
|
988
1033
|
} else if (command === "skills") {
|
|
1034
|
+
if (args[1] === "doctor") {
|
|
1035
|
+
const marker = args.indexOf("--");
|
|
1036
|
+
const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(2).join(" ");
|
|
1037
|
+
await skillsDoctor(task);
|
|
1038
|
+
process.exitCode = 0;
|
|
1039
|
+
} else {
|
|
989
1040
|
// Interactive community skill library selector + installer
|
|
990
1041
|
const agentsFlag = args.indexOf("--agents");
|
|
991
1042
|
const forceRefresh = args.includes("--refresh");
|
|
@@ -1020,6 +1071,7 @@ try {
|
|
|
1020
1071
|
}));
|
|
1021
1072
|
}
|
|
1022
1073
|
console.log("");
|
|
1074
|
+
}
|
|
1023
1075
|
} else if (command === "sync") {
|
|
1024
1076
|
if (args.includes("--workflows")) {
|
|
1025
1077
|
await syncWorkflows({
|