@minhpnq1807/contextos 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +119 -121
- package/bin/ctx.js +62 -4
- package/eval/hallucination/run-agent-leaderboard.js +15 -2
- package/eval/hallucination/run-leaderboard.js +5 -5
- package/package.json +1 -1
- package/plugins/ctx/.codex-plugin/plugin.json +1 -1
- package/plugins/ctx/lib/certification.js +3 -0
- package/plugins/ctx/lib/project-context-generator.js +389 -0
- package/plugins/ctx/lib/setup-wizard.js +5 -2
- package/plugins/ctx/lib/skill-discoverer.js +4 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.6.4
|
|
4
|
+
|
|
5
|
+
- **Project context generator:** Added `ctx doctor --fix` and `ctx setup --generate-project-context` to explicitly scaffold starter project skills and a primary workflow when a repository has rules but is missing ContextOS-ready skills/workflows. Generation is repo-scoped, does not run from `npm install`, and does not overwrite existing files unless `--force` is passed to `ctx doctor --fix`.
|
|
6
|
+
|
|
7
|
+
## 0.6.3
|
|
8
|
+
|
|
9
|
+
- **Launch benchmark wording:** Clarified that `ctx leaderboard --hallucination` is an offline deterministic benchmark comparing a raw heuristic baseline with ContextOS evidence-based context selection, while live agent results remain pending external CLI environments.
|
|
10
|
+
- **Offline leaderboard labels:** Renamed offline leaderboard output from agent-like labels to `Raw heuristic baseline` and `ContextOS evidence benchmark` so the 10% to 80% result is not confused with a live Codex/Gemini comparison.
|
|
11
|
+
- **Live leaderboard alias:** Added `ctx leaderboard --hallucination --live --agent <name>` as a launch-friendly alias for running the hallucination benchmark through one installed agent CLI. Live benchmark output now reports `OK`/`SKIPPED`/`ERROR` style statuses and supports `CONTEXTOS_<AGENT>_CMD` command templates for external wrappers.
|
|
12
|
+
|
|
3
13
|
## 0.6.2
|
|
4
14
|
|
|
5
15
|
- **Live agent leaderboard:** Added `ctx leaderboard --agents codex,gemini` and `npm run leaderboard:agents` to run the hallucination benchmark through installed Codex/Gemini CLIs with timeouts and skip/error reporting for missing or unauthenticated agents.
|
package/README.md
CHANGED
|
@@ -1,116 +1,97 @@
|
|
|
1
1
|
# ContextOS
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Same prompt. Same model. Different context.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
ContextOS stops coding agents from ignoring repo rules, guessing the wrong path, and reading random files.
|
|
6
6
|
|
|
7
7
|
[](https://www.npmjs.com/package/@minhpnq1807/contextos)
|
|
8
8
|
[](https://github.com/khovan123/contextOS/actions/workflows/ci.yml)
|
|
9
|
-
[](#contextos-ready)
|
|
10
9
|
[](LICENSE)
|
|
11
10
|
|
|
12
11
|
```text
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
Prompt:
|
|
13
|
+
Fix deployment
|
|
14
|
+
|
|
15
|
+
Raw Agent:
|
|
16
|
+
❌ Vercel
|
|
17
|
+
❌ Docker
|
|
18
|
+
❌ Railway
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
-> report followed / ignored / unknown
|
|
20
|
+
ContextOS:
|
|
21
|
+
✅ EAS
|
|
22
|
+
✅ Mobile Deployment
|
|
23
|
+
✅ GitHub Actions
|
|
23
24
|
```
|
|
24
25
|
|
|
25
|
-
ContextOS
|
|
26
|
+
ContextOS reads the repo before the agent starts: project rules, repo evidence, suggested files, skills, workflows, and post-task proof.
|
|
26
27
|
|
|
27
28
|
Published package: [`@minhpnq1807/contextos`](https://www.npmjs.com/package/@minhpnq1807/contextos)
|
|
28
29
|
|
|
29
|
-
##
|
|
30
|
+
## Hallucination Leaderboard
|
|
30
31
|
|
|
31
|
-
|
|
32
|
+
Offline deterministic benchmark:
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
| System | Correct context choice |
|
|
35
|
+
| --- | ---: |
|
|
36
|
+
| Raw heuristic baseline | 10.0% |
|
|
37
|
+
| ContextOS evidence benchmark | 80.0% |
|
|
34
38
|
|
|
35
39
|
```bash
|
|
36
|
-
ctx
|
|
40
|
+
ctx leaderboard --hallucination
|
|
37
41
|
```
|
|
38
42
|
|
|
39
|
-
|
|
40
|
-
| --- | --- |
|
|
41
|
-
| `eas.json`, `expo`, `react-native` | `eas`, `mobile-deployment`, `github-actions-ci-cd` |
|
|
42
|
-
| `vercel.json`, `next`, GitHub workflow | `vercel-deployment`, `github-actions-ci-cd`, `env-secret-management` |
|
|
43
|
-
| ContextOS repo with no app deploy evidence | no deployment skill selected |
|
|
44
|
-
|
|
45
|
-
More 10-second demos:
|
|
46
|
-
|
|
47
|
-
| Demo | GIF |
|
|
48
|
-
| --- | --- |
|
|
49
|
-
| AGENTS.md Lost In The Middle | [docs/demo/agents-lost-middle.gif](docs/demo/agents-lost-middle.gif) |
|
|
50
|
-
| ContextOS Ready Gold | [docs/demo/contextos-ready.gif](docs/demo/contextos-ready.gif) |
|
|
43
|
+
This means ContextOS improves deterministic context routing from 10% to 80% on the offline hallucination task set. It does not claim ContextOS beats Codex, Gemini, Claude Code, or Cursor in live runs.
|
|
51
44
|
|
|
52
|
-
|
|
45
|
+
Live agent benchmark support exists, but results are pending an external environment with working CLI auth/session access:
|
|
53
46
|
|
|
54
47
|
```bash
|
|
55
|
-
|
|
48
|
+
ctx leaderboard --hallucination --live --agent codex
|
|
49
|
+
ctx leaderboard --hallucination --live --agent gemini
|
|
56
50
|
```
|
|
57
51
|
|
|
58
|
-
|
|
52
|
+
If a CLI cannot run in the current environment, the command reports `SKIPPED` or an agent error instead of blocking launch.
|
|
59
53
|
|
|
60
|
-
|
|
54
|
+
Live benchmark tracking:
|
|
61
55
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
ContextOS routes from project evidence instead:
|
|
68
|
-
|
|
69
|
-
```text
|
|
70
|
-
Detected evidence:
|
|
71
|
-
- eas.json
|
|
72
|
-
- expo dependency
|
|
73
|
-
- GitHub workflow
|
|
74
|
-
|
|
75
|
-
Selected skills:
|
|
76
|
-
- eas
|
|
77
|
-
- mobile-deployment
|
|
78
|
-
- github-actions-ci-cd
|
|
79
|
-
```
|
|
56
|
+
- [Run Codex live benchmark](https://github.com/khovan123/contextOS/issues/1)
|
|
57
|
+
- [Run Claude Code live benchmark](https://github.com/khovan123/contextOS/issues/3)
|
|
58
|
+
- [Run Gemini CLI live benchmark](https://github.com/khovan123/contextOS/issues/4)
|
|
59
|
+
- [Run Cursor live benchmark](https://github.com/khovan123/contextOS/issues/2)
|
|
80
60
|
|
|
81
|
-
|
|
61
|
+
## Why People Star ContextOS
|
|
82
62
|
|
|
83
|
-
|
|
63
|
+
- Agents ignore `AGENTS.md`.
|
|
64
|
+
- Agents choose the wrong deployment path.
|
|
65
|
+
- Agents grep random files before understanding the repo.
|
|
66
|
+
- ContextOS fixes all three before coding starts.
|
|
84
67
|
|
|
85
|
-
|
|
86
|
-
| --- | ---: |
|
|
87
|
-
| Cases | 52 |
|
|
88
|
-
| Top-1 Accuracy | 94.2% |
|
|
89
|
-
| Top-3 Recall | 94.2% |
|
|
90
|
-
| False Positive Rate | 0.0% |
|
|
91
|
-
| Confidence Calibration | 100.0% |
|
|
92
|
-
| Negative Gate Accuracy | 100.0% |
|
|
93
|
-
|
|
94
|
-
This is an internal fixture benchmark, not an external real-world benchmark. It is designed to prove the router behavior across controlled Expo/EAS, Next/Vercel, Docker, Railway/Render, Firebase, auth, database, testing, mobile, and adversarial negative-gate cases.
|
|
68
|
+
## Demo
|
|
95
69
|
|
|
96
|
-
|
|
70
|
+

|
|
97
71
|
|
|
98
72
|
```bash
|
|
99
|
-
ctx
|
|
73
|
+
ctx skills doctor -- "fix deployed"
|
|
100
74
|
```
|
|
101
75
|
|
|
102
|
-
|
|
76
|
+
| Repo evidence | What ContextOS tells the agent |
|
|
77
|
+
| --- | --- |
|
|
78
|
+
| `eas.json`, `expo`, `react-native` | `eas`, `mobile-deployment`, `github-actions-ci-cd` |
|
|
79
|
+
| `vercel.json`, `next`, GitHub workflow | `vercel-deployment`, `github-actions-ci-cd`, `env-secret-management` |
|
|
80
|
+
| ContextOS repo with no app deploy evidence | no deployment skill selected |
|
|
103
81
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
|
107
|
-
|
|
|
82
|
+
More 10-second demo:
|
|
83
|
+
|
|
84
|
+
| Demo | GIF |
|
|
85
|
+
| --- | --- |
|
|
86
|
+
| AGENTS.md Lost In The Middle | [docs/demo/agents-lost-middle.gif](docs/demo/agents-lost-middle.gif) |
|
|
87
|
+
|
|
88
|
+
## What The Agent Sees
|
|
108
89
|
|
|
109
|
-
|
|
90
|
+
ContextOS injects a compact brief before the agent works:
|
|
110
91
|
|
|
111
92
|
```text
|
|
112
93
|
## Critical ContextOS rules
|
|
113
|
-
- IMPORTANT: This project has a knowledge graph.
|
|
94
|
+
- IMPORTANT: This project has a knowledge graph. Use it before broad file search.
|
|
114
95
|
- Use `query_graph` pattern="tests_for" to check coverage.
|
|
115
96
|
|
|
116
97
|
## Suggested files to check
|
|
@@ -129,9 +110,30 @@ ContextOS report
|
|
|
129
110
|
Efficiency: 100%
|
|
130
111
|
Injected rules: 8
|
|
131
112
|
Rule outcomes: 8 followed, 0 ignored, 0 unknown
|
|
132
|
-
Runtime
|
|
113
|
+
Runtime evidence: project graph was used before file search
|
|
133
114
|
```
|
|
134
115
|
|
|
116
|
+
Regenerate the GIFs from real local `ctx` command output:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
npm run demo:capture
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Internal Benchmark
|
|
123
|
+
|
|
124
|
+
Skill selection fixture benchmark:
|
|
125
|
+
|
|
126
|
+
| Metric | Result |
|
|
127
|
+
| --- | ---: |
|
|
128
|
+
| Cases | 52 |
|
|
129
|
+
| Top-1 Accuracy | 94.2% |
|
|
130
|
+
| Top-3 Recall | 94.2% |
|
|
131
|
+
| False Positive Rate | 0.0% |
|
|
132
|
+
| Confidence Calibration | 100.0% |
|
|
133
|
+
| Negative Gate Accuracy | 100.0% |
|
|
134
|
+
|
|
135
|
+
This is an internal fixture benchmark, not an external real-world benchmark. It is designed to prove that ContextOS changes its suggestions from repo evidence across controlled Expo/EAS, Next/Vercel, Docker, Railway/Render, Firebase, auth, database, testing, mobile, and adversarial negative cases.
|
|
136
|
+
|
|
135
137
|
## Quick Install
|
|
136
138
|
|
|
137
139
|
Install in 30 seconds:
|
|
@@ -148,6 +150,13 @@ Scriptable setup:
|
|
|
148
150
|
```bash
|
|
149
151
|
ctx setup --yes
|
|
150
152
|
ctx setup --yes --agents codex,claude,agy
|
|
153
|
+
ctx setup --yes --generate-project-context
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
If `ctx doctor` reports missing project skills/workflows, generate starter project context explicitly:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
ctx doctor --fix
|
|
151
160
|
```
|
|
152
161
|
|
|
153
162
|
No global install:
|
|
@@ -172,37 +181,23 @@ ctx install agy
|
|
|
172
181
|
|
|
173
182
|
Restart the agent after setup. Then use the agent normally.
|
|
174
183
|
|
|
175
|
-
## Why
|
|
176
|
-
|
|
177
|
-
Developers put real operating instructions in `AGENTS.md`: use this graph tool before reading files, run these tests, follow this architecture boundary, avoid this migration path.
|
|
178
|
-
|
|
179
|
-
The problem is not that agents cannot read `AGENTS.md`. The problem is that large context windows bury the important rule in the middle, where attention is weak. ContextOS turns a static rules file into task-aware runtime context.
|
|
180
|
-
|
|
181
|
-
The next visible demo is not another feature. It is showing the pain in a few seconds:
|
|
182
|
-
|
|
183
|
-
```text
|
|
184
|
-
Raw agent: guesses from the prompt.
|
|
185
|
-
ContextOS: routes from repo evidence.
|
|
186
|
-
```
|
|
187
|
-
|
|
188
184
|
## What ContextOS Does
|
|
189
185
|
|
|
190
|
-
|
|
|
186
|
+
| Agent failure | ContextOS behavior |
|
|
191
187
|
| --- | --- |
|
|
192
|
-
|
|
|
193
|
-
|
|
|
194
|
-
|
|
|
195
|
-
|
|
|
196
|
-
|
|
|
197
|
-
| Evidence | Stop hooks persist `followed`, `ignored`, `unknown`, and runtime telemetry for explicit reports. |
|
|
188
|
+
| Ignores project rules | Shows the relevant rules at the start of the task. |
|
|
189
|
+
| Picks the wrong tool or deployment path | Suggests skills only when the repo has supporting evidence. |
|
|
190
|
+
| Reads random files first | Suggests the likely files and workflows before exploration starts. |
|
|
191
|
+
| Claims compliance without proof | Reports which rules were followed, ignored, or unknown after the task. |
|
|
192
|
+
| Needs to work across agents | Supports Codex, Claude Code, and Antigravity with the same project context. |
|
|
198
193
|
|
|
199
194
|
## Comparison
|
|
200
195
|
|
|
201
196
|
| Approach | What it gives the agent | Main gap |
|
|
202
197
|
| --- | --- | --- |
|
|
203
198
|
| Plain `AGENTS.md` | Static repo instructions. | Important rules get buried or ignored when the task changes. |
|
|
204
|
-
| Generic RAG |
|
|
205
|
-
| ContextOS | Task-
|
|
199
|
+
| Generic RAG | Related files or snippets. | It usually does not choose skills/workflows or prove rule compliance. |
|
|
200
|
+
| ContextOS | Task-specific rules, files, skills, workflows, and evidence. | Requires local setup and prepared indexes for best results. |
|
|
206
201
|
|
|
207
202
|
## Safety Model
|
|
208
203
|
|
|
@@ -212,20 +207,20 @@ ContextOS is designed to be OSS-friendly and low-friction:
|
|
|
212
207
|
| --- | --- |
|
|
213
208
|
| Standalone by default | `ctx setup` works without `code-review-graph`, `codegraph`, or `agent-memory`. |
|
|
214
209
|
| Optional adapters | Graph and memory backends add signal when available; missing adapters contribute score `0`. |
|
|
215
|
-
| Fail-open hooks | Prompt hooks return local context or nothing instead of blocking the agent when
|
|
210
|
+
| Fail-open hooks | Prompt hooks return local context or nothing instead of blocking the agent when optional runtime pieces are unavailable. |
|
|
216
211
|
| Local-only telemetry | Reports, prompt history, evidence, and telemetry stay under `~/.ctx/contextos/`. |
|
|
217
|
-
| No hook network calls | Prompt and stop hooks do not call external services. Install/warm commands may
|
|
212
|
+
| No hook network calls | Prompt and stop hooks do not call external services. Install/warm commands may prepare local indexes when explicitly run. |
|
|
218
213
|
| No postinstall surprise | `npm install` only installs the CLI. Setup runs only when you call `ctx setup`. |
|
|
219
214
|
|
|
220
|
-
Positioning: ContextOS works standalone and gets smarter when graph or memory adapters are available.
|
|
215
|
+
Positioning: ContextOS works standalone and gets smarter when project graph or memory adapters are available.
|
|
221
216
|
|
|
222
217
|
## Roadmap
|
|
223
218
|
|
|
224
|
-
ContextOS is not heading toward a dashboard-first product. The next work is focused on making the existing local
|
|
219
|
+
ContextOS is not heading toward a dashboard-first product. The next work is focused on making the existing local behavior more visible and reusable:
|
|
225
220
|
|
|
226
221
|
| Next | Why |
|
|
227
222
|
| --- | --- |
|
|
228
|
-
| Hallucination Leaderboard | Compare raw agent guesses vs ContextOS evidence-
|
|
223
|
+
| Hallucination Leaderboard | Compare raw agent guesses vs ContextOS evidence-based recommendations across the same repos and tasks. |
|
|
229
224
|
| Agent Replay | Turn telemetry into a readable post-task narrative: prompt, selected skills, followed rules, suggested files, touched files, efficiency. |
|
|
230
225
|
| Community Skill Packs | Let contributors PR ContextOS-ready skills with triggers, evidence, negative gates, and workflows before building a larger hub. |
|
|
231
226
|
| ContextOS Ready | Define a repository readiness badge for AGENTS.md, skills, workflows, and evidence quality. |
|
|
@@ -237,11 +232,11 @@ See [docs/roadmap.md](docs/roadmap.md) for the current roadmap notes.
|
|
|
237
232
|
|
|
238
233
|
ContextOS starts the community loop with [`community-skills/`](community-skills/) instead of a hosted marketplace. The seed packs are `eas`, `vercel`, `prisma`, `redis`, `oauth-google`, and `jwt-auth`.
|
|
239
234
|
|
|
240
|
-
Each pack contains a model-visible `SKILL.md` plus `skill.yaml`
|
|
235
|
+
Each pack contains a model-visible `SKILL.md` plus `skill.yaml` metadata with prompt triggers, project evidence, negative triggers, and a short workflow. Contributors can PR new packs by copying [`community-skills/_template/`](community-skills/_template/).
|
|
241
236
|
|
|
242
237
|
## ContextOS Ready
|
|
243
238
|
|
|
244
|
-
`ctx doctor` scores whether a repository is ready for ContextOS-style agent
|
|
239
|
+
`ctx doctor` scores whether a repository is ready for ContextOS-style agent guidance:
|
|
245
240
|
|
|
246
241
|
```bash
|
|
247
242
|
ctx doctor
|
|
@@ -267,14 +262,16 @@ The score checks project `AGENTS.md` rules, project skill packs under `.codex/sk
|
|
|
267
262
|
| `ctx setup` | Recommended first-run install flow. |
|
|
268
263
|
| `ctx debug -- "Recheck authen flow"` | Preview what ContextOS would inject. |
|
|
269
264
|
| `ctx doctor` | Score repository readiness for the `ContextOS Ready` badge. |
|
|
265
|
+
| `ctx doctor --fix` | Generate starter project skills and workflow when the repo is missing them. |
|
|
270
266
|
| `ctx report` | Show the last task's compliance summary. |
|
|
271
267
|
| `ctx evidence` | Show why each rule was marked followed/ignored/unknown. |
|
|
272
268
|
| `ctx stats` | Show workspace-level usage and effectiveness metrics. |
|
|
273
269
|
| `ctx benchmark -- "task"` | Compare raw AGENTS.md ordering vs ContextOS scheduling. |
|
|
274
|
-
| `ctx benchmark --skills` | Run the
|
|
275
|
-
| `ctx leaderboard --hallucination` |
|
|
276
|
-
| `ctx leaderboard --
|
|
277
|
-
| `ctx
|
|
270
|
+
| `ctx benchmark --skills` | Run the skill selection eval benchmark. |
|
|
271
|
+
| `ctx leaderboard --hallucination` | Run the offline deterministic hallucination benchmark. |
|
|
272
|
+
| `ctx leaderboard --hallucination --live --agent codex` | Run the live CLI benchmark when agent auth/session is available. |
|
|
273
|
+
| `ctx leaderboard --agents codex,gemini` | Legacy live CLI leaderboard form. |
|
|
274
|
+
| `ctx sync --rules` | Sync project rules across agents. |
|
|
278
275
|
| `ctx sync --skills` | Sync skills across agents through skillshare. |
|
|
279
276
|
| `ctx sync --workflows` | Sync workflow markdown across Claude/Codex/Antigravity. |
|
|
280
277
|
|
|
@@ -283,7 +280,7 @@ The score checks project `AGENTS.md` rules, project skill packs under `.codex/sk
|
|
|
283
280
|
1. Start in a repo with an `AGENTS.md` that contains a rule like:
|
|
284
281
|
|
|
285
282
|
```text
|
|
286
|
-
Always use
|
|
283
|
+
Always use the project graph before reading files.
|
|
287
284
|
```
|
|
288
285
|
|
|
289
286
|
2. Install:
|
|
@@ -587,19 +584,22 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
|
|
|
587
584
|
| `ctx install --copy` | Copies only the plugin payload to `$CODEX_HOME/plugins/ctx`. | Legacy local development or manual plugin experiments. | Does not sync the active marketplace, rebuild indexes, register MCP, or install global hooks. Prefer `ctx refresh` for active local updates. |
|
|
588
585
|
| `ctx setup` | Runs the first-run setup wizard. | You want the recommended onboarding flow after `npm install -g @minhpnq1807/contextos`. | Installs selected agents, optionally syncs Ruler rules/MCP and skillshare skills, asks which prompt sections to show, then prints next steps. |
|
|
589
586
|
| `ctx setup --yes` | Runs setup with defaults non-interactively. | You want scriptable Codex setup. | Uses `codex`, enables injection, syncs rules, syncs skills, skips interactive community-skill installation when no TTY is available, and passes `--yes` to dependency setup prompts. Use `--agents codex,claude,agy` for multi-agent setup. |
|
|
587
|
+
| `ctx setup --generate-project-context` | Generates starter project skills and workflow during setup. | Your repo has rules but `ctx doctor` reports missing skills/workflows. | Creates `.codex/skills/<detected-skill>/SKILL.md`, matching `skill.yaml`, and `.codex/workflows/primary.md` without overwriting existing files. |
|
|
590
588
|
| `ctx setup --agents <list>` | Runs setup for selected agents. | You want only part of the default set. | Accepts comma-separated `codex`, `claude`, `agy`, or `antigravity`. |
|
|
591
589
|
| `ctx setup --no-rules` | Skips Ruler sync during setup. | You only want hooks/MCP install and maybe skill sync. | Does not run `ctx sync --rules`. |
|
|
592
590
|
| `ctx setup --no-skills` | Skips skillshare sync during setup. | You do not want shared skills configured. | Does not run `ctx sync --skills`. |
|
|
593
591
|
| `ctx setup --quiet` | Runs setup in measurement-only mode. | You want reports/stats without visible injected prompt context. | Installs hooks with prompt context injection disabled. |
|
|
594
592
|
| `ctx debug -- "task"` | Runs the scheduler locally for a fake prompt. | You want to see which AGENTS.md rules and files ContextOS would inject before using Codex. | Prints rule scores, scoring reasons, suggested files, and final `additionalContext`. |
|
|
595
593
|
| `ctx doctor` | Scores repository ContextOS readiness. | You want to add or verify a `ContextOS Ready` badge. | Prints Rules, Skills, Workflows, Overall tier, evidence, and next recommendations. |
|
|
594
|
+
| `ctx doctor --fix` | Generates starter ContextOS project context. | `ctx doctor` says skills/workflows are missing and you want explicit local scaffolding. | Detects package/config evidence, creates up to three starter project skills plus `.codex/workflows/primary.md`, then prints the updated readiness score. |
|
|
596
595
|
| `ctx report` | Shows the last Stop-hook compliance report for the current workspace. | An agent task has finished and you want the summary again. | Prints sectioned tables for summary, rule outcomes, suggested files, and runtime telemetry from `~/.ctx/contextos/workspaces/<workspace-id>/last-report.json`. |
|
|
597
596
|
| `ctx evidence` | Shows detailed evidence behind the last report for the current workspace. | You want to inspect why a rule was marked `followed`, `ignored`, `unknown`, or `unmeasurable`. | Prints a compact evidence table plus per-rule detail tables. |
|
|
598
597
|
| `ctx stats` | Shows aggregate runtime metrics for the current workspace. | You want to know whether ContextOS is active and useful over time. | Prints sectioned tables for prompt/report counts, injection rate, efficiency, rule outcomes, hook events, last prompt, and last report. |
|
|
599
598
|
| `ctx benchmark -- "task"` | Compares baseline AGENTS.md ordering with ContextOS task-aware scheduling. | You want a before/after signal for lost-in-the-middle risk. | Prints tables for parsed/actionable/filtered rules, baseline middle-risk, scheduled high/mid rules, recency reminder status, and top scored rules. |
|
|
600
599
|
| `ctx benchmark --skills` | Runs the Skill Router eval benchmark. | You want evidence for skill routing accuracy and negative gates. | Prints top-1 accuracy, top-3 recall, false positive rate, confidence calibration, and negative gate accuracy across `eval/skill-routing` fixtures. |
|
|
601
|
-
| `ctx leaderboard --hallucination` |
|
|
602
|
-
| `ctx leaderboard --
|
|
600
|
+
| `ctx leaderboard --hallucination` | Runs the offline deterministic hallucination benchmark. | You want launch evidence for the wrong-context problem without depending on external agent auth. | Runs 20 fixture tasks across 10+ repo contexts and prints Raw heuristic baseline vs ContextOS evidence benchmark plus sample failures. |
|
|
601
|
+
| `ctx leaderboard --hallucination --live --agent codex` | Runs the hallucination benchmark through one installed agent CLI. | You want real agent output and have CLI auth/session available. | Calls the selected CLI with timeouts; missing, blocked, or unauthenticated CLIs are reported as skipped/errors instead of blocking. |
|
|
602
|
+
| `ctx leaderboard --agents codex,gemini` | Legacy live CLI leaderboard form. | You want to run multiple live agents at once. | Equivalent live-agent benchmark shape for comma-separated CLIs. |
|
|
603
603
|
| `ctx sync --rules` | Syncs project rules and MCP servers through Ruler. | You want Codex, Claude Code, and Antigravity to share one project rule/MCP source of truth. | Ensures `.ruler/ruler.toml`, injects `ctx-mcp`, imports existing MCP servers from Codex and project `.mcp.json`, runs `ruler apply --agents codex,claude,antigravity`, mirrors MCP servers to Antigravity MCP configs, and verifies generated config. |
|
|
604
604
|
| `ctx sync --rules --agents <list>` | Syncs only selected agents through Ruler. | You want to update one or two agents without touching the others. | Accepts comma-separated values such as `codex`, `claude`, `agy`, `antigravity`, or `codex,claude,agy`; `agy` is normalized to Ruler's `antigravity`. |
|
|
605
605
|
| `ctx sync --rules --dry-run` | Previews Ruler sync without writing files or running apply. | You want to inspect behavior before changing project config. | Prints the same flow with dry-run status. |
|
|
@@ -664,7 +664,7 @@ These files are local telemetry only. Hooks do not make network calls.
|
|
|
664
664
|
|
|
665
665
|
## Project Understanding
|
|
666
666
|
|
|
667
|
-
ContextOS works standalone. The
|
|
667
|
+
ContextOS works standalone. The default path is local project rules, prepared file indexes, project skills, workflows, and evidence capture.
|
|
668
668
|
|
|
669
669
|
Project graph and memory backends are optional adapters:
|
|
670
670
|
|
|
@@ -676,26 +676,24 @@ Project graph and memory backends are optional adapters:
|
|
|
676
676
|
|
|
677
677
|
ContextOS does not require `code-review-graph`, `codegraph`, or `agent-memory` to install or run. It gets smarter when those backends are available; when they are missing, the adapter scores stay at zero and the hook continues with local context.
|
|
678
678
|
|
|
679
|
-
For file suggestions, ContextOS
|
|
679
|
+
For file suggestions, ContextOS uses prepared local indexes:
|
|
680
680
|
|
|
681
681
|
```text
|
|
682
682
|
prompt
|
|
683
|
-
->
|
|
684
|
-
->
|
|
685
|
-
->
|
|
686
|
-
->
|
|
687
|
-
->
|
|
688
|
-
-> merge and deduplicate semantic, import-graph, and optional graph matches
|
|
689
|
-
-> inject top suggested files with graph evidence reasons
|
|
683
|
+
-> read task-relevant AGENTS.md rules
|
|
684
|
+
-> suggest prepared file candidates
|
|
685
|
+
-> expand nearby imports
|
|
686
|
+
-> add optional project-graph matches when available
|
|
687
|
+
-> inject a compact list of files to check
|
|
690
688
|
```
|
|
691
689
|
|
|
692
|
-
This keeps the hook fast and local while still using graph
|
|
690
|
+
This keeps the hook fast and local while still using project graph signal when available. When no graph adapter is available, file suggestions still use local file indexes and import expansion.
|
|
693
691
|
|
|
694
|
-
Prompt
|
|
692
|
+
Prompt-time file suggestions do not walk the repository. `ctx install` and `ctx embeddings warm` rebuild the file index and one-hop import adjacency by walking source paths once; prompt hooks query those prepared indexes directly. Rules, files, skills, and workflows are resolved concurrently.
|
|
695
693
|
|
|
696
694
|
`ctx embeddings warm` automatically refreshes the active Codex marketplace payload before rebuilding indexes. Use `ctx refresh` when you want the same marketplace sync plus install-style file, skill, import, and code-review-graph embedding refresh in one command.
|
|
697
695
|
|
|
698
|
-
If a prompt has no usable context candidates, the hook fails open without emitting an empty `hook context` block, records `emptyContextReason` in the workspace runtime file, and starts a detached `autowarm` rebuild with a cooldown. That background rebuild refreshes
|
|
696
|
+
If a prompt has no usable context candidates, the hook fails open without emitting an empty `hook context` block, records `emptyContextReason` in the workspace runtime file, and starts a detached `autowarm` rebuild with a cooldown. That background rebuild refreshes prepared indexes for the next prompt while keeping repository walking out of the current prompt path.
|
|
699
697
|
|
|
700
698
|
Use `ctx --config` to choose which prompt sections ContextOS injects and how many suggestions each section may show. Interactive `ctx setup` includes the same section picker and limit prompts, while `ctx setup --yes` keeps the current saved config for automation. The panel supports multiple selection with `Space` and persists the global choice in `~/.ctx/contextos/output-config.json`. Defaults are five suggested files, five skills, and five workflows; caps are 20 files, 10 skills, and 5 workflows. Disabling rules hides both critical and additional relevant rule sections; compliance metadata remains available for reports.
|
|
701
699
|
|
package/bin/ctx.js
CHANGED
|
@@ -44,6 +44,7 @@ import { fetchSkillsForAgents, printSkillRecommendations, getAllLibraries, getIn
|
|
|
44
44
|
import { invalidateCtxMcpSocket } from "../plugins/ctx/lib/ctx-mcp-client.js";
|
|
45
45
|
import { runPrefixedCommand } from "../plugins/ctx/lib/shell-runner.js";
|
|
46
46
|
import { formatContextOSReady, inspectContextOSReady } from "../plugins/ctx/lib/certification.js";
|
|
47
|
+
import { formatProjectContextGeneration, generateProjectContext } from "../plugins/ctx/lib/project-context-generator.js";
|
|
47
48
|
|
|
48
49
|
/**
|
|
49
50
|
* Run a shell command with all output lines prefixed by │
|
|
@@ -189,17 +190,22 @@ Usage:
|
|
|
189
190
|
ctx setup Interactive full setup wizard
|
|
190
191
|
ctx setup --yes Auto-confirm all setup prompts
|
|
191
192
|
ctx setup --agents <names> Pre-select agents to install
|
|
193
|
+
ctx setup --generate-project-context Generate starter project skills/workflow
|
|
192
194
|
ctx setup --no-rules Skip AGENTS.md rule sync
|
|
193
195
|
ctx setup --no-skills Skip skill sync
|
|
194
196
|
ctx setup --quiet Quiet mode (minimal output)
|
|
195
197
|
ctx debug -- "task" Debug a task with ContextOS tracing
|
|
196
198
|
ctx doctor Score repository ContextOS readiness
|
|
199
|
+
ctx doctor --fix Generate starter project skills/workflow
|
|
200
|
+
ctx doctor --fix --force Regenerate starter project context files
|
|
197
201
|
ctx report Show last ContextOS compliance report
|
|
198
202
|
ctx evidence Show evidence from last report
|
|
199
203
|
ctx stats Show workspace statistics
|
|
200
204
|
ctx benchmark -- "task" Benchmark workspace for a task
|
|
201
205
|
ctx benchmark --skills Run skill routing eval benchmark
|
|
202
|
-
ctx leaderboard --hallucination
|
|
206
|
+
ctx leaderboard --hallucination Run offline deterministic hallucination benchmark
|
|
207
|
+
ctx leaderboard --hallucination --live --agent codex
|
|
208
|
+
Run hallucination benchmark through one live CLI
|
|
203
209
|
ctx leaderboard --agents codex,gemini Run live CLI leaderboard for installed agents
|
|
204
210
|
ctx sync --rules Sync AGENTS.md rules to all agents
|
|
205
211
|
ctx sync --rules --agents <names> Sync rules to specific agents only
|
|
@@ -252,6 +258,18 @@ function normalizeInstallAgent(agent) {
|
|
|
252
258
|
if (normalized === "antigravity") return "agy";
|
|
253
259
|
return normalized;
|
|
254
260
|
}
|
|
261
|
+
|
|
262
|
+
function leaderboardAgentsFromArgs(args) {
|
|
263
|
+
const agentIndex = args.indexOf("--agent");
|
|
264
|
+
const agentsIndex = args.indexOf("--agents");
|
|
265
|
+
const index = agentIndex >= 0 ? agentIndex : agentsIndex;
|
|
266
|
+
if (index < 0) return [];
|
|
267
|
+
return String(args[index + 1] || "")
|
|
268
|
+
.split(",")
|
|
269
|
+
.map((agent) => agent.trim())
|
|
270
|
+
.filter(Boolean);
|
|
271
|
+
}
|
|
272
|
+
|
|
255
273
|
/**
|
|
256
274
|
* Intercept console.log from an async fn,
|
|
257
275
|
* printing each line immediately with "│ " prefix for real-time feedback.
|
|
@@ -866,6 +884,22 @@ async function setup({ args = [], cwd = process.cwd() } = {}) {
|
|
|
866
884
|
});
|
|
867
885
|
}
|
|
868
886
|
|
|
887
|
+
if (interactive && !options.generateProjectContext) {
|
|
888
|
+
const readiness = inspectContextOSReady({ cwd });
|
|
889
|
+
if (readiness.skills.score < 50 || readiness.workflows.score < 50) {
|
|
890
|
+
const rl = readline.createInterface({ input, output });
|
|
891
|
+
try {
|
|
892
|
+
options.generateProjectContext = await askSetupYesNo(
|
|
893
|
+
rl,
|
|
894
|
+
"Generate starter project skills and workflow?",
|
|
895
|
+
true
|
|
896
|
+
);
|
|
897
|
+
} finally {
|
|
898
|
+
rl.close();
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
|
|
869
903
|
console.log("");
|
|
870
904
|
console.log("◇ Ready to setup:");
|
|
871
905
|
for (const line of setupSummaryLines({
|
|
@@ -878,6 +912,13 @@ async function setup({ args = [], cwd = process.cwd() } = {}) {
|
|
|
878
912
|
|
|
879
913
|
if (!options.agents.length) throw new Error("No agents selected. Use --agents codex,claude,antigravity,copilot.");
|
|
880
914
|
|
|
915
|
+
if (options.generateProjectContext) {
|
|
916
|
+
console.log("◇ Generating starter project context...");
|
|
917
|
+
const generated = generateProjectContext({ cwd });
|
|
918
|
+
for (const line of formatProjectContextGeneration(generated).split("\n")) console.log(`│ ${line}`);
|
|
919
|
+
console.log("");
|
|
920
|
+
}
|
|
921
|
+
|
|
881
922
|
for (const agent of options.agents) {
|
|
882
923
|
console.log(`◇ Setting up ${agent}...`);
|
|
883
924
|
await streamSetupOutput(() => install({ agent, copy: false }));
|
|
@@ -1008,7 +1049,14 @@ try {
|
|
|
1008
1049
|
if (!task.trim()) throw new Error('Usage: ctx debug -- "task"');
|
|
1009
1050
|
await debug(task);
|
|
1010
1051
|
} else if (command === "doctor") {
|
|
1011
|
-
|
|
1052
|
+
if (args.includes("--fix")) {
|
|
1053
|
+
const generated = generateProjectContext({ cwd: process.cwd(), force: args.includes("--force") });
|
|
1054
|
+
console.log(formatProjectContextGeneration(generated));
|
|
1055
|
+
console.log("");
|
|
1056
|
+
console.log(formatContextOSReady(inspectContextOSReady({ cwd: process.cwd() })));
|
|
1057
|
+
} else {
|
|
1058
|
+
console.log(formatContextOSReady(inspectContextOSReady({ cwd: process.cwd() })));
|
|
1059
|
+
}
|
|
1012
1060
|
} else if (command === "refresh") {
|
|
1013
1061
|
await refresh();
|
|
1014
1062
|
} else if (command === "autowarm") {
|
|
@@ -1039,7 +1087,17 @@ try {
|
|
|
1039
1087
|
console.log(formatBenchmark(benchmarkWorkspace({ cwd: process.cwd(), task })));
|
|
1040
1088
|
}
|
|
1041
1089
|
} else if (command === "leaderboard") {
|
|
1042
|
-
if (args.includes("--hallucination")) {
|
|
1090
|
+
if (args.includes("--hallucination") && args.includes("--live")) {
|
|
1091
|
+
const agents = leaderboardAgentsFromArgs(args);
|
|
1092
|
+
const limitIndex = args.indexOf("--limit");
|
|
1093
|
+
const timeoutIndex = args.indexOf("--timeout-ms");
|
|
1094
|
+
console.log(formatAgentLeaderboard(runAgentLeaderboard({
|
|
1095
|
+
rootDir,
|
|
1096
|
+
agents: agents.length ? agents : undefined,
|
|
1097
|
+
caseLimit: limitIndex >= 0 ? Number(args[limitIndex + 1]) : undefined,
|
|
1098
|
+
timeoutMs: timeoutIndex >= 0 ? Number(args[timeoutIndex + 1]) : undefined
|
|
1099
|
+
})));
|
|
1100
|
+
} else if (args.includes("--hallucination")) {
|
|
1043
1101
|
console.log(formatHallucinationLeaderboard(await runHallucinationLeaderboard({ rootDir })));
|
|
1044
1102
|
} else if (args.includes("--agents")) {
|
|
1045
1103
|
const index = args.indexOf("--agents");
|
|
@@ -1053,7 +1111,7 @@ try {
|
|
|
1053
1111
|
timeoutMs: timeoutIndex >= 0 ? Number(args[timeoutIndex + 1]) : undefined
|
|
1054
1112
|
})));
|
|
1055
1113
|
} else {
|
|
1056
|
-
throw new Error("Usage: ctx leaderboard --hallucination OR ctx leaderboard --agents codex,gemini");
|
|
1114
|
+
throw new Error("Usage: ctx leaderboard --hallucination OR ctx leaderboard --hallucination --live --agent codex OR ctx leaderboard --agents codex,gemini");
|
|
1057
1115
|
}
|
|
1058
1116
|
} else if (command === "skills") {
|
|
1059
1117
|
if (args[1] === "doctor") {
|
|
@@ -29,7 +29,8 @@ export function runAgentLeaderboard({
|
|
|
29
29
|
const systems = [];
|
|
30
30
|
|
|
31
31
|
for (const agent of agents) {
|
|
32
|
-
const
|
|
32
|
+
const template = agentCommandTemplate(agent);
|
|
33
|
+
const binary = template ? template.split(/\s+/).filter(Boolean)[0] : findBinary(agent);
|
|
33
34
|
if (!binary) {
|
|
34
35
|
systems.push({ name: agent, status: "skipped", reason: "binary not found", rows: [], correctRate: 0 });
|
|
35
36
|
continue;
|
|
@@ -71,7 +72,7 @@ export function formatAgentLeaderboard(result) {
|
|
|
71
72
|
];
|
|
72
73
|
for (const system of result.systems) {
|
|
73
74
|
const score = system.status === "ok" ? percent(system.correctRate) : system.reason;
|
|
74
|
-
lines.push(`${system.name.padEnd(8)} ${system.status.padEnd(8)} ${score}`);
|
|
75
|
+
lines.push(`${system.name.padEnd(8)} ${system.status.toUpperCase().padEnd(8)} ${score}`);
|
|
75
76
|
}
|
|
76
77
|
lines.push("", "Cases:");
|
|
77
78
|
for (const system of result.systems) {
|
|
@@ -123,6 +124,8 @@ function runAgentCase({ agent, binary, testCase, skillIds, timeoutMs, rootDir })
|
|
|
123
124
|
}
|
|
124
125
|
|
|
125
126
|
function agentArgs({ agent, cwd, prompt }) {
|
|
127
|
+
const genericTemplate = agentCommandTemplate(agent);
|
|
128
|
+
if (genericTemplate) return expandTemplate(genericTemplate, { cwd, prompt }).slice(1);
|
|
126
129
|
if (agent === "codex") {
|
|
127
130
|
return [
|
|
128
131
|
"exec",
|
|
@@ -140,6 +143,11 @@ function agentArgs({ agent, cwd, prompt }) {
|
|
|
140
143
|
return [prompt];
|
|
141
144
|
}
|
|
142
145
|
|
|
146
|
+
function agentCommandTemplate(agent) {
|
|
147
|
+
const envKey = `CONTEXTOS_${String(agent || "").toUpperCase().replace(/[^A-Z0-9]+/g, "_")}_CMD`;
|
|
148
|
+
return process.env[envKey] || "";
|
|
149
|
+
}
|
|
150
|
+
|
|
143
151
|
function buildPrompt({ task, skillIds }) {
|
|
144
152
|
return [
|
|
145
153
|
"You are evaluating a repository for a coding-agent skill router benchmark.",
|
|
@@ -180,6 +188,11 @@ function findBinary(name) {
|
|
|
180
188
|
for (const candidate of candidates) {
|
|
181
189
|
if (fs.existsSync(candidate)) return candidate;
|
|
182
190
|
}
|
|
191
|
+
for (const dir of String(process.env.PATH || "").split(path.delimiter)) {
|
|
192
|
+
if (!dir) continue;
|
|
193
|
+
const candidate = path.join(dir, safeName);
|
|
194
|
+
if (fs.existsSync(candidate)) return candidate;
|
|
195
|
+
}
|
|
183
196
|
for (const command of [
|
|
184
197
|
`command -v ${safeName}`,
|
|
185
198
|
`source ~/.profile >/dev/null 2>&1 || true; source ~/.bashrc >/dev/null 2>&1 || true; command -v ${safeName}`
|
|
@@ -41,8 +41,8 @@ export async function runHallucinationLeaderboard({
|
|
|
41
41
|
caseCount: selectedCases.length,
|
|
42
42
|
repoCount: new Set(selectedCases.map((row) => row.fixture)).size,
|
|
43
43
|
systems: [
|
|
44
|
-
summarizeSystem("Raw
|
|
45
|
-
summarizeSystem("ContextOS
|
|
44
|
+
summarizeSystem("Raw heuristic baseline", rawRows),
|
|
45
|
+
summarizeSystem("ContextOS evidence benchmark", contextRows)
|
|
46
46
|
],
|
|
47
47
|
rows: selectedCases.map((testCase) => ({
|
|
48
48
|
prompt: testCase.prompt,
|
|
@@ -60,11 +60,11 @@ export function formatHallucinationLeaderboard(result) {
|
|
|
60
60
|
`Repos: ${result.repoCount}`,
|
|
61
61
|
`Tasks: ${result.caseCount}`,
|
|
62
62
|
"",
|
|
63
|
-
"System
|
|
64
|
-
"
|
|
63
|
+
"System Correct Context",
|
|
64
|
+
"---------------------------- ---------------"
|
|
65
65
|
];
|
|
66
66
|
for (const system of result.systems) {
|
|
67
|
-
lines.push(`${system.name.padEnd(
|
|
67
|
+
lines.push(`${system.name.padEnd(28)} ${percent(system.correctRate)}`);
|
|
68
68
|
}
|
|
69
69
|
lines.push("", "Sample failures:");
|
|
70
70
|
const failures = result.rows
|
package/package.json
CHANGED
|
@@ -68,6 +68,9 @@ export function formatContextOSReady(result) {
|
|
|
68
68
|
if (next.length) {
|
|
69
69
|
lines.push("", "Next:");
|
|
70
70
|
for (const item of [...new Set(next)].slice(0, 5)) lines.push(`- ${item}`);
|
|
71
|
+
if (result.skills.score < 50 || result.workflows.score < 50) {
|
|
72
|
+
lines.push("- Run `ctx doctor --fix` to generate starter project skills and workflow.");
|
|
73
|
+
}
|
|
71
74
|
}
|
|
72
75
|
|
|
73
76
|
return lines.join("\n");
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
import { clearSkillScanCache } from "./skill-discoverer.js";
|
|
5
|
+
|
|
6
|
+
const STARTER_SKILL_LIMIT = 3;
|
|
7
|
+
|
|
8
|
+
export function generateProjectContext({ cwd = process.cwd(), force = false } = {}) {
|
|
9
|
+
const root = findProjectRoot(cwd);
|
|
10
|
+
const profile = detectProjectProfile(root);
|
|
11
|
+
const skills = selectStarterSkills(profile).slice(0, STARTER_SKILL_LIMIT);
|
|
12
|
+
const created = [];
|
|
13
|
+
const skipped = [];
|
|
14
|
+
|
|
15
|
+
for (const skill of skills) {
|
|
16
|
+
const dir = path.join(root, ".codex", "skills", skill.id);
|
|
17
|
+
const skillPath = path.join(dir, "SKILL.md");
|
|
18
|
+
const yamlPath = path.join(dir, "skill.yaml");
|
|
19
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
20
|
+
writeFile({ filePath: skillPath, content: renderSkillMarkdown(skill), force, created, skipped });
|
|
21
|
+
writeFile({ filePath: yamlPath, content: renderSkillYaml(skill), force, created, skipped });
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const workflowPath = path.join(root, ".codex", "workflows", "primary.md");
|
|
25
|
+
fs.mkdirSync(path.dirname(workflowPath), { recursive: true });
|
|
26
|
+
writeFile({
|
|
27
|
+
filePath: workflowPath,
|
|
28
|
+
content: renderPrimaryWorkflow(profile, skills),
|
|
29
|
+
force,
|
|
30
|
+
created,
|
|
31
|
+
skipped
|
|
32
|
+
});
|
|
33
|
+
clearSkillScanCache();
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
root,
|
|
37
|
+
profile,
|
|
38
|
+
skills: skills.map((skill) => skill.id),
|
|
39
|
+
created,
|
|
40
|
+
skipped
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function formatProjectContextGeneration(result) {
|
|
45
|
+
const lines = [
|
|
46
|
+
"Project context generated",
|
|
47
|
+
"",
|
|
48
|
+
`Root: ${result.root}`,
|
|
49
|
+
`Detected: ${result.profile.summary}`,
|
|
50
|
+
`Skills: ${result.skills.join(", ") || "(none)"}`,
|
|
51
|
+
"",
|
|
52
|
+
"Created:"
|
|
53
|
+
];
|
|
54
|
+
if (result.created.length) {
|
|
55
|
+
for (const filePath of result.created) lines.push(`- ${path.relative(result.root, filePath)}`);
|
|
56
|
+
} else {
|
|
57
|
+
lines.push("- none");
|
|
58
|
+
}
|
|
59
|
+
if (result.skipped.length) {
|
|
60
|
+
lines.push("", "Skipped existing files:");
|
|
61
|
+
for (const filePath of result.skipped) lines.push(`- ${path.relative(result.root, filePath)}`);
|
|
62
|
+
}
|
|
63
|
+
lines.push("", "Next:");
|
|
64
|
+
lines.push("- Review generated skills/workflow and edit project-specific wording.");
|
|
65
|
+
lines.push("- Run: ctx doctor");
|
|
66
|
+
lines.push("- Run: ctx debug -- \"your task\"");
|
|
67
|
+
return lines.join("\n");
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function detectProjectProfile(root) {
|
|
71
|
+
const packageFiles = findPackageJsonFiles(root);
|
|
72
|
+
const packages = packageFiles.map((filePath) => ({
|
|
73
|
+
filePath,
|
|
74
|
+
json: safeJson(filePath)
|
|
75
|
+
})).filter((item) => item.json);
|
|
76
|
+
const dependencies = new Set();
|
|
77
|
+
const scripts = new Set();
|
|
78
|
+
for (const item of packages) {
|
|
79
|
+
for (const section of ["dependencies", "devDependencies", "peerDependencies", "optionalDependencies"]) {
|
|
80
|
+
for (const name of Object.keys(item.json[section] || {})) dependencies.add(name);
|
|
81
|
+
}
|
|
82
|
+
for (const name of Object.keys(item.json.scripts || {})) scripts.add(name);
|
|
83
|
+
}
|
|
84
|
+
const files = new Set([
|
|
85
|
+
...findExisting(root, [
|
|
86
|
+
"eas.json",
|
|
87
|
+
"app.json",
|
|
88
|
+
"app.config.js",
|
|
89
|
+
"app.config.ts",
|
|
90
|
+
"vercel.json",
|
|
91
|
+
"Dockerfile",
|
|
92
|
+
"docker-compose.yml",
|
|
93
|
+
"compose.yml",
|
|
94
|
+
"railway.json",
|
|
95
|
+
"render.yaml",
|
|
96
|
+
"firebase.json",
|
|
97
|
+
"prisma/schema.prisma",
|
|
98
|
+
"jest.config.js",
|
|
99
|
+
"jest.config.ts",
|
|
100
|
+
"playwright.config.ts",
|
|
101
|
+
".github/workflows"
|
|
102
|
+
])
|
|
103
|
+
]);
|
|
104
|
+
|
|
105
|
+
const has = (name) => dependencies.has(name);
|
|
106
|
+
const hasFile = (name) => files.has(name);
|
|
107
|
+
const platforms = [];
|
|
108
|
+
if (has("expo") || has("react-native") || hasFile("eas.json")) platforms.push("expo-mobile");
|
|
109
|
+
if (has("next") || hasFile("vercel.json")) platforms.push("next-web");
|
|
110
|
+
if (has("@nestjs/core")) platforms.push("nestjs-backend");
|
|
111
|
+
if (has("express")) platforms.push("express-backend");
|
|
112
|
+
if (has("prisma") || has("@prisma/client") || hasFile("prisma/schema.prisma")) platforms.push("prisma");
|
|
113
|
+
if (has("redis") || has("ioredis")) platforms.push("redis");
|
|
114
|
+
if (hasFile("Dockerfile") || hasFile("docker-compose.yml")) platforms.push("docker");
|
|
115
|
+
if (hasFile(".github/workflows")) platforms.push("github-actions");
|
|
116
|
+
if (has("jest") || has("vitest") || hasFile("jest.config.js") || hasFile("playwright.config.ts")) platforms.push("testing");
|
|
117
|
+
|
|
118
|
+
const summary = platforms.length
|
|
119
|
+
? platforms.join(", ")
|
|
120
|
+
: packages.length
|
|
121
|
+
? `${packages.length} package.json file(s)`
|
|
122
|
+
: "generic repository";
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
root,
|
|
126
|
+
packageFiles,
|
|
127
|
+
dependencies,
|
|
128
|
+
scripts,
|
|
129
|
+
files,
|
|
130
|
+
platforms,
|
|
131
|
+
summary
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function selectStarterSkills(profile) {
|
|
136
|
+
const skills = [];
|
|
137
|
+
const hasPlatform = (platform) => profile.platforms.includes(platform);
|
|
138
|
+
if (hasPlatform("expo-mobile")) {
|
|
139
|
+
skills.push(skill({
|
|
140
|
+
id: "mobile-deployment",
|
|
141
|
+
name: "Mobile Deployment",
|
|
142
|
+
description: "Use for Expo, React Native, EAS build, QR/dev-client, Android/iOS preview, and mobile release tasks.",
|
|
143
|
+
prompts: ["expo", "react native", "eas", "mobile", "qr", "android", "ios", "preview", "production", "deploy"],
|
|
144
|
+
files: ["eas.json", "app.json", "app.config.ts", ".github/workflows/*"],
|
|
145
|
+
dependencies: ["expo", "react-native", "eas-cli"],
|
|
146
|
+
negatives: ["vercel", "serverless web deployment"]
|
|
147
|
+
}));
|
|
148
|
+
}
|
|
149
|
+
if (hasPlatform("next-web")) {
|
|
150
|
+
skills.push(skill({
|
|
151
|
+
id: "nextjs-web",
|
|
152
|
+
name: "Next.js Web",
|
|
153
|
+
description: "Use for Next.js routes, App Router, server/client component boundaries, Vercel deploys, and web UI tasks.",
|
|
154
|
+
prompts: ["next", "nextjs", "app router", "route", "page", "component", "vercel", "webapp"],
|
|
155
|
+
files: ["app/**", "pages/**", "next.config.*", "vercel.json"],
|
|
156
|
+
dependencies: ["next", "react"],
|
|
157
|
+
negatives: ["expo", "eas", "android", "ios"]
|
|
158
|
+
}));
|
|
159
|
+
}
|
|
160
|
+
if (hasPlatform("nestjs-backend") || hasPlatform("express-backend")) {
|
|
161
|
+
skills.push(skill({
|
|
162
|
+
id: "backend-api",
|
|
163
|
+
name: "Backend API",
|
|
164
|
+
description: "Use for backend services, API endpoints, validation, auth, controllers/routes, and service-layer changes.",
|
|
165
|
+
prompts: ["api", "backend", "service", "controller", "route", "auth", "validation", "fastify", "express", "nestjs"],
|
|
166
|
+
files: ["services/**", "src/**", "apps/**", "libs/**"],
|
|
167
|
+
dependencies: ["@nestjs/core", "express", "fastify", "zod", "class-validator"],
|
|
168
|
+
negatives: ["pure css", "static copy"]
|
|
169
|
+
}));
|
|
170
|
+
}
|
|
171
|
+
if (hasPlatform("prisma")) {
|
|
172
|
+
skills.push(skill({
|
|
173
|
+
id: "database-prisma",
|
|
174
|
+
name: "Database Prisma",
|
|
175
|
+
description: "Use for Prisma schema, migrations, query performance, repositories, and database-backed tests.",
|
|
176
|
+
prompts: ["prisma", "database", "migration", "query", "schema", "seed", "transaction"],
|
|
177
|
+
files: ["prisma/schema.prisma", "prisma/**", "src/**/repository*", "services/**/repository*"],
|
|
178
|
+
dependencies: ["prisma", "@prisma/client"],
|
|
179
|
+
negatives: ["frontend-only", "css-only"]
|
|
180
|
+
}));
|
|
181
|
+
}
|
|
182
|
+
if (hasPlatform("redis")) {
|
|
183
|
+
skills.push(skill({
|
|
184
|
+
id: "redis-cache",
|
|
185
|
+
name: "Redis Cache",
|
|
186
|
+
description: "Use for cache, queues, sessions, rate limits, Redis clients, and invalidation behavior.",
|
|
187
|
+
prompts: ["redis", "cache", "queue", "session", "rate limit", "invalidation"],
|
|
188
|
+
files: ["src/**/cache*", "services/**/cache*", "libs/**/cache*"],
|
|
189
|
+
dependencies: ["redis", "ioredis", "bullmq"],
|
|
190
|
+
negatives: ["static page"]
|
|
191
|
+
}));
|
|
192
|
+
}
|
|
193
|
+
if (hasPlatform("docker") || hasPlatform("github-actions")) {
|
|
194
|
+
skills.push(skill({
|
|
195
|
+
id: "ci-deployment",
|
|
196
|
+
name: "CI Deployment",
|
|
197
|
+
description: "Use for Docker, GitHub Actions, build logs, deploy failures, environment variables, and release pipelines.",
|
|
198
|
+
prompts: ["ci", "github actions", "docker", "deploy", "build failed", "pipeline", "environment", "secret"],
|
|
199
|
+
files: [".github/workflows/*", "Dockerfile", "docker-compose.yml", "railway.json", "render.yaml"],
|
|
200
|
+
dependencies: [],
|
|
201
|
+
negatives: ["local ui styling only"]
|
|
202
|
+
}));
|
|
203
|
+
}
|
|
204
|
+
if (hasPlatform("testing")) {
|
|
205
|
+
skills.push(skill({
|
|
206
|
+
id: "project-testing",
|
|
207
|
+
name: "Project Testing",
|
|
208
|
+
description: "Use for unit, integration, e2e, Jest, Vitest, Playwright, and focused verification tasks.",
|
|
209
|
+
prompts: ["test", "jest", "vitest", "playwright", "e2e", "coverage", "failing test"],
|
|
210
|
+
files: ["test/**", "__tests__/**", "*.spec.*", "*.test.*", "playwright.config.ts"],
|
|
211
|
+
dependencies: ["jest", "vitest", "@playwright/test"],
|
|
212
|
+
negatives: ["docs-only"]
|
|
213
|
+
}));
|
|
214
|
+
}
|
|
215
|
+
while (skills.length < STARTER_SKILL_LIMIT) {
|
|
216
|
+
const fallback = [
|
|
217
|
+
skill({
|
|
218
|
+
id: "project-implementation",
|
|
219
|
+
name: "Project Implementation",
|
|
220
|
+
description: "Use for normal feature work, bug fixes, and scoped implementation tasks in this repository.",
|
|
221
|
+
prompts: ["implement", "fix", "add", "update", "refactor", "bug"],
|
|
222
|
+
files: ["src/**", "apps/**", "services/**", "libs/**"],
|
|
223
|
+
dependencies: [],
|
|
224
|
+
negatives: ["release notes only"]
|
|
225
|
+
}),
|
|
226
|
+
skill({
|
|
227
|
+
id: "project-debugging",
|
|
228
|
+
name: "Project Debugging",
|
|
229
|
+
description: "Use for runtime errors, failed commands, logs, CI failures, and root-cause analysis.",
|
|
230
|
+
prompts: ["error", "failed", "timeout", "debug", "logs", "cannot", "fix"],
|
|
231
|
+
files: ["package.json", ".github/workflows/*", "src/**", "services/**"],
|
|
232
|
+
dependencies: [],
|
|
233
|
+
negatives: ["new feature with no failure"]
|
|
234
|
+
}),
|
|
235
|
+
skill({
|
|
236
|
+
id: "project-documentation",
|
|
237
|
+
name: "Project Documentation",
|
|
238
|
+
description: "Use for README, changelog, architecture notes, specs, and project documentation updates.",
|
|
239
|
+
prompts: ["readme", "docs", "documentation", "changelog", "spec", "guide"],
|
|
240
|
+
files: ["README.md", "docs/**", "CHANGELOG.md", "AGENTS.md"],
|
|
241
|
+
dependencies: [],
|
|
242
|
+
negatives: ["runtime bug"]
|
|
243
|
+
})
|
|
244
|
+
].find((item) => !skills.some((existing) => existing.id === item.id));
|
|
245
|
+
if (!fallback) break;
|
|
246
|
+
skills.push(fallback);
|
|
247
|
+
}
|
|
248
|
+
return dedupeById(skills);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function skill({ id, name, description, prompts, files, dependencies, negatives }) {
|
|
252
|
+
return { id, name, description, prompts, files, dependencies, negatives };
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function renderSkillMarkdown(skill) {
|
|
256
|
+
return [
|
|
257
|
+
`# ${skill.name}`,
|
|
258
|
+
"",
|
|
259
|
+
skill.description,
|
|
260
|
+
"",
|
|
261
|
+
"Use this skill when the prompt and project evidence match the metadata in `skill.yaml`.",
|
|
262
|
+
"",
|
|
263
|
+
"Before editing:",
|
|
264
|
+
"",
|
|
265
|
+
"1. Read the relevant project rules.",
|
|
266
|
+
"2. Inspect the suggested files and nearby tests.",
|
|
267
|
+
"3. Keep the change scoped to the task.",
|
|
268
|
+
"4. Run the focused verification command before final response.",
|
|
269
|
+
""
|
|
270
|
+
].join("\n");
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function renderSkillYaml(skill) {
|
|
274
|
+
return [
|
|
275
|
+
`id: ${skill.id}`,
|
|
276
|
+
`name: ${skill.name}`,
|
|
277
|
+
`description: ${skill.description}`,
|
|
278
|
+
"positive_triggers:",
|
|
279
|
+
" prompts:",
|
|
280
|
+
...skill.prompts.map((item) => ` - ${quoteYaml(item)}`),
|
|
281
|
+
" files:",
|
|
282
|
+
...skill.files.map((item) => ` - ${quoteYaml(item)}`),
|
|
283
|
+
" dependencies:",
|
|
284
|
+
...(skill.dependencies.length ? skill.dependencies.map((item) => ` - ${quoteYaml(item)}`) : [" - package.json"]),
|
|
285
|
+
"evidence:",
|
|
286
|
+
" files:",
|
|
287
|
+
...skill.files.slice(0, 4).map((item) => ` - ${quoteYaml(item)}`),
|
|
288
|
+
" dependencies:",
|
|
289
|
+
...(skill.dependencies.length ? skill.dependencies.map((item) => ` - ${quoteYaml(item)}`) : [" - package.json"]),
|
|
290
|
+
"negative_triggers:",
|
|
291
|
+
" prompts:",
|
|
292
|
+
...skill.negatives.map((item) => ` - ${quoteYaml(item)}`),
|
|
293
|
+
"workflow:",
|
|
294
|
+
" - Inspect repo evidence before choosing an implementation path.",
|
|
295
|
+
" - Read the suggested files and nearest tests.",
|
|
296
|
+
" - Implement the smallest scoped change.",
|
|
297
|
+
" - Run focused verification and summarize evidence.",
|
|
298
|
+
""
|
|
299
|
+
].join("\n");
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function renderPrimaryWorkflow(profile, skills) {
|
|
303
|
+
return [
|
|
304
|
+
"# Primary Workflow",
|
|
305
|
+
"",
|
|
306
|
+
`Use this workflow for common tasks in this repository. Detected project context: ${profile.summary}.`,
|
|
307
|
+
"",
|
|
308
|
+
"planner -> tester -> code-reviewer -> docs-manager",
|
|
309
|
+
"",
|
|
310
|
+
"1. Read the task and relevant AGENTS.md rules.",
|
|
311
|
+
"2. Check ContextOS suggested files and skills.",
|
|
312
|
+
"3. Inspect project config before choosing a deployment/framework path.",
|
|
313
|
+
"4. Implement the smallest scoped change.",
|
|
314
|
+
"5. Run focused tests or the closest available verification.",
|
|
315
|
+
"6. Summarize files changed, verification, and any remaining risk.",
|
|
316
|
+
"",
|
|
317
|
+
"Starter skills:",
|
|
318
|
+
...skills.map((skill) => `- ${skill.id}: ${skill.description}`),
|
|
319
|
+
""
|
|
320
|
+
].join("\n");
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function writeFile({ filePath, content, force, created, skipped }) {
|
|
324
|
+
if (fs.existsSync(filePath) && !force) {
|
|
325
|
+
skipped.push(filePath);
|
|
326
|
+
return;
|
|
327
|
+
}
|
|
328
|
+
fs.writeFileSync(filePath, content, "utf8");
|
|
329
|
+
created.push(filePath);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
function findPackageJsonFiles(root) {
|
|
333
|
+
const files = [];
|
|
334
|
+
walk(root, files, (filePath) => path.basename(filePath) === "package.json", 0);
|
|
335
|
+
return files.slice(0, 20);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function findExisting(root, relativePaths) {
|
|
339
|
+
return relativePaths.filter((relativePath) => fs.existsSync(path.join(root, relativePath)));
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function walk(directory, files, predicate, depth) {
|
|
343
|
+
if (depth > 4) return;
|
|
344
|
+
let entries = [];
|
|
345
|
+
try {
|
|
346
|
+
entries = fs.readdirSync(directory, { withFileTypes: true });
|
|
347
|
+
} catch {
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
for (const entry of entries) {
|
|
351
|
+
if (entry.name === "node_modules" || entry.name === ".git" || entry.name === ".ctx") continue;
|
|
352
|
+
const filePath = path.join(directory, entry.name);
|
|
353
|
+
if (entry.isDirectory()) walk(filePath, files, predicate, depth + 1);
|
|
354
|
+
else if (entry.isFile() && predicate(filePath)) files.push(filePath);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
function safeJson(filePath) {
|
|
359
|
+
try {
|
|
360
|
+
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
|
361
|
+
} catch {
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function findProjectRoot(cwd) {
|
|
367
|
+
let current = path.resolve(cwd);
|
|
368
|
+
while (true) {
|
|
369
|
+
if (fs.existsSync(path.join(current, ".git"))) return current;
|
|
370
|
+
const parent = path.dirname(current);
|
|
371
|
+
if (parent === current) return path.resolve(cwd);
|
|
372
|
+
current = parent;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
function dedupeById(skills) {
|
|
377
|
+
const seen = new Set();
|
|
378
|
+
const result = [];
|
|
379
|
+
for (const skill of skills) {
|
|
380
|
+
if (seen.has(skill.id)) continue;
|
|
381
|
+
seen.add(skill.id);
|
|
382
|
+
result.push(skill);
|
|
383
|
+
}
|
|
384
|
+
return result;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function quoteYaml(value) {
|
|
388
|
+
return JSON.stringify(String(value));
|
|
389
|
+
}
|
|
@@ -19,7 +19,8 @@ export function parseSetupArgs(args = []) {
|
|
|
19
19
|
yes,
|
|
20
20
|
quiet: args.includes("--quiet"),
|
|
21
21
|
syncRules: !args.includes("--no-rules"),
|
|
22
|
-
syncSkills: !args.includes("--no-skills")
|
|
22
|
+
syncSkills: !args.includes("--no-skills"),
|
|
23
|
+
generateProjectContext: args.includes("--generate-project-context")
|
|
23
24
|
};
|
|
24
25
|
}
|
|
25
26
|
|
|
@@ -42,6 +43,7 @@ export function setupSummaryLines({
|
|
|
42
43
|
agents = DEFAULT_AGENTS,
|
|
43
44
|
syncRules = true,
|
|
44
45
|
syncSkills = true,
|
|
46
|
+
generateProjectContext = false,
|
|
45
47
|
promptSections = null,
|
|
46
48
|
promptLimits = null
|
|
47
49
|
} = {}) {
|
|
@@ -50,7 +52,8 @@ export function setupSummaryLines({
|
|
|
50
52
|
`Agents: ${agents.join(", ") || "(none)"}`,
|
|
51
53
|
`Prompt context injection: always enabled`,
|
|
52
54
|
`Ruler rule/MCP sync: ${syncRules ? "enabled" : "skipped"}`,
|
|
53
|
-
`skillshare skill sync: ${syncSkills ? "enabled" : "skipped"}
|
|
55
|
+
`skillshare skill sync: ${syncSkills ? "enabled" : "skipped"}`,
|
|
56
|
+
`Project context generation: ${generateProjectContext ? "enabled" : "skipped"}`
|
|
54
57
|
];
|
|
55
58
|
if (promptSections !== null) lines.push(`Prompt sections shown: ${promptSections}`);
|
|
56
59
|
if (promptLimits !== null) lines.push(`Prompt suggest limits: ${promptLimits}`);
|
|
@@ -20,6 +20,10 @@ const DEFAULT_ROUTER_THRESHOLD = 0.35;
|
|
|
20
20
|
|
|
21
21
|
const scanCache = new Map();
|
|
22
22
|
|
|
23
|
+
export function clearSkillScanCache() {
|
|
24
|
+
scanCache.clear();
|
|
25
|
+
}
|
|
26
|
+
|
|
23
27
|
export function skillSearchRoots({ cwd = process.cwd(), home = os.homedir() } = {}) {
|
|
24
28
|
return [
|
|
25
29
|
path.join(cwd, ".codex", "skills"),
|