supered 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "supered",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Small, evidence-first workflows for coding agents.",
5
5
  "author": {
6
6
  "name": "Farouk Hajjej",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "supered",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "A compact agent workflow kit for clarifying, building, verifying, and shipping software changes.",
5
5
  "author": {
6
6
  "name": "Farouk Hajjej",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "supered",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Small, evidence-first workflows for coding agents.",
5
5
  "homepage": "https://fhajjej-ship-it.github.io/Supered/",
6
6
  "license": "MIT",
package/README.md CHANGED
@@ -30,6 +30,12 @@ Install with npx:
30
30
  npx supered install --target codex
31
31
  ```
32
32
 
33
+ Check the install:
34
+
35
+ ```bash
36
+ npx supered doctor --target codex
37
+ ```
38
+
33
39
  Or install the default Codex skill set with curl:
34
40
 
35
41
  ```bash
@@ -50,6 +56,7 @@ cd Supered
50
56
  npm test
51
57
  npm run validate
52
58
  node ./bin/supered.mjs install --target codex
59
+ node ./bin/supered.mjs doctor --target codex
53
60
  ```
54
61
 
55
62
  For Claude or Gemini, replace `codex` with `claude` or `gemini`.
@@ -63,6 +70,25 @@ Host-specific notes:
63
70
 
64
71
  Marketplace readiness lives in [docs/marketplace-checklist.md](docs/marketplace-checklist.md).
65
72
 
73
+ ## Which Skill Should I Use?
74
+
75
+ Use [docs/which-skill.md](docs/which-skill.md) as the quick router:
76
+
77
+ - unclear task: `shape-the-task`
78
+ - clear but large task: `make-a-map`
79
+ - planned implementation: `build-in-slices`
80
+ - broken or flaky behavior: `trace-the-fault`
81
+ - completion claim: `prove-the-change`
82
+ - commit, publish, or handoff: `ship-the-work`
83
+
84
+ ## Eval Pack
85
+
86
+ Supered includes a public eval pack so the skills can be judged against realistic work instead of vibes.
87
+
88
+ - [Scenario catalog](docs/evals/scenarios.json): 10 realistic coding-agent scenarios.
89
+ - [Baseline results](docs/evals/baseline-results.json): scores across clarity, actionability, guardrails, evidence, and outcome.
90
+ - [Eval report](docs/evals/README.md): how to use the scenarios to improve the skills.
91
+
66
92
  ## CLI
67
93
 
68
94
  ```bash
@@ -72,13 +98,14 @@ npm run smoke-install
72
98
  npm run verify-site
73
99
  npm run verify-package
74
100
  node ./bin/supered.mjs skills --json
101
+ node ./bin/supered.mjs doctor --target codex --json
75
102
  ```
76
103
 
77
104
  The validator checks package metadata, plugin metadata, and skill frontmatter so the public repo does not drift into a half-installable state.
78
105
 
79
106
  `npm run verify-site` opens the landing page in Chromium at desktop and mobile sizes, checks the logo and workflow text, and writes screenshots to `artifacts/site/`.
80
107
 
81
- `npm run verify-package` builds the npm tarball and verifies `npx`-style installs for every supported host target.
108
+ `npm run verify-package` builds the npm tarball and verifies `npx`-style installs plus Doctor checks for every supported host target.
82
109
 
83
110
  ## Design Principles
84
111
 
package/bin/supered.mjs CHANGED
@@ -1,19 +1,14 @@
1
1
  #!/usr/bin/env node
2
- import { cp, mkdir } from "node:fs/promises";
3
2
  import { dirname, resolve } from "node:path";
4
3
  import { fileURLToPath } from "node:url";
5
4
 
6
- import { listSkills, validateProject } from "../lib/manifest.js";
5
+ import { installSuperedSkills } from "../lib/host-install.js";
6
+ import { inspectSuperedInstall } from "../lib/install-doctor.js";
7
+ import { listSkills } from "../lib/manifest.js";
8
+ import { validateReleaseBundle } from "../lib/release-bundle.js";
7
9
 
8
10
  const root = resolve(dirname(fileURLToPath(import.meta.url)), "..");
9
11
  const [command, ...args] = process.argv.slice(2);
10
- const installTargets = {
11
- codex: ".codex/skills",
12
- claude: ".claude/skills",
13
- cursor: ".cursor/skills",
14
- gemini: ".gemini/skills",
15
- opencode: ".opencode/skills"
16
- };
17
12
 
18
13
  function printHelp() {
19
14
  console.log(`Supered
@@ -22,10 +17,12 @@ Usage:
22
17
  supered skills [--json]
23
18
  supered validate
24
19
  supered install --target <codex|claude|cursor|gemini|opencode> [--dest <path>]
20
+ supered doctor --target <codex|claude|cursor|gemini|opencode> [--dest <path>] [--json]
25
21
 
26
22
  Examples:
27
23
  npx supered install --target codex
28
24
  npx supered install --target gemini --dest ~/.gemini/skills
25
+ supered doctor --target codex
29
26
  `);
30
27
  }
31
28
 
@@ -42,7 +39,7 @@ async function skillsCommand() {
42
39
  }
43
40
 
44
41
  async function validateCommand() {
45
- const result = await validateProject(root);
42
+ const result = await validateReleaseBundle(root);
46
43
  if (result.errors.length > 0) {
47
44
  for (const error of result.errors) {
48
45
  console.error(`- ${error}`);
@@ -54,29 +51,52 @@ async function validateCommand() {
54
51
  console.log(`Supered bundle is valid: ${result.skills.length} skills, ${result.checked.length} files checked.`);
55
52
  }
56
53
 
57
- function defaultInstallDest(target) {
58
- const home = process.env.HOME;
59
- if (!home) {
60
- throw new Error("HOME is not set; pass --dest explicitly.");
54
+ async function installCommand() {
55
+ const targetIndex = args.indexOf("--target");
56
+ const destIndex = args.indexOf("--dest");
57
+ const target = targetIndex === -1 ? "" : args[targetIndex + 1];
58
+ const dest = destIndex === -1 ? undefined : args[destIndex + 1];
59
+
60
+ if (!target || (destIndex !== -1 && !dest)) {
61
+ throw new Error("Install requires --target <codex|claude|cursor|gemini|opencode>.");
61
62
  }
62
63
 
63
- if (installTargets[target]) return `${home}/${installTargets[target]}`;
64
- throw new Error(`Unsupported target: ${target}`);
64
+ const result = await installSuperedSkills({ root, target, dest });
65
+ console.log(`Installed Supered skills for ${result.target} at ${result.dest}.`);
65
66
  }
66
67
 
67
- async function installCommand() {
68
+ async function doctorCommand() {
68
69
  const targetIndex = args.indexOf("--target");
69
70
  const destIndex = args.indexOf("--dest");
70
71
  const target = targetIndex === -1 ? "" : args[targetIndex + 1];
71
- const dest = destIndex === -1 ? defaultInstallDest(target) : args[destIndex + 1];
72
+ const dest = destIndex === -1 ? undefined : args[destIndex + 1];
73
+ const json = args.includes("--json");
72
74
 
73
- if (!target || !dest) {
74
- throw new Error("Install requires --target <codex|claude|cursor|gemini|opencode>.");
75
+ if (!target || (destIndex !== -1 && !dest)) {
76
+ throw new Error("Doctor requires --target <codex|claude|cursor|gemini|opencode>.");
75
77
  }
76
78
 
77
- await mkdir(dest, { recursive: true });
78
- await cp(resolve(root, "skills"), dest, { recursive: true });
79
- console.log(`Installed Supered skills for ${target} at ${dest}.`);
79
+ const result = await inspectSuperedInstall({ root, target, dest });
80
+ if (json) {
81
+ console.log(JSON.stringify(result, null, 2));
82
+ if (result.status !== "ok") {
83
+ process.exitCode = 1;
84
+ }
85
+ return;
86
+ }
87
+
88
+ if (result.status === "ok") {
89
+ console.log(`Supered doctor passed for ${result.target} at ${result.dest}.`);
90
+ console.log(`${result.installedSkills.length} skills installed and current.`);
91
+ return;
92
+ }
93
+
94
+ console.log(`Supered doctor found ${result.issues.length} issue${result.issues.length === 1 ? "" : "s"} for ${result.target} at ${result.dest}.`);
95
+ for (const installIssue of result.issues) {
96
+ console.log(`- [${installIssue.code}] ${installIssue.message}`);
97
+ }
98
+ console.log(`Fix: ${result.fixCommand}`);
99
+ process.exitCode = 1;
80
100
  }
81
101
 
82
102
  try {
@@ -88,6 +108,8 @@ try {
88
108
  await validateCommand();
89
109
  } else if (command === "install") {
90
110
  await installCommand();
111
+ } else if (command === "doctor") {
112
+ await doctorCommand();
91
113
  } else {
92
114
  throw new Error(`Unknown command: ${command}`);
93
115
  }
@@ -0,0 +1,34 @@
1
+ # Supered Eval Pack
2
+
3
+ This pack gives Supered a visible usefulness standard: 10 realistic coding-agent scenarios, a simple scoring rubric, and baseline results for the current skill set.
4
+
5
+ The goal is not to claim scientific benchmark status. The goal is to make the product falsifiable: a reader can see what the skills are supposed to help with, how outcomes are judged, and where the current library is strong or still dependent on project context.
6
+
7
+ ## Files
8
+
9
+ - [scenarios.json](scenarios.json): 10 realistic coding-agent scenarios with prompts, context, primary skills, success criteria, and expected evidence.
10
+ - [baseline-results.json](baseline-results.json): maintainer-scored baseline results for Supered v0.2.0.
11
+
12
+ ## Scoring
13
+
14
+ Each scenario is scored from 1 to 5 across five dimensions:
15
+
16
+ - `clarity`: the skill helps the agent understand what kind of work this is.
17
+ - `actionability`: the skill tells the agent what to do next, not just how to think.
18
+ - `guardrails`: the skill prevents common bad moves such as scope creep, guessing, or premature claims.
19
+ - `evidence`: the skill asks for proof before completion or handoff.
20
+ - `outcome`: the skill increases the chance of a useful user-facing result.
21
+
22
+ ## Baseline Summary
23
+
24
+ The current baseline average is `4.58 / 5` across the catalog. Strongest areas are release handoff, verification, and fault tracing. The scenarios with lower scores are intentionally product-heavy or environment-dependent, where a skill can guide the agent but cannot replace user priorities, browser access, database access, or security review.
25
+
26
+ ## How To Use It
27
+
28
+ 1. Pick a scenario close to your real task.
29
+ 2. Ask an agent to use the recommended Supered skill.
30
+ 3. Score the result against the five dimensions.
31
+ 4. Add notes when the skill was vague, too strict, or missing a useful recovery path.
32
+ 5. Propose a skill improvement or a new scenario when the score exposes a real gap.
33
+
34
+ This gives contributors a practical way to improve Supered without turning the project into a pile of opinions.
@@ -0,0 +1,132 @@
1
+ {
2
+ "product": "Supered",
3
+ "catalogVersion": "0.1",
4
+ "summary": {
5
+ "averageScore": 4.58,
6
+ "scenarioCount": 10,
7
+ "scoreScale": "1-5",
8
+ "method": "Maintainer review of the v0.2.0 Supered skill bodies against the scenario catalog. Scores are intentionally conservative until external user studies exist."
9
+ },
10
+ "results": [
11
+ {
12
+ "scenarioId": "S01",
13
+ "recommendedSkill": "shape-the-task",
14
+ "scores": {
15
+ "clarity": 5,
16
+ "actionability": 5,
17
+ "guardrails": 4,
18
+ "evidence": 4,
19
+ "outcome": 5
20
+ },
21
+ "notes": "The skill asks for users, constraints, assumptions, and acceptance signals before implementation, which directly prevents dashboard overreach. Evidence is strong but still depends on user confirmation."
22
+ },
23
+ {
24
+ "scenarioId": "S02",
25
+ "recommendedSkill": "make-a-map",
26
+ "scores": {
27
+ "clarity": 5,
28
+ "actionability": 5,
29
+ "guardrails": 5,
30
+ "evidence": 4,
31
+ "outcome": 5
32
+ },
33
+ "notes": "The mapping playbook fits multi-file auth work well because it forces affected areas, checkpoints, and verification commands before edits. External security review may still be needed."
34
+ },
35
+ {
36
+ "scenarioId": "S03",
37
+ "recommendedSkill": "build-in-slices",
38
+ "scores": {
39
+ "clarity": 4,
40
+ "actionability": 5,
41
+ "guardrails": 5,
42
+ "evidence": 4,
43
+ "outcome": 4
44
+ },
45
+ "notes": "The slice discipline prevents a settings redesign from swallowing the small save-confirmation task. The score is slightly lower because visual quality still needs domain judgment."
46
+ },
47
+ {
48
+ "scenarioId": "S04",
49
+ "recommendedSkill": "trace-the-fault",
50
+ "scores": {
51
+ "clarity": 5,
52
+ "actionability": 5,
53
+ "guardrails": 5,
54
+ "evidence": 5,
55
+ "outcome": 4
56
+ },
57
+ "notes": "The fault-tracing skill is strongest on this scenario: it demands symptom capture, hypotheses, probes, and proof before patches. The remaining risk is platform access to mobile Safari."
58
+ },
59
+ {
60
+ "scenarioId": "S05",
61
+ "recommendedSkill": "prove-the-change",
62
+ "scores": {
63
+ "clarity": 5,
64
+ "actionability": 4,
65
+ "guardrails": 5,
66
+ "evidence": 5,
67
+ "outcome": 5
68
+ },
69
+ "notes": "The verification skill explicitly blocks premature completion claims and requires fresh proof. Actionability is high, though the exact export check depends on the app's local tooling."
70
+ },
71
+ {
72
+ "scenarioId": "S06",
73
+ "recommendedSkill": "ship-the-work",
74
+ "scores": {
75
+ "clarity": 5,
76
+ "actionability": 5,
77
+ "guardrails": 5,
78
+ "evidence": 5,
79
+ "outcome": 5
80
+ },
81
+ "notes": "The shipping skill aligns directly with public release work: clean status, intended staging, verification, push, release, and public read-back. It is the best fit in the catalog."
82
+ },
83
+ {
84
+ "scenarioId": "S07",
85
+ "recommendedSkill": "using-supered",
86
+ "scores": {
87
+ "clarity": 4,
88
+ "actionability": 5,
89
+ "guardrails": 4,
90
+ "evidence": 4,
91
+ "outcome": 4
92
+ },
93
+ "notes": "The entrypoint helps route a messy request into a first useful workflow instead of trying everything. The score is lower because the request still requires product prioritization."
94
+ },
95
+ {
96
+ "scenarioId": "S08",
97
+ "recommendedSkill": "trace-the-fault",
98
+ "scores": {
99
+ "clarity": 5,
100
+ "actionability": 5,
101
+ "guardrails": 5,
102
+ "evidence": 5,
103
+ "outcome": 4
104
+ },
105
+ "notes": "CI recovery benefits from the same diagnosis discipline: start from logs, reproduce, isolate, then patch. Outcome depends on whether the CI failure can be reproduced locally."
106
+ },
107
+ {
108
+ "scenarioId": "S09",
109
+ "recommendedSkill": "make-a-map",
110
+ "scores": {
111
+ "clarity": 4,
112
+ "actionability": 5,
113
+ "guardrails": 4,
114
+ "evidence": 5,
115
+ "outcome": 4
116
+ },
117
+ "notes": "The map-and-slice combination handles migration risk well by separating schema, data, and route compatibility. Database-specific rollback details remain project dependent."
118
+ },
119
+ {
120
+ "scenarioId": "S10",
121
+ "recommendedSkill": "ship-the-work",
122
+ "scores": {
123
+ "clarity": 5,
124
+ "actionability": 4,
125
+ "guardrails": 5,
126
+ "evidence": 5,
127
+ "outcome": 4
128
+ },
129
+ "notes": "The handoff scenario is well covered because the skill values honest status over false completion. It needs local judgment about whether to commit partial work or leave notes only."
130
+ }
131
+ ]
132
+ }
@@ -0,0 +1,212 @@
1
+ {
2
+ "product": "Supered",
3
+ "version": "0.1",
4
+ "description": "A compact public catalog of realistic coding-agent scenarios for checking whether Supered skills are useful in practice.",
5
+ "scoring": {
6
+ "maxScore": 5,
7
+ "dimensions": [
8
+ "clarity",
9
+ "actionability",
10
+ "guardrails",
11
+ "evidence",
12
+ "outcome"
13
+ ],
14
+ "rubric": {
15
+ "1": "Absent or actively misleading.",
16
+ "2": "Present but too vague to guide reliable work.",
17
+ "3": "Usable with noticeable gaps or extra human correction.",
18
+ "4": "Strong enough for normal work with minor judgment required.",
19
+ "5": "Excellent: specific, safe, evidence-driven, and ready to apply."
20
+ }
21
+ },
22
+ "scenarios": [
23
+ {
24
+ "id": "S01",
25
+ "title": "Turn a vague feature request into a buildable slice",
26
+ "prompt": "Add dashboards for customers so they can understand account health, but keep it simple and do not overbuild it.",
27
+ "context": "The repo has a React admin app, no product requirements document, three possible user roles, and no obvious owner for metric definitions. The agent must avoid inventing a large analytics system.",
28
+ "primarySkills": [
29
+ "using-supered",
30
+ "shape-the-task"
31
+ ],
32
+ "successCriteria": [
33
+ "Clarifies the target user, first workflow, and non-goals.",
34
+ "Produces a short brief that can be accepted or corrected.",
35
+ "Identifies the smallest useful dashboard slice before implementation."
36
+ ],
37
+ "expectedEvidence": [
38
+ "A concise task brief with assumptions called out.",
39
+ "A list of decisions that need user confirmation before code changes."
40
+ ]
41
+ },
42
+ {
43
+ "id": "S02",
44
+ "title": "Map a multi-file authentication change",
45
+ "prompt": "Add magic-link login to the existing app and make sure the old password login still works.",
46
+ "context": "The app has API routes, an email sender, session middleware, and browser tests. The request touches security-sensitive paths and must be broken into reviewable changes.",
47
+ "primarySkills": [
48
+ "make-a-map",
49
+ "prove-the-change"
50
+ ],
51
+ "successCriteria": [
52
+ "Lists affected files and ownership boundaries before editing.",
53
+ "Sequences backend, email, session, and UI work into small checkpoints.",
54
+ "Names verification commands for old and new login paths."
55
+ ],
56
+ "expectedEvidence": [
57
+ "A short execution map with ordered slices.",
58
+ "Verification notes covering both magic-link and password login."
59
+ ]
60
+ },
61
+ {
62
+ "id": "S03",
63
+ "title": "Implement a narrow UI improvement without scope creep",
64
+ "prompt": "Make the settings page easier to scan and add a save confirmation when preferences change.",
65
+ "context": "The settings page is already used by customers, has mixed form components, and has a brittle screenshot test. The agent should improve the workflow without redesigning the app.",
66
+ "primarySkills": [
67
+ "build-in-slices"
68
+ ],
69
+ "successCriteria": [
70
+ "Starts with one visible behavior and one verification path.",
71
+ "Keeps layout and interaction edits separable.",
72
+ "Avoids unrelated visual redesign or component churn."
73
+ ],
74
+ "expectedEvidence": [
75
+ "A small diff grouped around the settings workflow.",
76
+ "Passing component or browser verification for the save confirmation."
77
+ ]
78
+ },
79
+ {
80
+ "id": "S04",
81
+ "title": "Diagnose an intermittent checkout failure",
82
+ "prompt": "Checkout sometimes fails with a generic payment error, mostly on mobile Safari. Fix it.",
83
+ "context": "Logs show several possible causes: token refresh, double-submit, gateway declines, and a recent UI refactor. The agent must investigate before changing code.",
84
+ "primarySkills": [
85
+ "trace-the-fault"
86
+ ],
87
+ "successCriteria": [
88
+ "Separates symptom, reproduction attempts, and hypotheses.",
89
+ "Finds evidence for one root cause before patching.",
90
+ "Adds or updates a regression check for the observed failure."
91
+ ],
92
+ "expectedEvidence": [
93
+ "A fault trace showing ruled-out hypotheses.",
94
+ "A failing-then-passing check or a documented reproduction result."
95
+ ]
96
+ },
97
+ {
98
+ "id": "S05",
99
+ "title": "Prove a production bug fix before claiming completion",
100
+ "prompt": "The export button is fixed now, right? Tell the customer we are done and include the exact proof that the filtered CSV download works.",
101
+ "context": "A prior agent changed CSV generation but did not run the browser workflow. The customer issue involves filters, permissions, and a downloaded file.",
102
+ "primarySkills": [
103
+ "prove-the-change"
104
+ ],
105
+ "successCriteria": [
106
+ "Refuses to claim completion from code changes alone.",
107
+ "Runs the relevant automated or manual export verification.",
108
+ "Reports the actual evidence and any untested risk."
109
+ ],
110
+ "expectedEvidence": [
111
+ "Fresh command output or browser verification notes.",
112
+ "A final status that distinguishes verified behavior from assumptions."
113
+ ]
114
+ },
115
+ {
116
+ "id": "S06",
117
+ "title": "Prepare a small public release",
118
+ "prompt": "Commit this library update, push it, tag a release, and make sure the public install instructions still work.",
119
+ "context": "The repo is public, has CI, a package manifest, generated screenshots, and possible uncommitted user files. The agent must not overwrite unrelated work.",
120
+ "primarySkills": [
121
+ "ship-the-work",
122
+ "prove-the-change"
123
+ ],
124
+ "successCriteria": [
125
+ "Checks the worktree and stages only intended files.",
126
+ "Runs release-relevant validation before tagging or publishing.",
127
+ "Reads back the public release or install path after shipping."
128
+ ],
129
+ "expectedEvidence": [
130
+ "Git status, commit, push, and release URLs.",
131
+ "Public install or package registry verification."
132
+ ]
133
+ },
134
+ {
135
+ "id": "S07",
136
+ "title": "Choose the right workflow for a mixed request",
137
+ "prompt": "The app is slow, the nav is confusing, and we should probably add AI summaries. Can you just improve it?",
138
+ "context": "The request mixes performance, design, and new AI features. The agent needs to route the session instead of starting a broad refactor.",
139
+ "primarySkills": [
140
+ "using-supered",
141
+ "shape-the-task"
142
+ ],
143
+ "successCriteria": [
144
+ "Identifies separate work types and risks.",
145
+ "Chooses a first skill based on the highest uncertainty.",
146
+ "Offers a narrow next action instead of starting all tasks at once."
147
+ ],
148
+ "expectedEvidence": [
149
+ "A routing decision with the chosen Supered skill.",
150
+ "A short set of questions or assumptions that unblock the first slice."
151
+ ]
152
+ },
153
+ {
154
+ "id": "S08",
155
+ "title": "Recover from a failing CI run",
156
+ "prompt": "CI is red after the last push. Please inspect the failed job, fix the real cause, and keep the intended feature intact.",
157
+ "context": "The failure could come from tests, packaging, linting, or browser setup. The agent must inspect the failed job and avoid local-only guesses.",
158
+ "primarySkills": [
159
+ "trace-the-fault",
160
+ "build-in-slices"
161
+ ],
162
+ "successCriteria": [
163
+ "Starts from the failing CI evidence.",
164
+ "Reproduces or narrows the failure locally before editing.",
165
+ "Makes the smallest fix that preserves the intended feature."
166
+ ],
167
+ "expectedEvidence": [
168
+ "CI log excerpt or command that identifies the failure point.",
169
+ "A passing local check and, when possible, a green rerun."
170
+ ]
171
+ },
172
+ {
173
+ "id": "S09",
174
+ "title": "Add tests around a risky data migration",
175
+ "prompt": "Add a migration that backfills missing organization slugs without breaking existing URLs.",
176
+ "context": "The database has legacy rows, duplicate display names, and routing code that assumes slugs are stable. The migration needs a reversible plan and proof.",
177
+ "primarySkills": [
178
+ "make-a-map",
179
+ "build-in-slices",
180
+ "prove-the-change"
181
+ ],
182
+ "successCriteria": [
183
+ "Maps schema, data, and route changes before implementation.",
184
+ "Builds the migration and app compatibility in separate slices.",
185
+ "Verifies duplicate and legacy-row cases explicitly."
186
+ ],
187
+ "expectedEvidence": [
188
+ "Migration test cases or dry-run output for edge cases.",
189
+ "Application route checks for old and new organization URLs."
190
+ ]
191
+ },
192
+ {
193
+ "id": "S10",
194
+ "title": "Hand off unfinished work honestly",
195
+ "prompt": "I need to stop here. Leave the branch in a state another engineer can continue from.",
196
+ "context": "The agent has partial implementation, one failing test, and two unverified assumptions. The handoff must be useful without pretending the work is done.",
197
+ "primarySkills": [
198
+ "ship-the-work",
199
+ "prove-the-change"
200
+ ],
201
+ "successCriteria": [
202
+ "Separates completed, partial, and unstarted work.",
203
+ "Lists failing or skipped checks clearly.",
204
+ "Leaves next steps and evidence paths for the next engineer."
205
+ ],
206
+ "expectedEvidence": [
207
+ "A handoff note with known status and blockers.",
208
+ "Command output showing the current failing or passing checks."
209
+ ]
210
+ }
211
+ ]
212
+ }
package/docs/index.html CHANGED
@@ -15,6 +15,7 @@
15
15
  </a>
16
16
  <nav>
17
17
  <a href="#skills">Skills</a>
18
+ <a href="#proof">Proof</a>
18
19
  <a href="#install">Install</a>
19
20
  <a href="https://github.com/fhajjej-ship-it/Supered">GitHub</a>
20
21
  </nav>
@@ -65,6 +66,28 @@
65
66
  </div>
66
67
  </section>
67
68
 
69
+ <section id="proof" class="proof" aria-labelledby="proof-title">
70
+ <div class="section-head">
71
+ <p class="eyebrow">Proof</p>
72
+ <h2 id="proof-title">Eval pack, not vibes.</h2>
73
+ <p>Supered ships with 10 realistic coding-agent scenarios and a baseline scoring pass across clarity, actionability, guardrails, evidence, and outcome.</p>
74
+ </div>
75
+ <div class="proof-grid">
76
+ <a href="https://github.com/fhajjej-ship-it/Supered/blob/main/docs/which-skill.md">
77
+ <span>Guide</span>
78
+ <strong>Which skill should I use?</strong>
79
+ </a>
80
+ <a href="https://github.com/fhajjej-ship-it/Supered/blob/main/docs/evals/scenarios.json">
81
+ <span>Catalog</span>
82
+ <strong>10 realistic coding-agent scenarios</strong>
83
+ </a>
84
+ <a href="https://github.com/fhajjej-ship-it/Supered/blob/main/docs/evals/README.md">
85
+ <span>Eval pack</span>
86
+ <strong>How usefulness is scored</strong>
87
+ </a>
88
+ </div>
89
+ </section>
90
+
68
91
  <section id="install" class="install" aria-labelledby="install-title">
69
92
  <div>
70
93
  <p class="eyebrow">Install</p>
package/docs/install.md CHANGED
@@ -6,12 +6,14 @@ Supered ships plain skill folders plus lightweight plugin manifests.
6
6
 
7
7
  ```bash
8
8
  npx supered install --target codex
9
+ npx supered doctor --target codex
9
10
  ```
10
11
 
11
12
  Choose another host:
12
13
 
13
14
  ```bash
14
15
  npx supered install --target opencode
16
+ npx supered doctor --target opencode
15
17
  ```
16
18
 
17
19
  ## One-line install
@@ -30,20 +32,34 @@ curl -fsSL https://raw.githubusercontent.com/fhajjej-ship-it/Supered/main/instal
30
32
 
31
33
  ```bash
32
34
  node ./bin/supered.mjs install --target codex
35
+ node ./bin/supered.mjs doctor --target codex
33
36
  ```
34
37
 
35
38
  ## Claude
36
39
 
37
40
  ```bash
38
41
  node ./bin/supered.mjs install --target claude
42
+ node ./bin/supered.mjs doctor --target claude
39
43
  ```
40
44
 
41
45
  ## Gemini
42
46
 
43
47
  ```bash
44
48
  node ./bin/supered.mjs install --target gemini
49
+ node ./bin/supered.mjs doctor --target gemini
45
50
  ```
46
51
 
52
+ ## Doctor
53
+
54
+ Doctor checks Install Health without changing files:
55
+
56
+ ```bash
57
+ supered doctor --target codex
58
+ supered doctor --target codex --json
59
+ ```
60
+
61
+ It reports missing skills, changed skill files, missing destinations, and unsafe symlinks. The fix it prints is a reinstall command for the same target and destination.
62
+
47
63
  ## Manual
48
64
 
49
65
  Copy each folder in `skills/` into the skill directory used by your agent host.