supered 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "supered",
3
- "version": "0.1.3",
3
+ "version": "0.2.1",
4
4
  "description": "Small, evidence-first workflows for coding agents.",
5
5
  "author": {
6
6
  "name": "Farouk Hajjej",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "supered",
3
- "version": "0.1.3",
3
+ "version": "0.2.1",
4
4
  "description": "A compact agent workflow kit for clarifying, building, verifying, and shipping software changes.",
5
5
  "author": {
6
6
  "name": "Farouk Hajjej",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "supered",
3
- "version": "0.1.3",
3
+ "version": "0.2.1",
4
4
  "description": "Small, evidence-first workflows for coding agents.",
5
5
  "homepage": "https://fhajjej-ship-it.github.io/Supered/",
6
6
  "license": "MIT",
package/README.md CHANGED
@@ -63,6 +63,25 @@ Host-specific notes:
63
63
 
64
64
  Marketplace readiness lives in [docs/marketplace-checklist.md](docs/marketplace-checklist.md).
65
65
 
66
+ ## Which Skill Should I Use?
67
+
68
+ Use [docs/which-skill.md](docs/which-skill.md) as the quick router:
69
+
70
+ - unclear task: `shape-the-task`
71
+ - clear but large task: `make-a-map`
72
+ - planned implementation: `build-in-slices`
73
+ - broken or flaky behavior: `trace-the-fault`
74
+ - completion claim: `prove-the-change`
75
+ - commit, publish, or handoff: `ship-the-work`
76
+
77
+ ## Eval Pack
78
+
79
+ Supered includes a public eval pack so the skills can be judged against realistic work instead of vibes.
80
+
81
+ - [Scenario catalog](docs/evals/scenarios.json): 10 realistic coding-agent scenarios.
82
+ - [Baseline results](docs/evals/baseline-results.json): scores across clarity, actionability, guardrails, evidence, and outcome.
83
+ - [Eval report](docs/evals/README.md): how to use the scenarios to improve the skills.
84
+
66
85
  ## CLI
67
86
 
68
87
  ```bash
package/bin/supered.mjs CHANGED
@@ -1,19 +1,13 @@
1
1
  #!/usr/bin/env node
2
- import { cp, mkdir } from "node:fs/promises";
3
2
  import { dirname, resolve } from "node:path";
4
3
  import { fileURLToPath } from "node:url";
5
4
 
6
- import { listSkills, validateProject } from "../lib/manifest.js";
5
+ import { installSuperedSkills } from "../lib/host-install.js";
6
+ import { listSkills } from "../lib/manifest.js";
7
+ import { validateReleaseBundle } from "../lib/release-bundle.js";
7
8
 
8
9
  const root = resolve(dirname(fileURLToPath(import.meta.url)), "..");
9
10
  const [command, ...args] = process.argv.slice(2);
10
- const installTargets = {
11
- codex: ".codex/skills",
12
- claude: ".claude/skills",
13
- cursor: ".cursor/skills",
14
- gemini: ".gemini/skills",
15
- opencode: ".opencode/skills"
16
- };
17
11
 
18
12
  function printHelp() {
19
13
  console.log(`Supered
@@ -42,7 +36,7 @@ async function skillsCommand() {
42
36
  }
43
37
 
44
38
  async function validateCommand() {
45
- const result = await validateProject(root);
39
+ const result = await validateReleaseBundle(root);
46
40
  if (result.errors.length > 0) {
47
41
  for (const error of result.errors) {
48
42
  console.error(`- ${error}`);
@@ -54,29 +48,18 @@ async function validateCommand() {
54
48
  console.log(`Supered bundle is valid: ${result.skills.length} skills, ${result.checked.length} files checked.`);
55
49
  }
56
50
 
57
- function defaultInstallDest(target) {
58
- const home = process.env.HOME;
59
- if (!home) {
60
- throw new Error("HOME is not set; pass --dest explicitly.");
61
- }
62
-
63
- if (installTargets[target]) return `${home}/${installTargets[target]}`;
64
- throw new Error(`Unsupported target: ${target}`);
65
- }
66
-
67
51
  async function installCommand() {
68
52
  const targetIndex = args.indexOf("--target");
69
53
  const destIndex = args.indexOf("--dest");
70
54
  const target = targetIndex === -1 ? "" : args[targetIndex + 1];
71
- const dest = destIndex === -1 ? defaultInstallDest(target) : args[destIndex + 1];
55
+ const dest = destIndex === -1 ? undefined : args[destIndex + 1];
72
56
 
73
- if (!target || !dest) {
57
+ if (!target || (destIndex !== -1 && !dest)) {
74
58
  throw new Error("Install requires --target <codex|claude|cursor|gemini|opencode>.");
75
59
  }
76
60
 
77
- await mkdir(dest, { recursive: true });
78
- await cp(resolve(root, "skills"), dest, { recursive: true });
79
- console.log(`Installed Supered skills for ${target} at ${dest}.`);
61
+ const result = await installSuperedSkills({ root, target, dest });
62
+ console.log(`Installed Supered skills for ${result.target} at ${result.dest}.`);
80
63
  }
81
64
 
82
65
  try {
@@ -0,0 +1,34 @@
1
+ # Supered Eval Pack
2
+
3
+ This pack gives Supered a visible usefulness standard: 10 realistic coding-agent scenarios, a simple scoring rubric, and baseline results for the current skill set.
4
+
5
+ The goal is not to claim scientific benchmark status. The goal is to make the product falsifiable: a reader can see what the skills are supposed to help with, how outcomes are judged, and where the current library is strong or still dependent on project context.
6
+
7
+ ## Files
8
+
9
+ - [scenarios.json](scenarios.json): 10 realistic coding-agent scenarios with prompts, context, primary skills, success criteria, and expected evidence.
10
+ - [baseline-results.json](baseline-results.json): maintainer-scored baseline results for Supered v0.2.0.
11
+
12
+ ## Scoring
13
+
14
+ Each scenario is scored from 1 to 5 across five dimensions:
15
+
16
+ - `clarity`: the skill helps the agent understand what kind of work this is.
17
+ - `actionability`: the skill tells the agent what to do next, not just how to think.
18
+ - `guardrails`: the skill prevents common bad moves such as scope creep, guessing, or premature claims.
19
+ - `evidence`: the skill asks for proof before completion or handoff.
20
+ - `outcome`: the skill increases the chance of a useful user-facing result.
21
+
22
+ ## Baseline Summary
23
+
24
+ The current baseline average is `4.58 / 5` across the catalog. Strongest areas are release handoff, verification, and fault tracing. The scenarios with lower scores are intentionally product-heavy or environment-dependent, where a skill can guide the agent but cannot replace user priorities, browser access, database access, or security review.
25
+
26
+ ## How To Use It
27
+
28
+ 1. Pick a scenario close to your real task.
29
+ 2. Ask an agent to use the recommended Supered skill.
30
+ 3. Score the result against the five dimensions.
31
+ 4. Add notes when the skill was vague, too strict, or missing a useful recovery path.
32
+ 5. Propose a skill improvement or a new scenario when the score exposes a real gap.
33
+
34
+ This gives contributors a practical way to improve Supered without turning the project into a pile of opinions.
@@ -0,0 +1,132 @@
1
+ {
2
+ "product": "Supered",
3
+ "catalogVersion": "0.1",
4
+ "summary": {
5
+ "averageScore": 4.58,
6
+ "scenarioCount": 10,
7
+ "scoreScale": "1-5",
8
+ "method": "Maintainer review of the v0.2.0 Supered skill bodies against the scenario catalog. Scores are intentionally conservative until external user studies exist."
9
+ },
10
+ "results": [
11
+ {
12
+ "scenarioId": "S01",
13
+ "recommendedSkill": "shape-the-task",
14
+ "scores": {
15
+ "clarity": 5,
16
+ "actionability": 5,
17
+ "guardrails": 4,
18
+ "evidence": 4,
19
+ "outcome": 5
20
+ },
21
+ "notes": "The skill asks for users, constraints, assumptions, and acceptance signals before implementation, which directly prevents dashboard overreach. Evidence is strong but still depends on user confirmation."
22
+ },
23
+ {
24
+ "scenarioId": "S02",
25
+ "recommendedSkill": "make-a-map",
26
+ "scores": {
27
+ "clarity": 5,
28
+ "actionability": 5,
29
+ "guardrails": 5,
30
+ "evidence": 4,
31
+ "outcome": 5
32
+ },
33
+ "notes": "The mapping playbook fits multi-file auth work well because it forces affected areas, checkpoints, and verification commands before edits. External security review may still be needed."
34
+ },
35
+ {
36
+ "scenarioId": "S03",
37
+ "recommendedSkill": "build-in-slices",
38
+ "scores": {
39
+ "clarity": 4,
40
+ "actionability": 5,
41
+ "guardrails": 5,
42
+ "evidence": 4,
43
+ "outcome": 4
44
+ },
45
+ "notes": "The slice discipline prevents a settings redesign from swallowing the small save-confirmation task. The score is slightly lower because visual quality still needs domain judgment."
46
+ },
47
+ {
48
+ "scenarioId": "S04",
49
+ "recommendedSkill": "trace-the-fault",
50
+ "scores": {
51
+ "clarity": 5,
52
+ "actionability": 5,
53
+ "guardrails": 5,
54
+ "evidence": 5,
55
+ "outcome": 4
56
+ },
57
+ "notes": "The fault-tracing skill is strongest on this scenario: it demands symptom capture, hypotheses, probes, and proof before patches. The remaining risk is platform access to mobile Safari."
58
+ },
59
+ {
60
+ "scenarioId": "S05",
61
+ "recommendedSkill": "prove-the-change",
62
+ "scores": {
63
+ "clarity": 5,
64
+ "actionability": 4,
65
+ "guardrails": 5,
66
+ "evidence": 5,
67
+ "outcome": 5
68
+ },
69
+ "notes": "The verification skill explicitly blocks premature completion claims and requires fresh proof. Actionability is high, though the exact export check depends on the app's local tooling."
70
+ },
71
+ {
72
+ "scenarioId": "S06",
73
+ "recommendedSkill": "ship-the-work",
74
+ "scores": {
75
+ "clarity": 5,
76
+ "actionability": 5,
77
+ "guardrails": 5,
78
+ "evidence": 5,
79
+ "outcome": 5
80
+ },
81
+ "notes": "The shipping skill aligns directly with public release work: clean status, intended staging, verification, push, release, and public read-back. It is the best fit in the catalog."
82
+ },
83
+ {
84
+ "scenarioId": "S07",
85
+ "recommendedSkill": "using-supered",
86
+ "scores": {
87
+ "clarity": 4,
88
+ "actionability": 5,
89
+ "guardrails": 4,
90
+ "evidence": 4,
91
+ "outcome": 4
92
+ },
93
+ "notes": "The entrypoint helps route a messy request into a first useful workflow instead of trying everything. The score is lower because the request still requires product prioritization."
94
+ },
95
+ {
96
+ "scenarioId": "S08",
97
+ "recommendedSkill": "trace-the-fault",
98
+ "scores": {
99
+ "clarity": 5,
100
+ "actionability": 5,
101
+ "guardrails": 5,
102
+ "evidence": 5,
103
+ "outcome": 4
104
+ },
105
+ "notes": "CI recovery benefits from the same diagnosis discipline: start from logs, reproduce, isolate, then patch. Outcome depends on whether the CI failure can be reproduced locally."
106
+ },
107
+ {
108
+ "scenarioId": "S09",
109
+ "recommendedSkill": "make-a-map",
110
+ "scores": {
111
+ "clarity": 4,
112
+ "actionability": 5,
113
+ "guardrails": 4,
114
+ "evidence": 5,
115
+ "outcome": 4
116
+ },
117
+ "notes": "The map-and-slice combination handles migration risk well by separating schema, data, and route compatibility. Database-specific rollback details remain project dependent."
118
+ },
119
+ {
120
+ "scenarioId": "S10",
121
+ "recommendedSkill": "ship-the-work",
122
+ "scores": {
123
+ "clarity": 5,
124
+ "actionability": 4,
125
+ "guardrails": 5,
126
+ "evidence": 5,
127
+ "outcome": 4
128
+ },
129
+ "notes": "The handoff scenario is well covered because the skill values honest status over false completion. It needs local judgment about whether to commit partial work or leave notes only."
130
+ }
131
+ ]
132
+ }
@@ -0,0 +1,212 @@
1
+ {
2
+ "product": "Supered",
3
+ "version": "0.1",
4
+ "description": "A compact public catalog of realistic coding-agent scenarios for checking whether Supered skills are useful in practice.",
5
+ "scoring": {
6
+ "maxScore": 5,
7
+ "dimensions": [
8
+ "clarity",
9
+ "actionability",
10
+ "guardrails",
11
+ "evidence",
12
+ "outcome"
13
+ ],
14
+ "rubric": {
15
+ "1": "Absent or actively misleading.",
16
+ "2": "Present but too vague to guide reliable work.",
17
+ "3": "Usable with noticeable gaps or extra human correction.",
18
+ "4": "Strong enough for normal work with minor judgment required.",
19
+ "5": "Excellent: specific, safe, evidence-driven, and ready to apply."
20
+ }
21
+ },
22
+ "scenarios": [
23
+ {
24
+ "id": "S01",
25
+ "title": "Turn a vague feature request into a buildable slice",
26
+ "prompt": "Add dashboards for customers so they can understand account health, but keep it simple and do not overbuild it.",
27
+ "context": "The repo has a React admin app, no product requirements document, three possible user roles, and no obvious owner for metric definitions. The agent must avoid inventing a large analytics system.",
28
+ "primarySkills": [
29
+ "using-supered",
30
+ "shape-the-task"
31
+ ],
32
+ "successCriteria": [
33
+ "Clarifies the target user, first workflow, and non-goals.",
34
+ "Produces a short brief that can be accepted or corrected.",
35
+ "Identifies the smallest useful dashboard slice before implementation."
36
+ ],
37
+ "expectedEvidence": [
38
+ "A concise task brief with assumptions called out.",
39
+ "A list of decisions that need user confirmation before code changes."
40
+ ]
41
+ },
42
+ {
43
+ "id": "S02",
44
+ "title": "Map a multi-file authentication change",
45
+ "prompt": "Add magic-link login to the existing app and make sure the old password login still works.",
46
+ "context": "The app has API routes, an email sender, session middleware, and browser tests. The request touches security-sensitive paths and must be broken into reviewable changes.",
47
+ "primarySkills": [
48
+ "make-a-map",
49
+ "prove-the-change"
50
+ ],
51
+ "successCriteria": [
52
+ "Lists affected files and ownership boundaries before editing.",
53
+ "Sequences backend, email, session, and UI work into small checkpoints.",
54
+ "Names verification commands for old and new login paths."
55
+ ],
56
+ "expectedEvidence": [
57
+ "A short execution map with ordered slices.",
58
+ "Verification notes covering both magic-link and password login."
59
+ ]
60
+ },
61
+ {
62
+ "id": "S03",
63
+ "title": "Implement a narrow UI improvement without scope creep",
64
+ "prompt": "Make the settings page easier to scan and add a save confirmation when preferences change.",
65
+ "context": "The settings page is already used by customers, has mixed form components, and has a brittle screenshot test. The agent should improve the workflow without redesigning the app.",
66
+ "primarySkills": [
67
+ "build-in-slices"
68
+ ],
69
+ "successCriteria": [
70
+ "Starts with one visible behavior and one verification path.",
71
+ "Keeps layout and interaction edits separable.",
72
+ "Avoids unrelated visual redesign or component churn."
73
+ ],
74
+ "expectedEvidence": [
75
+ "A small diff grouped around the settings workflow.",
76
+ "Passing component or browser verification for the save confirmation."
77
+ ]
78
+ },
79
+ {
80
+ "id": "S04",
81
+ "title": "Diagnose an intermittent checkout failure",
82
+ "prompt": "Checkout sometimes fails with a generic payment error, mostly on mobile Safari. Fix it.",
83
+ "context": "Logs show several possible causes: token refresh, double-submit, gateway declines, and a recent UI refactor. The agent must investigate before changing code.",
84
+ "primarySkills": [
85
+ "trace-the-fault"
86
+ ],
87
+ "successCriteria": [
88
+ "Separates symptom, reproduction attempts, and hypotheses.",
89
+ "Finds evidence for one root cause before patching.",
90
+ "Adds or updates a regression check for the observed failure."
91
+ ],
92
+ "expectedEvidence": [
93
+ "A fault trace showing ruled-out hypotheses.",
94
+ "A failing-then-passing check or a documented reproduction result."
95
+ ]
96
+ },
97
+ {
98
+ "id": "S05",
99
+ "title": "Prove a production bug fix before claiming completion",
100
+ "prompt": "The export button is fixed now, right? Tell the customer we are done and include the exact proof that the filtered CSV download works.",
101
+ "context": "A prior agent changed CSV generation but did not run the browser workflow. The customer issue involves filters, permissions, and a downloaded file.",
102
+ "primarySkills": [
103
+ "prove-the-change"
104
+ ],
105
+ "successCriteria": [
106
+ "Refuses to claim completion from code changes alone.",
107
+ "Runs the relevant automated or manual export verification.",
108
+ "Reports the actual evidence and any untested risk."
109
+ ],
110
+ "expectedEvidence": [
111
+ "Fresh command output or browser verification notes.",
112
+ "A final status that distinguishes verified behavior from assumptions."
113
+ ]
114
+ },
115
+ {
116
+ "id": "S06",
117
+ "title": "Prepare a small public release",
118
+ "prompt": "Commit this library update, push it, tag a release, and make sure the public install instructions still work.",
119
+ "context": "The repo is public, has CI, a package manifest, generated screenshots, and possible uncommitted user files. The agent must not overwrite unrelated work.",
120
+ "primarySkills": [
121
+ "ship-the-work",
122
+ "prove-the-change"
123
+ ],
124
+ "successCriteria": [
125
+ "Checks the worktree and stages only intended files.",
126
+ "Runs release-relevant validation before tagging or publishing.",
127
+ "Reads back the public release or install path after shipping."
128
+ ],
129
+ "expectedEvidence": [
130
+ "Git status, commit, push, and release URLs.",
131
+ "Public install or package registry verification."
132
+ ]
133
+ },
134
+ {
135
+ "id": "S07",
136
+ "title": "Choose the right workflow for a mixed request",
137
+ "prompt": "The app is slow, the nav is confusing, and we should probably add AI summaries. Can you just improve it?",
138
+ "context": "The request mixes performance, design, and new AI features. The agent needs to route the session instead of starting a broad refactor.",
139
+ "primarySkills": [
140
+ "using-supered",
141
+ "shape-the-task"
142
+ ],
143
+ "successCriteria": [
144
+ "Identifies separate work types and risks.",
145
+ "Chooses a first skill based on the highest uncertainty.",
146
+ "Offers a narrow next action instead of starting all tasks at once."
147
+ ],
148
+ "expectedEvidence": [
149
+ "A routing decision with the chosen Supered skill.",
150
+ "A short set of questions or assumptions that unblock the first slice."
151
+ ]
152
+ },
153
+ {
154
+ "id": "S08",
155
+ "title": "Recover from a failing CI run",
156
+ "prompt": "CI is red after the last push. Please inspect the failed job, fix the real cause, and keep the intended feature intact.",
157
+ "context": "The failure could come from tests, packaging, linting, or browser setup. The agent must inspect the failed job and avoid local-only guesses.",
158
+ "primarySkills": [
159
+ "trace-the-fault",
160
+ "build-in-slices"
161
+ ],
162
+ "successCriteria": [
163
+ "Starts from the failing CI evidence.",
164
+ "Reproduces or narrows the failure locally before editing.",
165
+ "Makes the smallest fix that preserves the intended feature."
166
+ ],
167
+ "expectedEvidence": [
168
+ "CI log excerpt or command that identifies the failure point.",
169
+ "A passing local check and, when possible, a green rerun."
170
+ ]
171
+ },
172
+ {
173
+ "id": "S09",
174
+ "title": "Add tests around a risky data migration",
175
+ "prompt": "Add a migration that backfills missing organization slugs without breaking existing URLs.",
176
+ "context": "The database has legacy rows, duplicate display names, and routing code that assumes slugs are stable. The migration needs a reversible plan and proof.",
177
+ "primarySkills": [
178
+ "make-a-map",
179
+ "build-in-slices",
180
+ "prove-the-change"
181
+ ],
182
+ "successCriteria": [
183
+ "Maps schema, data, and route changes before implementation.",
184
+ "Builds the migration and app compatibility in separate slices.",
185
+ "Verifies duplicate and legacy-row cases explicitly."
186
+ ],
187
+ "expectedEvidence": [
188
+ "Migration test cases or dry-run output for edge cases.",
189
+ "Application route checks for old and new organization URLs."
190
+ ]
191
+ },
192
+ {
193
+ "id": "S10",
194
+ "title": "Hand off unfinished work honestly",
195
+ "prompt": "I need to stop here. Leave the branch in a state another engineer can continue from.",
196
+ "context": "The agent has partial implementation, one failing test, and two unverified assumptions. The handoff must be useful without pretending the work is done.",
197
+ "primarySkills": [
198
+ "ship-the-work",
199
+ "prove-the-change"
200
+ ],
201
+ "successCriteria": [
202
+ "Separates completed, partial, and unstarted work.",
203
+ "Lists failing or skipped checks clearly.",
204
+ "Leaves next steps and evidence paths for the next engineer."
205
+ ],
206
+ "expectedEvidence": [
207
+ "A handoff note with known status and blockers.",
208
+ "Command output showing the current failing or passing checks."
209
+ ]
210
+ }
211
+ ]
212
+ }
package/docs/index.html CHANGED
@@ -15,6 +15,7 @@
15
15
  </a>
16
16
  <nav>
17
17
  <a href="#skills">Skills</a>
18
+ <a href="#proof">Proof</a>
18
19
  <a href="#install">Install</a>
19
20
  <a href="https://github.com/fhajjej-ship-it/Supered">GitHub</a>
20
21
  </nav>
@@ -65,6 +66,28 @@
65
66
  </div>
66
67
  </section>
67
68
 
69
+ <section id="proof" class="proof" aria-labelledby="proof-title">
70
+ <div class="section-head">
71
+ <p class="eyebrow">Proof</p>
72
+ <h2 id="proof-title">Eval pack, not vibes.</h2>
73
+ <p>Supered ships with 10 realistic coding-agent scenarios and a baseline scoring pass across clarity, actionability, guardrails, evidence, and outcome.</p>
74
+ </div>
75
+ <div class="proof-grid">
76
+ <a href="https://github.com/fhajjej-ship-it/Supered/blob/main/docs/which-skill.md">
77
+ <span>Guide</span>
78
+ <strong>Which skill should I use?</strong>
79
+ </a>
80
+ <a href="https://github.com/fhajjej-ship-it/Supered/blob/main/docs/evals/scenarios.json">
81
+ <span>Catalog</span>
82
+ <strong>10 realistic coding-agent scenarios</strong>
83
+ </a>
84
+ <a href="https://github.com/fhajjej-ship-it/Supered/blob/main/docs/evals/README.md">
85
+ <span>Eval pack</span>
86
+ <strong>How usefulness is scored</strong>
87
+ </a>
88
+ </div>
89
+ </section>
90
+
68
91
  <section id="install" class="install" aria-labelledby="install-title">
69
92
  <div>
70
93
  <p class="eyebrow">Install</p>
@@ -0,0 +1,25 @@
1
+ # Skill Design Principles
2
+
3
+ Supered skills are written as operating playbooks, not inspirational prompts. The description is only a trigger. The body carries the useful work: required inputs, a procedure, evidence expectations, guardrails, failure handling, and quality gates.
4
+
5
+ ## Source Guidance
6
+
7
+ - OpenAI's skill guidance emphasizes concise, reusable task instructions that can be invoked when the model needs a specific capability: https://openai.com/academy/skills/
8
+ - OpenAI's practical agent guide frames agents as systems that need clear task boundaries, reliable handoffs, tool use, and verification loops: https://openai.com/business/guides-and-resources/a-practical-guide-to-building-ai-agents/
9
+ - Anthropic's Agent Skills guidance describes skills as modular capability folders with instructions and optional resources, loaded only when relevant: https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills
10
+ - Anthropic's agent-building guidance favors simple, composable workflows, clear routing, explicit evaluator patterns, and measured autonomy: https://www.anthropic.com/engineering/building-effective-agents
11
+ - OpenAI's prompt engineering best practices reinforce direct instructions, concrete examples, explicit constraints, and clear desired outputs: https://help.openai.com/en/articles/6654000-playground-and-prompt-engineering
12
+
13
+ ## Supered Standards
14
+
15
+ Every bundled skill should meet these standards:
16
+
17
+ - Trigger-only metadata: the frontmatter description starts with `Use when`, stays short, and does not summarize the procedure.
18
+ - Progressive disclosure: the first page gives the operating path; deeper examples and resources are only included when they help the agent act.
19
+ - Evidence-first completion: each workflow names what proof must exist before the agent claims progress.
20
+ - Guardrails over vibes: each skill states hard stop conditions and behaviors that must not happen.
21
+ - Failure-mode literacy: common breakdowns are named with recovery moves.
22
+ - Concrete examples: scenarios show what good use looks like in real development work.
23
+ - Testable quality: repository tests enforce the structural minimum so the skills cannot drift back into thin paragraphs.
24
+
25
+ The goal is not more text for its own sake. The goal is a compact professional standard: when an agent opens a Supered skill, it should know when to use it, how to proceed, when to stop, what to show, and how to prove the result.
package/docs/styles.css CHANGED
@@ -176,6 +176,7 @@ h3 {
176
176
  }
177
177
 
178
178
  .workflow,
179
+ .proof,
179
180
  .install {
180
181
  border-top: 1px solid var(--line);
181
182
  padding: 68px clamp(24px, 6vw, 84px);
@@ -209,12 +210,47 @@ h3 {
209
210
  }
210
211
 
211
212
  .steps p,
213
+ .proof p,
212
214
  .install p {
213
215
  color: var(--muted);
214
216
  font-size: 18px;
215
217
  line-height: 1.45;
216
218
  }
217
219
 
220
+ .proof-grid {
221
+ display: grid;
222
+ gap: 16px;
223
+ grid-template-columns: repeat(3, minmax(0, 1fr));
224
+ margin-top: 30px;
225
+ }
226
+
227
+ .proof-grid a {
228
+ background: white;
229
+ border: 1px solid var(--line);
230
+ border-radius: 8px;
231
+ display: grid;
232
+ gap: 14px;
233
+ min-height: 150px;
234
+ padding: 22px;
235
+ }
236
+
237
+ .proof-grid a:hover {
238
+ border-color: var(--teal);
239
+ }
240
+
241
+ .proof-grid span {
242
+ color: var(--orange);
243
+ font-size: 13px;
244
+ font-weight: 850;
245
+ text-transform: uppercase;
246
+ }
247
+
248
+ .proof-grid strong {
249
+ color: var(--navy);
250
+ font-size: 22px;
251
+ line-height: 1.15;
252
+ }
253
+
218
254
  .install {
219
255
  align-items: start;
220
256
  display: grid;
@@ -269,6 +305,10 @@ code {
269
305
  grid-template-columns: 1fr;
270
306
  }
271
307
 
308
+ .proof-grid {
309
+ grid-template-columns: 1fr;
310
+ }
311
+
272
312
  .hero-media {
273
313
  justify-items: start;
274
314
  }
@@ -0,0 +1,33 @@
1
+ # Which Skill Should I Use?
2
+
3
+ Start with the shape of the work, not the name of the tool.
4
+
5
+ | Situation | Use this skill | Why |
6
+ | --- | --- | --- |
7
+ | The request is broad, mixed, or risky and you are not sure where to begin. | `using-supered` | Routes the session to the right next workflow. |
8
+ | The user asks for something vague, underspecified, or easy to overbuild. | `shape-the-task` | Produces a short brief, assumptions, non-goals, and acceptance signals. |
9
+ | The direction is approved but the files, order, or checks are unclear. | `make-a-map` | Turns intent into an execution map with checkpoints and verification. |
10
+ | You are implementing code, docs, packaging, or site changes. | `build-in-slices` | Keeps work small, reviewable, and easier to recover from. |
11
+ | Something is broken, flaky, confusing, or explained only by guesses. | `trace-the-fault` | Forces symptom capture, hypotheses, probes, and evidence before patches. |
12
+ | You are about to say work is done, fixed, published, or ready. | `prove-the-change` | Requires fresh proof before any completion claim. |
13
+ | The work needs commit, push, release, deploy, publish, or public handoff. | `ship-the-work` | Makes shipping part of the work and preserves evidence for readers. |
14
+
15
+ ## Fast Routing
16
+
17
+ - If the task is unclear: start with `shape-the-task`.
18
+ - If the task is clear but large: start with `make-a-map`.
19
+ - If the task is already planned: start with `build-in-slices`.
20
+ - If behavior is broken: start with `trace-the-fault`.
21
+ - If you are making a claim: start with `prove-the-change`.
22
+ - If the work leaves your machine: start with `ship-the-work`.
23
+
24
+ ## Combining Skills
25
+
26
+ Good sessions often use two or three skills in order:
27
+
28
+ - New feature: `shape-the-task` -> `make-a-map` -> `build-in-slices` -> `prove-the-change`.
29
+ - Bug fix: `trace-the-fault` -> `build-in-slices` -> `prove-the-change`.
30
+ - Public release: `prove-the-change` -> `ship-the-work`.
31
+ - Messy request: `using-supered` -> whichever skill the routing decision selects.
32
+
33
+ Do not load every skill just because they exist. Use the smallest workflow that changes the outcome.