kushi-agents 5.0.2 → 5.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -0
- package/bin/cli.mjs +103 -0
- package/package.json +6 -2
- package/plugin/agents/kushi.agent.md +3 -1
- package/plugin/instructions/skill-authoring.instructions.md +147 -0
- package/plugin/instructions/skill-evals.instructions.md +130 -0
- package/plugin/skills/aggregate-project/evals/evals.json +33 -0
- package/plugin/skills/apply-ado-update/evals/evals.json +33 -0
- package/plugin/skills/ask-project/SKILL.md +10 -0
- package/plugin/skills/ask-project/evals/evals.json +34 -0
- package/plugin/skills/bootstrap-project/evals/evals.json +34 -0
- package/plugin/skills/build-state/evals/evals.json +31 -0
- package/plugin/skills/consolidate-evidence/evals/evals.json +33 -0
- package/plugin/skills/dashboard/evals/evals.json +33 -0
- package/plugin/skills/emit-vertex/evals/evals.json +33 -0
- package/plugin/skills/eval/SKILL.md +90 -0
- package/plugin/skills/eval/evals.schema.json +73 -0
- package/plugin/skills/eval/run-evals.ps1 +372 -0
- package/plugin/skills/fde-intake/evals/evals.json +33 -0
- package/plugin/skills/fde-report/evals/evals.json +33 -0
- package/plugin/skills/fde-triage/evals/evals.json +33 -0
- package/plugin/skills/intro/SKILL.md +160 -451
- package/plugin/skills/intro/evals/evals.json +33 -0
- package/plugin/skills/intro/references/walkthrough.md +310 -0
- package/plugin/skills/link-entities/evals/evals.json +31 -0
- package/plugin/skills/project-status/SKILL.md +10 -1
- package/plugin/skills/project-status/evals/evals.json +33 -0
- package/plugin/skills/propose-ado-update/evals/evals.json +33 -0
- package/plugin/skills/pull-ado/evals/evals.json +35 -0
- package/plugin/skills/pull-crm/evals/evals.json +35 -0
- package/plugin/skills/pull-email/evals/evals.json +35 -0
- package/plugin/skills/pull-loop/evals/evals.json +35 -0
- package/plugin/skills/pull-meetings/evals/evals.json +35 -0
- package/plugin/skills/pull-misc/evals/evals.json +35 -0
- package/plugin/skills/pull-onenote/evals/evals.json +35 -0
- package/plugin/skills/pull-sharepoint/evals/evals.json +35 -0
- package/plugin/skills/pull-teams/evals/evals.json +35 -0
- package/plugin/skills/refresh-project/evals/evals.json +31 -0
- package/plugin/skills/self-check/SKILL.md +2 -0
- package/plugin/skills/self-check/evals/evals.json +28 -0
- package/plugin/skills/self-check/run.ps1 +144 -0
- package/plugin/skills/setup/SKILL.md +10 -0
- package/plugin/skills/setup/evals/evals.json +33 -0
- package/plugin/skills/skill-checker/SKILL.md +136 -0
- package/plugin/skills/skill-checker/check-skill.ps1 +416 -0
- package/plugin/skills/skill-checker/evals/evals.json +41 -0
- package/plugin/skills/skill-creator/SKILL.md +134 -0
- package/plugin/skills/skill-creator/evals/evals.json +40 -0
- package/plugin/skills/skill-creator/generate-eval-review.ps1 +101 -0
- package/plugin/skills/skill-creator/optimize-description.ps1 +87 -0
- package/plugin/skills/skill-creator/scaffold.ps1 +180 -0
- package/plugin/skills/skill-creator/templates/evals-starter.template.json +27 -0
- package/plugin/skills/skill-creator/templates/gotchas-stub.template.md +9 -0
- package/plugin/skills/skill-creator/templates/skill-skeleton.template.md +28 -0
- package/plugin/skills/tour/evals/evals.json +33 -0
- package/plugin/skills/vertex-link/SKILL.md +10 -0
- package/plugin/skills/vertex-link/evals/evals.json +33 -0
- package/src/eval-aggregator.mjs +209 -0
- package/src/eval-aggregator.test.mjs +64 -0
- package/src/eval-runner.test.mjs +69 -0
- package/src/skill-checker.test.mjs +118 -0
- package/src/skill-creator.test.mjs +92 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
# Interactive Walkthrough
|
|
2
|
+
|
|
3
|
+
Loaded by `intro` SKILL when the user accepts the guided tour.
|
|
4
|
+
|
|
5
|
+
## Mode 2: Interactive Walkthrough
|
|
6
|
+
|
|
7
|
+
### Pre-flight
|
|
8
|
+
|
|
9
|
+
Before opening, check whether any project is already bootstrapped:
|
|
10
|
+
|
|
11
|
+
1. Read `<workspace>/.kushi/config/user/project-evidence.yml` (personal config). If `active_projects` has entries, pick the most recent as the demo target — substitute that name into `{{active_project}}` placeholders below.
|
|
12
|
+
2. If none, propose a fictional project named **Contoso Discovery** and tell the user that Try-it prompts will use that name; they can substitute their own at any time.
|
|
13
|
+
|
|
14
|
+
### Navigation Keywords
|
|
15
|
+
|
|
16
|
+
After every demo moment, list:
|
|
17
|
+
|
|
18
|
+
- `next` (or `n`) — advance to the next moment.
|
|
19
|
+
- `skip` (or `s`) — jump to the next moment without trying this one.
|
|
20
|
+
- `done` (or `d`, `exit`, `quit`) — exit the walkthrough; show the cheat sheet.
|
|
21
|
+
- `try` (or `t`) — actually run the Try-it prompt now (kushi will execute it for the active project).
|
|
22
|
+
|
|
23
|
+
### Opening
|
|
24
|
+
|
|
25
|
+
> Great — let's tour Kushi end-to-end. Each step:
|
|
26
|
+
>
|
|
27
|
+
> - Explains **one verb** in plain language.
|
|
28
|
+
> - Gives you a **ready-to-send prompt** in a code block — copy-paste it, or just reply `try` and I'll run it for you.
|
|
29
|
+
> - Waits for `next` / `skip` / `done` before advancing.
|
|
30
|
+
>
|
|
31
|
+
> Demo target for this tour: **{{active_project}}**. (Substitute any project name in the Try-it prompts.)
|
|
32
|
+
>
|
|
33
|
+
> Reply `next` (or just say "let's go") to begin.
|
|
34
|
+
|
|
35
|
+
### Demo Moments
|
|
36
|
+
|
|
37
|
+
Present in order. After each moment, append the navigation block.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
**Moment 1 — Snapshot vs Stream (the foundation)**
|
|
42
|
+
|
|
43
|
+
> **The single most important rule in Kushi.** Every per-source skill writes two kinds of evidence:
|
|
44
|
+
>
|
|
45
|
+
> - **Snapshot** = current state of one entity. One file per entity. Replaced on every refresh. No date in filename. Example: `Evidence/{{alias}}/Teams-Chats/snapshot/roster.md` always shows the *current* member list of the project chat — not who was in it three weeks ago.
|
|
46
|
+
> - **Stream** = timestamped events. One file per ISO week. Append-only. Dated filename. Example: `Evidence/{{alias}}/Email/stream/2026-05-04_email-stream.md` shows every email received during the week of May 4.
|
|
47
|
+
>
|
|
48
|
+
> State files (e.g. `02_stakeholders.md`) cite snapshots for *current* facts and streams for *temporal* claims like "decision made on May 5".
|
|
49
|
+
|
|
50
|
+
Reply `next` to see how to bootstrap a project, or `done` to exit.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
**Moment 2 — Bootstrap a new project**
|
|
55
|
+
|
|
56
|
+
> **`bootstrap-project`** — first-time setup for an engagement. Lays configs side-by-side, scaffolds `Evidence/` and `State/` folders, asks you to confirm sources, then runs the initial 30-day pull across every enabled source.
|
|
57
|
+
|
|
58
|
+
**Try it** — copy and send (or reply `try`):
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
kushi bootstrap {{active_project}}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Reply `next`, `skip`, or `done`.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
**Moment 3 — Refresh with a watermark**
|
|
69
|
+
|
|
70
|
+
> **`refresh-project`** — incremental pull driven by `Evidence/run-log.yml` watermarks. If the last successful pull was Monday 9 AM, refresh picks up everything since then. No window arg = "since last watermark", with a 7-day fallback if no watermark exists.
|
|
71
|
+
|
|
72
|
+
**Try it — default (since last watermark):**
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
kushi refresh {{active_project}}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Try it — explicit window:**
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
kushi refresh {{active_project}} last 14 days
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Reply `next`, `skip`, or `done`.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
**Moment 4 — Pull one source in isolation**
|
|
89
|
+
|
|
90
|
+
> **`pull-<source>`** — run a single per-source skill without touching the others. Useful when you only care about new ADO items, or you want to backfill OneNote pages without re-pulling email. Each `pull-*` skill is **WorkIQ-first** and writes to your alias subfolder under `Evidence/<alias>/<source>/`.
|
|
91
|
+
|
|
92
|
+
**Try it — pull just email:**
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
kushi pull email for {{active_project}} last 7 days
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Try it — pull just ADO work items:**
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
kushi pull ado for {{active_project}} last 30 days
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Reply `next`, `skip`, or `done`.
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
**Moment 5 — Render State from existing Evidence**
|
|
109
|
+
|
|
110
|
+
> **`build-state`** — re-render every file under `State/` from existing `Evidence/`, **without any source pulls**. Use this when you want to see how the state view changes after manually editing evidence, or when you've consolidated streams from multiple contributors and want a fresh outcome view.
|
|
111
|
+
>
|
|
112
|
+
> State output is outcome-based — `00_overview.md`, `01_decisions.md`, `02_stakeholders.md`, `03_architecture.md`, `04_workshops.md`, `05_actions.md`, `06_risks.md`, `07_timeline.md`, `08_artifacts.md`, `09_open-questions.md`. Every assertion cites the evidence file + date it came from.
|
|
113
|
+
|
|
114
|
+
**Try it:**
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
kushi state {{active_project}}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Reply `next`, `skip`, or `done`.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
**Moment 6 — Consolidate multi-user evidence**
|
|
125
|
+
|
|
126
|
+
> **`consolidate-evidence`** — merge per-user streams into `Evidence/_Consolidated/`. Only needed if multiple contributors are pulling for the same project. The consolidated file tags each entry with the contributor alias so provenance is preserved.
|
|
127
|
+
|
|
128
|
+
**Try it:**
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
kushi consolidate {{active_project}} last 7 days
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Reply `next`, `skip`, or `done`.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
**Moment 7 — Inspect the run-log**
|
|
139
|
+
|
|
140
|
+
> **`project-status`** — read-only inspector for `Evidence/run-log.yml`. Shows per-source watermarks, last successful run, and any failures. Use it before kicking off a refresh to see what kushi will pull.
|
|
141
|
+
|
|
142
|
+
**Try it:**
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
kushi status {{active_project}}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Reply `next`, `skip`, or `done`.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
**Moment 8 — Ask the project (cited Q&A)**
|
|
153
|
+
|
|
154
|
+
> **`ask-project`** — read-only natural-language Q&A over what's already been captured. Loads the cheapest set of files needed (Evidence/ first; State/ on `full`), cites every assertion `[source: <alias>/<folder>/<file> · YYYY-MM-DD]`, warns if the relevant source is older than 14 days, and ends with a Confidence verdict.
|
|
155
|
+
>
|
|
156
|
+
> **Auto-routes — no prefix needed.** Just name a project and ask a question. The slash form (`/ask`) and the explicit `kushi ask` are also accepted.
|
|
157
|
+
>
|
|
158
|
+
> **Never** triggers a `pull-*`. If evidence is stale or missing, it offers `kushi refresh <project>` and stops.
|
|
159
|
+
|
|
160
|
+
**Try it — explicit form:**
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
kushi ask {{active_project}} what's the status?
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Try it — auto-routed (no prefix):**
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
what's the MACC for {{active_project}} and is it confirmed?
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
```
|
|
173
|
+
who is the EM on {{active_project}}?
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
summarize {{active_project}}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Reply `next`, `skip`, or `done`.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
**Moment 9 — FDE Intake (first-artifact authoring)**
|
|
185
|
+
|
|
186
|
+
> **`fde-intake`** — author or update the FDE Intake document at `Reports/00-FDE-Intake-<project>.md`. It is the first artifact in any FDE engagement and is consumed by FDE Triage to decide which team is best suited and whether the engagement is billable.
|
|
187
|
+
>
|
|
188
|
+
> Grounded in **two sources**: the project's Evidence/ + the FDE reference pack (`intake-questions.md`, `report-doctrine.md`, `core-fde-reference.md`). Embeds inline `> ⚠️ VALIDATION WARNING — Rule X.Y` blockquotes wherever Evidence is silent.
|
|
189
|
+
>
|
|
190
|
+
> **Standard + Full profiles only.**
|
|
191
|
+
|
|
192
|
+
**Try it:**
|
|
193
|
+
|
|
194
|
+
```
|
|
195
|
+
kushi fde-intake {{active_project}}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Reply `next`, `skip`, or `done`.
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
**Moment 10 — FDE Report (5 shapes)**
|
|
203
|
+
|
|
204
|
+
> **`fde-report`** — generate an FDE-shaped engagement report. One skill, five shapes:
|
|
205
|
+
>
|
|
206
|
+
> - `weekly` (default) — internal status, 1–2 pages.
|
|
207
|
+
> - `short` — customer-facing, sanitized (no MACC / MS staffing / CRM IDs).
|
|
208
|
+
> - `long` — incoming-crew handoff, 3–5 pages.
|
|
209
|
+
> - `fitness` — 10-row FDE Fitness scorecard + verdict.
|
|
210
|
+
> - `stage-readiness` — "should we advance to the next FDE stage?" check.
|
|
211
|
+
>
|
|
212
|
+
> Read-only against Evidence; no outbound. Apply all 9 doctrine rules.
|
|
213
|
+
|
|
214
|
+
**Try it — weekly default:**
|
|
215
|
+
|
|
216
|
+
```
|
|
217
|
+
kushi fde-report {{active_project}}
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
**Try it — fitness scorecard:**
|
|
221
|
+
|
|
222
|
+
```
|
|
223
|
+
kushi fde-report {{active_project}} fitness
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
**Try it — stage-readiness check:**
|
|
227
|
+
|
|
228
|
+
```
|
|
229
|
+
kushi fde-report {{active_project}} stage-readiness
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Reply `next`, `skip`, or `done`.
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
**Moment 11 — FDE Triage (full 7-file bundle)**
|
|
237
|
+
|
|
238
|
+
> **`fde-triage`** — produce the full FDE Triage bundle: 7 companion files at `Reports/triage/<YYYY-MM-DD>/`:
|
|
239
|
+
>
|
|
240
|
+
> 1. `00-fde-analysis.md` — concise FDE analysis (entry point)
|
|
241
|
+
> 2. `01-fde-fitness.md` — 10-row fitness scorecard
|
|
242
|
+
> 3. `02-risk-analysis.md` — risks bucketed into 8 reference-pack categories
|
|
243
|
+
> 4. `03-6Q.md` — 6-question engagement framing
|
|
244
|
+
> 5. `04-readiness-checklist.md` — **mobilization readiness** (distinct from stage readiness)
|
|
245
|
+
> 6. `05-executive-consolidated-report.md` — leadership readout
|
|
246
|
+
> 7. `06-global-opportunity-and-reuse.md` — repeatability lens
|
|
247
|
+
> 8. `07-validation-warnings-checklist.md` — central tracker (open / resolved / NA)
|
|
248
|
+
>
|
|
249
|
+
> Re-runs preserve user-edited statuses in file 07.
|
|
250
|
+
|
|
251
|
+
**Try it:**
|
|
252
|
+
|
|
253
|
+
```
|
|
254
|
+
kushi fde-triage {{active_project}}
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
Reply `next`, `skip`, or `done`.
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
**Moment 12 — Self-check before committing**
|
|
262
|
+
|
|
263
|
+
> **`self-check`** — Kushi has its own consistency checker. Before you commit changes to the kushi repo (or after editing an instruction file), run self-check to verify every skill is in the agent inventory, every prompt routes to a real skill, every cross-link resolves, and the live `~/.copilot/m-skills/kushi/` install matches the repo. Run it as a markdown report (default), JSON for CI (`-Json`), or with deeper hash + schema checks (`-Deep`). Cross-OS: on macOS/Linux use `./run.sh` (requires `pwsh` 7+).
|
|
264
|
+
|
|
265
|
+
**Try it — quick:**
|
|
266
|
+
|
|
267
|
+
```
|
|
268
|
+
kushi self-check
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Try it — deep + strict for CI (Windows):**
|
|
272
|
+
|
|
273
|
+
```
|
|
274
|
+
pwsh plugin/skills/self-check/run.ps1 -Deep -StrictExit
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
**Try it — deep + strict for CI (macOS / Linux):**
|
|
278
|
+
|
|
279
|
+
```
|
|
280
|
+
./plugin/skills/self-check/run.sh -Deep -StrictExit
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
Reply `next` to see the closing cheat sheet, or `done` to exit.
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
### Closing
|
|
288
|
+
|
|
289
|
+
> That's the full tour. Cheat sheet:
|
|
290
|
+
>
|
|
291
|
+
> | What you want | What to say |
|
|
292
|
+
> |---|---|
|
|
293
|
+
> | First-time engagement setup | `kushi bootstrap <project>` |
|
|
294
|
+
> | Catch up since last pull | `kushi refresh <project>` |
|
|
295
|
+
> | Backfill a custom range | `kushi refresh <project> last N days` |
|
|
296
|
+
> | One source only | `kushi pull <source> for <project>` |
|
|
297
|
+
> | Re-render state files (full only) | `kushi state <project>` |
|
|
298
|
+
> | Merge contributors | `kushi consolidate <project>` |
|
|
299
|
+
> | Inspect run-log | `kushi status <project>` |
|
|
300
|
+
> | **Ask a question (cited Q&A)** | `kushi ask <project> <question>` — or just say it naturally |
|
|
301
|
+
> | First-artifact FDE Intake | `kushi fde-intake <project>` |
|
|
302
|
+
> | FDE report (5 shapes) | `kushi fde-report <project> [shape]` |
|
|
303
|
+
> | Full FDE Triage bundle (7 files) | `kushi fde-triage <project>` |
|
|
304
|
+
> | Pre-commit validation | `kushi self-check` (or `-Deep`) |
|
|
305
|
+
> | This tour again | `kushi intro` |
|
|
306
|
+
>
|
|
307
|
+
> You can also describe what you need in plain language — kushi will route to the right verb. Reply `restart` to retake the tour, or just ask me anything.
|
|
308
|
+
|
|
309
|
+
---
|
|
310
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "link-entities",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Deterministic entity-graph output — closed edge taxonomy + nodes/edges schema.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "le-graph-schema",
|
|
8
|
+
"name": "fixture project-graph.json has v1 schema + nodes + edges",
|
|
9
|
+
"input": "validate graph schema",
|
|
10
|
+
"fixture": "evals/fixtures/fixture-acme",
|
|
11
|
+
"canary": true,
|
|
12
|
+
"grader_type": "script",
|
|
13
|
+
"expected_assertions": [
|
|
14
|
+
{ "type": "file-exists", "path": "Evidence/_graph/project-graph.json" },
|
|
15
|
+
{ "type": "json-path-equals", "path": "Evidence/_graph/project-graph.json", "json_path": "$.schema", "equals": "kushi.project-graph/v1" }
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"id": "le-closed-taxonomy",
|
|
20
|
+
"name": "every edge uses a kind from the closed taxonomy",
|
|
21
|
+
"input": "validate edges",
|
|
22
|
+
"fixture": "evals/fixtures/fixture-acme",
|
|
23
|
+
"canary": false,
|
|
24
|
+
"grader_type": "script",
|
|
25
|
+
"args": { "read_fixture": "Evidence/_graph/project-graph.json" },
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{ "type": "regex-match", "pattern": "\"kind\":\\s*\"(references|decides|action-item-tracks|discusses|produced-by|follow-up-of|same-thread|participant-of)\"" }
|
|
28
|
+
]
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
@@ -58,4 +58,13 @@ Scan run-log `history` for entries where `status != ok` in the last 5 runs. Disp
|
|
|
58
58
|
- "status of `<X>`"
|
|
59
59
|
- "@Kushi status `<X>`"
|
|
60
60
|
- "what was the last refresh for `<X>`"
|
|
61
|
-
- "show run-log for `<X>`"
|
|
61
|
+
- "show run-log for `<X>`"
|
|
62
|
+
|
|
63
|
+
## Validation loop
|
|
64
|
+
|
|
65
|
+
<!-- TODO(retrofit): fill in — describe how to verify this skill ran correctly. Auto-added by skill-checker --retrofit --apply per skill-authoring.instructions.md. -->
|
|
66
|
+
|
|
67
|
+
1. Run pwsh plugin/skills/self-check/run.ps1 -Targeted <area>.
|
|
68
|
+
2. Fix any findings, then re-run the affected step.
|
|
69
|
+
3. Repeat until self-check exits 0.
|
|
70
|
+
4. Only then update
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "project-status",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for project-status. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "project-status-smoke-1",
|
|
8
|
+
"name": "project-status produces a non-empty response",
|
|
9
|
+
"input": "synthetic project-status probe — canary smoke",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": ".+"
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"id": "project-status-smoke-2",
|
|
21
|
+
"name": "project-status echoes case id",
|
|
22
|
+
"input": "case-id project-status-smoke-2",
|
|
23
|
+
"canary": false,
|
|
24
|
+
"grader_type": "script",
|
|
25
|
+
"expected_assertions": [
|
|
26
|
+
{
|
|
27
|
+
"type": "regex-match",
|
|
28
|
+
"pattern": "project-status-smoke-2"
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "propose-ado-update",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for propose-ado-update. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "propose-ado-update-smoke-1",
|
|
8
|
+
"name": "propose-ado-update produces a non-empty response",
|
|
9
|
+
"input": "synthetic propose-ado-update probe — canary smoke",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": ".+"
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"id": "propose-ado-update-smoke-2",
|
|
21
|
+
"name": "propose-ado-update echoes case id",
|
|
22
|
+
"input": "case-id propose-ado-update-smoke-2",
|
|
23
|
+
"canary": false,
|
|
24
|
+
"grader_type": "script",
|
|
25
|
+
"expected_assertions": [
|
|
26
|
+
{
|
|
27
|
+
"type": "regex-match",
|
|
28
|
+
"pattern": "propose-ado-update-smoke-2"
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-ado",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-ado. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-ado-cached-1",
|
|
8
|
+
"name": "pull-ado cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-ado-rubric-1",
|
|
22
|
+
"name": "pull-ado output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-ado pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-ado response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-crm",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-crm. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-crm-cached-1",
|
|
8
|
+
"name": "pull-crm cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-crm-rubric-1",
|
|
22
|
+
"name": "pull-crm output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-crm pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-crm response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-email",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-email. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-email-cached-1",
|
|
8
|
+
"name": "pull-email cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-email-rubric-1",
|
|
22
|
+
"name": "pull-email output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-email pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-email response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-loop",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-loop. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-loop-cached-1",
|
|
8
|
+
"name": "pull-loop cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-loop-rubric-1",
|
|
22
|
+
"name": "pull-loop output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-loop pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-loop response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-meetings",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-meetings. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-meetings-cached-1",
|
|
8
|
+
"name": "pull-meetings cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-meetings-rubric-1",
|
|
22
|
+
"name": "pull-meetings output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-meetings pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-meetings response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-misc",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-misc. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-misc-cached-1",
|
|
8
|
+
"name": "pull-misc cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-misc-rubric-1",
|
|
22
|
+
"name": "pull-misc output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-misc pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-misc response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill": "pull-onenote",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Auto-seeded evals for pull-onenote. Replace with real cases as the skill matures.",
|
|
5
|
+
"cases": [
|
|
6
|
+
{
|
|
7
|
+
"id": "pull-onenote-cached-1",
|
|
8
|
+
"name": "pull-onenote cached/dry-run produces output",
|
|
9
|
+
"input": "--cached --dry-run fixture-acme",
|
|
10
|
+
"canary": false,
|
|
11
|
+
"grader_type": "script",
|
|
12
|
+
"expected_assertions": [
|
|
13
|
+
{
|
|
14
|
+
"type": "regex-match",
|
|
15
|
+
"pattern": "fixture-acme",
|
|
16
|
+
"flags": "i"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "pull-onenote-rubric-1",
|
|
22
|
+
"name": "pull-onenote output quality (LLM-rubric, skipped in canary)",
|
|
23
|
+
"input": "summarize fixture-acme pull-onenote pulls",
|
|
24
|
+
"canary": false,
|
|
25
|
+
"grader_type": "llm",
|
|
26
|
+
"expected_assertions": [
|
|
27
|
+
{
|
|
28
|
+
"type": "llm-rubric",
|
|
29
|
+
"rubric": "Does the pull-onenote response cite a source file path and an ISO timestamp?",
|
|
30
|
+
"min_score": 4
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|