@hegemonart/get-design-done 1.59.4 → 1.59.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +4 -4
- package/.claude-plugin/plugin.json +2 -2
- package/CHANGELOG.md +62 -0
- package/README.md +223 -436
- package/SKILL.md +2 -2
- package/docs/i18n/README.de.md +2 -0
- package/docs/i18n/README.fr.md +2 -0
- package/docs/i18n/README.it.md +2 -0
- package/docs/i18n/README.ja.md +2 -0
- package/docs/i18n/README.ko.md +2 -0
- package/docs/i18n/README.zh-CN.md +2 -0
- package/hooks/budget-enforcer.ts +134 -7
- package/package.json +1 -1
- package/reference/runtime-models.md +15 -15
- package/reference/schemas/generated.d.ts +4 -0
- package/reference/schemas/runtime-models.schema.json +5 -0
- package/reference/skill-graph.md +4 -1
- package/scripts/lib/bandit-router/integration.cjs +38 -0
- package/scripts/lib/install/installer.cjs +133 -1
- package/scripts/lib/manifest/skills.json +13 -0
- package/scripts/skill-templates/handoff/SKILL.md +99 -0
- package/skills/handoff/SKILL.md +99 -0
package/SKILL.md
CHANGED
|
@@ -27,7 +27,7 @@ Each stage produces artifacts in `.design/` inside the current project.
|
|
|
27
27
|
| `plan` | `get-design-done:plan` | Stage 3 of 5 - decompose into tasks → DESIGN-PLAN.md |
|
|
28
28
|
| `design` | `get-design-done:design` | Stage 4 of 5 - execute tasks → DESIGN-SUMMARY.md |
|
|
29
29
|
| `verify` | `get-design-done:verify` | Stage 5 of 5 - score + audit → DESIGN-VERIFICATION.md |
|
|
30
|
-
| `handoff <path>` |
|
|
30
|
+
| `handoff <path>` | `get-design-done:gdd-handoff` | Skip scan/discover/plan; initialize from Claude Design bundle; route to verify |
|
|
31
31
|
| `map` | `get-design-done:gdd-map` | Parallel codebase mapping - spawns 5 mappers → `.design/map/*.md` + `.design/DESIGN-MAP.md` |
|
|
32
32
|
| `next` | `get-design-done:gdd-next` | Route to the next pipeline stage based on STATE.md |
|
|
33
33
|
| `help` | `get-design-done:gdd-help` | List all commands with one-line descriptions |
|
|
@@ -207,7 +207,7 @@ If `$ARGUMENTS` is a stage or command name - invoke it directly, no state check:
|
|
|
207
207
|
/gdd:plan → Skill("get-design-done:plan") # stage 3-of-5
|
|
208
208
|
/gdd:design → Skill("get-design-done:design") # stage 4-of-5
|
|
209
209
|
/gdd:verify → Skill("get-design-done:verify") # stage 5-of-5
|
|
210
|
-
/gdd:handoff →
|
|
210
|
+
/gdd:handoff → Skill("get-design-done:gdd-handoff")
|
|
211
211
|
/gdd:map → Skill("get-design-done:gdd-map") # parallel codebase mapping
|
|
212
212
|
/gdd:next → Skill("get-design-done:gdd-next")
|
|
213
213
|
/gdd:help → Skill("get-design-done:gdd-help")
|
package/docs/i18n/README.de.md
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[English](../../README.md) · [简体中文](README.zh-CN.md) · [日本語](README.ja.md) · [한국어](README.ko.md) · [Français](README.fr.md) · [Italiano](README.it.md) · **Deutsch**
|
|
6
6
|
|
|
7
|
+
> Hinweis: Diese Übersetzung kann hinter der englischen Version zurückliegen. Die maßgebliche Version ist [README.md](../../README.md) (translation may lag behind English; see README.md for the canonical version).
|
|
8
|
+
|
|
7
9
|
**Eine Design-Quality-Pipeline für AI-Coding-Agenten: Brief → Explore → Plan → Implementierung → Verifikation.**
|
|
8
10
|
|
|
9
11
|
**Get Design Done hält AI-generierte UI an deinen Brief, dein Design-System, deine Referenzen und deine Quality Gates gebunden. Funktioniert mit Claude Code, OpenCode, Gemini CLI, Kilo, Codex, Copilot, Cursor, Windsurf, Antigravity, Augment, Trae, Qwen Code, CodeBuddy und Cline.**
|
package/docs/i18n/README.fr.md
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[English](../../README.md) · [简体中文](README.zh-CN.md) · [日本語](README.ja.md) · [한국어](README.ko.md) · **Français** · [Italiano](README.it.md) · [Deutsch](README.de.md)
|
|
6
6
|
|
|
7
|
+
> Remarque : cette traduction peut être en retard sur la version anglaise. La version de référence est [README.md](../../README.md) (translation may lag behind English; see README.md for the canonical version).
|
|
8
|
+
|
|
7
9
|
**Un pipeline de qualité design pour agents de code IA : brief → exploration → plan → implémentation → vérification.**
|
|
8
10
|
|
|
9
11
|
**Get Design Done garde l'UI générée par IA liée à votre brief, votre design system, vos références et vos quality gates. Fonctionne avec Claude Code, OpenCode, Gemini CLI, Kilo, Codex, Copilot, Cursor, Windsurf, Antigravity, Augment, Trae, Qwen Code, CodeBuddy et Cline.**
|
package/docs/i18n/README.it.md
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[English](../../README.md) · [简体中文](README.zh-CN.md) · [日本語](README.ja.md) · [한국어](README.ko.md) · [Français](README.fr.md) · **Italiano** · [Deutsch](README.de.md)
|
|
6
6
|
|
|
7
|
+
> Nota: questa traduzione può essere in ritardo rispetto alla versione inglese. La versione di riferimento è [README.md](../../README.md) (translation may lag behind English; see README.md for the canonical version).
|
|
8
|
+
|
|
7
9
|
**Una pipeline di qualità del design per agenti di coding IA: brief → esplorazione → piano → implementazione → verifica.**
|
|
8
10
|
|
|
9
11
|
**Get Design Done mantiene l'UI generata dall'IA allineata al tuo brief, al tuo design system, ai tuoi riferimenti e ai tuoi quality gate. Funziona con Claude Code, OpenCode, Gemini CLI, Kilo, Codex, Copilot, Cursor, Windsurf, Antigravity, Augment, Trae, Qwen Code, CodeBuddy e Cline.**
|
package/docs/i18n/README.ja.md
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[English](../../README.md) · [简体中文](README.zh-CN.md) · **日本語** · [한국어](README.ko.md) · [Français](README.fr.md) · [Italiano](README.it.md) · [Deutsch](README.de.md)
|
|
6
6
|
|
|
7
|
+
> 注記: この翻訳は英語版より遅れている場合があります。正式な版は [README.md](../../README.md) です (translation may lag behind English; see README.md for the canonical version)。
|
|
8
|
+
|
|
7
9
|
**AI コーディングエージェントのためのデザイン品質パイプライン: ブリーフ → 探索 → 計画 → 実装 → 検証。**
|
|
8
10
|
|
|
9
11
|
**Get Design Done は、AI が生成した UI をあなたのブリーフ、デザインシステム、リファレンス、品質ゲートに結びつけたまま進めます。Claude Code、OpenCode、Gemini CLI、Kilo、Codex、Copilot、Cursor、Windsurf、Antigravity、Augment、Trae、Qwen Code、CodeBuddy、Cline で動作します。**
|
package/docs/i18n/README.ko.md
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[English](../../README.md) · [简体中文](README.zh-CN.md) · [日本語](README.ja.md) · **한국어** · [Français](README.fr.md) · [Italiano](README.it.md) · [Deutsch](README.de.md)
|
|
6
6
|
|
|
7
|
+
> 참고: 이 번역은 영어 버전보다 늦을 수 있습니다. 기준이 되는 버전은 [README.md](../../README.md) 입니다 (translation may lag behind English; see README.md for the canonical version).
|
|
8
|
+
|
|
7
9
|
**AI 코딩 에이전트를 위한 디자인 품질 파이프라인: 브리프 → 탐색 → 계획 → 구현 → 검증.**
|
|
8
10
|
|
|
9
11
|
**Get Design Done은 AI가 생성한 UI가 브리프, 디자인 시스템, 레퍼런스, 품질 게이트에 계속 묶여 있도록 합니다. Claude Code, OpenCode, Gemini CLI, Kilo, Codex, Copilot, Cursor, Windsurf, Antigravity, Augment, Trae, Qwen Code, CodeBuddy, Cline에서 동작합니다.**
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[English](../../README.md) · **简体中文** · [日本語](README.ja.md) · [한국어](README.ko.md) · [Français](README.fr.md) · [Italiano](README.it.md) · [Deutsch](README.de.md)
|
|
6
6
|
|
|
7
|
+
> 说明:此翻译可能落后于英文版本。规范版本以 [README.md](../../README.md) 为准 (translation may lag behind English; see README.md for the canonical version)。
|
|
8
|
+
|
|
7
9
|
**面向 AI 编码智能体的设计质量流水线:简报 → 探索 → 规划 → 实现 → 验证。**
|
|
8
10
|
|
|
9
11
|
**Get Design Done 让 AI 生成的 UI 始终贴住你的简报、设计系统、参考资料与质量闸门。支持 Claude Code、OpenCode、Gemini CLI、Kilo、Codex、Copilot、Cursor、Windsurf、Antigravity、Augment、Trae、Qwen Code、CodeBuddy 与 Cline。**
|
package/hooks/budget-enforcer.ts
CHANGED
|
@@ -191,6 +191,21 @@ const tierResolver = nodeRequire(
|
|
|
191
191
|
'../scripts/lib/tier-resolver.cjs',
|
|
192
192
|
) as TierResolverModule;
|
|
193
193
|
|
|
194
|
+
// Phase 59.5 P1: runtime-models parser for the BYOK/unverified provenance
|
|
195
|
+
// guard. We read the parsed runtime rows to learn a runtime's `status`
|
|
196
|
+
// ("verified" | "byok" | "unverified"). The parser is pure + never invoked
|
|
197
|
+
// for its model-resolution side here; only to classify the runtime so an
|
|
198
|
+
// unverified row never drives a HARD budget cap. Soft-imported defensively:
|
|
199
|
+
// any parser failure degrades to the built-in verified allowlist below.
|
|
200
|
+
interface RuntimeModelsParserModule {
|
|
201
|
+
parseRuntimeModels(opts?: { cwd?: string }): {
|
|
202
|
+
runtimes: Array<{ id: string; status?: string }>;
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
const runtimeModelsParser = nodeRequire(
|
|
206
|
+
'../scripts/lib/install/parse-runtime-models.cjs',
|
|
207
|
+
) as RuntimeModelsParserModule;
|
|
208
|
+
|
|
194
209
|
// Plan 33.6-03 (SC#6, D-08, D-12): OpenRouter tier-resolver adapter. When the
|
|
195
210
|
// user opts in (`.design/config.json#openrouter_enabled: true` OR
|
|
196
211
|
// `OPENROUTER_API_KEY` present), the hook consults this adapter FIRST for a
|
|
@@ -506,6 +521,75 @@ export function loadBudget(): ResolvedBudget {
|
|
|
506
521
|
}
|
|
507
522
|
}
|
|
508
523
|
|
|
524
|
+
// ── runtime provenance status (Phase 59.5 P1) ───────────────────────────────
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* Phase 59.5 P1: provenance confidence of a runtime's tier→model row, as
|
|
528
|
+
* documented in reference/runtime-models.md and enumerated by
|
|
529
|
+
* reference/schemas/runtime-models.schema.json#status.
|
|
530
|
+
*/
|
|
531
|
+
export type RuntimeStatus = 'verified' | 'byok' | 'unverified';
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* Built-in verified allowlist: the 4 runtimes whose tier maps are confirmed
|
|
535
|
+
* against runtime-author docs (the runtime-models.md banner: "4 of 14 ...
|
|
536
|
+
* verified (claude, codex, gemini, qwen)"). Used as the fallback classifier
|
|
537
|
+
* when the parsed row carries no structured `status` field yet (the markdown
|
|
538
|
+
* JSON blocks do not emit `status` at the time of this plan; the schema is
|
|
539
|
+
* ready, the parser wiring is a deferred follow-up). Once a row DOES carry
|
|
540
|
+
* `status`, the parsed value takes precedence over this allowlist.
|
|
541
|
+
*/
|
|
542
|
+
const VERIFIED_RUNTIME_IDS: ReadonlySet<string> = new Set([
|
|
543
|
+
'claude',
|
|
544
|
+
'codex',
|
|
545
|
+
'gemini',
|
|
546
|
+
'qwen',
|
|
547
|
+
]);
|
|
548
|
+
|
|
549
|
+
/** Per-process memo of runtime-id → parsed `status` (null until first read). */
|
|
550
|
+
let _runtimeStatusMap: Map<string, RuntimeStatus> | null = null;
|
|
551
|
+
|
|
552
|
+
function isRuntimeStatus(v: unknown): v is RuntimeStatus {
|
|
553
|
+
return v === 'verified' || v === 'byok' || v === 'unverified';
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
/**
|
|
557
|
+
* Resolve a runtime's provenance status. Reads the parsed runtime-models
|
|
558
|
+
* doc once per process; if a row carries a structured `status` it wins,
|
|
559
|
+
* otherwise the built-in verified allowlist decides (verified vs unverified).
|
|
560
|
+
* Fail-open: any parser error → allowlist-only classification. Never throws.
|
|
561
|
+
*
|
|
562
|
+
* @param runtimeId runtime id (e.g. 'claude', 'cline'); falsy → 'unverified'.
|
|
563
|
+
*/
|
|
564
|
+
export function runtimeStatus(runtimeId: string | null | undefined): RuntimeStatus {
|
|
565
|
+
if (typeof runtimeId !== 'string' || runtimeId.length === 0) {
|
|
566
|
+
return 'unverified';
|
|
567
|
+
}
|
|
568
|
+
if (_runtimeStatusMap === null) {
|
|
569
|
+
_runtimeStatusMap = new Map();
|
|
570
|
+
try {
|
|
571
|
+
const parsed = runtimeModelsParser.parseRuntimeModels({ cwd: process.cwd() });
|
|
572
|
+
const rows = Array.isArray(parsed?.runtimes) ? parsed.runtimes : [];
|
|
573
|
+
for (const row of rows) {
|
|
574
|
+
if (row && typeof row.id === 'string' && isRuntimeStatus(row.status)) {
|
|
575
|
+
_runtimeStatusMap.set(row.id, row.status);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
} catch {
|
|
579
|
+
// Fail open: parser error degrades to the verified allowlist below.
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
const parsedStatus = _runtimeStatusMap.get(runtimeId);
|
|
583
|
+
if (parsedStatus !== undefined) return parsedStatus;
|
|
584
|
+
return VERIFIED_RUNTIME_IDS.has(runtimeId) ? 'verified' : 'unverified';
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
/** True when the runtime row must NOT drive a HARD budget cap (P1 guard). */
|
|
588
|
+
export function isUnverifiedRuntime(runtimeId: string | null | undefined): boolean {
|
|
589
|
+
const s = runtimeStatus(runtimeId);
|
|
590
|
+
return s === 'byok' || s === 'unverified';
|
|
591
|
+
}
|
|
592
|
+
|
|
509
593
|
// ── cumulative phase spend (WR-02) ──────────────────────────────────────────
|
|
510
594
|
|
|
511
595
|
/**
|
|
@@ -1138,7 +1222,40 @@ export async function main(): Promise<void> {
|
|
|
1138
1222
|
// no router decision is supplied, behavior is identical to pre-25.
|
|
1139
1223
|
const perSpawnCap = resolvePerSpawnCap(budget, complexityClass);
|
|
1140
1224
|
|
|
1141
|
-
|
|
1225
|
+
// ── Phase 59.5 P1: BYOK/unverified provenance guard ────────────────────────
|
|
1226
|
+
//
|
|
1227
|
+
// Resolve the runtime id (router-supplied `runtime`, else env detection,
|
|
1228
|
+
// else 'claude', same precedence the cost-recording block uses below) so we
|
|
1229
|
+
// can consult its runtime-models provenance `status` BEFORE the hard-cap
|
|
1230
|
+
// branches. When the runtime row is byok/unverified the resolved per-runtime
|
|
1231
|
+
// model is best-effort (the user's actual provider may diverge from the
|
|
1232
|
+
// Anthropic-default fill), so an estimated cost computed against it must NOT
|
|
1233
|
+
// hard-block the user. We degrade enforce-mode to advisory ('warn') for THIS
|
|
1234
|
+
// spawn only: the per-spawn + per-phase 100% caps stop blocking and surface a
|
|
1235
|
+
// stderr warning instead, while the 80% auto-downgrade still applies (a tier
|
|
1236
|
+
// downgrade is non-blocking and strictly cheaper, so it is safe to keep).
|
|
1237
|
+
// Verified runtimes (claude/codex/gemini/qwen) are unaffected (full hard
|
|
1238
|
+
// enforcement). The project-level cap above is intentionally NOT degraded: it
|
|
1239
|
+
// is governed by total ledger spend, not a per-runtime resolved model.
|
|
1240
|
+
const guardRuntimeId =
|
|
1241
|
+
(typeof routerDecision?.runtime === 'string' && routerDecision.runtime.length > 0
|
|
1242
|
+
? routerDecision.runtime
|
|
1243
|
+
: runtimeDetect.detect()) ?? 'claude';
|
|
1244
|
+
const runtimeIsUnverified = isUnverifiedRuntime(guardRuntimeId);
|
|
1245
|
+
const effectiveEnforcementMode: ResolvedBudget['enforcement_mode'] =
|
|
1246
|
+
budget.enforcement_mode === 'enforce' && runtimeIsUnverified
|
|
1247
|
+
? 'warn'
|
|
1248
|
+
: budget.enforcement_mode;
|
|
1249
|
+
if (budget.enforcement_mode === 'enforce' && runtimeIsUnverified) {
|
|
1250
|
+
process.stderr.write(
|
|
1251
|
+
`gdd-budget-enforcer WARN: runtime '${guardRuntimeId}' has provenance status ` +
|
|
1252
|
+
`'${runtimeStatus(guardRuntimeId)}' (BYOK/unverified tier→model row); ` +
|
|
1253
|
+
`hard budget caps degraded to advisory for this spawn so an unverified ` +
|
|
1254
|
+
`cost estimate never hard-blocks you.\n`,
|
|
1255
|
+
);
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
if (effectiveEnforcementMode === 'enforce') {
|
|
1142
1259
|
// Branch C: 100% per-spawn cap hard block (class-specific or per_task).
|
|
1143
1260
|
if (estCost >= perSpawnCap) {
|
|
1144
1261
|
writeTelemetry({
|
|
@@ -1202,12 +1319,24 @@ export async function main(): Promise<void> {
|
|
|
1202
1319
|
toolInput._tier_override = 'haiku';
|
|
1203
1320
|
toolInput._tier_downgraded = true;
|
|
1204
1321
|
}
|
|
1205
|
-
} else if (
|
|
1322
|
+
} else if (effectiveEnforcementMode === 'warn') {
|
|
1206
1323
|
if (estCost >= perSpawnCap) {
|
|
1207
1324
|
process.stderr.write(
|
|
1208
1325
|
`gdd-budget-enforcer WARN: per-spawn cap will be exceeded ($${estCost.toFixed(4)} >= $${perSpawnCap})\n`,
|
|
1209
1326
|
);
|
|
1210
1327
|
}
|
|
1328
|
+
// Phase 59.5 P1: when enforce was degraded to advisory for a byok/unverified
|
|
1329
|
+
// runtime, also surface the per-phase breach that the hard branch above
|
|
1330
|
+
// would otherwise have reported (it is skipped for unverified runtimes).
|
|
1331
|
+
if (
|
|
1332
|
+
budget.enforcement_mode === 'enforce' &&
|
|
1333
|
+
phaseSpend + estCost >= budget.per_phase_cap_usd
|
|
1334
|
+
) {
|
|
1335
|
+
process.stderr.write(
|
|
1336
|
+
`gdd-budget-enforcer WARN: per-phase cap will be exceeded for ${phase} ` +
|
|
1337
|
+
`($${(phaseSpend + estCost).toFixed(4)} >= $${budget.per_phase_cap_usd.toFixed(2)})\n`,
|
|
1338
|
+
);
|
|
1339
|
+
}
|
|
1211
1340
|
}
|
|
1212
1341
|
// enforcement_mode === 'log': telemetry only.
|
|
1213
1342
|
|
|
@@ -1230,11 +1359,9 @@ export async function main(): Promise<void> {
|
|
|
1230
1359
|
toolInput._tier_override ?? toolInput._default_tier ?? 'sonnet';
|
|
1231
1360
|
// Runtime tag: prefer the router's explicit `runtime` (D-08) field;
|
|
1232
1361
|
// fall back to env-var detection; default to 'claude' since the .ts
|
|
1233
|
-
// hook itself only runs inside Claude Code.
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
? routerDecision.runtime
|
|
1237
|
-
: runtimeDetect.detect()) ?? 'claude';
|
|
1362
|
+
// hook itself only runs inside Claude Code. Reuse the id already resolved
|
|
1363
|
+
// for the Phase 59.5 P1 provenance guard above (single resolution source).
|
|
1364
|
+
const runtimeId = guardRuntimeId;
|
|
1238
1365
|
|
|
1239
1366
|
// ── Plan 27.5-02 — bandit consultation ────────────────────────────────────
|
|
1240
1367
|
//
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hegemonart/get-design-done",
|
|
3
|
-
"version": "1.59.
|
|
3
|
+
"version": "1.59.6",
|
|
4
4
|
"description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
|
|
5
5
|
"author": "Hegemon",
|
|
6
6
|
"homepage": "https://github.com/hegemonart/get-design-done",
|
|
@@ -10,7 +10,7 @@ Single canonical map from Anthropic tier names (`opus|sonnet|haiku`) and runtime
|
|
|
10
10
|
>
|
|
11
11
|
> Unverified: kilo, copilot, cursor, windsurf, antigravity, augment, trae, codebuddy, cline, opencode.
|
|
12
12
|
>
|
|
13
|
-
> The schema (`reference/schemas/runtime-models.schema.json`)
|
|
13
|
+
> **Provenance status field.** Each row below is annotated with a structured `status` in its section heading: `verified` (confirmed against runtime-author docs), `byok` (BYOK / multi-provider, where the user-configured model may diverge from the Anthropic-default fill), or `unverified` (placeholder fill pending researcher confirmation). The schema (`reference/schemas/runtime-models.schema.json`) accepts an optional `status` enum of exactly these three values, so verified rows MAY omit it and remain shape-valid. The `hooks/budget-enforcer.ts` guard consults this status (or its built-in verified allowlist) so a `byok`/`unverified` row never drives a HARD budget cap: it degrades to advisory enforcement for that spawn. This makes the unverified-ness machine-readable, not just a content gap in prose.
|
|
14
14
|
|
|
15
15
|
This file is parsed by `scripts/lib/install/parse-runtime-models.cjs` and consumed by:
|
|
16
16
|
|
|
@@ -36,7 +36,7 @@ This file is parsed by `scripts/lib/install/parse-runtime-models.cjs` and consum
|
|
|
36
36
|
|
|
37
37
|
---
|
|
38
38
|
|
|
39
|
-
## claude - Claude Code
|
|
39
|
+
## claude - Claude Code (status: verified)
|
|
40
40
|
|
|
41
41
|
Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/en/docs/about-claude/models. Seed picks per CONTEXT.md D-02.
|
|
42
42
|
|
|
@@ -66,7 +66,7 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
|
|
|
66
66
|
|
|
67
67
|
---
|
|
68
68
|
|
|
69
|
-
## codex - OpenAI Codex CLI
|
|
69
|
+
## codex - OpenAI Codex CLI (status: verified)
|
|
70
70
|
|
|
71
71
|
OpenAI's Codex CLI runtime. Public tier docs at https://platform.openai.com/docs/models. Seed picks per CONTEXT.md D-02.
|
|
72
72
|
|
|
@@ -96,7 +96,7 @@ OpenAI's Codex CLI runtime. Public tier docs at https://platform.openai.com/docs
|
|
|
96
96
|
|
|
97
97
|
---
|
|
98
98
|
|
|
99
|
-
## gemini - Gemini CLI
|
|
99
|
+
## gemini - Gemini CLI (status: verified)
|
|
100
100
|
|
|
101
101
|
Google's Gemini CLI runtime. Public tier docs at https://ai.google.dev/gemini-api/docs/models. Seed picks per CONTEXT.md D-02.
|
|
102
102
|
|
|
@@ -126,7 +126,7 @@ Google's Gemini CLI runtime. Public tier docs at https://ai.google.dev/gemini-ap
|
|
|
126
126
|
|
|
127
127
|
---
|
|
128
128
|
|
|
129
|
-
## qwen - Qwen Code
|
|
129
|
+
## qwen - Qwen Code (status: verified)
|
|
130
130
|
|
|
131
131
|
Alibaba's Qwen Code runtime. Public tier docs at https://github.com/QwenLM/qwen-code. Seed picks per CONTEXT.md D-02.
|
|
132
132
|
|
|
@@ -156,7 +156,7 @@ Alibaba's Qwen Code runtime. Public tier docs at https://github.com/QwenLM/qwen-
|
|
|
156
156
|
|
|
157
157
|
---
|
|
158
158
|
|
|
159
|
-
## kilo - Kilo Code
|
|
159
|
+
## kilo - Kilo Code (status: byok)
|
|
160
160
|
|
|
161
161
|
Kilo Code adapter - multi-provider, Anthropic-default fill until runtime-author docs confirm. Researcher fill needed (CONTEXT.md D-02).
|
|
162
162
|
|
|
@@ -186,7 +186,7 @@ Kilo Code adapter - multi-provider, Anthropic-default fill until runtime-author
|
|
|
186
186
|
|
|
187
187
|
---
|
|
188
188
|
|
|
189
|
-
## copilot - GitHub Copilot CLI
|
|
189
|
+
## copilot - GitHub Copilot CLI (status: byok)
|
|
190
190
|
|
|
191
191
|
GitHub Copilot CLI - multi-provider routing under the hood. Researcher fill needed (CONTEXT.md D-02).
|
|
192
192
|
|
|
@@ -216,7 +216,7 @@ GitHub Copilot CLI - multi-provider routing under the hood. Researcher fill need
|
|
|
216
216
|
|
|
217
217
|
---
|
|
218
218
|
|
|
219
|
-
## cursor - Cursor
|
|
219
|
+
## cursor - Cursor (status: byok)
|
|
220
220
|
|
|
221
221
|
Cursor IDE/CLI - multi-provider routing. Researcher fill needed (CONTEXT.md D-02).
|
|
222
222
|
|
|
@@ -246,7 +246,7 @@ Cursor IDE/CLI - multi-provider routing. Researcher fill needed (CONTEXT.md D-02
|
|
|
246
246
|
|
|
247
247
|
---
|
|
248
248
|
|
|
249
|
-
## windsurf - Windsurf
|
|
249
|
+
## windsurf - Windsurf (status: byok)
|
|
250
250
|
|
|
251
251
|
Windsurf (formerly Codeium) - multi-provider Cascade router. Researcher fill needed (CONTEXT.md D-02).
|
|
252
252
|
|
|
@@ -276,7 +276,7 @@ Windsurf (formerly Codeium) - multi-provider Cascade router. Researcher fill nee
|
|
|
276
276
|
|
|
277
277
|
---
|
|
278
278
|
|
|
279
|
-
## antigravity - Antigravity
|
|
279
|
+
## antigravity - Antigravity (status: unverified)
|
|
280
280
|
|
|
281
281
|
Antigravity - Google's agentic coding platform. Researcher fill needed (CONTEXT.md D-02).
|
|
282
282
|
|
|
@@ -306,7 +306,7 @@ Antigravity - Google's agentic coding platform. Researcher fill needed (CONTEXT.
|
|
|
306
306
|
|
|
307
307
|
---
|
|
308
308
|
|
|
309
|
-
## augment - Augment
|
|
309
|
+
## augment - Augment (status: byok)
|
|
310
310
|
|
|
311
311
|
Augment Code - multi-provider agentic IDE. Researcher fill needed (CONTEXT.md D-02).
|
|
312
312
|
|
|
@@ -336,7 +336,7 @@ Augment Code - multi-provider agentic IDE. Researcher fill needed (CONTEXT.md D-
|
|
|
336
336
|
|
|
337
337
|
---
|
|
338
338
|
|
|
339
|
-
## trae - Trae
|
|
339
|
+
## trae - Trae (status: unverified)
|
|
340
340
|
|
|
341
341
|
Trae - single-model session runtime per CONTEXT.md D-02 example. `single_tier: true` annotates the row. Researcher fill needed.
|
|
342
342
|
|
|
@@ -367,7 +367,7 @@ Trae - single-model session runtime per CONTEXT.md D-02 example. `single_tier: t
|
|
|
367
367
|
|
|
368
368
|
---
|
|
369
369
|
|
|
370
|
-
## codebuddy - CodeBuddy
|
|
370
|
+
## codebuddy - CodeBuddy (status: byok)
|
|
371
371
|
|
|
372
372
|
CodeBuddy (Tencent) - multi-provider routing. Researcher fill needed (CONTEXT.md D-02).
|
|
373
373
|
|
|
@@ -397,7 +397,7 @@ CodeBuddy (Tencent) - multi-provider routing. Researcher fill needed (CONTEXT.md
|
|
|
397
397
|
|
|
398
398
|
---
|
|
399
399
|
|
|
400
|
-
## cline - Cline
|
|
400
|
+
## cline - Cline (status: byok)
|
|
401
401
|
|
|
402
402
|
Cline (formerly Claude Dev) - multi-provider VS Code agent. Researcher fill needed (CONTEXT.md D-02).
|
|
403
403
|
|
|
@@ -427,7 +427,7 @@ Cline (formerly Claude Dev) - multi-provider VS Code agent. Researcher fill need
|
|
|
427
427
|
|
|
428
428
|
---
|
|
429
429
|
|
|
430
|
-
## opencode - OpenCode
|
|
430
|
+
## opencode - OpenCode (status: byok)
|
|
431
431
|
|
|
432
432
|
OpenCode - open-source AI coding agent, BYOK multi-provider. Researcher fill needed (CONTEXT.md D-02).
|
|
433
433
|
|
|
@@ -1042,6 +1042,10 @@ export interface RuntimeEntry {
|
|
|
1042
1042
|
* When true, the runtime exposes a single model that maps to all three tiers (D-02). Downstream consumers (router, budget-enforcer) may render a UI affordance noting tier-selection has no cost effect for this runtime.
|
|
1043
1043
|
*/
|
|
1044
1044
|
single_tier?: boolean;
|
|
1045
|
+
/**
|
|
1046
|
+
* Provenance confidence of this runtime's tier map. 'verified' = confirmed against runtime-author docs (claude, codex, gemini, qwen). 'byok' = BYOK / multi-provider runtime whose user-configured model may diverge from the Anthropic-default fill. 'unverified' = placeholder fill pending researcher confirmation. Optional: rows omitting this field are treated as unverified-unless-stated by consumers, and verified rows MAY omit it. The budget-enforcer guard reads this (or its built-in verified allowlist) so a byok/unverified row never drives a HARD budget cap (degrades to advisory).
|
|
1047
|
+
*/
|
|
1048
|
+
status?: 'verified' | 'byok' | 'unverified';
|
|
1045
1049
|
/**
|
|
1046
1050
|
* Map of canonical Anthropic tier names (D-03) to the runtime's concrete model identifier. All three keys are required even when single_tier=true (assign the same model three times).
|
|
1047
1051
|
*/
|
|
@@ -47,6 +47,11 @@
|
|
|
47
47
|
"type": "boolean",
|
|
48
48
|
"description": "When true, the runtime exposes a single model that maps to all three tiers (D-02). Downstream consumers (router, budget-enforcer) may render a UI affordance noting tier-selection has no cost effect for this runtime."
|
|
49
49
|
},
|
|
50
|
+
"status": {
|
|
51
|
+
"type": "string",
|
|
52
|
+
"enum": ["verified", "byok", "unverified"],
|
|
53
|
+
"description": "Provenance confidence of this runtime's tier map. 'verified' = confirmed against runtime-author docs (claude, codex, gemini, qwen). 'byok' = BYOK / multi-provider runtime whose user-configured model may diverge from the Anthropic-default fill. 'unverified' = placeholder fill pending researcher confirmation. Optional: rows omitting this field are treated as unverified-unless-stated by consumers, and verified rows MAY omit it. The budget-enforcer guard reads this (or its built-in verified allowlist) so a byok/unverified row never drives a HARD budget cap (degrades to advisory)."
|
|
54
|
+
},
|
|
50
55
|
"tier_to_model": {
|
|
51
56
|
"type": "object",
|
|
52
57
|
"additionalProperties": false,
|
package/reference/skill-graph.md
CHANGED
|
@@ -9,7 +9,7 @@ is a `composes_with` edge (the source calls the target as sub-orchestration); a
|
|
|
9
9
|
a `next_skills` edge (a pipeline hint for what runs next). Stage grouping is best-effort and
|
|
10
10
|
inferred from the skill name; skills with no stage keyword fall under Utility.
|
|
11
11
|
|
|
12
|
-
Skills:
|
|
12
|
+
Skills: 96. Composition edges: 20 composes_with, 7 next_skills.
|
|
13
13
|
|
|
14
14
|
```mermaid
|
|
15
15
|
flowchart TD
|
|
@@ -78,6 +78,7 @@ flowchart TD
|
|
|
78
78
|
n_fast["fast"]
|
|
79
79
|
n_figma_extract["figma-extract"]
|
|
80
80
|
n_graphify["graphify"]
|
|
81
|
+
n_handoff["handoff"]
|
|
81
82
|
n_health["health"]
|
|
82
83
|
n_help["help"]
|
|
83
84
|
n_instinct["instinct"]
|
|
@@ -138,6 +139,8 @@ flowchart TD
|
|
|
138
139
|
n_explore --> n_list_assumptions
|
|
139
140
|
n_explore --> n_sketch
|
|
140
141
|
n_explore -.-> n_plan
|
|
142
|
+
n_handoff --> n_verify
|
|
143
|
+
n_handoff -.-> n_verify
|
|
141
144
|
n_new_cycle --> n_brief
|
|
142
145
|
n_new_project --> n_brief
|
|
143
146
|
n_new_project -.-> n_brief
|
|
@@ -35,6 +35,12 @@
|
|
|
35
35
|
|
|
36
36
|
const banditRouter = require('../bandit-router.cjs');
|
|
37
37
|
const adaptiveModeLib = require('../adaptive-mode.cjs');
|
|
38
|
+
// Phase 56 (CAL-01) per-agent risk calibration. recordOutcome feeds the same
|
|
39
|
+
// {agent, status} signal it gives the bandit into this table so calibration
|
|
40
|
+
// learns from the post-spawn outcome too. Lazy-tolerant: the call is wrapped in
|
|
41
|
+
// its own best-effort try/catch (D-04) so a calibration write can never break
|
|
42
|
+
// the bandit path.
|
|
43
|
+
const calibration = require('../risk/calibration.cjs');
|
|
38
44
|
|
|
39
45
|
const DELEGATE_NONE = banditRouter.DELEGATE_NONE; // 'none'
|
|
40
46
|
const VALID_DELEGATES = banditRouter.DEFAULT_DELEGATES; // ['none','gemini','codex','cursor','copilot','qwen']
|
|
@@ -299,6 +305,38 @@ function recordOutcome(input) {
|
|
|
299
305
|
}
|
|
300
306
|
}
|
|
301
307
|
|
|
308
|
+
// CAL-01: also fold the same outcome into the per-agent risk calibration
|
|
309
|
+
// table so the calibration layer (compute-risk feedback) learns from the
|
|
310
|
+
// identical post-spawn signal the bandit just saw. Independent best-effort
|
|
311
|
+
// try/catch (D-04): a calibration write failure must NEVER throw into or
|
|
312
|
+
// break the bandit path above. The bandit signal carries no emitted risk
|
|
313
|
+
// score, so `risk` degrades to 0 via normalizeRecord; status drives the
|
|
314
|
+
// correctness axis (completed → applied-correct, anything else → not-correct).
|
|
315
|
+
// Writes to calibration.DEFAULT_CALIBRATION_PATH ('.design/telemetry/
|
|
316
|
+
// calibration.json') under baseDir — the module's own canonical location.
|
|
317
|
+
try {
|
|
318
|
+
calibration.updateCalibration(
|
|
319
|
+
input.agent,
|
|
320
|
+
{
|
|
321
|
+
accepted: true,
|
|
322
|
+
post_apply_correct: input.status === 'completed',
|
|
323
|
+
},
|
|
324
|
+
{ root: input.baseDir, baseDir: input.baseDir },
|
|
325
|
+
);
|
|
326
|
+
} catch (err) {
|
|
327
|
+
if (process.env.GDD_BANDIT_DEBUG === '1') {
|
|
328
|
+
try {
|
|
329
|
+
process.stderr.write(
|
|
330
|
+
'[bandit-integration] recordOutcome calibration swallowed: ' +
|
|
331
|
+
(err && err.message ? err.message : String(err)) +
|
|
332
|
+
'\n',
|
|
333
|
+
);
|
|
334
|
+
} catch {
|
|
335
|
+
/* swallow */
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
302
340
|
return undefined;
|
|
303
341
|
}
|
|
304
342
|
|
|
@@ -334,6 +334,58 @@ function listSourceSkills(skillsRoot) {
|
|
|
334
334
|
});
|
|
335
335
|
}
|
|
336
336
|
|
|
337
|
+
/**
|
|
338
|
+
* Enumerate co-located sibling `*.md` reference files for a skill.
|
|
339
|
+
*
|
|
340
|
+
* A skill source directory may ship reference files next to SKILL.md
|
|
341
|
+
* (e.g. `<name>-procedure.md`, `<name>-rules.md`, `cache-policy.md`).
|
|
342
|
+
* SKILL.md references these via relative links; if they are not installed
|
|
343
|
+
* the links resolve to nothing. This returns the top-level sibling `.md`
|
|
344
|
+
* files only (NOT SKILL.md itself, NOT files in nested subdirectories).
|
|
345
|
+
*
|
|
346
|
+
* Best-effort: any fs error yields an empty list (never throws). A single
|
|
347
|
+
* unreadable skill dir must not crash the whole install.
|
|
348
|
+
*
|
|
349
|
+
* @param {string} skillSrcDir absolute path to `<skillsRoot>/<name>`
|
|
350
|
+
* @returns {string[]} basenames of sibling `.md` files (excluding SKILL.md)
|
|
351
|
+
*/
|
|
352
|
+
function listSiblingRefFiles(skillSrcDir) {
|
|
353
|
+
let entries;
|
|
354
|
+
try {
|
|
355
|
+
entries = fs.readdirSync(skillSrcDir, { withFileTypes: true });
|
|
356
|
+
} catch {
|
|
357
|
+
return [];
|
|
358
|
+
}
|
|
359
|
+
return entries
|
|
360
|
+
.filter((ent) => {
|
|
361
|
+
if (!ent.isFile()) return false;
|
|
362
|
+
if (ent.name === 'SKILL.md') return false;
|
|
363
|
+
return ent.name.toLowerCase().endsWith('.md');
|
|
364
|
+
})
|
|
365
|
+
.map((ent) => ent.name);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* Wrap a passthrough sibling reference file's content with a plugin
|
|
370
|
+
* fingerprint header so foreign-file protection + uninstall can recognize
|
|
371
|
+
* it as plugin-owned. Idempotent: re-wrapping a file that already carries
|
|
372
|
+
* the fingerprint returns it unchanged.
|
|
373
|
+
*
|
|
374
|
+
* The fingerprint matches `merge.cjs#GDD_ADAPTER_FINGERPRINT`, the same
|
|
375
|
+
* marker every SKILL converter injects via `shared.ensureAdapterHeader`,
|
|
376
|
+
* so `isPluginOwned` treats the sibling as owned.
|
|
377
|
+
*
|
|
378
|
+
* @param {string} raw source sibling file content
|
|
379
|
+
* @returns {string}
|
|
380
|
+
*/
|
|
381
|
+
function fingerprintSiblingRef(raw) {
|
|
382
|
+
const text = typeof raw === 'string' ? raw : '';
|
|
383
|
+
if (isPluginOwned(text)) return text;
|
|
384
|
+
const header =
|
|
385
|
+
'<!-- gdd: auto-generated from Claude SKILL.md. Reference adapter -->\n\n';
|
|
386
|
+
return header + text;
|
|
387
|
+
}
|
|
388
|
+
|
|
337
389
|
/**
|
|
338
390
|
* Install all artifacts for a `multi-artifact` runtime.
|
|
339
391
|
*
|
|
@@ -395,6 +447,48 @@ function installMultiArtifact(runtime, configDir, dryRun, opts) {
|
|
|
395
447
|
action: writeResult.action,
|
|
396
448
|
...(writeResult.reason ? { reason: writeResult.reason } : {}),
|
|
397
449
|
});
|
|
450
|
+
|
|
451
|
+
// Batch H6: carry co-located sibling `*.md` reference files alongside
|
|
452
|
+
// SKILL.md. The skills layout only stages SKILL.md per skill, so
|
|
453
|
+
// reference siblings (e.g. `<name>-procedure.md`) are otherwise lost.
|
|
454
|
+
// Scoped to cursor (the audited flat-layout runtime); other runtimes
|
|
455
|
+
// keep their prior single-SKILL.md behavior. Siblings are passthrough
|
|
456
|
+
// copies fingerprinted so foreign-file protection + uninstall treat
|
|
457
|
+
// them as plugin-owned. Broader skillsKind-runtime carry is deferred
|
|
458
|
+
// (see converters/cursor.cjs KNOWN LIMITATION).
|
|
459
|
+
if (kind.kind === 'skills' && runtime.id === 'cursor' && item.srcPath) {
|
|
460
|
+
const skillSrcDir = path.dirname(item.srcPath);
|
|
461
|
+
const skillDestDir = path.dirname(destPath);
|
|
462
|
+
for (const sibling of listSiblingRefFiles(skillSrcDir)) {
|
|
463
|
+
let rawSibling;
|
|
464
|
+
try {
|
|
465
|
+
rawSibling = fs.readFileSync(
|
|
466
|
+
path.join(skillSrcDir, sibling),
|
|
467
|
+
'utf8',
|
|
468
|
+
);
|
|
469
|
+
} catch (err) {
|
|
470
|
+
perFile.push({
|
|
471
|
+
kind: 'skill-ref',
|
|
472
|
+
path: path.join(skillDestDir, sibling),
|
|
473
|
+
action: 'skipped-foreign',
|
|
474
|
+
reason: `Could not read sibling ${sibling}: ${err.message}`,
|
|
475
|
+
});
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
const siblingDest = path.join(skillDestDir, sibling);
|
|
479
|
+
const siblingWrite = writeFingerprinted(
|
|
480
|
+
siblingDest,
|
|
481
|
+
fingerprintSiblingRef(rawSibling),
|
|
482
|
+
dryRun,
|
|
483
|
+
);
|
|
484
|
+
perFile.push({
|
|
485
|
+
kind: 'skill-ref',
|
|
486
|
+
path: siblingDest,
|
|
487
|
+
action: siblingWrite.action,
|
|
488
|
+
...(siblingWrite.reason ? { reason: siblingWrite.reason } : {}),
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
}
|
|
398
492
|
}
|
|
399
493
|
}
|
|
400
494
|
|
|
@@ -489,7 +583,45 @@ function uninstallMultiArtifact(runtime, configDir, dryRun, opts) {
|
|
|
489
583
|
|
|
490
584
|
// If we removed a SKILL.md, remember to trim its now-empty parent.
|
|
491
585
|
if (kind.kind === 'skills') {
|
|
492
|
-
|
|
586
|
+
const skillDestDir = path.dirname(destPath);
|
|
587
|
+
skillDirsToTrim.push(skillDestDir);
|
|
588
|
+
|
|
589
|
+
// Batch H6: symmetric cleanup for the sibling reference files the
|
|
590
|
+
// cursor install carries alongside SKILL.md. Remove only the
|
|
591
|
+
// plugin-owned siblings so a now-empty dir can be trimmed below;
|
|
592
|
+
// user-authored siblings are left in place (foreign-file discipline).
|
|
593
|
+
if (runtime.id === 'cursor') {
|
|
594
|
+
for (const sibling of listSiblingRefFiles(skillDestDir)) {
|
|
595
|
+
const siblingPath = path.join(skillDestDir, sibling);
|
|
596
|
+
let siblingContent;
|
|
597
|
+
try {
|
|
598
|
+
siblingContent = fs.readFileSync(siblingPath, 'utf8');
|
|
599
|
+
} catch (err) {
|
|
600
|
+
perFile.push({
|
|
601
|
+
kind: 'skill-ref',
|
|
602
|
+
path: siblingPath,
|
|
603
|
+
action: 'skipped-foreign',
|
|
604
|
+
reason: `Could not read sibling ${sibling}: ${err.message}`,
|
|
605
|
+
});
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
if (!isPluginOwned(siblingContent)) {
|
|
609
|
+
perFile.push({
|
|
610
|
+
kind: 'skill-ref',
|
|
611
|
+
path: siblingPath,
|
|
612
|
+
action: 'skipped-foreign',
|
|
613
|
+
reason: `Existing ${sibling} was not authored by this plugin; not removing.`,
|
|
614
|
+
});
|
|
615
|
+
continue;
|
|
616
|
+
}
|
|
617
|
+
if (!dryRun) fs.unlinkSync(siblingPath);
|
|
618
|
+
perFile.push({
|
|
619
|
+
kind: 'skill-ref',
|
|
620
|
+
path: siblingPath,
|
|
621
|
+
action: 'removed',
|
|
622
|
+
});
|
|
623
|
+
}
|
|
624
|
+
}
|
|
493
625
|
}
|
|
494
626
|
}
|
|
495
627
|
}
|