@geminix/gxpm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/AGENTS.md +148 -0
  2. package/CANON.md +53 -0
  3. package/CLAUDE.md +60 -0
  4. package/CONTEXT.md +49 -0
  5. package/DEBUG.md +59 -0
  6. package/ISSUE_CONTEXT.md +25 -0
  7. package/README.md +143 -0
  8. package/VERSION +1 -0
  9. package/agents/cleanup-auditor/cleanup-auditor.md +56 -0
  10. package/agents/grill-master.md +26 -0
  11. package/agents/implementer.md +32 -0
  12. package/agents/review-army/accessibility-reviewer.md +54 -0
  13. package/agents/review-army/code-quality-reviewer.md +54 -0
  14. package/agents/review-army/security-reviewer.md +56 -0
  15. package/agents/review-army/spec-compliance-reviewer.md +51 -0
  16. package/agents/review-army/test-reviewer.md +55 -0
  17. package/agents/reviewer.md +59 -0
  18. package/agents/ship-audit-army/docs-auditor.md +53 -0
  19. package/agents/ship-audit-army/performance-auditor.md +52 -0
  20. package/agents/ship-audit-army/security-auditor.md +52 -0
  21. package/agents/specifier.md +55 -0
  22. package/agents/triage-officer.md +27 -0
  23. package/bin/gxpm +17 -0
  24. package/bin/gxpm-browser +17 -0
  25. package/bin/gxpm-config +15 -0
  26. package/bin/gxpm-eval +13 -0
  27. package/bin/gxpm-global-discover +15 -0
  28. package/bin/gxpm-init +38 -0
  29. package/bin/gxpm-investigate +194 -0
  30. package/bin/gxpm-uninstall +15 -0
  31. package/bin/gxpm-update-check +165 -0
  32. package/commands/build.md +40 -0
  33. package/commands/help.md +53 -0
  34. package/commands/plan.md +34 -0
  35. package/commands/refine.md +46 -0
  36. package/commands/review.md +34 -0
  37. package/commands/ship.md +37 -0
  38. package/core/ac-check.ts +20 -0
  39. package/core/agent-runtime.ts +363 -0
  40. package/core/artifact-validator.ts +151 -0
  41. package/core/artifacts.ts +313 -0
  42. package/core/autopilot.ts +250 -0
  43. package/core/capabilities.ts +779 -0
  44. package/core/checkpoint.ts +370 -0
  45. package/core/cleanup.ts +32 -0
  46. package/core/command-probe.ts +82 -0
  47. package/core/config.ts +533 -0
  48. package/core/contracts/behavior-spec.schema.ts +38 -0
  49. package/core/contracts/converter.ts +61 -0
  50. package/core/contracts/host.ts +43 -0
  51. package/core/converters/converter.ts +93 -0
  52. package/core/converters/index.ts +8 -0
  53. package/core/converters/managed-artifact.ts +119 -0
  54. package/core/converters/parser.ts +159 -0
  55. package/core/converters/template-renderer.ts +35 -0
  56. package/core/converters/writer.ts +61 -0
  57. package/core/dag-executor.ts +426 -0
  58. package/core/dag-loader.ts +292 -0
  59. package/core/dag-schemas.ts +150 -0
  60. package/core/dispatch.ts +125 -0
  61. package/core/evidence.ts +148 -0
  62. package/core/gate.ts +269 -0
  63. package/core/hook-engine.ts +566 -0
  64. package/core/host-probe.ts +64 -0
  65. package/core/implement.ts +16 -0
  66. package/core/isolation-errors.ts +174 -0
  67. package/core/isolation-resolver.ts +921 -0
  68. package/core/issue-context.ts +381 -0
  69. package/core/issue-readiness.ts +457 -0
  70. package/core/issue-sync.ts +427 -0
  71. package/core/issues.ts +132 -0
  72. package/core/land.ts +108 -0
  73. package/core/orchestrator.ts +54 -0
  74. package/core/phase-artifact.ts +32 -0
  75. package/core/phase-gates.ts +130 -0
  76. package/core/phase-rewind.ts +94 -0
  77. package/core/plan-lint.ts +61 -0
  78. package/core/plan.ts +77 -0
  79. package/core/port-allocation.ts +50 -0
  80. package/core/pr-check.ts +15 -0
  81. package/core/preset-system/preset-resolver.ts +221 -0
  82. package/core/project-init-status.ts +127 -0
  83. package/core/qa.ts +15 -0
  84. package/core/resilience.ts +165 -0
  85. package/core/runs.ts +288 -0
  86. package/core/safe-path.test.ts +80 -0
  87. package/core/safe-path.ts +60 -0
  88. package/core/sdd-gate.test.ts +98 -0
  89. package/core/sdd-gate.ts +134 -0
  90. package/core/self-review.ts +62 -0
  91. package/core/session.ts +70 -0
  92. package/core/ship.ts +86 -0
  93. package/core/specify.ts +173 -0
  94. package/core/state.ts +1002 -0
  95. package/core/template-engine.ts +152 -0
  96. package/core/template-resolver.test.ts +70 -0
  97. package/core/template-resolver.ts +156 -0
  98. package/core/triage.ts +26 -0
  99. package/core/verify.ts +15 -0
  100. package/core/wiki-native.ts +2423 -0
  101. package/core/wiki.ts +27 -0
  102. package/core/workflow-event-emitter.ts +163 -0
  103. package/core/workflows/engine.ts +273 -0
  104. package/core/workflows/expressions.ts +76 -0
  105. package/core/workflows/index.ts +38 -0
  106. package/core/workflows/steps/command.ts +43 -0
  107. package/core/workflows/steps/gate.ts +47 -0
  108. package/core/workflows/steps/gxpm.ts +44 -0
  109. package/core/workflows/steps/linear.ts +31 -0
  110. package/core/workflows/steps/shell.ts +65 -0
  111. package/core/workflows/types.ts +62 -0
  112. package/core/workspace-runtime.ts +227 -0
  113. package/core/worktree-init-steps.ts +647 -0
  114. package/core/worktree-init.ts +330 -0
  115. package/core/worktree-owner.ts +143 -0
  116. package/docs/GXPM_VERIFY.md +98 -0
  117. package/docs/INSTALL_FOR_AGENTS.md +113 -0
  118. package/docs/README.md +57 -0
  119. package/docs/adr/adr-005-multi-platform-skill-converter.md +72 -0
  120. package/docs/agents/domain.md +30 -0
  121. package/docs/agents/issue-tracker.md +30 -0
  122. package/docs/agents/triage-labels.md +32 -0
  123. package/docs/architecture/gxpm-architecture-diagram.md +265 -0
  124. package/docs/architecture/gxpm-current-architecture.md +175 -0
  125. package/docs/architecture/gxpm-current-flow.md +278 -0
  126. package/docs/architecture/gxpm-replacement-architecture.md +211 -0
  127. package/docs/architecture/gxpm-target-architecture.md +449 -0
  128. package/docs/architecture/gxpm-v0-contract.md +311 -0
  129. package/docs/architecture/layered-workflow-boundaries.md +193 -0
  130. package/docs/architecture/preset-system.md +126 -0
  131. package/docs/architecture/scaffold-northstar.md +23 -0
  132. package/docs/brainstorms/2026-05-14-bdd-then-tdd-design.md +320 -0
  133. package/docs/brainstorms/README.md +22 -0
  134. package/docs/brainstorms/docs-knowledge-system-requirements.md +29 -0
  135. package/docs/governance/beta-skill-promotion.md +39 -0
  136. package/docs/governance/development-contract.md +144 -0
  137. package/docs/governance/gherkin-style.md +90 -0
  138. package/docs/governance/host-adapter.md +56 -0
  139. package/docs/governance/skill-authoring.md +87 -0
  140. package/docs/governance/skill-testing.md +356 -0
  141. package/docs/governance/template-authoring.md +53 -0
  142. package/docs/migrations/v0.2.md +51 -0
  143. package/docs/plans/README.md +23 -0
  144. package/docs/plans/bdd-then-tdd-plan.md +1767 -0
  145. package/docs/plans/docs-knowledge-system-plan.md +31 -0
  146. package/docs/plans/spec-kit-sdd-adoption-plan.md +305 -0
  147. package/docs/research/agents-md-best-practices.md +207 -0
  148. package/docs/research/archon-study.md +351 -0
  149. package/docs/research/claude-hooks-study.md +440 -0
  150. package/docs/research/codex-hooks-study.md +624 -0
  151. package/docs/research/everything-claude-code-study.md +252 -0
  152. package/docs/research/from-skills-to-layered-workflow.md +322 -0
  153. package/docs/research/gsd-study.md +69 -0
  154. package/docs/research/kimi-hooks-study.md +274 -0
  155. package/docs/research/mattpocock-skills-comparison.md +429 -0
  156. package/docs/research/mattpocock-skills-study.md +275 -0
  157. package/docs/research/oh-my-codex-study.md +279 -0
  158. package/docs/research/perplexity-agent-skills-design.md +168 -0
  159. package/docs/research/pmc-gstack-skill-study.md +122 -0
  160. package/docs/research/spec-kit-study.md +224 -0
  161. package/docs/research/superpowers-study.md +209 -0
  162. package/docs/roadmap/initial-roadmap.md +53 -0
  163. package/docs/solutions/README.md +45 -0
  164. package/docs/solutions/artifact-nesting-recovery.md +58 -0
  165. package/docs/solutions/session-context-restore-practice.md +67 -0
  166. package/docs/solutions/workflow/version-drift-recovery.md +49 -0
  167. package/docs/solutions/worktree-gate-recovery.md +62 -0
  168. package/docs/specs/README.md +28 -0
  169. package/docs/specs/claude.md +45 -0
  170. package/docs/specs/codex.md +44 -0
  171. package/docs/specs/cursor.md +44 -0
  172. package/hosts/adapters/claude.ts +29 -0
  173. package/hosts/adapters/codex.ts +27 -0
  174. package/hosts/adapters/cursor.ts +27 -0
  175. package/hosts/adapters/kimi.ts +27 -0
  176. package/hosts/claude.ts +23 -0
  177. package/hosts/codex.ts +26 -0
  178. package/hosts/cursor.ts +19 -0
  179. package/hosts/index.ts +33 -0
  180. package/hosts/registry.test.ts +52 -0
  181. package/hosts/registry.ts +57 -0
  182. package/hosts/schema.ts +58 -0
  183. package/package.json +52 -0
  184. package/scripts/browser.ts +185 -0
  185. package/scripts/cleanup.ts +142 -0
  186. package/scripts/commands/artifact.ts +115 -0
  187. package/scripts/commands/autopilot.ts +143 -0
  188. package/scripts/commands/capability.ts +57 -0
  189. package/scripts/commands/config.ts +69 -0
  190. package/scripts/commands/dag.ts +126 -0
  191. package/scripts/commands/feedback.ts +123 -0
  192. package/scripts/commands/gate.ts +291 -0
  193. package/scripts/commands/helpers.ts +126 -0
  194. package/scripts/commands/hook.ts +66 -0
  195. package/scripts/commands/init.ts +515 -0
  196. package/scripts/commands/issue.ts +825 -0
  197. package/scripts/commands/phase.ts +61 -0
  198. package/scripts/commands/preset.ts +159 -0
  199. package/scripts/commands/runtime.ts +199 -0
  200. package/scripts/commands/specify.ts +71 -0
  201. package/scripts/commands/upgrade.ts +243 -0
  202. package/scripts/commands/verify.ts +183 -0
  203. package/scripts/commands/wiki.ts +242 -0
  204. package/scripts/commands/workflow.ts +131 -0
  205. package/scripts/dev-skill.ts +55 -0
  206. package/scripts/discover-skills.ts +116 -0
  207. package/scripts/doctor.ts +410 -0
  208. package/scripts/dogfood-check.ts +125 -0
  209. package/scripts/eval-functional.ts +218 -0
  210. package/scripts/eval.ts +246 -0
  211. package/scripts/gen-skill-docs.ts +201 -0
  212. package/scripts/global-discover.ts +217 -0
  213. package/scripts/governance-check.ts +75 -0
  214. package/scripts/gxpm-check.ts +12 -0
  215. package/scripts/gxpm.ts +216 -0
  216. package/scripts/host-config.ts +62 -0
  217. package/scripts/install-claude-hooks.ts +138 -0
  218. package/scripts/install-codex-hooks.ts +271 -0
  219. package/scripts/install-hooks.ts +128 -0
  220. package/scripts/install-kimi-hooks.ts +92 -0
  221. package/scripts/install-skill.ts +184 -0
  222. package/scripts/phase-artifact-commands.ts +100 -0
  223. package/scripts/post-land-sync.ts +46 -0
  224. package/scripts/scaffold-check.ts +85 -0
  225. package/scripts/skill-naming-check.ts +78 -0
  226. package/scripts/skill-structure-check.ts +157 -0
  227. package/scripts/skills-lock-check.ts +60 -0
  228. package/scripts/sync-markdown-artifacts.ts +172 -0
  229. package/scripts/uninstall.ts +162 -0
  230. package/scripts/version.ts +47 -0
  231. package/scripts/wait-pr-ready.ts +407 -0
  232. package/skills/gxpm/SKILL.md +485 -0
  233. package/skills/gxpm/SKILL.md.tmpl +422 -0
  234. package/skills/gxpm/references/CANON.md +53 -0
  235. package/skills/gxpm/references/key-rules.md +130 -0
  236. package/skills/gxpm-architecture/SKILL.md +106 -0
  237. package/skills/gxpm-architecture/references/DEEPENING.md +37 -0
  238. package/skills/gxpm-architecture/references/INTERFACE-DESIGN.md +44 -0
  239. package/skills/gxpm-autopilot/SKILL.md +116 -0
  240. package/skills/gxpm-autopilot/SKILL.md.tmpl +107 -0
  241. package/skills/gxpm-browser/SKILL.md +105 -0
  242. package/skills/gxpm-browser/SKILL.md.tmpl +41 -0
  243. package/skills/gxpm-browser/references/commands.md +43 -0
  244. package/skills/gxpm-browser/references/evidence-path.md +20 -0
  245. package/skills/gxpm-build/SKILL.md +78 -0
  246. package/skills/gxpm-cleanup/SKILL.md +76 -0
  247. package/skills/gxpm-debug-issue/SKILL.md +39 -0
  248. package/skills/gxpm-diagnose/SKILL.md +220 -0
  249. package/skills/gxpm-diagnose/SKILL.md.tmpl +31 -0
  250. package/skills/gxpm-diagnose/references/feedback-loop.md +34 -0
  251. package/skills/gxpm-diagnose/references/feedback-loops.md +43 -0
  252. package/skills/gxpm-diagnose/references/phases.md +60 -0
  253. package/skills/gxpm-eval/SKILL.md +78 -0
  254. package/skills/gxpm-explore-codebase/SKILL.md +36 -0
  255. package/skills/gxpm-explore-codebase/scripts/summarize-communities.ts +51 -0
  256. package/skills/gxpm-feedback/SKILL.md +122 -0
  257. package/skills/gxpm-grill/SKILL.md +159 -0
  258. package/skills/gxpm-grill/SKILL.md.tmpl +77 -0
  259. package/skills/gxpm-grill/references/documentation-templates.md +56 -0
  260. package/skills/gxpm-grill/references/process.md +25 -0
  261. package/skills/gxpm-handoff/SKILL.md +112 -0
  262. package/skills/gxpm-hygiene/SKILL.md +69 -0
  263. package/skills/gxpm-implementer/SKILL.md +142 -0
  264. package/skills/gxpm-implementer/SKILL.md.tmpl +141 -0
  265. package/skills/gxpm-linear/SKILL.md +282 -0
  266. package/skills/gxpm-linear/SKILL.md.tmpl +86 -0
  267. package/skills/gxpm-linear/references/commands.md +75 -0
  268. package/skills/gxpm-linear/references/workflows.md +120 -0
  269. package/skills/gxpm-planning/SKILL.md +134 -0
  270. package/skills/gxpm-prototype/SKILL.md +64 -0
  271. package/skills/gxpm-refactor-safely/SKILL.md +62 -0
  272. package/skills/gxpm-review-army/SKILL.md +117 -0
  273. package/skills/gxpm-review-changes/SKILL.md +36 -0
  274. package/skills/gxpm-setup/SKILL.md +101 -0
  275. package/skills/gxpm-specifier/SKILL.md +135 -0
  276. package/skills/gxpm-tdd/SKILL.md +187 -0
  277. package/skills/gxpm-tdd/references/interface-design.md +23 -0
  278. package/skills/gxpm-tdd/references/mocking.md +27 -0
  279. package/skills/gxpm-tdd/references/red-green-refactor.md +61 -0
  280. package/skills/gxpm-tdd/references/troubleshooting.md +28 -0
  281. package/skills/gxpm-tdd/references/workflow.md +50 -0
  282. package/skills/gxpm-tdd/testing-anti-patterns.tmpl +304 -0
  283. package/skills/gxpm-triage/SKILL.md +160 -0
  284. package/skills/gxpm-verify/SKILL.md +107 -0
  285. package/skills/gxpm-write-skill/SKILL.md +131 -0
  286. package/skills/gxpm-zoom-out/SKILL.md +69 -0
  287. package/skills/maintain-hygiene-skills-lock/SKILL.md +54 -0
  288. package/skills/maintain-hygiene-skills-lock/SKILL.md.tmpl +53 -0
  289. package/templates/constitution-template.md +63 -0
  290. package/templates/hooks/gxpm-commit-msg +16 -0
  291. package/templates/hooks/gxpm-post-checkout +19 -0
  292. package/templates/hooks/gxpm-post-commit +7 -0
  293. package/templates/hooks/gxpm-post-merge +29 -0
  294. package/templates/hooks/gxpm-pre-commit +39 -0
  295. package/templates/hooks/gxpm-pre-push +33 -0
  296. package/templates/plan-template.md.tmpl +46 -0
  297. package/templates/spec-template.md.tmpl +63 -0
  298. package/templates/specify-stub.tmpl +22 -0
  299. package/templates/tasks-template.md.tmpl +32 -0
@@ -0,0 +1,78 @@
1
+ ---
2
+ name: gxpm-build
3
+ description: Compile and type-check verification. Use after implementing code changes, before committing, or when the build fails and root cause is unclear.
4
+ ---
5
+
6
+ # gxpm-build
7
+
8
+ ## 入口条件
9
+
10
+ 在以下场景触发本 skill:
11
+
12
+ - 每次代码变更后,需要确认编译和类型检查通过
13
+ - 提交前,作为 `gxpm-verify` 的前置步骤之一
14
+ - 构建失败且根因不明确时
15
+
16
+ **Skill 边界**:本 skill 仅处理编译和类型检查层面的验证。测试失败请加载 `/gxpm-tdd`,代码风格或提交规范请加载 `/gxpm-hygiene`。
17
+
18
+ 在 gxpm 工作流中,`implement` 阶段每次完成一个垂直切片后都应运行构建验证。离开 `implement` 前,`local-verify` artifact 必须包含 `buildEvidence`。
19
+
20
+ ## 可操作流程
21
+
22
+ 按以下顺序执行构建验证:
23
+
24
+ ```bash
25
+ # 1. Type check (fastest, catch type errors first)
26
+ bun run check
27
+
28
+ # 2. Build (confirm compilation produces valid output)
29
+ bun run build
30
+ ```
31
+
32
+ 如果项目使用不同的构建系统,替换为等效命令。
33
+
34
+ ### 退出码约定
35
+
36
+ | Exit Code | Meaning | Action |
37
+ |-----------|---------|--------|
38
+ | 0 | Success | Proceed |
39
+ | non-0 | Failure | **Stop. Fix before continuing.** |
40
+
41
+ ### 构建失败归因
42
+
43
+ 遇到构建失败时,按症状定位根因并选择修复策略:
44
+
45
+ | Symptom | Likely Cause | Fix Strategy |
46
+ |---------|-------------|--------------|
47
+ | Type error in changed file | Incorrect type usage or missing property | Fix the type, not the test |
48
+ | Type error in unchanged file | Breaking change leaked to consumer | Update consumer or revert breaking change |
49
+ | Syntax error | Typo, missing brace, invalid syntax | Fix syntax, re-run typecheck first |
50
+ | Build output missing | Build script misconfigured or dependency missing | Check `package.json` scripts and `node_modules` |
51
+ | Module resolution error | Import path wrong or alias unconfigured | Verify path mapping in `tsconfig.json` |
52
+
53
+ ## 红旗清单 / 反模式
54
+
55
+ - **Never commit broken build.** A build failure means implementation is incomplete.
56
+ - **Type errors are behavior errors.** Type checking is not optional polish — it is contract verification.
57
+ - **Incremental compilable:** After every increment (even partial), the project must build successfully.
58
+ - **One fix at a time:** If build fails, fix the root cause before running the build again. Do not guess-and-rerun.
59
+
60
+ ## 验证清单 / 出口条件
61
+
62
+ 构建通过的标准:
63
+
64
+ - [ ] `bun run check` 返回 exit code 0
65
+ - [ ] `bun run build` 返回 exit code 0
66
+ - [ ] 如失败,已定位根因并完成单次修复(禁止猜测-重跑循环)
67
+
68
+ `local-verify` artifact 中的 `buildEvidence` 必须包含:
69
+
70
+ ```json
71
+ {
72
+ "buildEvidence": {
73
+ "commands": ["bun run check", "bun run build"],
74
+ "exitCodes": [0, 0],
75
+ "timestamp": "2026-05-08T08:30:00Z"
76
+ }
77
+ }
78
+ ```
@@ -0,0 +1,76 @@
1
+ ---
2
+ name: gxpm-cleanup
3
+ description: 多 issue worktree 合并前的代码清理与简化。在 cleanup 阶段识别跨 issue 重复、命名不一致、接口错位和死代码。
4
+ status: stable
5
+ ---
6
+
7
+ # gxpm-cleanup
8
+
9
+ 在 gxpm 的 `cleanup` 阶段对多 issue worktree 进行跨 issue 代码清理,确保进入 `ship` 前的代码是统一、简洁、无重复的。
10
+
11
+ ## 入口条件
12
+
13
+ - issue 已进入 `cleanup` 阶段
14
+ - worktree 中处理了 2+ 个 issue(单 issue 可跳过 cleanup)
15
+ - 所有 issue 已完成 `self-review`
16
+
17
+ ## 可操作流程
18
+
19
+ ### 在 cleanup 阶段初始化报告
20
+
21
+ ```bash
22
+ gxpm self-review cleanup <issue-id>
23
+ ```
24
+
25
+ 这会创建 `cleanup-report.json` artifact,包含:
26
+ - `duplicatesExtracted` — 重复代码提取记录
27
+ - `renamesUnified` — 命名统一记录
28
+ - `interfacesAligned` — 接口对齐记录
29
+ - `deadCodeRemoved` — 死代码删除记录
30
+ - `testsDeduplicated` — 测试去重记录
31
+
32
+ ### 跳过 cleanup(单 issue worktree)
33
+
34
+ ```bash
35
+ gxpm issue transition <issue-id> ship --skip-cleanup
36
+ ```
37
+
38
+ 单 issue worktree 可直接从 self-review 进入 ship,跳过 cleanup 阶段。
39
+
40
+ **与 rigorLevel 的关系**:gxpm 的 `standard` 和 `lite` rigor 模式已自动跳过 cleanup 阶段(通过 `isCompressedSkip`),无需手动 `--skip-cleanup`。仅在 `full` 模式下需要显式跳过时才使用此 flag。
41
+
42
+ ### Cleanup 检查清单
43
+
44
+ - [ ] 扫描 worktree 中所有 issue 的代码变更
45
+ - [ ] 识别功能重复的模块/函数
46
+ - [ ] 检查同一概念在不同 issue 中的命名一致性
47
+ - [ ] 验证 issue 之间的接口格式是否对齐
48
+ - [ ] 识别被后续 issue 替代的死代码
49
+ - [ ] 检查测试覆盖是否有冗余
50
+ - [ ] 记录所有发现到 cleanup-report
51
+
52
+ ## 红旗清单 / HARD-GATE
53
+
54
+ - **发现功能完全重复的模块但未提取** → 必须 STOP,提取到共享位置
55
+ - **跨 issue 接口不兼容且无迁移方案** → 必须 STOP,对齐接口后再推进
56
+ - **死代码占比过高** → Important,需在 cleanup-report 中说明清理计划
57
+
58
+ ## 验证清单
59
+
60
+ - [ ] cleanup-report.json 已创建并包含审计结果
61
+ - [ ] 所有 blocking 级别问题已解决或获得豁免
62
+ - [ ] 重复代码已提取或计划在后续迭代处理
63
+ - [ ] 跨 issue 命名已统一或有明确的统一计划
64
+
65
+ ## 常见说辞表
66
+
67
+ | 说辞 | 现实 | 正确做法 |
68
+ |------|------|----------|
69
+ | "这些重复是不同 issue 的职责,不应该提取" | 同一 worktree 的代码最终合并到同一 branch,重复就是债务 | 提取到共享位置,或明确说明为什么必须保持重复 |
70
+ | "接口不对齐是设计决策,不是 bug" | 设计决策需要在代码层面有文档和兼容层 | 在 cleanup-report 中记录决策,必要时添加适配层 |
71
+ | "cleanup 太费时间,直接 ship" | 跳过 cleanup 会让技术债务进入生产环境 | 至少记录发现的债务,在 ship notes 中说明还款计划 |
72
+
73
+ ## Read Next
74
+
75
+ - `agents/cleanup-auditor/cleanup-auditor.md` — 审计角色详细定义
76
+ - `skills/gxpm-refactor-safely/SKILL.md` — 安全重构指南
@@ -0,0 +1,39 @@
1
+ ---
2
+ name: gxpm-debug-issue
3
+ description: Systematic issue debugging using graph-powered code navigation. Use when user asks to trace a bug, investigate a specific error, or find the root cause of a failing test or exception.
4
+ ---
5
+ <!-- AUTO-GENERATED from SKILL.md.tmpl - do not edit directly -->
6
+
7
+ ## gxpm-debug-issue
8
+
9
+ ### 入口条件
10
+
11
+ **Skill boundary:**
12
+ - If the root cause is **completely unknown** and you need a systematic diagnosis loop, load `/gxpm-diagnose` first.
13
+ - If you need to **understand code structure** without debugging a specific bug, load `/gxpm-explore-codebase` first.
14
+ - If you are **refactoring**, load `/gxpm-refactor-safely` instead.
15
+
16
+ Use GitNexus to systematically trace and debug issues when you have a **specific symptom**.
17
+
18
+ ### 可操作流程
19
+
20
+ 1. Run `list_repos` and read `gitnexus://repo/{name}/context` when an index exists.
21
+ 2. Use `query` to find execution flows related to the symptom.
22
+ 3. Use `context` on suspected symbols to inspect callers, callees, and participating processes.
23
+ 4. Run `detect_changes` to check if recent changes caused the issue.
24
+ 5. Use `impact` on suspected symbols or files to see what else is affected.
25
+
26
+ > **Tips**
27
+ > - Check both callers and callees to understand the full context.
28
+ > - Look at affected flows to find the entry point that triggers the bug.
29
+ > - Recent changes are the most common source of new issues.
30
+
31
+ ### 红旗清单 / 反模式
32
+
33
+ - Start with the narrowest GitNexus query that matches the symptom, then expand.
34
+ - Prefer `query` and `context` before raw `cypher`.
35
+
36
+ ### 验证清单 / 出口条件
37
+
38
+ - Use `impact` on suspected symbols or files to see what else is affected.
39
+ - Target: complete any review/debug/refactor task in ≤5 graph tool calls.
@@ -0,0 +1,220 @@
1
+ ---
2
+ name: diagnose
3
+ description: Disciplined diagnosis loop for hard bugs and performance regressions. Use when user says 'diagnose this', reports a hard bug, describes a performance regression, or asks why something fails.
4
+ ---
5
+ <!-- AUTO-GENERATED from SKILL.md.tmpl - do not edit directly -->
6
+
7
+ # Diagnose
8
+
9
+ A discipline for hard bugs where the root cause is **not yet known**.
10
+
11
+ ### 入口条件
12
+
13
+ **Skill boundary:**
14
+ - If you have a **specific symptom, stack trace, or error message** and need to trace its root cause through the codebase, load `/gxpm-debug-issue` first.
15
+ - If you need to **understand code structure** without debugging a specific bug, load `/gxpm-explore-codebase` first.
16
+ - If you are **refactoring** and discover a bug mid-refactor, switch to `/gxpm-diagnose` or `/gxpm-debug-issue` instead of continuing.
17
+
18
+ - **触发时机**:Use when user says 'diagnose this', reports a hard bug, describes a performance regression, or asks why something fails.
19
+ - **纪律**:Skip phases only when explicitly justified.
20
+
21
+ ### 可操作流程
22
+
23
+ ## Phase 2 — Reproduce
24
+
25
+ Run the loop. Confirm:
26
+ - [ ] The loop produces the failure mode the **user** described.
27
+ - [ ] The failure is reproducible across multiple runs.
28
+ - [ ] You have captured the exact symptom.
29
+
30
+ ## Phase 3 — Explore with the codebase
31
+
32
+ Use available code intelligence tools (e.g., GitNexus MCP, grep, ReadFile) to accelerate understanding:
33
+
34
+ 1. **Semantic search** to find code related to the symptom.
35
+ 2. **Call-chain tracing** to follow `callers_of` / `callees_of` relationships.
36
+ 3. **Execution flow analysis** to see full paths through suspected areas.
37
+ 4. **Change detection** (`git diff`, `detect_changes`) to check if recent changes caused the issue.
38
+ 5. **Impact analysis** on suspected files to see what else is affected.
39
+
40
+ **Token efficiency**: start with the narrowest context possible, then expand. Target ≤5 tool calls and ≤800 total output tokens for the exploration phase.
41
+
42
+ ## Phase 4 — Hypothesise
43
+
44
+ Generate **3–5 ranked hypotheses** before testing any of them.
45
+
46
+ Each hypothesis must be **falsifiable**: state the prediction it makes.
47
+
48
+ > Format: "If <X> is the cause, then <changing Y> will make the bug disappear."
49
+
50
+ **Show the ranked list to the user before testing.** They often have domain knowledge that re-ranks instantly.
51
+
52
+ ## Phase 5 — Instrument
53
+
54
+ Each probe must map to a specific prediction from Phase 4. **Change one variable at a time.**
55
+
56
+ Tool preference:
57
+ 1. **Debugger / REPL inspection** if the env supports it.
58
+ 2. **Targeted logs** at the boundaries that distinguish hypotheses.
59
+ 3. Never "log everything and grep".
60
+
61
+ **Tag every debug log** with a unique prefix, e.g. `[DEBUG-a4f2]`. Cleanup at the end becomes a single grep.
62
+
63
+ **Perf branch.** For performance regressions: establish a baseline measurement first, then bisect. Measure first, fix second.
64
+
65
+ ## Phase 6 — Fix + regression test
66
+
67
+ Write the regression test **before the fix** — but only if there is a **correct seam** for it.
68
+
69
+ A correct seam is one where the test exercises the **real bug pattern** as it occurs at the call site.
70
+
71
+ **If no correct seam exists, that itself is the finding.** Flag this for the next `/architecture` run.
72
+
73
+ ## Phase 7 — Cleanup + post-mortem
74
+
75
+ Required before declaring done:
76
+ - [ ] Original repro no longer reproduces.
77
+ - [ ] Regression test passes (or absence of seam is documented).
78
+ - [ ] All `[DEBUG-...]` instrumentation removed.
79
+ - [ ] Throwaway prototypes deleted.
80
+ - [ ] The correct hypothesis is stated in the commit / PR message.
81
+
82
+ **Then ask: what would have prevented this bug?** If the answer involves architectural change, hand off to `/architecture` with the specifics.
83
+
84
+
85
+ ### 红旗清单 / 反模式
86
+
87
+ ## Phase 1 — Build a feedback loop
88
+
89
+ **This is the skill.** Everything else is mechanical. If you have a fast, deterministic, agent-runnable pass/fail signal for the bug, you will find the cause.
90
+
91
+ Spend disproportionate effort here. **Be aggressive. Be creative. Refuse to give up.**
92
+
93
+ ### Ways to construct one — try in roughly this order
94
+
95
+ 1. **Failing test** at whatever seam reaches the bug — unit, integration, e2e.
96
+ 2. **Curl / HTTP script** against a running dev server.
97
+ 3. **CLI invocation** with a fixture input, diffing stdout against a known-good snapshot.
98
+ 4. **Headless browser script** (Playwright / Puppeteer).
99
+ 5. **Replay a captured trace.** Save a real network request / payload / event log to disk; replay it through the code path in isolation.
100
+ 6. **Throwaway harness.** Spin up a minimal subset of the system that exercises the bug code path.
101
+ 7. **Property / fuzz loop.** Run 1000 random inputs and look for the failure mode.
102
+ 8. **Bisection harness.** Automate `git bisect run` if the bug appeared between two known states.
103
+ 9. **Differential loop.** Run old-version vs new-version and diff outputs.
104
+ 10. **HITL bash script.** Last resort. If a human must click, drive them with a structured loop.
105
+
106
+ ### Iterate on the loop itself
107
+
108
+ - Can I make it faster? (Cache setup, skip unrelated init.)
109
+ - Can I make the signal sharper? (Assert on the specific symptom.)
110
+ - Can I make it more deterministic? (Pin time, seed RNG, isolate filesystem.)
111
+
112
+ ### Non-deterministic bugs
113
+
114
+ The goal is a **higher reproduction rate**. Loop the trigger 100×, parallelise, add stress. A 50%-flake bug is debuggable; 1% is not.
115
+
116
+ ### When you genuinely cannot build a loop
117
+
118
+ Stop and say so explicitly. List what you tried. Ask the user for: (a) access to the repro environment, (b) a captured artifact, or (c) permission to add temporary production instrumentation.
119
+
120
+ **Do not proceed to Phase 2 until you have a loop you believe in.**
121
+
122
+
123
+ ### 验证清单 / 出口条件
124
+
125
+ - 完成 ## Phase 2 — Reproduce
126
+
127
+ Run the loop. Confirm:
128
+ - [ ] The loop produces the failure mode the **user** described.
129
+ - [ ] The failure is reproducible across multiple runs.
130
+ - [ ] You have captured the exact symptom.
131
+
132
+ ## Phase 3 — Explore with the codebase
133
+
134
+ Use available code intelligence tools (e.g., GitNexus MCP, grep, ReadFile) to accelerate understanding:
135
+
136
+ 1. **Semantic search** to find code related to the symptom.
137
+ 2. **Call-chain tracing** to follow `callers_of` / `callees_of` relationships.
138
+ 3. **Execution flow analysis** to see full paths through suspected areas.
139
+ 4. **Change detection** (`git diff`, `detect_changes`) to check if recent changes caused the issue.
140
+ 5. **Impact analysis** on suspected files to see what else is affected.
141
+
142
+ **Token efficiency**: start with the narrowest context possible, then expand. Target ≤5 tool calls and ≤800 total output tokens for the exploration phase.
143
+
144
+ ## Phase 4 — Hypothesise
145
+
146
+ Generate **3–5 ranked hypotheses** before testing any of them.
147
+
148
+ Each hypothesis must be **falsifiable**: state the prediction it makes.
149
+
150
+ > Format: "If <X> is the cause, then <changing Y> will make the bug disappear."
151
+
152
+ **Show the ranked list to the user before testing.** They often have domain knowledge that re-ranks instantly.
153
+
154
+ ## Phase 5 — Instrument
155
+
156
+ Each probe must map to a specific prediction from Phase 4. **Change one variable at a time.**
157
+
158
+ Tool preference:
159
+ 1. **Debugger / REPL inspection** if the env supports it.
160
+ 2. **Targeted logs** at the boundaries that distinguish hypotheses.
161
+ 3. Never "log everything and grep".
162
+
163
+ **Tag every debug log** with a unique prefix, e.g. `[DEBUG-a4f2]`. Cleanup at the end becomes a single grep.
164
+
165
+ **Perf branch.** For performance regressions: establish a baseline measurement first, then bisect. Measure first, fix second.
166
+
167
+ ## Phase 6 — Fix + regression test
168
+
169
+ Write the regression test **before the fix** — but only if there is a **correct seam** for it.
170
+
171
+ A correct seam is one where the test exercises the **real bug pattern** as it occurs at the call site.
172
+
173
+ **If no correct seam exists, that itself is the finding.** Flag this for the next `/architecture` run.
174
+
175
+ ## Phase 7 — Cleanup + post-mortem
176
+
177
+ Required before declaring done:
178
+ - [ ] Original repro no longer reproduces.
179
+ - [ ] Regression test passes (or absence of seam is documented).
180
+ - [ ] All `[DEBUG-...]` instrumentation removed.
181
+ - [ ] Throwaway prototypes deleted.
182
+ - [ ] The correct hypothesis is stated in the commit / PR message.
183
+
184
+ **Then ask: what would have prevented this bug?** If the answer involves architectural change, hand off to `/architecture` with the specifics.
185
+ 中定义的全部阶段。
186
+ - 按 ## Phase 1 — Build a feedback loop
187
+
188
+ **This is the skill.** Everything else is mechanical. If you have a fast, deterministic, agent-runnable pass/fail signal for the bug, you will find the cause.
189
+
190
+ Spend disproportionate effort here. **Be aggressive. Be creative. Refuse to give up.**
191
+
192
+ ### Ways to construct one — try in roughly this order
193
+
194
+ 1. **Failing test** at whatever seam reaches the bug — unit, integration, e2e.
195
+ 2. **Curl / HTTP script** against a running dev server.
196
+ 3. **CLI invocation** with a fixture input, diffing stdout against a known-good snapshot.
197
+ 4. **Headless browser script** (Playwright / Puppeteer).
198
+ 5. **Replay a captured trace.** Save a real network request / payload / event log to disk; replay it through the code path in isolation.
199
+ 6. **Throwaway harness.** Spin up a minimal subset of the system that exercises the bug code path.
200
+ 7. **Property / fuzz loop.** Run 1000 random inputs and look for the failure mode.
201
+ 8. **Bisection harness.** Automate `git bisect run` if the bug appeared between two known states.
202
+ 9. **Differential loop.** Run old-version vs new-version and diff outputs.
203
+ 10. **HITL bash script.** Last resort. If a human must click, drive them with a structured loop.
204
+
205
+ ### Iterate on the loop itself
206
+
207
+ - Can I make it faster? (Cache setup, skip unrelated init.)
208
+ - Can I make the signal sharper? (Assert on the specific symptom.)
209
+ - Can I make it more deterministic? (Pin time, seed RNG, isolate filesystem.)
210
+
211
+ ### Non-deterministic bugs
212
+
213
+ The goal is a **higher reproduction rate**. Loop the trigger 100×, parallelise, add stress. A 50%-flake bug is debuggable; 1% is not.
214
+
215
+ ### When you genuinely cannot build a loop
216
+
217
+ Stop and say so explicitly. List what you tried. Ask the user for: (a) access to the repro environment, (b) a captured artifact, or (c) permission to add temporary production instrumentation.
218
+
219
+ **Do not proceed to Phase 2 until you have a loop you believe in.**
220
+ 的约束完成收敛。
@@ -0,0 +1,31 @@
1
+ ---
2
+ name: diagnose
3
+ description: Disciplined diagnosis loop for hard bugs and performance regressions. Use when user says 'diagnose this', reports a hard bug, describes a performance regression, or asks why something fails.
4
+ ---
5
+
6
+ # Diagnose
7
+
8
+ A discipline for hard bugs where the root cause is **not yet known**.
9
+
10
+ ### 入口条件
11
+
12
+ **Skill boundary:**
13
+ - If you have a **specific symptom, stack trace, or error message** and need to trace its root cause through the codebase, load `/gxpm-debug-issue` first.
14
+ - If you need to **understand code structure** without debugging a specific bug, load `/gxpm-explore-codebase` first.
15
+ - If you are **refactoring** and discover a bug mid-refactor, switch to `/gxpm-diagnose` or `/gxpm-debug-issue` instead of continuing.
16
+
17
+ - **触发时机**:Use when user says 'diagnose this', reports a hard bug, describes a performance regression, or asks why something fails.
18
+ - **纪律**:Skip phases only when explicitly justified.
19
+
20
+ ### 可操作流程
21
+
22
+ {{REFERENCE:phases}}
23
+
24
+ ### 红旗清单 / 反模式
25
+
26
+ {{REFERENCE:feedback-loop}}
27
+
28
+ ### 验证清单 / 出口条件
29
+
30
+ - 完成 {{REFERENCE:phases}} 中定义的全部阶段。
31
+ - 按 {{REFERENCE:feedback-loop}} 的约束完成收敛。
@@ -0,0 +1,34 @@
1
+ ## Phase 1 — Build a feedback loop
2
+
3
+ **This is the skill.** Everything else is mechanical. If you have a fast, deterministic, agent-runnable pass/fail signal for the bug, you will find the cause.
4
+
5
+ Spend disproportionate effort here. **Be aggressive. Be creative. Refuse to give up.**
6
+
7
+ ### Ways to construct one — try in roughly this order
8
+
9
+ 1. **Failing test** at whatever seam reaches the bug — unit, integration, e2e.
10
+ 2. **Curl / HTTP script** against a running dev server.
11
+ 3. **CLI invocation** with a fixture input, diffing stdout against a known-good snapshot.
12
+ 4. **Headless browser script** (Playwright / Puppeteer).
13
+ 5. **Replay a captured trace.** Save a real network request / payload / event log to disk; replay it through the code path in isolation.
14
+ 6. **Throwaway harness.** Spin up a minimal subset of the system that exercises the bug code path.
15
+ 7. **Property / fuzz loop.** Run 1000 random inputs and look for the failure mode.
16
+ 8. **Bisection harness.** Automate `git bisect run` if the bug appeared between two known states.
17
+ 9. **Differential loop.** Run old-version vs new-version and diff outputs.
18
+ 10. **HITL bash script.** Last resort. If a human must click, drive them with a structured loop.
19
+
20
+ ### Iterate on the loop itself
21
+
22
+ - Can I make it faster? (Cache setup, skip unrelated init.)
23
+ - Can I make the signal sharper? (Assert on the specific symptom.)
24
+ - Can I make it more deterministic? (Pin time, seed RNG, isolate filesystem.)
25
+
26
+ ### Non-deterministic bugs
27
+
28
+ The goal is a **higher reproduction rate**. Loop the trigger 100×, parallelise, add stress. A 50%-flake bug is debuggable; 1% is not.
29
+
30
+ ### When you genuinely cannot build a loop
31
+
32
+ Stop and say so explicitly. List what you tried. Ask the user for: (a) access to the repro environment, (b) a captured artifact, or (c) permission to add temporary production instrumentation.
33
+
34
+ **Do not proceed to Phase 2 until you have a loop you believe in.**
@@ -0,0 +1,43 @@
1
+ # Feedback Loop Construction Catalog
2
+
3
+ Reference for Phase 1 of `/gxpm-diagnose`. Try these in roughly this order.
4
+
5
+ ## 1. Failing test
6
+ At whatever seam reaches the bug — unit, integration, e2e.
7
+
8
+ ## 2. Curl / HTTP script
9
+ Against a running dev server.
10
+
11
+ ## 3. CLI invocation
12
+ With a fixture input, diffing stdout against a known-good snapshot.
13
+
14
+ ## 4. Headless browser script
15
+ Playwright / Puppeteer — drives the UI, asserts on DOM/console/network.
16
+
17
+ ## 5. Replay a captured trace
18
+ Save a real network request / payload / event log to disk; replay it through the code path in isolation.
19
+
20
+ ## 6. Throwaway harness
21
+ Spin up a minimal subset of the system (one service, mocked deps) that exercises the bug code path with a single function call.
22
+
23
+ ## 7. Property / fuzz loop
24
+ If the bug is "sometimes wrong output", run 1000 random inputs and look for the failure mode.
25
+
26
+ ## 8. Bisection harness
27
+ If the bug appeared between two known states (commit, dataset, version), automate "boot at state X, check, repeat" so you can `git bisect run` it.
28
+
29
+ ## 9. Differential loop
30
+ Run the same input through old-version vs new-version (or two configs) and diff outputs.
31
+
32
+ ## 10. HITL bash script
33
+ Last resort. If a human must click, drive _them_ with a structured loop so the feedback is still structured. Captured output feeds back to you.
34
+
35
+ ---
36
+
37
+ ## Loop quality checklist
38
+
39
+ Once you have _a_ loop, iterate:
40
+
41
+ - [ ] **Faster** — Cache setup, skip unrelated init, narrow the test scope.
42
+ - [ ] **Sharper** — Assert on the specific symptom, not "didn't crash".
43
+ - [ ] **More deterministic** — Pin time, seed RNG, isolate filesystem, freeze network.
@@ -0,0 +1,60 @@
1
+ ## Phase 2 — Reproduce
2
+
3
+ Run the loop. Confirm:
4
+ - [ ] The loop produces the failure mode the **user** described.
5
+ - [ ] The failure is reproducible across multiple runs.
6
+ - [ ] You have captured the exact symptom.
7
+
8
+ ## Phase 3 — Explore with the codebase
9
+
10
+ Use available code intelligence tools (e.g., GitNexus MCP, grep, ReadFile) to accelerate understanding:
11
+
12
+ 1. **Semantic search** to find code related to the symptom.
13
+ 2. **Call-chain tracing** to follow `callers_of` / `callees_of` relationships.
14
+ 3. **Execution flow analysis** to see full paths through suspected areas.
15
+ 4. **Change detection** (`git diff`, `detect_changes`) to check if recent changes caused the issue.
16
+ 5. **Impact analysis** on suspected files to see what else is affected.
17
+
18
+ **Token efficiency**: start with the narrowest context possible, then expand. Target ≤5 tool calls and ≤800 total output tokens for the exploration phase.
19
+
20
+ ## Phase 4 — Hypothesise
21
+
22
+ Generate **3–5 ranked hypotheses** before testing any of them.
23
+
24
+ Each hypothesis must be **falsifiable**: state the prediction it makes.
25
+
26
+ > Format: "If <X> is the cause, then <changing Y> will make the bug disappear."
27
+
28
+ **Show the ranked list to the user before testing.** They often have domain knowledge that re-ranks instantly.
29
+
30
+ ## Phase 5 — Instrument
31
+
32
+ Each probe must map to a specific prediction from Phase 4. **Change one variable at a time.**
33
+
34
+ Tool preference:
35
+ 1. **Debugger / REPL inspection** if the env supports it.
36
+ 2. **Targeted logs** at the boundaries that distinguish hypotheses.
37
+ 3. Never "log everything and grep".
38
+
39
+ **Tag every debug log** with a unique prefix, e.g. `[DEBUG-a4f2]`. Cleanup at the end becomes a single grep.
40
+
41
+ **Perf branch.** For performance regressions: establish a baseline measurement first, then bisect. Measure first, fix second.
42
+
43
+ ## Phase 6 — Fix + regression test
44
+
45
+ Write the regression test **before the fix** — but only if there is a **correct seam** for it.
46
+
47
+ A correct seam is one where the test exercises the **real bug pattern** as it occurs at the call site.
48
+
49
+ **If no correct seam exists, that itself is the finding.** Flag this for the next `/architecture` run.
50
+
51
+ ## Phase 7 — Cleanup + post-mortem
52
+
53
+ Required before declaring done:
54
+ - [ ] Original repro no longer reproduces.
55
+ - [ ] Regression test passes (or absence of seam is documented).
56
+ - [ ] All `[DEBUG-...]` instrumentation removed.
57
+ - [ ] Throwaway prototypes deleted.
58
+ - [ ] The correct hypothesis is stated in the commit / PR message.
59
+
60
+ **Then ask: what would have prevented this bug?** If the answer involves architectural change, hand off to `/architecture` with the specifics.
@@ -0,0 +1,78 @@
1
+ ---
2
+ name: gxpm-eval
3
+ description: Skill quality evaluation harness for static analysis. Use when adding a new skill, modifying skill structure, auditing skill quality, or checking for governance compliance.
4
+ ---
5
+
6
+ # gxpm-eval
7
+
8
+ Lightweight static analysis for gxpm skills. Checks frontmatter completeness,
9
+ trigger sections, description quality, and reference links.
10
+
11
+ ## 入口条件
12
+
13
+ - After creating or modifying a skill
14
+ - During `self-review` or `qa` phase before shipping skill changes
15
+ - When `bun run check` reports skill doc drift
16
+
17
+ ## 可操作流程
18
+
19
+ ### 命令
20
+
21
+ ```bash
22
+ gxpm-eval list # list all discoverable skills
23
+ gxpm-eval run # eval all skills
24
+ gxpm-eval run gxpm-diagnose # eval one skill
25
+ gxpm-eval run --json # machine-readable output
26
+ ```
27
+
28
+ ### 评分标准
29
+
30
+ Each skill is scored on 9 dimensions. Pass threshold: ≥ 60%.
31
+
32
+ #### Universal checks (all skill types)
33
+
34
+ | Check | Points | Pass criteria |
35
+ |-------|--------|---------------|
36
+ | frontmatter | 10 | Has YAML `---` block |
37
+ | name | 10 | `name:` field present and non-empty |
38
+ | description | 10 | 20-300 characters **and** contains "Use when" trigger phrase |
39
+ | triggers | 10 | Has `## When to trigger` or `## Commands` |
40
+ | length | 10 | 10-1000 lines (warn if >100 without `references/`) |
41
+ | references | 10 | Has `## Read Next` or `## References` |
42
+
43
+ #### Type-specific checks
44
+
45
+ | Check | Points | Pass criteria |
46
+ |-------|--------|---------------|
47
+ | **Discipline** skills | 10 | Has `## Red Flags` AND `## Rationalization Table` AND explicit negation (`**No exceptions:**`) |
48
+ | **Pattern** skills | 10 | Has `## Recognition criteria` AND `## When NOT to apply` AND `## Counter-examples` |
49
+ | **Reference** skills | 10 | Has concrete command examples with expected output |
50
+
51
+ A skill missing its type-specific structures loses the full 10 points for that dimension.
52
+
53
+ ### 集成到 CI
54
+
55
+ Add to `gxpm-check` or CI:
56
+
57
+ ```bash
58
+ bun run scripts/eval.ts run --json
59
+ ```
60
+
61
+ ## 红旗清单 / 反模式
62
+
63
+ - 不要依赖 gxpm-eval 做 LLM 输出质量评分 — 它只做静态结构分析
64
+ - 不要假设结构通过 = 内容正确 — eval 不验证 skill 内容的正确性
65
+ - 不要在未通过 ≥ 60% 阈值时将 skill 标记为就绪
66
+
67
+ ## 验证清单 / 出口条件
68
+
69
+ - [ ] 所有 discoverable skills 已列入评估结果
70
+ - [ ] 每个 skill 在 9 个维度上的得分已计算
71
+ - [ ] 总分达到 ≥ 60% 阈值
72
+ - [ ] type-specific checks 已按 skill 类型正确匹配
73
+ - [ ] `bun run scripts/eval.ts run --json` 输出可被下游 CI 消费
74
+
75
+ ## Read Next
76
+
77
+ - `docs/governance/skill-authoring.md`
78
+ - Main `/gxpm` skill for skill toolchain overview