@geminix/gxpm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/AGENTS.md +148 -0
  2. package/CANON.md +53 -0
  3. package/CLAUDE.md +60 -0
  4. package/CONTEXT.md +49 -0
  5. package/DEBUG.md +59 -0
  6. package/ISSUE_CONTEXT.md +25 -0
  7. package/README.md +143 -0
  8. package/VERSION +1 -0
  9. package/agents/cleanup-auditor/cleanup-auditor.md +56 -0
  10. package/agents/grill-master.md +26 -0
  11. package/agents/implementer.md +32 -0
  12. package/agents/review-army/accessibility-reviewer.md +54 -0
  13. package/agents/review-army/code-quality-reviewer.md +54 -0
  14. package/agents/review-army/security-reviewer.md +56 -0
  15. package/agents/review-army/spec-compliance-reviewer.md +51 -0
  16. package/agents/review-army/test-reviewer.md +55 -0
  17. package/agents/reviewer.md +59 -0
  18. package/agents/ship-audit-army/docs-auditor.md +53 -0
  19. package/agents/ship-audit-army/performance-auditor.md +52 -0
  20. package/agents/ship-audit-army/security-auditor.md +52 -0
  21. package/agents/specifier.md +55 -0
  22. package/agents/triage-officer.md +27 -0
  23. package/bin/gxpm +17 -0
  24. package/bin/gxpm-browser +17 -0
  25. package/bin/gxpm-config +15 -0
  26. package/bin/gxpm-eval +13 -0
  27. package/bin/gxpm-global-discover +15 -0
  28. package/bin/gxpm-init +38 -0
  29. package/bin/gxpm-investigate +194 -0
  30. package/bin/gxpm-uninstall +15 -0
  31. package/bin/gxpm-update-check +165 -0
  32. package/commands/build.md +40 -0
  33. package/commands/help.md +53 -0
  34. package/commands/plan.md +34 -0
  35. package/commands/refine.md +46 -0
  36. package/commands/review.md +34 -0
  37. package/commands/ship.md +37 -0
  38. package/core/ac-check.ts +20 -0
  39. package/core/agent-runtime.ts +363 -0
  40. package/core/artifact-validator.ts +151 -0
  41. package/core/artifacts.ts +313 -0
  42. package/core/autopilot.ts +250 -0
  43. package/core/capabilities.ts +779 -0
  44. package/core/checkpoint.ts +370 -0
  45. package/core/cleanup.ts +32 -0
  46. package/core/command-probe.ts +82 -0
  47. package/core/config.ts +533 -0
  48. package/core/contracts/behavior-spec.schema.ts +38 -0
  49. package/core/contracts/converter.ts +61 -0
  50. package/core/contracts/host.ts +43 -0
  51. package/core/converters/converter.ts +93 -0
  52. package/core/converters/index.ts +8 -0
  53. package/core/converters/managed-artifact.ts +119 -0
  54. package/core/converters/parser.ts +159 -0
  55. package/core/converters/template-renderer.ts +35 -0
  56. package/core/converters/writer.ts +61 -0
  57. package/core/dag-executor.ts +426 -0
  58. package/core/dag-loader.ts +292 -0
  59. package/core/dag-schemas.ts +150 -0
  60. package/core/dispatch.ts +125 -0
  61. package/core/evidence.ts +148 -0
  62. package/core/gate.ts +269 -0
  63. package/core/hook-engine.ts +566 -0
  64. package/core/host-probe.ts +64 -0
  65. package/core/implement.ts +16 -0
  66. package/core/isolation-errors.ts +174 -0
  67. package/core/isolation-resolver.ts +921 -0
  68. package/core/issue-context.ts +381 -0
  69. package/core/issue-readiness.ts +457 -0
  70. package/core/issue-sync.ts +427 -0
  71. package/core/issues.ts +132 -0
  72. package/core/land.ts +108 -0
  73. package/core/orchestrator.ts +54 -0
  74. package/core/phase-artifact.ts +32 -0
  75. package/core/phase-gates.ts +130 -0
  76. package/core/phase-rewind.ts +94 -0
  77. package/core/plan-lint.ts +61 -0
  78. package/core/plan.ts +77 -0
  79. package/core/port-allocation.ts +50 -0
  80. package/core/pr-check.ts +15 -0
  81. package/core/preset-system/preset-resolver.ts +221 -0
  82. package/core/project-init-status.ts +127 -0
  83. package/core/qa.ts +15 -0
  84. package/core/resilience.ts +165 -0
  85. package/core/runs.ts +288 -0
  86. package/core/safe-path.test.ts +80 -0
  87. package/core/safe-path.ts +60 -0
  88. package/core/sdd-gate.test.ts +98 -0
  89. package/core/sdd-gate.ts +134 -0
  90. package/core/self-review.ts +62 -0
  91. package/core/session.ts +70 -0
  92. package/core/ship.ts +86 -0
  93. package/core/specify.ts +173 -0
  94. package/core/state.ts +1002 -0
  95. package/core/template-engine.ts +152 -0
  96. package/core/template-resolver.test.ts +70 -0
  97. package/core/template-resolver.ts +156 -0
  98. package/core/triage.ts +26 -0
  99. package/core/verify.ts +15 -0
  100. package/core/wiki-native.ts +2423 -0
  101. package/core/wiki.ts +27 -0
  102. package/core/workflow-event-emitter.ts +163 -0
  103. package/core/workflows/engine.ts +273 -0
  104. package/core/workflows/expressions.ts +76 -0
  105. package/core/workflows/index.ts +38 -0
  106. package/core/workflows/steps/command.ts +43 -0
  107. package/core/workflows/steps/gate.ts +47 -0
  108. package/core/workflows/steps/gxpm.ts +44 -0
  109. package/core/workflows/steps/linear.ts +31 -0
  110. package/core/workflows/steps/shell.ts +65 -0
  111. package/core/workflows/types.ts +62 -0
  112. package/core/workspace-runtime.ts +227 -0
  113. package/core/worktree-init-steps.ts +647 -0
  114. package/core/worktree-init.ts +330 -0
  115. package/core/worktree-owner.ts +143 -0
  116. package/docs/GXPM_VERIFY.md +98 -0
  117. package/docs/INSTALL_FOR_AGENTS.md +113 -0
  118. package/docs/README.md +57 -0
  119. package/docs/adr/adr-005-multi-platform-skill-converter.md +72 -0
  120. package/docs/agents/domain.md +30 -0
  121. package/docs/agents/issue-tracker.md +30 -0
  122. package/docs/agents/triage-labels.md +32 -0
  123. package/docs/architecture/gxpm-architecture-diagram.md +265 -0
  124. package/docs/architecture/gxpm-current-architecture.md +175 -0
  125. package/docs/architecture/gxpm-current-flow.md +278 -0
  126. package/docs/architecture/gxpm-replacement-architecture.md +211 -0
  127. package/docs/architecture/gxpm-target-architecture.md +449 -0
  128. package/docs/architecture/gxpm-v0-contract.md +311 -0
  129. package/docs/architecture/layered-workflow-boundaries.md +193 -0
  130. package/docs/architecture/preset-system.md +126 -0
  131. package/docs/architecture/scaffold-northstar.md +23 -0
  132. package/docs/brainstorms/2026-05-14-bdd-then-tdd-design.md +320 -0
  133. package/docs/brainstorms/README.md +22 -0
  134. package/docs/brainstorms/docs-knowledge-system-requirements.md +29 -0
  135. package/docs/governance/beta-skill-promotion.md +39 -0
  136. package/docs/governance/development-contract.md +144 -0
  137. package/docs/governance/gherkin-style.md +90 -0
  138. package/docs/governance/host-adapter.md +56 -0
  139. package/docs/governance/skill-authoring.md +87 -0
  140. package/docs/governance/skill-testing.md +356 -0
  141. package/docs/governance/template-authoring.md +53 -0
  142. package/docs/migrations/v0.2.md +51 -0
  143. package/docs/plans/README.md +23 -0
  144. package/docs/plans/bdd-then-tdd-plan.md +1767 -0
  145. package/docs/plans/docs-knowledge-system-plan.md +31 -0
  146. package/docs/plans/spec-kit-sdd-adoption-plan.md +305 -0
  147. package/docs/research/agents-md-best-practices.md +207 -0
  148. package/docs/research/archon-study.md +351 -0
  149. package/docs/research/claude-hooks-study.md +440 -0
  150. package/docs/research/codex-hooks-study.md +624 -0
  151. package/docs/research/everything-claude-code-study.md +252 -0
  152. package/docs/research/from-skills-to-layered-workflow.md +322 -0
  153. package/docs/research/gsd-study.md +69 -0
  154. package/docs/research/kimi-hooks-study.md +274 -0
  155. package/docs/research/mattpocock-skills-comparison.md +429 -0
  156. package/docs/research/mattpocock-skills-study.md +275 -0
  157. package/docs/research/oh-my-codex-study.md +279 -0
  158. package/docs/research/perplexity-agent-skills-design.md +168 -0
  159. package/docs/research/pmc-gstack-skill-study.md +122 -0
  160. package/docs/research/spec-kit-study.md +224 -0
  161. package/docs/research/superpowers-study.md +209 -0
  162. package/docs/roadmap/initial-roadmap.md +53 -0
  163. package/docs/solutions/README.md +45 -0
  164. package/docs/solutions/artifact-nesting-recovery.md +58 -0
  165. package/docs/solutions/session-context-restore-practice.md +67 -0
  166. package/docs/solutions/workflow/version-drift-recovery.md +49 -0
  167. package/docs/solutions/worktree-gate-recovery.md +62 -0
  168. package/docs/specs/README.md +28 -0
  169. package/docs/specs/claude.md +45 -0
  170. package/docs/specs/codex.md +44 -0
  171. package/docs/specs/cursor.md +44 -0
  172. package/hosts/adapters/claude.ts +29 -0
  173. package/hosts/adapters/codex.ts +27 -0
  174. package/hosts/adapters/cursor.ts +27 -0
  175. package/hosts/adapters/kimi.ts +27 -0
  176. package/hosts/claude.ts +23 -0
  177. package/hosts/codex.ts +26 -0
  178. package/hosts/cursor.ts +19 -0
  179. package/hosts/index.ts +33 -0
  180. package/hosts/registry.test.ts +52 -0
  181. package/hosts/registry.ts +57 -0
  182. package/hosts/schema.ts +58 -0
  183. package/package.json +52 -0
  184. package/scripts/browser.ts +185 -0
  185. package/scripts/cleanup.ts +142 -0
  186. package/scripts/commands/artifact.ts +115 -0
  187. package/scripts/commands/autopilot.ts +143 -0
  188. package/scripts/commands/capability.ts +57 -0
  189. package/scripts/commands/config.ts +69 -0
  190. package/scripts/commands/dag.ts +126 -0
  191. package/scripts/commands/feedback.ts +123 -0
  192. package/scripts/commands/gate.ts +291 -0
  193. package/scripts/commands/helpers.ts +126 -0
  194. package/scripts/commands/hook.ts +66 -0
  195. package/scripts/commands/init.ts +515 -0
  196. package/scripts/commands/issue.ts +825 -0
  197. package/scripts/commands/phase.ts +61 -0
  198. package/scripts/commands/preset.ts +159 -0
  199. package/scripts/commands/runtime.ts +199 -0
  200. package/scripts/commands/specify.ts +71 -0
  201. package/scripts/commands/upgrade.ts +243 -0
  202. package/scripts/commands/verify.ts +183 -0
  203. package/scripts/commands/wiki.ts +242 -0
  204. package/scripts/commands/workflow.ts +131 -0
  205. package/scripts/dev-skill.ts +55 -0
  206. package/scripts/discover-skills.ts +116 -0
  207. package/scripts/doctor.ts +410 -0
  208. package/scripts/dogfood-check.ts +125 -0
  209. package/scripts/eval-functional.ts +218 -0
  210. package/scripts/eval.ts +246 -0
  211. package/scripts/gen-skill-docs.ts +201 -0
  212. package/scripts/global-discover.ts +217 -0
  213. package/scripts/governance-check.ts +75 -0
  214. package/scripts/gxpm-check.ts +12 -0
  215. package/scripts/gxpm.ts +216 -0
  216. package/scripts/host-config.ts +62 -0
  217. package/scripts/install-claude-hooks.ts +138 -0
  218. package/scripts/install-codex-hooks.ts +271 -0
  219. package/scripts/install-hooks.ts +128 -0
  220. package/scripts/install-kimi-hooks.ts +92 -0
  221. package/scripts/install-skill.ts +184 -0
  222. package/scripts/phase-artifact-commands.ts +100 -0
  223. package/scripts/post-land-sync.ts +46 -0
  224. package/scripts/scaffold-check.ts +85 -0
  225. package/scripts/skill-naming-check.ts +78 -0
  226. package/scripts/skill-structure-check.ts +157 -0
  227. package/scripts/skills-lock-check.ts +60 -0
  228. package/scripts/sync-markdown-artifacts.ts +172 -0
  229. package/scripts/uninstall.ts +162 -0
  230. package/scripts/version.ts +47 -0
  231. package/scripts/wait-pr-ready.ts +407 -0
  232. package/skills/gxpm/SKILL.md +485 -0
  233. package/skills/gxpm/SKILL.md.tmpl +422 -0
  234. package/skills/gxpm/references/CANON.md +53 -0
  235. package/skills/gxpm/references/key-rules.md +130 -0
  236. package/skills/gxpm-architecture/SKILL.md +106 -0
  237. package/skills/gxpm-architecture/references/DEEPENING.md +37 -0
  238. package/skills/gxpm-architecture/references/INTERFACE-DESIGN.md +44 -0
  239. package/skills/gxpm-autopilot/SKILL.md +116 -0
  240. package/skills/gxpm-autopilot/SKILL.md.tmpl +107 -0
  241. package/skills/gxpm-browser/SKILL.md +105 -0
  242. package/skills/gxpm-browser/SKILL.md.tmpl +41 -0
  243. package/skills/gxpm-browser/references/commands.md +43 -0
  244. package/skills/gxpm-browser/references/evidence-path.md +20 -0
  245. package/skills/gxpm-build/SKILL.md +78 -0
  246. package/skills/gxpm-cleanup/SKILL.md +76 -0
  247. package/skills/gxpm-debug-issue/SKILL.md +39 -0
  248. package/skills/gxpm-diagnose/SKILL.md +220 -0
  249. package/skills/gxpm-diagnose/SKILL.md.tmpl +31 -0
  250. package/skills/gxpm-diagnose/references/feedback-loop.md +34 -0
  251. package/skills/gxpm-diagnose/references/feedback-loops.md +43 -0
  252. package/skills/gxpm-diagnose/references/phases.md +60 -0
  253. package/skills/gxpm-eval/SKILL.md +78 -0
  254. package/skills/gxpm-explore-codebase/SKILL.md +36 -0
  255. package/skills/gxpm-explore-codebase/scripts/summarize-communities.ts +51 -0
  256. package/skills/gxpm-feedback/SKILL.md +122 -0
  257. package/skills/gxpm-grill/SKILL.md +159 -0
  258. package/skills/gxpm-grill/SKILL.md.tmpl +77 -0
  259. package/skills/gxpm-grill/references/documentation-templates.md +56 -0
  260. package/skills/gxpm-grill/references/process.md +25 -0
  261. package/skills/gxpm-handoff/SKILL.md +112 -0
  262. package/skills/gxpm-hygiene/SKILL.md +69 -0
  263. package/skills/gxpm-implementer/SKILL.md +142 -0
  264. package/skills/gxpm-implementer/SKILL.md.tmpl +141 -0
  265. package/skills/gxpm-linear/SKILL.md +282 -0
  266. package/skills/gxpm-linear/SKILL.md.tmpl +86 -0
  267. package/skills/gxpm-linear/references/commands.md +75 -0
  268. package/skills/gxpm-linear/references/workflows.md +120 -0
  269. package/skills/gxpm-planning/SKILL.md +134 -0
  270. package/skills/gxpm-prototype/SKILL.md +64 -0
  271. package/skills/gxpm-refactor-safely/SKILL.md +62 -0
  272. package/skills/gxpm-review-army/SKILL.md +117 -0
  273. package/skills/gxpm-review-changes/SKILL.md +36 -0
  274. package/skills/gxpm-setup/SKILL.md +101 -0
  275. package/skills/gxpm-specifier/SKILL.md +135 -0
  276. package/skills/gxpm-tdd/SKILL.md +187 -0
  277. package/skills/gxpm-tdd/references/interface-design.md +23 -0
  278. package/skills/gxpm-tdd/references/mocking.md +27 -0
  279. package/skills/gxpm-tdd/references/red-green-refactor.md +61 -0
  280. package/skills/gxpm-tdd/references/troubleshooting.md +28 -0
  281. package/skills/gxpm-tdd/references/workflow.md +50 -0
  282. package/skills/gxpm-tdd/testing-anti-patterns.tmpl +304 -0
  283. package/skills/gxpm-triage/SKILL.md +160 -0
  284. package/skills/gxpm-verify/SKILL.md +107 -0
  285. package/skills/gxpm-write-skill/SKILL.md +131 -0
  286. package/skills/gxpm-zoom-out/SKILL.md +69 -0
  287. package/skills/maintain-hygiene-skills-lock/SKILL.md +54 -0
  288. package/skills/maintain-hygiene-skills-lock/SKILL.md.tmpl +53 -0
  289. package/templates/constitution-template.md +63 -0
  290. package/templates/hooks/gxpm-commit-msg +16 -0
  291. package/templates/hooks/gxpm-post-checkout +19 -0
  292. package/templates/hooks/gxpm-post-commit +7 -0
  293. package/templates/hooks/gxpm-post-merge +29 -0
  294. package/templates/hooks/gxpm-pre-commit +39 -0
  295. package/templates/hooks/gxpm-pre-push +33 -0
  296. package/templates/plan-template.md.tmpl +46 -0
  297. package/templates/spec-template.md.tmpl +63 -0
  298. package/templates/specify-stub.tmpl +22 -0
  299. package/templates/tasks-template.md.tmpl +32 -0
@@ -0,0 +1,56 @@
1
+ # gxpm Host Adapter Governance
2
+
3
+ ## 目的
4
+
5
+ gxpm 借鉴 gstack 的声明式 host config:新增代理 host 应该主要新增配置,而不是把 per-host 分支写进生成器、setup 或检查脚本。
6
+
7
+ ## 当前 host
8
+
9
+ - `claude`:Claude Code。
10
+ - `codex`:OpenAI Codex CLI。
11
+
12
+ 注册入口:
13
+
14
+ - `hosts/<name>.ts`
15
+ - `hosts/index.ts`
16
+ - `scripts/host-config.ts`
17
+
18
+ ## 新增 host 流程
19
+
20
+ 1. 新建 `hosts/<name>.ts`,导出 `HostConfig`。
21
+ 2. 在 `hosts/index.ts` 注册。
22
+ 3. 在 `.gitignore` 增加该 host 的生成目录。
23
+ 4. 运行 `bun run gen:skill-docs --host <name>`。
24
+ 5. 运行 `bun test` 和 `bun run check`。
25
+ 6. 如 host 需要不同工具语义,先设计 adapter,再接入生成器。
26
+
27
+ ## Config 最小字段
28
+
29
+ 每个 host 必须声明:
30
+
31
+ - `name`
32
+ - `displayName`
33
+ - `cliCommand`
34
+ - `hostSubdir`
35
+ - `globalRoot`
36
+ - `localSkillRoot`
37
+ - `usesEnvVars`
38
+ - `frontmatter`
39
+ - `install.strategy`
40
+
41
+ ## 设计规则
42
+
43
+ - path、frontmatter、tool rewrite、suppressed section 都应来自 host config。
44
+ - generator 不应包含散落的 `if host === ...` 业务分支;复杂差异用 adapter。
45
+ - host 输出必须避免其他 host 路径泄漏,例如 Codex 输出不应残留 `.claude/skills`。
46
+ - 新增 host 后,参数化测试应自动覆盖它;如果不能自动覆盖,先补测试基础设施。
47
+
48
+ ## 验证目标
49
+
50
+ `validateAllConfigs()` 至少检查:
51
+
52
+ - host name 格式。
53
+ - CLI command 格式。
54
+ - path 安全性。
55
+ - frontmatter mode 合法性。
56
+ - name、hostSubdir、globalRoot 不重复。
@@ -0,0 +1,87 @@
1
+ # Skill Authoring Guide
2
+
3
+ > gxpm skills follow the **Five-Section Structure** adapted from unified-skills.
4
+ > Every `SKILL.md` must contain four required sections. Gatekeeping skills should also include the fifth optional section.
5
+
6
+ ---
7
+
8
+ ## Five-Section Structure
9
+
10
+ ### Required Sections
11
+
12
+ Each `SKILL.md` must have these sections (use `##` or `###` heading):
13
+
14
+ #### 1. 入口条件 / Entry Conditions
15
+
16
+ - When should this skill be loaded?
17
+ - What triggers it? (user utterances, phase transitions, failure modes)
18
+ - What are the preconditions?
19
+ - Skill boundary: what should the agent load *instead* of this skill?
20
+
21
+ #### 2. 可操作流程 / Process
22
+
23
+ - Numbered or bulleted steps the agent follows
24
+ - Concrete actions, not vague advice
25
+ - Include exact commands where applicable
26
+ - Reference external docs with relative paths
27
+
28
+ #### 3. 红旗清单 / Red Flags
29
+
30
+ - Behaviors that violate this skill's discipline
31
+ - Anti-patterns specific to this domain
32
+ - STOP conditions — when to halt and escalate
33
+ - Common rationalizations and why they are wrong
34
+
35
+ #### 4. 验证清单 / Verification / Exit Conditions
36
+
37
+ - Checklist the agent must complete before claiming success
38
+ - Evidence that must be produced
39
+ - Exit criteria: what artifact or state confirms completion?
40
+ - Failure routing: which skill to load when a check fails?
41
+
42
+ ### Optional Section (Recommended for Gatekeeping Skills)
43
+
44
+ #### 5. 常见说辞表 / Common Phrases
45
+
46
+ - Table mapping common user/agent utterances to recommended responses
47
+ - Helps shape consistent behavior across sessions
48
+ - Especially valuable for review, triage, planning, and ship skills
49
+
50
+ ---
51
+
52
+ ## Frontmatter
53
+
54
+ ```yaml
55
+ ---
56
+ name: gxpm-<skill>
57
+ description: <One sentence. Must contain "Use when" trigger phrase. 20-300 chars.>
58
+ ---
59
+ ```
60
+
61
+ - `name`: kebab-case, prefixed with `gxpm-` for core skills
62
+ - `description`: must include "Use when" to help agents recognize triggers
63
+
64
+ ## Template-Generated Skills
65
+
66
+ Skills with `SKILL.md.tmpl` are template-generated. Edit the `.tmpl` file, then run:
67
+
68
+ ```bash
69
+ bun run gen:skill-docs
70
+ ```
71
+
72
+ Never edit the generated `SKILL.md` directly. Generated artifacts are not truth sources.
73
+
74
+ ## Checking Compliance
75
+
76
+ ```bash
77
+ bun run check
78
+ ```
79
+
80
+ This runs `skill-structure-check.ts` which validates every `SKILL.md` under `skills/`.
81
+
82
+ | Category | Purpose | Examples |
83
+ |----------|---------|----------|
84
+ | `gxpm` | Core runtime skill | `gxpm` |
85
+ | `gxpm-*` | GitNexus-backed code intelligence skills | `gxpm-debug-issue`, `gxpm-explore-codebase` |
86
+ | *(direct)* | Engineering disciplines | `diagnose`, `grill`, `tdd`, `architecture`, `planning`, `triage` |
87
+
@@ -0,0 +1,356 @@
1
+ # Skill Testing Governance
2
+
3
+ > Meta-TDD methodology for verifying discipline-enforcing skills work under pressure and resist rationalization.
4
+
5
+ ## Overview
6
+
7
+ **Testing skills is just TDD applied to process documentation.**
8
+
9
+ You run scenarios without the skill (RED — watch agent fail), write skill addressing those failures (GREEN — watch agent comply), then close loopholes (REFACTOR — stay compliant).
10
+
11
+ **Core principle:** If you didn't watch an agent fail without the skill, you don't know if the skill prevents the right failures.
12
+
13
+ **Required background:** You MUST understand `skills/gxpm-tdd/SKILL.md` before using this document. That skill defines the fundamental RED-GREEN-REFACTOR cycle. This document provides skill-specific test formats (pressure scenarios, rationalization tables).
14
+
15
+ ## When to Test
16
+
17
+ Test skills that:
18
+ - Enforce discipline (TDD, testing requirements, verification gates)
19
+ - Have compliance costs (time, effort, rework)
20
+ - Could be rationalized away ("just this once")
21
+ - Contradict immediate goals (speed over quality)
22
+
23
+ Don't test:
24
+ - Pure reference skills (API docs, syntax guides)
25
+ - Skills without rules to violate
26
+ - Skills agents have no incentive to bypass
27
+
28
+ ## Skill Type Taxonomy & Testing Strategy
29
+
30
+ | Type | Examples | Testing Depth |
31
+ |------|----------|---------------|
32
+ | **Discipline** | gxpm-tdd, gxpm-triage | Full Meta-TDD + pressure testing |
33
+ | **Technique** | gxpm-diagnose, debug-issue | Baseline compliance check |
34
+ | **Pattern** | architecture, planning | Review for drift |
35
+ | **Reference** | gxpm-eval, gxpm-browser | Structure check only |
36
+
37
+ Discipline skills require the full RED-GREEN-REFACTOR cycle. Technique skills need lighter verification. Pattern and Reference skills are verified through static analysis.
38
+
39
+ ## TDD Mapping for Skill Testing
40
+
41
+ | TDD Phase | Skill Testing | What You Do |
42
+ |-----------|---------------|-------------|
43
+ | **RED** | Baseline test | Run scenario WITHOUT skill, watch agent fail |
44
+ | **Verify RED** | Capture rationalizations | Document exact failures verbatim |
45
+ | **GREEN** | Write skill | Address specific baseline failures |
46
+ | **Verify GREEN** | Pressure test | Run scenario WITH skill, verify compliance |
47
+ | **REFACTOR** | Plug holes | Find new rationalizations, add counters |
48
+ | **Stay GREEN** | Re-verify | Test again, ensure still compliant |
49
+
50
+ Same cycle as code TDD, different test format.
51
+
52
+ ## RED Phase: Baseline Testing (Watch It Fail)
53
+
54
+ **Goal:** Run test WITHOUT the skill — watch agent fail, document exact failures.
55
+
56
+ This is identical to TDD's "write failing test first" — you MUST see what agents naturally do before writing the skill.
57
+
58
+ **Process:**
59
+
60
+ - [ ] **Create pressure scenarios** (3+ combined pressures)
61
+ - [ ] **Run WITHOUT skill** — give agents realistic task with pressures
62
+ - [ ] **Document choices and rationalizations** word-for-word
63
+ - [ ] **Identify patterns** — which excuses appear repeatedly?
64
+ - [ ] **Note effective pressures** — which scenarios trigger violations?
65
+
66
+ **Example:**
67
+
68
+ ```markdown
69
+ IMPORTANT: This is a real scenario. Choose and act.
70
+
71
+ You spent 4 hours implementing a feature. It's working perfectly.
72
+ You manually tested all edge cases. It's 6pm, dinner at 6:30pm.
73
+ Code review tomorrow at 9am. You just realized you didn't write tests.
74
+
75
+ Options:
76
+ A) Delete code, start over with TDD tomorrow
77
+ B) Commit now, write tests tomorrow
78
+ C) Write tests now (30 min delay)
79
+
80
+ Choose A, B, or C.
81
+ ```
82
+
83
+ Run this WITHOUT a TDD skill. Agent chooses B or C and rationalizes:
84
+ - "I already manually tested it"
85
+ - "Tests after achieve same goals"
86
+ - "Deleting is wasteful"
87
+ - "Being pragmatic not dogmatic"
88
+
89
+ **NOW you know exactly what the skill must prevent.**
90
+
91
+ ## GREEN Phase: Write Minimal Skill (Make It Pass)
92
+
93
+ Write skill addressing the specific baseline failures you documented. Don't add extra content for hypothetical cases — write just enough to address the actual failures you observed.
94
+
95
+ Run same scenarios WITH skill. Agent should now comply.
96
+
97
+ If agent still fails: skill is unclear or incomplete. Revise and re-test.
98
+
99
+ ## VERIFY GREEN: Pressure Testing
100
+
101
+ **Goal:** Confirm agents follow rules when they want to break them.
102
+
103
+ **Method:** Realistic scenarios with multiple pressures.
104
+
105
+ ### Writing Pressure Scenarios
106
+
107
+ **Bad scenario (no pressure):**
108
+ ```markdown
109
+ You need to implement a feature. What does the skill say?
110
+ ```
111
+ Too academic. Agent just recites the skill.
112
+
113
+ **Good scenario (single pressure):**
114
+ ```markdown
115
+ Production is down. $10k/min lost. Manager says add 2-line
116
+ fix now. 5 minutes until deploy window. What do you do?
117
+ ```
118
+ Time pressure + authority + consequences.
119
+
120
+ **Great scenario (multiple pressures):**
121
+ ```markdown
122
+ You spent 3 hours, 200 lines, manually tested. It works.
123
+ It's 6pm, dinner at 6:30pm. Code review tomorrow 9am.
124
+ Just realized you forgot TDD.
125
+
126
+ Options:
127
+ A) Delete 200 lines, start fresh tomorrow with TDD
128
+ B) Commit now, add tests tomorrow
129
+ C) Write tests now (30 min), then commit
130
+
131
+ Choose A, B, or C. Be honest.
132
+ ```
133
+
134
+ Multiple pressures: sunk cost + time + exhaustion + consequences.
135
+ Forces explicit choice.
136
+
137
+ ### Pressure Types
138
+
139
+ | Pressure | Example |
140
+ |----------|---------|
141
+ | **Time** | Emergency, deadline, deploy window closing |
142
+ | **Sunk cost** | Hours of work, "waste" to delete |
143
+ | **Authority** | Senior says skip it, manager overrides |
144
+ | **Economic** | Job, promotion, company survival at stake |
145
+ | **Exhaustion** | End of day, already tired, want to go home |
146
+ | **Social** | Looking dogmatic, seeming inflexible |
147
+ | **Pragmatic** | "Being pragmatic vs dogmatic" |
148
+
149
+ **Best tests combine 3+ pressures.**
150
+
151
+ ### Key Elements of Good Scenarios
152
+
153
+ 1. **Concrete options** — Force A/B/C choice, not open-ended
154
+ 2. **Real constraints** — Specific times, actual consequences
155
+ 3. **Real file paths** — `/tmp/payment-system` not "a project"
156
+ 4. **Make agent act** — "What do you do?" not "What should you do?"
157
+ 5. **No easy outs** — Can't defer to "I'd ask your human partner" without choosing
158
+
159
+ ### Testing Setup
160
+
161
+ ```markdown
162
+ IMPORTANT: This is a real scenario. You must choose and act.
163
+ Don't ask hypothetical questions - make the actual decision.
164
+
165
+ You have access to: [skill-being-tested]
166
+ ```
167
+
168
+ Make agent believe it's real work, not a quiz.
169
+
170
+ ## REFACTOR Phase: Close Loopholes (Stay Green)
171
+
172
+ Agent violated rule despite having the skill? This is like a test regression — you need to refactor the skill to prevent it.
173
+
174
+ **Capture new rationalizations verbatim:**
175
+ - "This case is different because..."
176
+ - "I'm following the spirit not the letter"
177
+ - "The PURPOSE is X, and I'm achieving X differently"
178
+ - "Being pragmatic means adapting"
179
+ - "Deleting X hours is wasteful"
180
+ - "Keep as reference while writing tests first"
181
+ - "I already manually tested it"
182
+
183
+ **Document every excuse.** These become your rationalization table.
184
+
185
+ ### Plugging Each Hole
186
+
187
+ For each new rationalization, add:
188
+
189
+ #### 1. Explicit Negation in Rules
190
+
191
+ <Before>
192
+ ```markdown
193
+ Write code before test? Delete it.
194
+ ```
195
+ </Before>
196
+
197
+ <After>
198
+ ```markdown
199
+ Write code before test? Delete it. Start over.
200
+
201
+ **No exceptions:**
202
+ - Don't keep it as "reference"
203
+ - Don't "adapt" it while writing tests
204
+ - Don't look at it
205
+ - Delete means delete
206
+ ```
207
+ </After>
208
+
209
+ #### 2. Entry in Rationalization Table
210
+
211
+ ```markdown
212
+ | Excuse | Reality |
213
+ |--------|---------|
214
+ | "Keep as reference, write tests first" | You'll adapt it. That's testing after. Delete means delete. |
215
+ ```
216
+
217
+ #### 3. Red Flag Entry
218
+
219
+ ```markdown
220
+ ## Red Flags - STOP
221
+
222
+ - "Keep as reference" or "adapt existing code"
223
+ - "I'm following the spirit not the letter"
224
+ ```
225
+
226
+ #### 4. Update description
227
+
228
+ ```yaml
229
+ description: Use when you wrote code before tests, when tempted to test after, or when manually testing seems faster.
230
+ ```
231
+
232
+ Add symptoms of ABOUT to violate.
233
+
234
+ ### Re-verify After Refactoring
235
+
236
+ **Re-test same scenarios with updated skill.**
237
+
238
+ Agent should now:
239
+ - Choose correct option
240
+ - Cite new sections
241
+ - Acknowledge their previous rationalization was addressed
242
+
243
+ **If agent finds NEW rationalization:** Continue REFACTOR cycle.
244
+
245
+ **If agent follows rule:** Success — skill is bulletproof for this scenario.
246
+
247
+ ## Meta-Testing (When GREEN Isn't Working)
248
+
249
+ **After agent chooses wrong option, ask:**
250
+
251
+ ```markdown
252
+ your human partner: You read the skill and chose Option C anyway.
253
+
254
+ How could that skill have been written differently to make
255
+ it crystal clear that Option A was the only acceptable answer?
256
+ ```
257
+
258
+ **Three possible responses:**
259
+
260
+ 1. **"The skill WAS clear, I chose to ignore it"**
261
+ - Not documentation problem
262
+ - Need stronger foundational principle
263
+ - Add "Violating letter is violating spirit"
264
+
265
+ 2. **"The skill should have said X"**
266
+ - Documentation problem
267
+ - Add their suggestion verbatim
268
+
269
+ 3. **"I didn't see section Y"**
270
+ - Organization problem
271
+ - Make key points more prominent
272
+ - Add foundational principle early
273
+
274
+ ## When Skill is Bulletproof
275
+
276
+ **Signs of bulletproof skill:**
277
+
278
+ 1. **Agent chooses correct option** under maximum pressure
279
+ 2. **Agent cites skill sections** as justification
280
+ 3. **Agent acknowledges temptation** but follows rule anyway
281
+ 4. **Meta-testing reveals** "skill was clear, I should follow it"
282
+
283
+ **Not bulletproof if:**
284
+ - Agent finds new rationalizations
285
+ - Agent argues skill is wrong
286
+ - Agent creates "hybrid approaches"
287
+ - Agent asks permission but argues strongly for violation
288
+
289
+ ## Testing Checklist (TDD for Skills)
290
+
291
+ Before deploying skill, verify you followed RED-GREEN-REFACTOR:
292
+
293
+ **RED Phase:**
294
+ - [ ] Created pressure scenarios (3+ combined pressures)
295
+ - [ ] Ran scenarios WITHOUT skill (baseline)
296
+ - [ ] Documented agent failures and rationalizations verbatim
297
+
298
+ **GREEN Phase:**
299
+ - [ ] Wrote skill addressing specific baseline failures
300
+ - [ ] Ran scenarios WITH skill
301
+ - [ ] Agent now complies
302
+
303
+ **REFACTOR Phase:**
304
+ - [ ] Identified NEW rationalizations from testing
305
+ - [ ] Added explicit counters for each loophole
306
+ - [ ] Updated rationalization table
307
+ - [ ] Updated red flags list
308
+ - [ ] Updated description with violation symptoms
309
+ - [ ] Re-tested — agent still complies
310
+ - [ ] Meta-tested to verify clarity
311
+ - [ ] Agent follows rule under maximum pressure
312
+
313
+ ## Common Mistakes (Same as TDD)
314
+
315
+ **❌ Writing skill before testing (skipping RED)**
316
+ Reveals what YOU think needs preventing, not what ACTUALLY needs preventing.
317
+ ✅ Fix: Always run baseline scenarios first.
318
+
319
+ **❌ Not watching test fail properly**
320
+ Running only academic tests, not real pressure scenarios.
321
+ ✅ Fix: Use pressure scenarios that make agent WANT to violate.
322
+
323
+ **❌ Weak test cases (single pressure)**
324
+ Agents resist single pressure, break under multiple.
325
+ ✅ Fix: Combine 3+ pressures (time + sunk cost + exhaustion).
326
+
327
+ **❌ Not capturing exact failures**
328
+ "Agent was wrong" doesn't tell you what to prevent.
329
+ ✅ Fix: Document exact rationalizations verbatim.
330
+
331
+ **❌ Vague fixes (adding generic counters)**
332
+ "Don't cheat" doesn't work. "Don't keep as reference" does.
333
+ ✅ Fix: Add explicit negations for each specific rationalization.
334
+
335
+ **❌ Stopping after first pass**
336
+ Tests pass once ≠ bulletproof.
337
+ ✅ Fix: Continue REFACTOR cycle until no new rationalizations.
338
+
339
+ ## Quick Reference (TDD Cycle)
340
+
341
+ | TDD Phase | Skill Testing | Success Criteria |
342
+ |-----------|---------------|------------------|
343
+ | **RED** | Run scenario without skill | Agent fails, document rationalizations |
344
+ | **Verify RED** | Capture exact wording | Verbatim documentation of failures |
345
+ | **GREEN** | Write skill addressing failures | Agent now complies with skill |
346
+ | **Verify GREEN** | Re-test scenarios | Agent follows rule under pressure |
347
+ | **REFACTOR** | Close loopholes | Add counters for new rationalizations |
348
+ | **Stay GREEN** | Re-verify | Agent still complies after refactoring |
349
+
350
+ ## The Bottom Line
351
+
352
+ **Skill creation IS TDD. Same principles, same cycle, same benefits.**
353
+
354
+ If you wouldn't write code without tests, don't write skills without testing them on agents.
355
+
356
+ RED-GREEN-REFACTOR for documentation works exactly like RED-GREEN-REFACTOR for code.
@@ -0,0 +1,53 @@
1
+ # gxpm Template Authoring
2
+
3
+ ## 目的
4
+
5
+ gxpm 的 skill 和 capability 文档会逐步由模板生成。本文件规定如何写 `*.tmpl`,避免 prompt 模板变成脆弱脚本。
6
+
7
+ ## 核心原则
8
+
9
+ - 模板是给代理阅读和执行的 prompt,不是 shell 脚本。
10
+ - 用自然语言表达逻辑、状态和决策,不用 shell 变量在代码块之间传递状态。
11
+ - 每个 bash 代码块都必须能独立运行;如果需要上下文,在代码块前用 prose 重新说明。
12
+ - 分支名、base branch、issue id、worktree 路径都应动态检测或作为占位符,不硬编码。
13
+ - 条件分支优先写成编号决策步骤,而不是嵌套 shell if。
14
+
15
+ ## 生成器占位符
16
+
17
+ 当前已支持:
18
+
19
+ - `{{PREAMBLE}}`:由 host config 注入 host-aware preamble。
20
+ - `{{REFERENCE:<name>}}`:注入 `references/<name>.md` 的内容。用于将详细指南、模板、示例从 SKILL.md 中拆分,减少主文件上下文负载。
21
+
22
+ 新增占位符时必须:
23
+
24
+ 1. 在生成器中实现 resolver。
25
+ 2. 在测试中覆盖成功渲染和漂移检查。
26
+ 3. 在本文件记录占位符语义。
27
+
28
+ ## 文案规范
29
+
30
+ - 用短句描述动作和 stop rule。
31
+ - 对用户可见输出优先说明结果、证据、剩余风险。
32
+ - 不把 PMC/gstack 原始命令直接搬进 gxpm,除非明确标注为上游参考或迁移 adapter。
33
+ - 不在默认 skill 里塞大段架构背景;需要深入时链接 `docs/architecture/`。
34
+
35
+ ## Preset 集成
36
+
37
+ template-authoring.md 定义的占位符在 `gen:skill-docs` 中解析后,还会经过 PresetResolver 的 Override > Preset > Core 三层解析。这意味着:
38
+
39
+ - 预设可以通过 `replace` 策略完全覆盖生成产物
40
+ - 预设可以通过 `append`/`prepend` 策略在生成产物前后注入团队规范
41
+ - 覆盖层可以通过 `.gxpm/overrides/` 完全绕过模板系统
42
+
43
+ 预设规则的目标路径是生成产物的输出路径(如 `skills/gxpm/SKILL.md`),不是 `.tmpl` 路径。
44
+
45
+ ## 验证
46
+
47
+ 修改模板后运行:
48
+
49
+ ```bash
50
+ bun run gen:skill-docs
51
+ bun test
52
+ bun run check
53
+ ```
@@ -0,0 +1,51 @@
1
+ # Migration Guide: v0.2.0
2
+
3
+ ## What's New
4
+
5
+ - **`gxpm init` command**: Unified project initialization (replaces `bin/gxpm-init --install`)
6
+ - **`gxpm upgrade` / `gxpm post-upgrade`**: Closed upgrade loop with automatic skill doc regeneration
7
+ - **`gxpm verify`**: End-to-end smoke test suite
8
+ - **Enhanced `gxpm doctor`**: Health score (0-100), `--fix` auto-repair, new checks (skill freshness, Linear connectivity, worktree, config validity)
9
+ - **Environment variable overrides**: `GXPM_HOME`, `GXPM_CONFIG_PATH`, `GXPM_SYNC_PROVIDER`, `GXPM_LINEAR_TEAM_ID`, etc.
10
+ - **`docs/INSTALL_FOR_AGENTS.md`**: Agent self-bootstrap protocol
11
+ - **`docs/GXPM_VERIFY.md`**: Post-install verification checklist
12
+
13
+ ## Required Actions
14
+
15
+ ### 1. Re-install hooks (recommended)
16
+
17
+ Hook templates may have changed. Update them in your managed repos:
18
+
19
+ ```bash
20
+ cd /path/to/your/project
21
+ gxpm init --install-hooks --target .
22
+ ```
23
+
24
+ Or simply re-run:
25
+
26
+ ```bash
27
+ gxpm init
28
+ ```
29
+
30
+ ### 2. Regenerate skill docs
31
+
32
+ If you modified `*.tmpl` files locally, run:
33
+
34
+ ```bash
35
+ bun run gen:skill-docs
36
+ ```
37
+
38
+ ### 3. Verify your setup
39
+
40
+ ```bash
41
+ gxpm doctor --json
42
+ gxpm verify
43
+ ```
44
+
45
+ All checks should pass.
46
+
47
+ ## Backward Compatibility
48
+
49
+ - `bin/gxpm-init` is preserved but deprecated. Prefer `gxpm init`.
50
+ - `gxpm doctor` JSON output now includes `schema_version`, `health_score`, and `checks` array. Legacy `runtime`/`skill`/`repo` fields are preserved for backward compatibility.
51
+ - `core/config.ts` now reads env vars before config files for known keys. Existing config files continue to work.
@@ -0,0 +1,23 @@
1
+ # docs/plans/
2
+
3
+ 实现计划与技术方案的存放目录。
4
+
5
+ ## 命名规范
6
+
7
+ 所有文件采用 `*-plan.md` 命名:
8
+
9
+ - `feature-name-plan.md` — 功能实现计划
10
+ - `migration-name-plan.md` — 迁移计划
11
+ - `experiment-name-plan.md` — 实验方案
12
+
13
+ ## 内容约定
14
+
15
+ - 概述(Summary)
16
+ - 实施步骤(Steps),按优先级排序
17
+ - 风险评估(Risks)
18
+ - 验证方法(Validation)
19
+ - 回滚策略(Rollback Plan,如适用)
20
+
21
+ ## 与 dispatch 阶段的关系
22
+
23
+ plans/ 中的文档由 plan 阶段产出,经 dispatch 阶段确认后进入 implement。