xtrm-tools 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. package/CHANGELOG.md +504 -0
  2. package/README.md +201 -0
  3. package/cli/dist/index.cjs +57378 -0
  4. package/cli/dist/index.cjs.map +1 -0
  5. package/cli/dist/index.d.cts +2 -0
  6. package/cli/package.json +47 -0
  7. package/config/.env.example +40 -0
  8. package/config/hooks.json +72 -0
  9. package/config/instructions/agents-top.md +30 -0
  10. package/config/instructions/claude-top.md +30 -0
  11. package/config/mcp_servers.json +57 -0
  12. package/config/mcp_servers_optional.json +53 -0
  13. package/config/pi/auth.json.template +14 -0
  14. package/config/pi/extensions/auto-session-name/index.ts +29 -0
  15. package/config/pi/extensions/auto-session-name/package.json +16 -0
  16. package/config/pi/extensions/auto-update/index.ts +71 -0
  17. package/config/pi/extensions/auto-update/package.json +16 -0
  18. package/config/pi/extensions/beads/index.ts +166 -0
  19. package/config/pi/extensions/beads/package.json +16 -0
  20. package/config/pi/extensions/bg-process/index.ts +230 -0
  21. package/config/pi/extensions/bg-process/package.json +16 -0
  22. package/config/pi/extensions/compact-header/index.ts +69 -0
  23. package/config/pi/extensions/compact-header/package.json +16 -0
  24. package/config/pi/extensions/core/adapter.ts +52 -0
  25. package/config/pi/extensions/core/guard-rules.ts +102 -0
  26. package/config/pi/extensions/core/lib.ts +3 -0
  27. package/config/pi/extensions/core/logger.ts +45 -0
  28. package/config/pi/extensions/core/runner.ts +71 -0
  29. package/config/pi/extensions/core/session-state.ts +59 -0
  30. package/config/pi/extensions/custom-footer/index.ts +160 -0
  31. package/config/pi/extensions/custom-footer/package.json +16 -0
  32. package/config/pi/extensions/custom-provider-qwen-cli/index.ts +363 -0
  33. package/config/pi/extensions/custom-provider-qwen-cli/package.json +1 -0
  34. package/config/pi/extensions/git-checkpoint/index.ts +53 -0
  35. package/config/pi/extensions/git-checkpoint/package.json +16 -0
  36. package/config/pi/extensions/minimal-mode/index.ts +201 -0
  37. package/config/pi/extensions/minimal-mode/package.json +16 -0
  38. package/config/pi/extensions/plan-mode/README.md +65 -0
  39. package/config/pi/extensions/plan-mode/index.ts +417 -0
  40. package/config/pi/extensions/plan-mode/package.json +12 -0
  41. package/config/pi/extensions/plan-mode/utils.ts +324 -0
  42. package/config/pi/extensions/quality-gates/index.ts +67 -0
  43. package/config/pi/extensions/quality-gates/package.json +16 -0
  44. package/config/pi/extensions/service-skills/index.ts +108 -0
  45. package/config/pi/extensions/service-skills/package.json +16 -0
  46. package/config/pi/extensions/session-flow/index.ts +131 -0
  47. package/config/pi/extensions/session-flow/package.json +16 -0
  48. package/config/pi/extensions/todo/index.ts +299 -0
  49. package/config/pi/extensions/todo/package.json +16 -0
  50. package/config/pi/extensions/xtrm-loader/index.ts +89 -0
  51. package/config/pi/extensions/xtrm-loader/package.json +16 -0
  52. package/config/pi/install-schema.json +44 -0
  53. package/config/pi/models.json.template +76 -0
  54. package/config/pi/pi-worktrees-settings.json +6 -0
  55. package/config/pi/settings.json.template +16 -0
  56. package/config/settings.json +70 -0
  57. package/hooks/README.md +75 -0
  58. package/hooks/agent_context.py +105 -0
  59. package/hooks/beads-claim-sync.mjs +166 -0
  60. package/hooks/beads-commit-gate.mjs +55 -0
  61. package/hooks/beads-compact-restore.mjs +69 -0
  62. package/hooks/beads-compact-save.mjs +51 -0
  63. package/hooks/beads-edit-gate.mjs +45 -0
  64. package/hooks/beads-gate-core.mjs +215 -0
  65. package/hooks/beads-gate-messages.mjs +87 -0
  66. package/hooks/beads-gate-utils.mjs +185 -0
  67. package/hooks/beads-memory-gate.mjs +61 -0
  68. package/hooks/beads-stop-gate.mjs +32 -0
  69. package/hooks/branch-state.mjs +39 -0
  70. package/hooks/gitnexus/gitnexus-hook.cjs +222 -0
  71. package/hooks/guard-rules.mjs +118 -0
  72. package/hooks/hooks.json +116 -0
  73. package/hooks/main-guard-post-push.mjs +71 -0
  74. package/hooks/main-guard.mjs +119 -0
  75. package/hooks/quality-check.cjs +1286 -0
  76. package/hooks/quality-check.py +345 -0
  77. package/hooks/serena-workflow-reminder.py +74 -0
  78. package/package.json +77 -0
  79. package/project-skills/quality-gates/.claude/hooks/hook-config.json +66 -0
  80. package/project-skills/quality-gates/.claude/hooks/quality-check.cjs +1286 -0
  81. package/project-skills/quality-gates/.claude/hooks/quality-check.py +334 -0
  82. package/project-skills/quality-gates/.claude/settings.json +3 -0
  83. package/project-skills/quality-gates/.claude/skills/using-quality-gates/SKILL.md +254 -0
  84. package/project-skills/quality-gates/README.md +109 -0
  85. package/project-skills/quality-gates/evals/evals.json +181 -0
  86. package/project-skills/quality-gates/workspace/iteration-1/FINAL-EVAL-SUMMARY.md +75 -0
  87. package/project-skills/quality-gates/workspace/iteration-1/edge-case-auto-fix-verification/with_skill/outputs/response.md +59 -0
  88. package/project-skills/quality-gates/workspace/iteration-1/edge-case-mixed-language-project/with_skill/outputs/response.md +60 -0
  89. package/project-skills/quality-gates/workspace/iteration-1/eval-summary.md +105 -0
  90. package/project-skills/quality-gates/workspace/iteration-1/partial-install-python-only/with_skill/outputs/response.md +93 -0
  91. package/project-skills/quality-gates/workspace/iteration-1/python-refactor-request/with_skill/outputs/response.md +104 -0
  92. package/project-skills/quality-gates/workspace/iteration-1/quality-gate-error-fix/with_skill/outputs/response.md +74 -0
  93. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-general-chat/with_skill/outputs/response.md +18 -0
  94. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-math-question/with_skill/outputs/response.md +18 -0
  95. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-unrelated-coding/with_skill/outputs/response.md +56 -0
  96. package/project-skills/quality-gates/workspace/iteration-1/tdd-guard-blocking-confusion/with_skill/outputs/response.md +67 -0
  97. package/project-skills/quality-gates/workspace/iteration-1/typescript-feature-with-tests/with_skill/outputs/response.md +97 -0
  98. package/project-skills/service-skills-set/.claude/git-hooks/doc_reminder.py +67 -0
  99. package/project-skills/service-skills-set/.claude/git-hooks/skill_staleness.py +194 -0
  100. package/project-skills/service-skills-set/.claude/service-registry.json +4 -0
  101. package/project-skills/service-skills-set/.claude/settings.json +37 -0
  102. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/SKILL.md +433 -0
  103. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/references/script_quality_standards.md +425 -0
  104. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/references/service_skill_system_guide.md +278 -0
  105. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/bootstrap.py +308 -0
  106. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/deep_dive.py +304 -0
  107. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/scaffolder.py +482 -0
  108. package/project-skills/service-skills-set/.claude/skills/scoping-service-skills/SKILL.md +231 -0
  109. package/project-skills/service-skills-set/.claude/skills/scoping-service-skills/scripts/scope.py +74 -0
  110. package/project-skills/service-skills-set/.claude/skills/updating-service-skills/SKILL.md +136 -0
  111. package/project-skills/service-skills-set/.claude/skills/updating-service-skills/scripts/drift_detector.py +222 -0
  112. package/project-skills/service-skills-set/.claude/skills/using-service-skills/SKILL.md +108 -0
  113. package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/cataloger.py +74 -0
  114. package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/skill_activator.py +152 -0
  115. package/project-skills/service-skills-set/README.md +93 -0
  116. package/project-skills/service-skills-set/install-service-skills.py +193 -0
  117. package/project-skills/service-skills-set/service-skills-readme.md +236 -0
  118. package/skills/README.txt +31 -0
  119. package/skills/clean-code/SKILL.md +201 -0
  120. package/skills/creating-service-skills/SKILL.md +433 -0
  121. package/skills/creating-service-skills/references/script_quality_standards.md +425 -0
  122. package/skills/creating-service-skills/references/service_skill_system_guide.md +278 -0
  123. package/skills/creating-service-skills/scripts/bootstrap.py +326 -0
  124. package/skills/creating-service-skills/scripts/deep_dive.py +304 -0
  125. package/skills/creating-service-skills/scripts/scaffolder.py +482 -0
  126. package/skills/delegating/SKILL.md +196 -0
  127. package/skills/delegating/config.yaml +210 -0
  128. package/skills/delegating/references/orchestration-protocols.md +41 -0
  129. package/skills/docker-expert/SKILL.md +409 -0
  130. package/skills/documenting/CHANGELOG.md +23 -0
  131. package/skills/documenting/README.md +148 -0
  132. package/skills/documenting/SKILL.md +113 -0
  133. package/skills/documenting/examples/example_pattern.md +70 -0
  134. package/skills/documenting/examples/example_reference.md +70 -0
  135. package/skills/documenting/examples/example_ssot_analytics.md +64 -0
  136. package/skills/documenting/examples/example_workflow.md +141 -0
  137. package/skills/documenting/references/changelog-format.md +97 -0
  138. package/skills/documenting/references/metadata-schema.md +136 -0
  139. package/skills/documenting/references/taxonomy.md +81 -0
  140. package/skills/documenting/references/versioning-rules.md +78 -0
  141. package/skills/documenting/scripts/bump_version.sh +60 -0
  142. package/skills/documenting/scripts/changelog/__init__.py +0 -0
  143. package/skills/documenting/scripts/changelog/add_entry.py +216 -0
  144. package/skills/documenting/scripts/changelog/bump_release.py +117 -0
  145. package/skills/documenting/scripts/changelog/init_changelog.py +54 -0
  146. package/skills/documenting/scripts/changelog/validate_changelog.py +128 -0
  147. package/skills/documenting/scripts/drift_detector.py +266 -0
  148. package/skills/documenting/scripts/generate_template.py +311 -0
  149. package/skills/documenting/scripts/list_by_category.sh +84 -0
  150. package/skills/documenting/scripts/orchestrator.py +255 -0
  151. package/skills/documenting/scripts/validate_metadata.py +242 -0
  152. package/skills/documenting/templates/CHANGELOG.md.template +13 -0
  153. package/skills/find-skills/SKILL.md +133 -0
  154. package/skills/gitnexus-debugging/SKILL.md +85 -0
  155. package/skills/gitnexus-exploring/SKILL.md +75 -0
  156. package/skills/gitnexus-impact-analysis/SKILL.md +94 -0
  157. package/skills/gitnexus-refactoring/SKILL.md +113 -0
  158. package/skills/hook-development/SKILL.md +797 -0
  159. package/skills/hook-development/examples/load-context.sh +55 -0
  160. package/skills/hook-development/examples/quality-check.js +1168 -0
  161. package/skills/hook-development/examples/validate-bash.sh +43 -0
  162. package/skills/hook-development/examples/validate-write.sh +38 -0
  163. package/skills/hook-development/references/advanced.md +527 -0
  164. package/skills/hook-development/references/migration.md +369 -0
  165. package/skills/hook-development/references/patterns.md +412 -0
  166. package/skills/hook-development/scripts/README.md +164 -0
  167. package/skills/hook-development/scripts/hook-linter.sh +153 -0
  168. package/skills/hook-development/scripts/test-hook.sh +252 -0
  169. package/skills/hook-development/scripts/validate-hook-schema.sh +159 -0
  170. package/skills/obsidian-cli/SKILL.md +106 -0
  171. package/skills/orchestrating-agents/SKILL.md +135 -0
  172. package/skills/orchestrating-agents/config.yaml +45 -0
  173. package/skills/orchestrating-agents/references/agent-context-integration.md +37 -0
  174. package/skills/orchestrating-agents/references/examples.md +45 -0
  175. package/skills/orchestrating-agents/references/handover-protocol.md +31 -0
  176. package/skills/orchestrating-agents/references/workflows.md +42 -0
  177. package/skills/orchestrating-agents/scripts/detect_neighbors.py +23 -0
  178. package/skills/prompt-improving/README.md +162 -0
  179. package/skills/prompt-improving/SKILL.md +74 -0
  180. package/skills/prompt-improving/references/analysis_commands.md +24 -0
  181. package/skills/prompt-improving/references/chain_of_thought.md +24 -0
  182. package/skills/prompt-improving/references/mcp_definitions.md +20 -0
  183. package/skills/prompt-improving/references/multishot.md +23 -0
  184. package/skills/prompt-improving/references/xml_core.md +60 -0
  185. package/skills/python-testing/SKILL.md +815 -0
  186. package/skills/scoping-service-skills/SKILL.md +231 -0
  187. package/skills/scoping-service-skills/scripts/scope.py +74 -0
  188. package/skills/senior-backend/SKILL.md +209 -0
  189. package/skills/senior-backend/references/api_design_patterns.md +103 -0
  190. package/skills/senior-backend/references/backend_security_practices.md +103 -0
  191. package/skills/senior-backend/references/database_optimization_guide.md +103 -0
  192. package/skills/senior-backend/scripts/api_load_tester.py +114 -0
  193. package/skills/senior-backend/scripts/api_scaffolder.py +114 -0
  194. package/skills/senior-backend/scripts/database_migration_tool.py +114 -0
  195. package/skills/senior-data-scientist/SKILL.md +226 -0
  196. package/skills/senior-data-scientist/references/experiment_design_frameworks.md +80 -0
  197. package/skills/senior-data-scientist/references/feature_engineering_patterns.md +80 -0
  198. package/skills/senior-data-scientist/references/statistical_methods_advanced.md +80 -0
  199. package/skills/senior-data-scientist/scripts/experiment_designer.py +100 -0
  200. package/skills/senior-data-scientist/scripts/feature_engineering_pipeline.py +100 -0
  201. package/skills/senior-data-scientist/scripts/model_evaluation_suite.py +100 -0
  202. package/skills/senior-devops/SKILL.md +209 -0
  203. package/skills/senior-devops/references/cicd_pipeline_guide.md +103 -0
  204. package/skills/senior-devops/references/deployment_strategies.md +103 -0
  205. package/skills/senior-devops/references/infrastructure_as_code.md +103 -0
  206. package/skills/senior-devops/scripts/deployment_manager.py +114 -0
  207. package/skills/senior-devops/scripts/pipeline_generator.py +114 -0
  208. package/skills/senior-devops/scripts/terraform_scaffolder.py +114 -0
  209. package/skills/senior-security/SKILL.md +209 -0
  210. package/skills/senior-security/references/cryptography_implementation.md +103 -0
  211. package/skills/senior-security/references/penetration_testing_guide.md +103 -0
  212. package/skills/senior-security/references/security_architecture_patterns.md +103 -0
  213. package/skills/senior-security/scripts/pentest_automator.py +114 -0
  214. package/skills/senior-security/scripts/security_auditor.py +114 -0
  215. package/skills/senior-security/scripts/threat_modeler.py +114 -0
  216. package/skills/skill-creator/LICENSE.txt +202 -0
  217. package/skills/skill-creator/SKILL.md +479 -0
  218. package/skills/skill-creator/agents/analyzer.md +274 -0
  219. package/skills/skill-creator/agents/comparator.md +202 -0
  220. package/skills/skill-creator/agents/grader.md +223 -0
  221. package/skills/skill-creator/assets/eval_review.html +146 -0
  222. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  223. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  224. package/skills/skill-creator/references/schemas.md +430 -0
  225. package/skills/skill-creator/scripts/__init__.py +0 -0
  226. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  227. package/skills/skill-creator/scripts/generate_report.py +326 -0
  228. package/skills/skill-creator/scripts/improve_description.py +248 -0
  229. package/skills/skill-creator/scripts/package_skill.py +136 -0
  230. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  231. package/skills/skill-creator/scripts/run_eval.py +310 -0
  232. package/skills/skill-creator/scripts/run_loop.py +332 -0
  233. package/skills/skill-creator/scripts/utils.py +47 -0
  234. package/skills/sync-docs/SKILL.md +132 -0
  235. package/skills/sync-docs/evals/evals.json +89 -0
  236. package/skills/sync-docs/references/doc-structure.md +99 -0
  237. package/skills/sync-docs/references/schema.md +103 -0
  238. package/skills/sync-docs/scripts/changelog/add_entry.py +216 -0
  239. package/skills/sync-docs/scripts/context_gatherer.py +240 -0
  240. package/skills/sync-docs/scripts/doc_structure_analyzer.py +495 -0
  241. package/skills/sync-docs/scripts/drift_detector.py +327 -0
  242. package/skills/sync-docs/scripts/validate_doc.py +365 -0
  243. package/skills/sync-docs/scripts/validate_metadata.py +185 -0
  244. package/skills/sync-docs-workspace/iteration-1/benchmark.json +293 -0
  245. package/skills/sync-docs-workspace/iteration-1/benchmark.md +13 -0
  246. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +27 -0
  247. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +210 -0
  248. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +28 -0
  249. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +1 -0
  250. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +101 -0
  251. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +28 -0
  252. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +5 -0
  253. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +5 -0
  254. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +27 -0
  255. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +198 -0
  256. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +28 -0
  257. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +1 -0
  258. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +94 -0
  259. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +28 -0
  260. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +1 -0
  261. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +27 -0
  262. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +237 -0
  263. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +28 -0
  264. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
  265. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +134 -0
  266. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +28 -0
  267. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +1 -0
  268. package/skills/sync-docs-workspace/iteration-2/benchmark.json +297 -0
  269. package/skills/sync-docs-workspace/iteration-2/benchmark.md +13 -0
  270. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +27 -0
  271. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +137 -0
  272. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +92 -0
  273. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +1 -0
  274. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +134 -0
  275. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +86 -0
  276. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +1 -0
  277. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +27 -0
  278. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +193 -0
  279. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +72 -0
  280. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +1 -0
  281. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +211 -0
  282. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +91 -0
  283. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +5 -0
  284. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +27 -0
  285. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +182 -0
  286. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
  287. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
  288. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +222 -0
  289. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +88 -0
  290. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
  291. package/skills/sync-docs-workspace/iteration-3/benchmark.json +298 -0
  292. package/skills/sync-docs-workspace/iteration-3/benchmark.md +13 -0
  293. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +27 -0
  294. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +125 -0
  295. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +97 -0
  296. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +5 -0
  297. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +144 -0
  298. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +78 -0
  299. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +5 -0
  300. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +27 -0
  301. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +104 -0
  302. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +91 -0
  303. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +5 -0
  304. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +79 -0
  305. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +82 -0
  306. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +5 -0
  307. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +27 -0
  308. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +302 -0
  309. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +33 -0
  310. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +114 -0
  311. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +118 -0
  312. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +38 -0
  313. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +158 -0
  314. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
  315. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +5 -0
  316. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +71 -0
  317. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +90 -0
  318. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
  319. package/skills/test-planning/SKILL.md +208 -0
  320. package/skills/test-planning/evals/evals.json +23 -0
  321. package/skills/updating-service-skills/SKILL.md +136 -0
  322. package/skills/updating-service-skills/scripts/drift_detector.py +222 -0
  323. package/skills/using-TDD/SKILL.md +410 -0
  324. package/skills/using-quality-gates/SKILL.md +254 -0
  325. package/skills/using-serena-lsp/README.md +8 -0
  326. package/skills/using-serena-lsp/REFERENCE.md +194 -0
  327. package/skills/using-serena-lsp/SKILL.md +82 -0
  328. package/skills/using-service-skills/SKILL.md +108 -0
  329. package/skills/using-service-skills/scripts/cataloger.py +74 -0
  330. package/skills/using-service-skills/scripts/skill_activator.py +152 -0
  331. package/skills/using-service-skills/scripts/test_skill_activator.py +58 -0
  332. package/skills/using-xtrm/SKILL.md +245 -0
  333. package/skills/xt-end/SKILL.md +128 -0
@@ -0,0 +1,332 @@
1
+ #!/usr/bin/env python3
2
+ """Run the eval + improve loop until all pass or max iterations reached.
3
+
4
+ Combines run_eval.py and improve_description.py in a loop, tracking history
5
+ and returning the best description found. Supports train/test split to prevent
6
+ overfitting.
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import random
12
+ import sys
13
+ import tempfile
14
+ import time
15
+ import webbrowser
16
+ from pathlib import Path
17
+
18
+ import anthropic
19
+
20
+ from scripts.generate_report import generate_html
21
+ from scripts.improve_description import improve_description
22
+ from scripts.run_eval import find_project_root, run_eval
23
+ from scripts.utils import parse_skill_md
24
+
25
+
26
+ def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]:
27
+ """Split eval set into train and test sets, stratified by should_trigger."""
28
+ random.seed(seed)
29
+
30
+ # Separate by should_trigger
31
+ trigger = [e for e in eval_set if e["should_trigger"]]
32
+ no_trigger = [e for e in eval_set if not e["should_trigger"]]
33
+
34
+ # Shuffle each group
35
+ random.shuffle(trigger)
36
+ random.shuffle(no_trigger)
37
+
38
+ # Calculate split points
39
+ n_trigger_test = max(1, int(len(trigger) * holdout))
40
+ n_no_trigger_test = max(1, int(len(no_trigger) * holdout))
41
+
42
+ # Split
43
+ test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test]
44
+ train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:]
45
+
46
+ return train_set, test_set
47
+
48
+
49
+ def run_loop(
50
+ eval_set: list[dict],
51
+ skill_path: Path,
52
+ description_override: str | None,
53
+ num_workers: int,
54
+ timeout: int,
55
+ max_iterations: int,
56
+ runs_per_query: int,
57
+ trigger_threshold: float,
58
+ holdout: float,
59
+ model: str,
60
+ verbose: bool,
61
+ live_report_path: Path | None = None,
62
+ log_dir: Path | None = None,
63
+ ) -> dict:
64
+ """Run the eval + improvement loop."""
65
+ project_root = find_project_root()
66
+ name, original_description, content = parse_skill_md(skill_path)
67
+ current_description = description_override or original_description
68
+
69
+ # Split into train/test if holdout > 0
70
+ if holdout > 0:
71
+ train_set, test_set = split_eval_set(eval_set, holdout)
72
+ if verbose:
73
+ print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr)
74
+ else:
75
+ train_set = eval_set
76
+ test_set = []
77
+
78
+ client = anthropic.Anthropic()
79
+ history = []
80
+ exit_reason = "unknown"
81
+
82
+ for iteration in range(1, max_iterations + 1):
83
+ if verbose:
84
+ print(f"\n{'='*60}", file=sys.stderr)
85
+ print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr)
86
+ print(f"Description: {current_description}", file=sys.stderr)
87
+ print(f"{'='*60}", file=sys.stderr)
88
+
89
+ # Evaluate train + test together in one batch for parallelism
90
+ all_queries = train_set + test_set
91
+ t0 = time.time()
92
+ all_results = run_eval(
93
+ eval_set=all_queries,
94
+ skill_name=name,
95
+ description=current_description,
96
+ num_workers=num_workers,
97
+ timeout=timeout,
98
+ project_root=project_root,
99
+ runs_per_query=runs_per_query,
100
+ trigger_threshold=trigger_threshold,
101
+ model=model,
102
+ )
103
+ eval_elapsed = time.time() - t0
104
+
105
+ # Split results back into train/test by matching queries
106
+ train_queries_set = {q["query"] for q in train_set}
107
+ train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set]
108
+ test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set]
109
+
110
+ train_passed = sum(1 for r in train_result_list if r["pass"])
111
+ train_total = len(train_result_list)
112
+ train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total}
113
+ train_results = {"results": train_result_list, "summary": train_summary}
114
+
115
+ if test_set:
116
+ test_passed = sum(1 for r in test_result_list if r["pass"])
117
+ test_total = len(test_result_list)
118
+ test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total}
119
+ test_results = {"results": test_result_list, "summary": test_summary}
120
+ else:
121
+ test_results = None
122
+ test_summary = None
123
+
124
+ history.append({
125
+ "iteration": iteration,
126
+ "description": current_description,
127
+ "train_passed": train_summary["passed"],
128
+ "train_failed": train_summary["failed"],
129
+ "train_total": train_summary["total"],
130
+ "train_results": train_results["results"],
131
+ "test_passed": test_summary["passed"] if test_summary else None,
132
+ "test_failed": test_summary["failed"] if test_summary else None,
133
+ "test_total": test_summary["total"] if test_summary else None,
134
+ "test_results": test_results["results"] if test_results else None,
135
+ # For backward compat with report generator
136
+ "passed": train_summary["passed"],
137
+ "failed": train_summary["failed"],
138
+ "total": train_summary["total"],
139
+ "results": train_results["results"],
140
+ })
141
+
142
+ # Write live report if path provided
143
+ if live_report_path:
144
+ partial_output = {
145
+ "original_description": original_description,
146
+ "best_description": current_description,
147
+ "best_score": "in progress",
148
+ "iterations_run": len(history),
149
+ "holdout": holdout,
150
+ "train_size": len(train_set),
151
+ "test_size": len(test_set),
152
+ "history": history,
153
+ }
154
+ live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name))
155
+
156
+ if verbose:
157
+ def print_eval_stats(label, results, elapsed):
158
+ pos = [r for r in results if r["should_trigger"]]
159
+ neg = [r for r in results if not r["should_trigger"]]
160
+ tp = sum(r["triggers"] for r in pos)
161
+ pos_runs = sum(r["runs"] for r in pos)
162
+ fn = pos_runs - tp
163
+ fp = sum(r["triggers"] for r in neg)
164
+ neg_runs = sum(r["runs"] for r in neg)
165
+ tn = neg_runs - fp
166
+ total = tp + tn + fp + fn
167
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0
168
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0
169
+ accuracy = (tp + tn) / total if total > 0 else 0.0
170
+ print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr)
171
+ for r in results:
172
+ status = "PASS" if r["pass"] else "FAIL"
173
+ rate_str = f"{r['triggers']}/{r['runs']}"
174
+ print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr)
175
+
176
+ print_eval_stats("Train", train_results["results"], eval_elapsed)
177
+ if test_summary:
178
+ print_eval_stats("Test ", test_results["results"], 0)
179
+
180
+ if train_summary["failed"] == 0:
181
+ exit_reason = f"all_passed (iteration {iteration})"
182
+ if verbose:
183
+ print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr)
184
+ break
185
+
186
+ if iteration == max_iterations:
187
+ exit_reason = f"max_iterations ({max_iterations})"
188
+ if verbose:
189
+ print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr)
190
+ break
191
+
192
+ # Improve the description based on train results
193
+ if verbose:
194
+ print(f"\nImproving description...", file=sys.stderr)
195
+
196
+ t0 = time.time()
197
+ # Strip test scores from history so improvement model can't see them
198
+ blinded_history = [
199
+ {k: v for k, v in h.items() if not k.startswith("test_")}
200
+ for h in history
201
+ ]
202
+ new_description = improve_description(
203
+ client=client,
204
+ skill_name=name,
205
+ skill_content=content,
206
+ current_description=current_description,
207
+ eval_results=train_results,
208
+ history=blinded_history,
209
+ model=model,
210
+ log_dir=log_dir,
211
+ iteration=iteration,
212
+ )
213
+ improve_elapsed = time.time() - t0
214
+
215
+ if verbose:
216
+ print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr)
217
+
218
+ current_description = new_description
219
+
220
+ # Find the best iteration by TEST score (or train if no test set)
221
+ if test_set:
222
+ best = max(history, key=lambda h: h["test_passed"] or 0)
223
+ best_score = f"{best['test_passed']}/{best['test_total']}"
224
+ else:
225
+ best = max(history, key=lambda h: h["train_passed"])
226
+ best_score = f"{best['train_passed']}/{best['train_total']}"
227
+
228
+ if verbose:
229
+ print(f"\nExit reason: {exit_reason}", file=sys.stderr)
230
+ print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr)
231
+
232
+ return {
233
+ "exit_reason": exit_reason,
234
+ "original_description": original_description,
235
+ "best_description": best["description"],
236
+ "best_score": best_score,
237
+ "best_train_score": f"{best['train_passed']}/{best['train_total']}",
238
+ "best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None,
239
+ "final_description": current_description,
240
+ "iterations_run": len(history),
241
+ "holdout": holdout,
242
+ "train_size": len(train_set),
243
+ "test_size": len(test_set),
244
+ "history": history,
245
+ }
246
+
247
+
248
+ def main():
249
+ parser = argparse.ArgumentParser(description="Run eval + improve loop")
250
+ parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
251
+ parser.add_argument("--skill-path", required=True, help="Path to skill directory")
252
+ parser.add_argument("--description", default=None, help="Override starting description")
253
+ parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
254
+ parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
255
+ parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations")
256
+ parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
257
+ parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
258
+ parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)")
259
+ parser.add_argument("--model", required=True, help="Model for improvement")
260
+ parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
261
+ parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)")
262
+ parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here")
263
+ args = parser.parse_args()
264
+
265
+ eval_set = json.loads(Path(args.eval_set).read_text())
266
+ skill_path = Path(args.skill_path)
267
+
268
+ if not (skill_path / "SKILL.md").exists():
269
+ print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
270
+ sys.exit(1)
271
+
272
+ name, _, _ = parse_skill_md(skill_path)
273
+
274
+ # Set up live report path
275
+ if args.report != "none":
276
+ if args.report == "auto":
277
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
278
+ live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html"
279
+ else:
280
+ live_report_path = Path(args.report)
281
+ # Open the report immediately so the user can watch
282
+ live_report_path.write_text("<html><body><h1>Starting optimization loop...</h1><meta http-equiv='refresh' content='5'></body></html>")
283
+ webbrowser.open(str(live_report_path))
284
+ else:
285
+ live_report_path = None
286
+
287
+ # Determine output directory (create before run_loop so logs can be written)
288
+ if args.results_dir:
289
+ timestamp = time.strftime("%Y-%m-%d_%H%M%S")
290
+ results_dir = Path(args.results_dir) / timestamp
291
+ results_dir.mkdir(parents=True, exist_ok=True)
292
+ else:
293
+ results_dir = None
294
+
295
+ log_dir = results_dir / "logs" if results_dir else None
296
+
297
+ output = run_loop(
298
+ eval_set=eval_set,
299
+ skill_path=skill_path,
300
+ description_override=args.description,
301
+ num_workers=args.num_workers,
302
+ timeout=args.timeout,
303
+ max_iterations=args.max_iterations,
304
+ runs_per_query=args.runs_per_query,
305
+ trigger_threshold=args.trigger_threshold,
306
+ holdout=args.holdout,
307
+ model=args.model,
308
+ verbose=args.verbose,
309
+ live_report_path=live_report_path,
310
+ log_dir=log_dir,
311
+ )
312
+
313
+ # Save JSON output
314
+ json_output = json.dumps(output, indent=2)
315
+ print(json_output)
316
+ if results_dir:
317
+ (results_dir / "results.json").write_text(json_output)
318
+
319
+ # Write final HTML report (without auto-refresh)
320
+ if live_report_path:
321
+ live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name))
322
+ print(f"\nReport: {live_report_path}", file=sys.stderr)
323
+
324
+ if results_dir and live_report_path:
325
+ (results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name))
326
+
327
+ if results_dir:
328
+ print(f"Results saved to: {results_dir}", file=sys.stderr)
329
+
330
+
331
+ if __name__ == "__main__":
332
+ main()
@@ -0,0 +1,47 @@
1
+ """Shared utilities for skill-creator scripts."""
2
+
3
+ from pathlib import Path
4
+
5
+
6
+
7
+ def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
8
+ """Parse a SKILL.md file, returning (name, description, full_content)."""
9
+ content = (skill_path / "SKILL.md").read_text()
10
+ lines = content.split("\n")
11
+
12
+ if lines[0].strip() != "---":
13
+ raise ValueError("SKILL.md missing frontmatter (no opening ---)")
14
+
15
+ end_idx = None
16
+ for i, line in enumerate(lines[1:], start=1):
17
+ if line.strip() == "---":
18
+ end_idx = i
19
+ break
20
+
21
+ if end_idx is None:
22
+ raise ValueError("SKILL.md missing frontmatter (no closing ---)")
23
+
24
+ name = ""
25
+ description = ""
26
+ frontmatter_lines = lines[1:end_idx]
27
+ i = 0
28
+ while i < len(frontmatter_lines):
29
+ line = frontmatter_lines[i]
30
+ if line.startswith("name:"):
31
+ name = line[len("name:"):].strip().strip('"').strip("'")
32
+ elif line.startswith("description:"):
33
+ value = line[len("description:"):].strip()
34
+ # Handle YAML multiline indicators (>, |, >-, |-)
35
+ if value in (">", "|", ">-", "|-"):
36
+ continuation_lines: list[str] = []
37
+ i += 1
38
+ while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")):
39
+ continuation_lines.append(frontmatter_lines[i].strip())
40
+ i += 1
41
+ description = " ".join(continuation_lines)
42
+ continue
43
+ else:
44
+ description = value.strip('"').strip("'")
45
+ i += 1
46
+
47
+ return name, description, content
@@ -0,0 +1,132 @@
1
+ ---
2
+ name: sync-docs
3
+ description: >-
4
+ Doc audit and structural sync for xtrm projects. Use whenever the README
5
+ feels too long, docs are out of sync after a sprint, the CHANGELOG is behind,
6
+ or the user asks to "sync docs", "doc audit", "split readme", "check docs
7
+ health", or "detect drift". Reads bd issues and git history, then runs
8
+ docs-only drift detection on README.md, CHANGELOG.md, and docs/ — creating
9
+ missing focused files instead of a monolithic README.
10
+ gemini-command: sync-docs
11
+ version: 1.1.0
12
+ ---
13
+
14
+ # sync-docs
15
+
16
+ Keeps project documentation in sync with code reality.
17
+
18
+ ## Overview
19
+
20
+ ```
21
+ Phase 1: Gather context — what changed recently?
22
+ Phase 2: Detect docs drift — which docs/ files are stale?
23
+ Phase 3: Analyze structure — what belongs outside README?
24
+ Phase 4: Plan + execute — fix docs and changelog
25
+ Phase 5: Validate — schema-check all docs/
26
+ ```
27
+
28
+ **Audit vs Execute mode:** If the user asked for an audit/report/check-only task, stop after Phase 3. Only run fixes when the user explicitly asks for changes.
29
+
30
+ ---
31
+
32
+ ## Phase 1: Gather Context
33
+
34
+ ```bash
35
+ # Global install
36
+ python3 "$HOME/.claude/skills/sync-docs/scripts/context_gatherer.py" [--since=30]
37
+
38
+ # From repository
39
+ python3 "skills/sync-docs/scripts/context_gatherer.py" [--since=30]
40
+ ```
41
+
42
+ Outputs JSON with:
43
+ - recently closed bd issues
44
+ - merged PRs from git history
45
+ - recent commits
46
+ - docs drift report from `sync-docs/scripts/drift_detector.py`
47
+
48
+ ---
49
+
50
+ ## Phase 2: Detect docs/ Drift
51
+
52
+ ```bash
53
+ python3 "skills/sync-docs/scripts/drift_detector.py" scan --since 30
54
+ # optional JSON:
55
+ python3 "skills/sync-docs/scripts/drift_detector.py" scan --since 30 --json
56
+ ```
57
+
58
+ A docs file is stale when frontmatter `source_of_truth_for` (or `tracks`) matches files changed in recent commits.
59
+
60
+ ---
61
+
62
+ ## Phase 3: Analyze Document Structure
63
+
64
+ ```bash
65
+ python3 "skills/sync-docs/scripts/doc_structure_analyzer.py"
66
+ ```
67
+
68
+ Checks:
69
+ 1. README bloat/extractable sections
70
+ 2. CHANGELOG staleness (date + version gap)
71
+ 3. Missing focused docs files
72
+ 4. Invalid docs schema (missing frontmatter)
73
+
74
+ Statuses: `BLOATED`, `EXTRACTABLE`, `MISSING`, `STALE`, `INVALID_SCHEMA`, `OK`.
75
+
76
+ If this is audit-only, stop here and report.
77
+
78
+ ---
79
+
80
+ ## Phase 4: Execute Fixes
81
+
82
+ | Situation | Action |
83
+ |---|---|
84
+ | README bloated | Extract large sections to focused docs files |
85
+ | Missing docs file | Generate scaffold via `validate_doc.py --generate` |
86
+ | Stale docs file | Update content + bump `version` + `updated` |
87
+ | Stale CHANGELOG | Add entry with local changelog script |
88
+ | Invalid schema | Fix frontmatter and regenerate INDEX |
89
+
90
+ ### Auto-fix known gaps
91
+
92
+ ```bash
93
+ python3 "skills/sync-docs/scripts/doc_structure_analyzer.py" --fix
94
+ python3 "skills/sync-docs/scripts/doc_structure_analyzer.py" --fix --bd-remember
95
+ ```
96
+
97
+ ### Create one docs scaffold
98
+
99
+ ```bash
100
+ python3 "skills/sync-docs/scripts/validate_doc.py" --generate docs/hooks.md \
101
+ --title "Hooks Reference" --scope "hooks" --category "reference" \
102
+ --source-for "hooks/**/*.mjs,policies/*.json"
103
+ ```
104
+
105
+ ### Validate and regenerate metadata/index
106
+
107
+ ```bash
108
+ python3 "skills/sync-docs/scripts/validate_metadata.py" docs/
109
+ ```
110
+
111
+ ### Add changelog entry
112
+
113
+ ```bash
114
+ python3 "skills/sync-docs/scripts/changelog/add_entry.py" \
115
+ CHANGELOG.md Added "Describe the documentation update"
116
+ ```
117
+
118
+ ---
119
+
120
+ ## Phase 5: Final Validation
121
+
122
+ ```bash
123
+ python3 "skills/sync-docs/scripts/validate_doc.py" docs/
124
+ python3 "skills/sync-docs/scripts/drift_detector.py" scan --since 30
125
+ ```
126
+
127
+ ---
128
+
129
+ ## docs/ as SSOT
130
+
131
+ `docs/` is the only source of truth for project documentation in this workflow.
132
+ Use frontmatter (`source_of_truth_for`) to link docs pages to code areas and detect drift.
@@ -0,0 +1,89 @@
1
+ {
2
+ "skill_name": "sync-docs",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "I just closed a bunch of bd issues this sprint and merged 3 PRs. The README.md is getting long — can you sync the docs and make sure everything's in order? Use sync-docs.",
7
+ "expected_output": "Runs context_gatherer.py and doc_structure_analyzer.py, reports what was found (closed issues, PRs, any drift), identifies README extraction candidates or MISSING docs/ files, and either fixes them or gives a clear plan with next steps.",
8
+ "files": [],
9
+ "assertions": [
10
+ {
11
+ "id": "ran-context-gatherer",
12
+ "description": "Ran context_gatherer.py and reported bd closed issues or merged PRs from the output",
13
+ "check": "result.md mentions context_gatherer or bd closed issues or merged PRs with specific data"
14
+ },
15
+ {
16
+ "id": "ran-structure-analyzer",
17
+ "description": "Ran doc_structure_analyzer.py and used its output to identify doc issues",
18
+ "check": "result.md references MISSING, STALE, EXTRACTABLE, or BLOATED status from the analyzer"
19
+ },
20
+ {
21
+ "id": "concrete-action",
22
+ "description": "Produced at least one concrete recommendation or action (not just a vague summary)",
23
+ "check": "result.md names a specific file (e.g. docs/hooks.md) or section with a specific next step"
24
+ },
25
+ {
26
+ "id": "used-skill-scripts",
27
+ "description": "Used the skill scripts rather than just reading files manually",
28
+ "check": "result.md shows script execution output, not just manual file reading"
29
+ }
30
+ ]
31
+ },
32
+ {
33
+ "id": 2,
34
+ "prompt": "Run sync-docs --fix on this project and remember what you did with bd.",
35
+ "expected_output": "Runs doc_structure_analyzer.py --fix --bd-remember, creates scaffold files for any missing docs/ subsystems, persists a bd memory with the summary, then validates the created files with validate_doc.py.",
36
+ "files": [],
37
+ "assertions": [
38
+ {
39
+ "id": "ran-fix-flag",
40
+ "description": "Ran doc_structure_analyzer.py with --fix flag",
41
+ "check": "result.md shows the --fix command was executed"
42
+ },
43
+ {
44
+ "id": "ran-bd-remember",
45
+ "description": "Ran with --bd-remember or manually ran bd remember with a summary",
46
+ "check": "result.md shows bd remember was called and reports the memory key"
47
+ },
48
+ {
49
+ "id": "scaffold-created",
50
+ "description": "At least one scaffold file was created in docs/",
51
+ "check": "result.md lists a docs/*.md file created, OR reports no gaps found (valid outcome)"
52
+ },
53
+ {
54
+ "id": "validated-schema",
55
+ "description": "Ran validate_doc.py on created files to confirm schema",
56
+ "check": "result.md shows validate_doc.py was run and reports pass/fail for created files"
57
+ }
58
+ ]
59
+ },
60
+ {
61
+ "id": 3,
62
+ "prompt": "Do a doc audit. I think the README has sections that should be in docs/ but I'm not sure which ones.",
63
+ "expected_output": "Runs the full 5-phase sync-docs workflow: gathers context, runs drift detection, runs doc_structure_analyzer.py, and identifies EXTRACTABLE/BLOATED sections with specific suggestions for what goes in which docs/ file before recommending or making changes.",
64
+ "files": [],
65
+ "assertions": [
66
+ {
67
+ "id": "ran-analyzer",
68
+ "description": "Ran doc_structure_analyzer.py and referenced its structured output",
69
+ "check": "result.md cites the analyzer output (EXTRACTABLE, BLOATED, line count, or specific section names from the report)"
70
+ },
71
+ {
72
+ "id": "named-specific-sections",
73
+ "description": "Named specific README sections with their suggested docs/ destination",
74
+ "check": "result.md lists at least 2 specific sections (e.g. '## Policy System → docs/policies.md') not just generic advice"
75
+ },
76
+ {
77
+ "id": "actionable-report",
78
+ "description": "Report is actionable — tells user exactly what to do next, not just observations",
79
+ "check": "result.md includes a prioritized list or clear next steps, not just 'the README could be shorter'"
80
+ },
81
+ {
82
+ "id": "no-edits-made",
83
+ "description": "Did not edit or create any files (audit only)",
84
+ "check": "result.md does not claim to have modified README.md or created docs/ files"
85
+ }
86
+ ]
87
+ }
88
+ ]
89
+ }