xtrm-tools 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (632) hide show
  1. package/.xtrm/config/README.md +10 -0
  2. package/{config/mcp_servers.json → .xtrm/config/claude.mcp.json} +0 -28
  3. package/{config → .xtrm/config}/instructions/agents-top.md +12 -3
  4. package/{config → .xtrm/config}/instructions/claude-top.md +12 -3
  5. package/{config → .xtrm/config}/pi/extensions/beads/index.ts +44 -13
  6. package/{config → .xtrm/config}/pi/extensions/custom-footer/index.ts +59 -82
  7. package/{config → .xtrm/config}/pi/extensions/xtrm-ui/index.ts +2 -2
  8. package/{config → .xtrm/config}/pi/install-schema.json +2 -2
  9. package/.xtrm/config/pi.mcp.json +39 -0
  10. package/.xtrm/config/settings.json +41 -0
  11. package/.xtrm/extensions/auto-session-name/index.ts +29 -0
  12. package/.xtrm/extensions/auto-session-name/package.json +16 -0
  13. package/.xtrm/extensions/auto-update/index.ts +71 -0
  14. package/.xtrm/extensions/auto-update/package.json +16 -0
  15. package/.xtrm/extensions/beads/index.ts +232 -0
  16. package/.xtrm/extensions/beads/package.json +19 -0
  17. package/.xtrm/extensions/compact-header/index.ts +69 -0
  18. package/.xtrm/extensions/compact-header/package.json +16 -0
  19. package/.xtrm/extensions/core/adapter.ts +52 -0
  20. package/.xtrm/extensions/core/guard-rules.ts +100 -0
  21. package/.xtrm/extensions/core/lib.ts +3 -0
  22. package/.xtrm/extensions/core/logger.ts +45 -0
  23. package/.xtrm/extensions/core/package.json +18 -0
  24. package/.xtrm/extensions/core/runner.ts +71 -0
  25. package/.xtrm/extensions/core/session-state.ts +59 -0
  26. package/.xtrm/extensions/custom-footer/index.ts +398 -0
  27. package/.xtrm/extensions/custom-footer/package.json +19 -0
  28. package/.xtrm/extensions/custom-provider-qwen-cli/index.ts +363 -0
  29. package/.xtrm/extensions/custom-provider-qwen-cli/package.json +1 -0
  30. package/.xtrm/extensions/git-checkpoint/index.ts +53 -0
  31. package/.xtrm/extensions/git-checkpoint/package.json +16 -0
  32. package/.xtrm/extensions/lsp-bootstrap/index.ts +134 -0
  33. package/.xtrm/extensions/lsp-bootstrap/package.json +17 -0
  34. package/.xtrm/extensions/pi-serena-compact/index.ts +121 -0
  35. package/.xtrm/extensions/pi-serena-compact/package.json +16 -0
  36. package/.xtrm/extensions/quality-gates/index.ts +66 -0
  37. package/.xtrm/extensions/quality-gates/package.json +19 -0
  38. package/.xtrm/extensions/service-skills/index.ts +108 -0
  39. package/.xtrm/extensions/service-skills/package.json +19 -0
  40. package/.xtrm/extensions/session-flow/index.ts +96 -0
  41. package/.xtrm/extensions/session-flow/package.json +19 -0
  42. package/.xtrm/extensions/xtrm-loader/index.ts +152 -0
  43. package/.xtrm/extensions/xtrm-loader/package.json +19 -0
  44. package/.xtrm/extensions/xtrm-ui/format.ts +93 -0
  45. package/.xtrm/extensions/xtrm-ui/index.ts +1044 -0
  46. package/.xtrm/extensions/xtrm-ui/package.json +10 -0
  47. package/.xtrm/extensions/xtrm-ui/themes/pidex-dark.json +85 -0
  48. package/.xtrm/extensions/xtrm-ui/themes/pidex-light.json +85 -0
  49. package/{hooks → .xtrm/hooks}/README.md +2 -1
  50. package/{hooks → .xtrm/hooks}/beads-commit-gate.mjs +4 -0
  51. package/.xtrm/hooks/beads-memory-gate.mjs +119 -0
  52. package/{plugins/xtrm-tools → .xtrm}/hooks/quality-check-env.mjs +1 -4
  53. package/.xtrm/hooks/statusline.mjs +156 -0
  54. package/{plugins/xtrm-tools → .xtrm}/hooks/using-xtrm-reminder.mjs +8 -7
  55. package/.xtrm/registry.json +1323 -0
  56. package/CHANGELOG.md +27 -0
  57. package/README.md +3 -1
  58. package/cli/dist/index.cjs +27158 -31320
  59. package/cli/dist/index.cjs.map +1 -1
  60. package/cli/package.json +2 -1
  61. package/package.json +12 -12
  62. package/.claude-plugin/marketplace.json +0 -19
  63. package/.claude-plugin/plugin.json +0 -9
  64. package/config/hooks.json +0 -83
  65. package/config/settings.json +0 -70
  66. package/hooks/beads-memory-gate.mjs +0 -94
  67. package/hooks/quality-check-env.mjs +0 -79
  68. package/hooks/statusline.mjs +0 -183
  69. package/hooks/tsconfig-cache.json +0 -4
  70. package/hooks/using-xtrm-reminder.mjs +0 -47
  71. package/plugins/xtrm-tools/.claude-plugin/plugin.json +0 -9
  72. package/plugins/xtrm-tools/.mcp.json +0 -18
  73. package/plugins/xtrm-tools/hooks/README.md +0 -61
  74. package/plugins/xtrm-tools/hooks/beads-claim-sync.mjs +0 -154
  75. package/plugins/xtrm-tools/hooks/beads-commit-gate.mjs +0 -70
  76. package/plugins/xtrm-tools/hooks/beads-compact-restore.mjs +0 -77
  77. package/plugins/xtrm-tools/hooks/beads-compact-save.mjs +0 -63
  78. package/plugins/xtrm-tools/hooks/beads-edit-gate.mjs +0 -85
  79. package/plugins/xtrm-tools/hooks/beads-gate-core.mjs +0 -236
  80. package/plugins/xtrm-tools/hooks/beads-gate-messages.mjs +0 -75
  81. package/plugins/xtrm-tools/hooks/beads-gate-utils.mjs +0 -176
  82. package/plugins/xtrm-tools/hooks/beads-memory-gate.mjs +0 -94
  83. package/plugins/xtrm-tools/hooks/beads-stop-gate.mjs +0 -53
  84. package/plugins/xtrm-tools/hooks/gitnexus/gitnexus-hook.cjs +0 -222
  85. package/plugins/xtrm-tools/hooks/hooks.json +0 -129
  86. package/plugins/xtrm-tools/hooks/quality-check.cjs +0 -1286
  87. package/plugins/xtrm-tools/hooks/quality-check.py +0 -345
  88. package/plugins/xtrm-tools/hooks/statusline.mjs +0 -183
  89. package/plugins/xtrm-tools/hooks/tsconfig-cache.json +0 -4
  90. package/plugins/xtrm-tools/hooks/worktree-boundary.mjs +0 -33
  91. package/plugins/xtrm-tools/hooks/xtrm-logger.mjs +0 -123
  92. package/plugins/xtrm-tools/hooks/xtrm-session-logger.mjs +0 -27
  93. package/plugins/xtrm-tools/hooks/xtrm-tool-logger.mjs +0 -53
  94. package/plugins/xtrm-tools/skills/README.txt +0 -31
  95. package/plugins/xtrm-tools/skills/clean-code/SKILL.md +0 -201
  96. package/plugins/xtrm-tools/skills/creating-service-skills/SKILL.md +0 -433
  97. package/plugins/xtrm-tools/skills/creating-service-skills/references/script_quality_standards.md +0 -425
  98. package/plugins/xtrm-tools/skills/creating-service-skills/references/service_skill_system_guide.md +0 -278
  99. package/plugins/xtrm-tools/skills/creating-service-skills/scripts/bootstrap.py +0 -326
  100. package/plugins/xtrm-tools/skills/creating-service-skills/scripts/deep_dive.py +0 -304
  101. package/plugins/xtrm-tools/skills/creating-service-skills/scripts/scaffolder.py +0 -482
  102. package/plugins/xtrm-tools/skills/delegating/SKILL.md +0 -196
  103. package/plugins/xtrm-tools/skills/delegating/config.yaml +0 -210
  104. package/plugins/xtrm-tools/skills/delegating/references/orchestration-protocols.md +0 -41
  105. package/plugins/xtrm-tools/skills/docker-expert/SKILL.md +0 -409
  106. package/plugins/xtrm-tools/skills/documenting/CHANGELOG.md +0 -23
  107. package/plugins/xtrm-tools/skills/documenting/README.md +0 -148
  108. package/plugins/xtrm-tools/skills/documenting/SKILL.md +0 -113
  109. package/plugins/xtrm-tools/skills/documenting/examples/example_pattern.md +0 -70
  110. package/plugins/xtrm-tools/skills/documenting/examples/example_reference.md +0 -70
  111. package/plugins/xtrm-tools/skills/documenting/examples/example_ssot_analytics.md +0 -64
  112. package/plugins/xtrm-tools/skills/documenting/examples/example_workflow.md +0 -141
  113. package/plugins/xtrm-tools/skills/documenting/references/changelog-format.md +0 -97
  114. package/plugins/xtrm-tools/skills/documenting/references/metadata-schema.md +0 -136
  115. package/plugins/xtrm-tools/skills/documenting/references/taxonomy.md +0 -81
  116. package/plugins/xtrm-tools/skills/documenting/references/versioning-rules.md +0 -78
  117. package/plugins/xtrm-tools/skills/documenting/scripts/bump_version.sh +0 -60
  118. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/__init__.py +0 -0
  119. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/add_entry.py +0 -216
  120. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/bump_release.py +0 -117
  121. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/init_changelog.py +0 -54
  122. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/validate_changelog.py +0 -128
  123. package/plugins/xtrm-tools/skills/documenting/scripts/drift_detector.py +0 -266
  124. package/plugins/xtrm-tools/skills/documenting/scripts/generate_template.py +0 -311
  125. package/plugins/xtrm-tools/skills/documenting/scripts/list_by_category.sh +0 -84
  126. package/plugins/xtrm-tools/skills/documenting/scripts/orchestrator.py +0 -255
  127. package/plugins/xtrm-tools/skills/documenting/scripts/validate_metadata.py +0 -242
  128. package/plugins/xtrm-tools/skills/documenting/templates/CHANGELOG.md.template +0 -13
  129. package/plugins/xtrm-tools/skills/documenting/tests/integration_test.sh +0 -70
  130. package/plugins/xtrm-tools/skills/documenting/tests/test_changelog.py +0 -201
  131. package/plugins/xtrm-tools/skills/documenting/tests/test_drift_detector.py +0 -80
  132. package/plugins/xtrm-tools/skills/documenting/tests/test_orchestrator.py +0 -52
  133. package/plugins/xtrm-tools/skills/documenting/tests/test_validate_metadata.py +0 -64
  134. package/plugins/xtrm-tools/skills/find-skills/SKILL.md +0 -133
  135. package/plugins/xtrm-tools/skills/gitnexus-exploring/SKILL.md +0 -75
  136. package/plugins/xtrm-tools/skills/gitnexus-impact-analysis/SKILL.md +0 -94
  137. package/plugins/xtrm-tools/skills/gitnexus-refactoring/SKILL.md +0 -113
  138. package/plugins/xtrm-tools/skills/hook-development/SKILL.md +0 -797
  139. package/plugins/xtrm-tools/skills/hook-development/examples/load-context.sh +0 -55
  140. package/plugins/xtrm-tools/skills/hook-development/examples/quality-check.js +0 -1168
  141. package/plugins/xtrm-tools/skills/hook-development/examples/validate-bash.sh +0 -43
  142. package/plugins/xtrm-tools/skills/hook-development/examples/validate-write.sh +0 -38
  143. package/plugins/xtrm-tools/skills/hook-development/references/advanced.md +0 -527
  144. package/plugins/xtrm-tools/skills/hook-development/references/migration.md +0 -369
  145. package/plugins/xtrm-tools/skills/hook-development/references/patterns.md +0 -412
  146. package/plugins/xtrm-tools/skills/hook-development/scripts/README.md +0 -164
  147. package/plugins/xtrm-tools/skills/hook-development/scripts/hook-linter.sh +0 -153
  148. package/plugins/xtrm-tools/skills/hook-development/scripts/test-hook.sh +0 -252
  149. package/plugins/xtrm-tools/skills/hook-development/scripts/validate-hook-schema.sh +0 -159
  150. package/plugins/xtrm-tools/skills/obsidian-cli/SKILL.md +0 -106
  151. package/plugins/xtrm-tools/skills/orchestrating-agents/SKILL.md +0 -135
  152. package/plugins/xtrm-tools/skills/orchestrating-agents/config.yaml +0 -45
  153. package/plugins/xtrm-tools/skills/orchestrating-agents/references/agent-context-integration.md +0 -37
  154. package/plugins/xtrm-tools/skills/orchestrating-agents/references/examples.md +0 -45
  155. package/plugins/xtrm-tools/skills/orchestrating-agents/references/handover-protocol.md +0 -31
  156. package/plugins/xtrm-tools/skills/orchestrating-agents/references/workflows.md +0 -42
  157. package/plugins/xtrm-tools/skills/orchestrating-agents/scripts/detect_neighbors.py +0 -23
  158. package/plugins/xtrm-tools/skills/planning/SKILL.md +0 -405
  159. package/plugins/xtrm-tools/skills/planning/evals/evals.json +0 -19
  160. package/plugins/xtrm-tools/skills/prompt-improving/README.md +0 -162
  161. package/plugins/xtrm-tools/skills/prompt-improving/SKILL.md +0 -74
  162. package/plugins/xtrm-tools/skills/prompt-improving/references/analysis_commands.md +0 -24
  163. package/plugins/xtrm-tools/skills/prompt-improving/references/chain_of_thought.md +0 -24
  164. package/plugins/xtrm-tools/skills/prompt-improving/references/mcp_definitions.md +0 -20
  165. package/plugins/xtrm-tools/skills/prompt-improving/references/multishot.md +0 -23
  166. package/plugins/xtrm-tools/skills/prompt-improving/references/xml_core.md +0 -60
  167. package/plugins/xtrm-tools/skills/python-testing/SKILL.md +0 -815
  168. package/plugins/xtrm-tools/skills/scoping-service-skills/SKILL.md +0 -231
  169. package/plugins/xtrm-tools/skills/scoping-service-skills/scripts/scope.py +0 -74
  170. package/plugins/xtrm-tools/skills/senior-backend/SKILL.md +0 -209
  171. package/plugins/xtrm-tools/skills/senior-backend/references/api_design_patterns.md +0 -103
  172. package/plugins/xtrm-tools/skills/senior-backend/references/backend_security_practices.md +0 -103
  173. package/plugins/xtrm-tools/skills/senior-backend/references/database_optimization_guide.md +0 -103
  174. package/plugins/xtrm-tools/skills/senior-backend/scripts/api_load_tester.py +0 -114
  175. package/plugins/xtrm-tools/skills/senior-backend/scripts/api_scaffolder.py +0 -114
  176. package/plugins/xtrm-tools/skills/senior-backend/scripts/database_migration_tool.py +0 -114
  177. package/plugins/xtrm-tools/skills/senior-data-scientist/SKILL.md +0 -226
  178. package/plugins/xtrm-tools/skills/senior-data-scientist/references/experiment_design_frameworks.md +0 -80
  179. package/plugins/xtrm-tools/skills/senior-data-scientist/references/feature_engineering_patterns.md +0 -80
  180. package/plugins/xtrm-tools/skills/senior-data-scientist/references/statistical_methods_advanced.md +0 -80
  181. package/plugins/xtrm-tools/skills/senior-data-scientist/scripts/experiment_designer.py +0 -100
  182. package/plugins/xtrm-tools/skills/senior-data-scientist/scripts/feature_engineering_pipeline.py +0 -100
  183. package/plugins/xtrm-tools/skills/senior-data-scientist/scripts/model_evaluation_suite.py +0 -100
  184. package/plugins/xtrm-tools/skills/senior-devops/SKILL.md +0 -209
  185. package/plugins/xtrm-tools/skills/senior-devops/references/cicd_pipeline_guide.md +0 -103
  186. package/plugins/xtrm-tools/skills/senior-devops/references/deployment_strategies.md +0 -103
  187. package/plugins/xtrm-tools/skills/senior-devops/references/infrastructure_as_code.md +0 -103
  188. package/plugins/xtrm-tools/skills/senior-devops/scripts/deployment_manager.py +0 -114
  189. package/plugins/xtrm-tools/skills/senior-devops/scripts/pipeline_generator.py +0 -114
  190. package/plugins/xtrm-tools/skills/senior-devops/scripts/terraform_scaffolder.py +0 -114
  191. package/plugins/xtrm-tools/skills/senior-security/SKILL.md +0 -209
  192. package/plugins/xtrm-tools/skills/senior-security/references/cryptography_implementation.md +0 -103
  193. package/plugins/xtrm-tools/skills/senior-security/references/penetration_testing_guide.md +0 -103
  194. package/plugins/xtrm-tools/skills/senior-security/references/security_architecture_patterns.md +0 -103
  195. package/plugins/xtrm-tools/skills/senior-security/scripts/pentest_automator.py +0 -114
  196. package/plugins/xtrm-tools/skills/senior-security/scripts/security_auditor.py +0 -114
  197. package/plugins/xtrm-tools/skills/senior-security/scripts/threat_modeler.py +0 -114
  198. package/plugins/xtrm-tools/skills/skill-creator/LICENSE.txt +0 -202
  199. package/plugins/xtrm-tools/skills/skill-creator/SKILL.md +0 -479
  200. package/plugins/xtrm-tools/skills/skill-creator/agents/analyzer.md +0 -274
  201. package/plugins/xtrm-tools/skills/skill-creator/agents/comparator.md +0 -202
  202. package/plugins/xtrm-tools/skills/skill-creator/agents/grader.md +0 -223
  203. package/plugins/xtrm-tools/skills/skill-creator/assets/eval_review.html +0 -146
  204. package/plugins/xtrm-tools/skills/skill-creator/eval-viewer/generate_review.py +0 -471
  205. package/plugins/xtrm-tools/skills/skill-creator/eval-viewer/viewer.html +0 -1325
  206. package/plugins/xtrm-tools/skills/skill-creator/references/schemas.md +0 -430
  207. package/plugins/xtrm-tools/skills/skill-creator/scripts/__init__.py +0 -0
  208. package/plugins/xtrm-tools/skills/skill-creator/scripts/aggregate_benchmark.py +0 -401
  209. package/plugins/xtrm-tools/skills/skill-creator/scripts/generate_report.py +0 -326
  210. package/plugins/xtrm-tools/skills/skill-creator/scripts/improve_description.py +0 -248
  211. package/plugins/xtrm-tools/skills/skill-creator/scripts/package_skill.py +0 -136
  212. package/plugins/xtrm-tools/skills/skill-creator/scripts/quick_validate.py +0 -103
  213. package/plugins/xtrm-tools/skills/skill-creator/scripts/run_eval.py +0 -310
  214. package/plugins/xtrm-tools/skills/skill-creator/scripts/run_loop.py +0 -332
  215. package/plugins/xtrm-tools/skills/skill-creator/scripts/utils.py +0 -47
  216. package/plugins/xtrm-tools/skills/sync-docs/SKILL.md +0 -286
  217. package/plugins/xtrm-tools/skills/sync-docs/evals/evals.json +0 -89
  218. package/plugins/xtrm-tools/skills/sync-docs/references/doc-structure.md +0 -99
  219. package/plugins/xtrm-tools/skills/sync-docs/references/schema.md +0 -103
  220. package/plugins/xtrm-tools/skills/sync-docs/scripts/changelog/add_entry.py +0 -216
  221. package/plugins/xtrm-tools/skills/sync-docs/scripts/context_gatherer.py +0 -240
  222. package/plugins/xtrm-tools/skills/sync-docs/scripts/doc_structure_analyzer.py +0 -495
  223. package/plugins/xtrm-tools/skills/sync-docs/scripts/drift_detector.py +0 -563
  224. package/plugins/xtrm-tools/skills/sync-docs/scripts/validate_doc.py +0 -365
  225. package/plugins/xtrm-tools/skills/sync-docs/scripts/validate_metadata.py +0 -185
  226. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/benchmark.json +0 -293
  227. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/benchmark.md +0 -13
  228. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +0 -27
  229. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +0 -210
  230. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +0 -28
  231. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  232. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +0 -101
  233. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +0 -28
  234. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  235. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +0 -5
  236. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +0 -27
  237. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +0 -198
  238. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +0 -28
  239. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  240. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +0 -94
  241. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +0 -28
  242. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +0 -1
  243. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +0 -27
  244. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +0 -237
  245. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +0 -28
  246. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  247. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +0 -134
  248. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +0 -28
  249. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +0 -1
  250. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/benchmark.json +0 -297
  251. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/benchmark.md +0 -13
  252. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +0 -27
  253. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +0 -137
  254. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +0 -92
  255. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  256. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +0 -134
  257. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +0 -86
  258. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +0 -1
  259. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +0 -27
  260. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +0 -193
  261. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +0 -72
  262. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  263. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +0 -211
  264. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +0 -91
  265. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  266. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +0 -27
  267. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +0 -182
  268. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  269. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  270. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +0 -222
  271. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +0 -88
  272. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  273. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/benchmark.json +0 -298
  274. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/benchmark.md +0 -13
  275. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +0 -27
  276. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +0 -125
  277. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +0 -97
  278. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +0 -5
  279. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +0 -144
  280. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +0 -78
  281. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  282. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +0 -27
  283. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +0 -104
  284. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +0 -91
  285. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +0 -5
  286. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +0 -79
  287. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +0 -82
  288. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  289. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +0 -27
  290. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +0 -302
  291. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +0 -33
  292. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +0 -114
  293. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +0 -118
  294. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +0 -38
  295. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +0 -158
  296. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  297. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +0 -5
  298. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +0 -71
  299. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +0 -90
  300. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  301. package/plugins/xtrm-tools/skills/test-planning/SKILL.md +0 -465
  302. package/plugins/xtrm-tools/skills/test-planning/evals/evals.json +0 -23
  303. package/plugins/xtrm-tools/skills/updating-service-skills/SKILL.md +0 -136
  304. package/plugins/xtrm-tools/skills/updating-service-skills/scripts/drift_detector.py +0 -222
  305. package/plugins/xtrm-tools/skills/using-quality-gates/SKILL.md +0 -254
  306. package/plugins/xtrm-tools/skills/using-serena-lsp/README.md +0 -8
  307. package/plugins/xtrm-tools/skills/using-serena-lsp/REFERENCE.md +0 -194
  308. package/plugins/xtrm-tools/skills/using-serena-lsp/SKILL.md +0 -82
  309. package/plugins/xtrm-tools/skills/using-service-skills/SKILL.md +0 -108
  310. package/plugins/xtrm-tools/skills/using-service-skills/scripts/cataloger.py +0 -74
  311. package/plugins/xtrm-tools/skills/using-service-skills/scripts/skill_activator.py +0 -152
  312. package/plugins/xtrm-tools/skills/using-service-skills/scripts/test_skill_activator.py +0 -58
  313. package/plugins/xtrm-tools/skills/using-tdd/SKILL.md +0 -410
  314. package/plugins/xtrm-tools/skills/using-xtrm/SKILL.md +0 -127
  315. package/plugins/xtrm-tools/skills/xt-debugging/SKILL.md +0 -149
  316. package/plugins/xtrm-tools/skills/xt-end/SKILL.md +0 -297
  317. package/plugins/xtrm-tools/skills/xt-merge/SKILL.md +0 -313
  318. package/project-skills/quality-gates/.claude/hooks/hook-config.json +0 -66
  319. package/project-skills/quality-gates/.claude/hooks/quality-check.cjs +0 -1286
  320. package/project-skills/quality-gates/.claude/hooks/quality-check.py +0 -334
  321. package/project-skills/quality-gates/.claude/settings.json +0 -3
  322. package/project-skills/quality-gates/.claude/skills/using-quality-gates/SKILL.md +0 -254
  323. package/project-skills/quality-gates/README.md +0 -109
  324. package/project-skills/quality-gates/evals/evals.json +0 -181
  325. package/project-skills/quality-gates/workspace/iteration-1/FINAL-EVAL-SUMMARY.md +0 -75
  326. package/project-skills/quality-gates/workspace/iteration-1/edge-case-auto-fix-verification/with_skill/outputs/response.md +0 -59
  327. package/project-skills/quality-gates/workspace/iteration-1/edge-case-mixed-language-project/with_skill/outputs/response.md +0 -60
  328. package/project-skills/quality-gates/workspace/iteration-1/eval-summary.md +0 -105
  329. package/project-skills/quality-gates/workspace/iteration-1/partial-install-python-only/with_skill/outputs/response.md +0 -93
  330. package/project-skills/quality-gates/workspace/iteration-1/python-refactor-request/with_skill/outputs/response.md +0 -104
  331. package/project-skills/quality-gates/workspace/iteration-1/quality-gate-error-fix/with_skill/outputs/response.md +0 -74
  332. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-general-chat/with_skill/outputs/response.md +0 -18
  333. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-math-question/with_skill/outputs/response.md +0 -18
  334. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-unrelated-coding/with_skill/outputs/response.md +0 -56
  335. package/project-skills/quality-gates/workspace/iteration-1/tdd-guard-blocking-confusion/with_skill/outputs/response.md +0 -67
  336. package/project-skills/quality-gates/workspace/iteration-1/typescript-feature-with-tests/with_skill/outputs/response.md +0 -97
  337. package/project-skills/service-skills-set/.claude/git-hooks/doc_reminder.py +0 -67
  338. package/project-skills/service-skills-set/.claude/git-hooks/skill_staleness.py +0 -194
  339. package/project-skills/service-skills-set/.claude/service-registry.json +0 -4
  340. package/project-skills/service-skills-set/.claude/settings.json +0 -37
  341. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/SKILL.md +0 -433
  342. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/references/script_quality_standards.md +0 -425
  343. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/references/service_skill_system_guide.md +0 -278
  344. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/bootstrap.py +0 -308
  345. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/deep_dive.py +0 -304
  346. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/scaffolder.py +0 -482
  347. package/project-skills/service-skills-set/.claude/skills/scoping-service-skills/SKILL.md +0 -231
  348. package/project-skills/service-skills-set/.claude/skills/scoping-service-skills/scripts/scope.py +0 -74
  349. package/project-skills/service-skills-set/.claude/skills/updating-service-skills/SKILL.md +0 -136
  350. package/project-skills/service-skills-set/.claude/skills/updating-service-skills/scripts/drift_detector.py +0 -222
  351. package/project-skills/service-skills-set/.claude/skills/using-service-skills/SKILL.md +0 -108
  352. package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/cataloger.py +0 -74
  353. package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/skill_activator.py +0 -152
  354. package/project-skills/service-skills-set/README.md +0 -93
  355. package/project-skills/service-skills-set/install-service-skills.py +0 -193
  356. package/project-skills/service-skills-set/service-skills-readme.md +0 -236
  357. package/skills/README.txt +0 -31
  358. package/skills/clean-code/SKILL.md +0 -201
  359. package/skills/creating-service-skills/SKILL.md +0 -433
  360. package/skills/creating-service-skills/references/script_quality_standards.md +0 -425
  361. package/skills/creating-service-skills/references/service_skill_system_guide.md +0 -278
  362. package/skills/creating-service-skills/scripts/bootstrap.py +0 -326
  363. package/skills/creating-service-skills/scripts/deep_dive.py +0 -304
  364. package/skills/creating-service-skills/scripts/scaffolder.py +0 -482
  365. package/skills/delegating/SKILL.md +0 -196
  366. package/skills/delegating/config.yaml +0 -210
  367. package/skills/delegating/references/orchestration-protocols.md +0 -41
  368. package/skills/docker-expert/SKILL.md +0 -409
  369. package/skills/documenting/CHANGELOG.md +0 -23
  370. package/skills/documenting/README.md +0 -148
  371. package/skills/documenting/SKILL.md +0 -113
  372. package/skills/documenting/examples/example_pattern.md +0 -70
  373. package/skills/documenting/examples/example_reference.md +0 -70
  374. package/skills/documenting/examples/example_ssot_analytics.md +0 -64
  375. package/skills/documenting/examples/example_workflow.md +0 -141
  376. package/skills/documenting/references/changelog-format.md +0 -97
  377. package/skills/documenting/references/metadata-schema.md +0 -136
  378. package/skills/documenting/references/taxonomy.md +0 -81
  379. package/skills/documenting/references/versioning-rules.md +0 -78
  380. package/skills/documenting/scripts/bump_version.sh +0 -60
  381. package/skills/documenting/scripts/changelog/__init__.py +0 -0
  382. package/skills/documenting/scripts/changelog/add_entry.py +0 -216
  383. package/skills/documenting/scripts/changelog/bump_release.py +0 -117
  384. package/skills/documenting/scripts/changelog/init_changelog.py +0 -54
  385. package/skills/documenting/scripts/changelog/validate_changelog.py +0 -128
  386. package/skills/documenting/scripts/drift_detector.py +0 -266
  387. package/skills/documenting/scripts/generate_template.py +0 -311
  388. package/skills/documenting/scripts/list_by_category.sh +0 -84
  389. package/skills/documenting/scripts/orchestrator.py +0 -255
  390. package/skills/documenting/scripts/validate_metadata.py +0 -242
  391. package/skills/documenting/templates/CHANGELOG.md.template +0 -13
  392. package/skills/find-skills/SKILL.md +0 -133
  393. package/skills/gitnexus-exploring/SKILL.md +0 -75
  394. package/skills/gitnexus-impact-analysis/SKILL.md +0 -94
  395. package/skills/gitnexus-refactoring/SKILL.md +0 -113
  396. package/skills/hook-development/SKILL.md +0 -797
  397. package/skills/hook-development/examples/load-context.sh +0 -55
  398. package/skills/hook-development/examples/quality-check.js +0 -1168
  399. package/skills/hook-development/examples/validate-bash.sh +0 -43
  400. package/skills/hook-development/examples/validate-write.sh +0 -38
  401. package/skills/hook-development/references/advanced.md +0 -527
  402. package/skills/hook-development/references/migration.md +0 -369
  403. package/skills/hook-development/references/patterns.md +0 -412
  404. package/skills/hook-development/scripts/README.md +0 -164
  405. package/skills/hook-development/scripts/hook-linter.sh +0 -153
  406. package/skills/hook-development/scripts/test-hook.sh +0 -252
  407. package/skills/hook-development/scripts/validate-hook-schema.sh +0 -159
  408. package/skills/obsidian-cli/SKILL.md +0 -106
  409. package/skills/orchestrating-agents/SKILL.md +0 -135
  410. package/skills/orchestrating-agents/config.yaml +0 -45
  411. package/skills/orchestrating-agents/references/agent-context-integration.md +0 -37
  412. package/skills/orchestrating-agents/references/examples.md +0 -45
  413. package/skills/orchestrating-agents/references/handover-protocol.md +0 -31
  414. package/skills/orchestrating-agents/references/workflows.md +0 -42
  415. package/skills/orchestrating-agents/scripts/detect_neighbors.py +0 -23
  416. package/skills/planning/SKILL.md +0 -405
  417. package/skills/planning/evals/evals.json +0 -19
  418. package/skills/prompt-improving/README.md +0 -162
  419. package/skills/prompt-improving/SKILL.md +0 -74
  420. package/skills/prompt-improving/references/analysis_commands.md +0 -24
  421. package/skills/prompt-improving/references/chain_of_thought.md +0 -24
  422. package/skills/prompt-improving/references/mcp_definitions.md +0 -20
  423. package/skills/prompt-improving/references/multishot.md +0 -23
  424. package/skills/prompt-improving/references/xml_core.md +0 -60
  425. package/skills/python-testing/SKILL.md +0 -815
  426. package/skills/scoping-service-skills/SKILL.md +0 -231
  427. package/skills/scoping-service-skills/scripts/scope.py +0 -74
  428. package/skills/senior-backend/SKILL.md +0 -209
  429. package/skills/senior-backend/references/api_design_patterns.md +0 -103
  430. package/skills/senior-backend/references/backend_security_practices.md +0 -103
  431. package/skills/senior-backend/references/database_optimization_guide.md +0 -103
  432. package/skills/senior-backend/scripts/api_load_tester.py +0 -114
  433. package/skills/senior-backend/scripts/api_scaffolder.py +0 -114
  434. package/skills/senior-backend/scripts/database_migration_tool.py +0 -114
  435. package/skills/senior-data-scientist/SKILL.md +0 -226
  436. package/skills/senior-data-scientist/references/experiment_design_frameworks.md +0 -80
  437. package/skills/senior-data-scientist/references/feature_engineering_patterns.md +0 -80
  438. package/skills/senior-data-scientist/references/statistical_methods_advanced.md +0 -80
  439. package/skills/senior-data-scientist/scripts/experiment_designer.py +0 -100
  440. package/skills/senior-data-scientist/scripts/feature_engineering_pipeline.py +0 -100
  441. package/skills/senior-data-scientist/scripts/model_evaluation_suite.py +0 -100
  442. package/skills/senior-devops/SKILL.md +0 -209
  443. package/skills/senior-devops/references/cicd_pipeline_guide.md +0 -103
  444. package/skills/senior-devops/references/deployment_strategies.md +0 -103
  445. package/skills/senior-devops/references/infrastructure_as_code.md +0 -103
  446. package/skills/senior-devops/scripts/deployment_manager.py +0 -114
  447. package/skills/senior-devops/scripts/pipeline_generator.py +0 -114
  448. package/skills/senior-devops/scripts/terraform_scaffolder.py +0 -114
  449. package/skills/senior-security/SKILL.md +0 -209
  450. package/skills/senior-security/references/cryptography_implementation.md +0 -103
  451. package/skills/senior-security/references/penetration_testing_guide.md +0 -103
  452. package/skills/senior-security/references/security_architecture_patterns.md +0 -103
  453. package/skills/senior-security/scripts/pentest_automator.py +0 -114
  454. package/skills/senior-security/scripts/security_auditor.py +0 -114
  455. package/skills/senior-security/scripts/threat_modeler.py +0 -114
  456. package/skills/skill-creator/LICENSE.txt +0 -202
  457. package/skills/skill-creator/SKILL.md +0 -479
  458. package/skills/skill-creator/agents/analyzer.md +0 -274
  459. package/skills/skill-creator/agents/comparator.md +0 -202
  460. package/skills/skill-creator/agents/grader.md +0 -223
  461. package/skills/skill-creator/assets/eval_review.html +0 -146
  462. package/skills/skill-creator/eval-viewer/generate_review.py +0 -471
  463. package/skills/skill-creator/eval-viewer/viewer.html +0 -1325
  464. package/skills/skill-creator/references/schemas.md +0 -430
  465. package/skills/skill-creator/scripts/__init__.py +0 -0
  466. package/skills/skill-creator/scripts/aggregate_benchmark.py +0 -401
  467. package/skills/skill-creator/scripts/generate_report.py +0 -326
  468. package/skills/skill-creator/scripts/improve_description.py +0 -248
  469. package/skills/skill-creator/scripts/package_skill.py +0 -136
  470. package/skills/skill-creator/scripts/quick_validate.py +0 -103
  471. package/skills/skill-creator/scripts/run_eval.py +0 -310
  472. package/skills/skill-creator/scripts/run_loop.py +0 -332
  473. package/skills/skill-creator/scripts/utils.py +0 -47
  474. package/skills/sync-docs/SKILL.md +0 -286
  475. package/skills/sync-docs/evals/evals.json +0 -89
  476. package/skills/sync-docs/references/doc-structure.md +0 -99
  477. package/skills/sync-docs/references/schema.md +0 -103
  478. package/skills/sync-docs/scripts/changelog/add_entry.py +0 -216
  479. package/skills/sync-docs/scripts/context_gatherer.py +0 -240
  480. package/skills/sync-docs/scripts/doc_structure_analyzer.py +0 -495
  481. package/skills/sync-docs/scripts/drift_detector.py +0 -563
  482. package/skills/sync-docs/scripts/validate_doc.py +0 -365
  483. package/skills/sync-docs/scripts/validate_metadata.py +0 -185
  484. package/skills/sync-docs-workspace/iteration-1/benchmark.json +0 -293
  485. package/skills/sync-docs-workspace/iteration-1/benchmark.md +0 -13
  486. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +0 -27
  487. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +0 -210
  488. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +0 -28
  489. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  490. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +0 -101
  491. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +0 -28
  492. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  493. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +0 -5
  494. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +0 -27
  495. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +0 -198
  496. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +0 -28
  497. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  498. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +0 -94
  499. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +0 -28
  500. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +0 -1
  501. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +0 -27
  502. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +0 -237
  503. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +0 -28
  504. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  505. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +0 -134
  506. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +0 -28
  507. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +0 -1
  508. package/skills/sync-docs-workspace/iteration-2/benchmark.json +0 -297
  509. package/skills/sync-docs-workspace/iteration-2/benchmark.md +0 -13
  510. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +0 -27
  511. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +0 -137
  512. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +0 -92
  513. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  514. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +0 -134
  515. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +0 -86
  516. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +0 -1
  517. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +0 -27
  518. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +0 -193
  519. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +0 -72
  520. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  521. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +0 -211
  522. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +0 -91
  523. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  524. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +0 -27
  525. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +0 -182
  526. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  527. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  528. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +0 -222
  529. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +0 -88
  530. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  531. package/skills/sync-docs-workspace/iteration-3/benchmark.json +0 -298
  532. package/skills/sync-docs-workspace/iteration-3/benchmark.md +0 -13
  533. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +0 -27
  534. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +0 -125
  535. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +0 -97
  536. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +0 -5
  537. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +0 -144
  538. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +0 -78
  539. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  540. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +0 -27
  541. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +0 -104
  542. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +0 -91
  543. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +0 -5
  544. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +0 -79
  545. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +0 -82
  546. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  547. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +0 -27
  548. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +0 -302
  549. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +0 -33
  550. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +0 -114
  551. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +0 -118
  552. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +0 -38
  553. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +0 -158
  554. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  555. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +0 -5
  556. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +0 -71
  557. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +0 -90
  558. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  559. package/skills/test-planning/SKILL.md +0 -465
  560. package/skills/test-planning/evals/evals.json +0 -23
  561. package/skills/updating-service-skills/SKILL.md +0 -136
  562. package/skills/updating-service-skills/scripts/drift_detector.py +0 -222
  563. package/skills/using-quality-gates/SKILL.md +0 -254
  564. package/skills/using-serena-lsp/README.md +0 -8
  565. package/skills/using-serena-lsp/REFERENCE.md +0 -194
  566. package/skills/using-serena-lsp/SKILL.md +0 -82
  567. package/skills/using-service-skills/SKILL.md +0 -108
  568. package/skills/using-service-skills/scripts/cataloger.py +0 -74
  569. package/skills/using-service-skills/scripts/skill_activator.py +0 -152
  570. package/skills/using-service-skills/scripts/test_skill_activator.py +0 -58
  571. package/skills/using-tdd/SKILL.md +0 -410
  572. package/skills/using-xtrm/SKILL.md +0 -127
  573. package/skills/xt-debugging/SKILL.md +0 -149
  574. package/skills/xt-end/SKILL.md +0 -297
  575. package/skills/xt-merge/SKILL.md +0 -313
  576. /package/{config → .xtrm/config}/.env.example +0 -0
  577. /package/{config/mcp_servers_optional.json → .xtrm/config/claude.mcp.optional.json} +0 -0
  578. /package/{hooks → .xtrm/config}/hooks.json +0 -0
  579. /package/{config → .xtrm/config}/pi/auth.json.template +0 -0
  580. /package/{config → .xtrm/config}/pi/extensions/auto-session-name/index.ts +0 -0
  581. /package/{config → .xtrm/config}/pi/extensions/auto-session-name/package.json +0 -0
  582. /package/{config → .xtrm/config}/pi/extensions/auto-update/index.ts +0 -0
  583. /package/{config → .xtrm/config}/pi/extensions/auto-update/package.json +0 -0
  584. /package/{config → .xtrm/config}/pi/extensions/beads/package.json +0 -0
  585. /package/{config → .xtrm/config}/pi/extensions/compact-header/index.ts +0 -0
  586. /package/{config → .xtrm/config}/pi/extensions/compact-header/package.json +0 -0
  587. /package/{config → .xtrm/config}/pi/extensions/core/adapter.ts +0 -0
  588. /package/{config → .xtrm/config}/pi/extensions/core/guard-rules.ts +0 -0
  589. /package/{config → .xtrm/config}/pi/extensions/core/lib.ts +0 -0
  590. /package/{config → .xtrm/config}/pi/extensions/core/logger.ts +0 -0
  591. /package/{config → .xtrm/config}/pi/extensions/core/package.json +0 -0
  592. /package/{config → .xtrm/config}/pi/extensions/core/runner.ts +0 -0
  593. /package/{config → .xtrm/config}/pi/extensions/core/session-state.ts +0 -0
  594. /package/{config → .xtrm/config}/pi/extensions/custom-footer/package.json +0 -0
  595. /package/{config → .xtrm/config}/pi/extensions/custom-provider-qwen-cli/index.ts +0 -0
  596. /package/{config → .xtrm/config}/pi/extensions/custom-provider-qwen-cli/package.json +0 -0
  597. /package/{config → .xtrm/config}/pi/extensions/git-checkpoint/index.ts +0 -0
  598. /package/{config → .xtrm/config}/pi/extensions/git-checkpoint/package.json +0 -0
  599. /package/{config → .xtrm/config}/pi/extensions/lsp-bootstrap/index.ts +0 -0
  600. /package/{config → .xtrm/config}/pi/extensions/lsp-bootstrap/package.json +0 -0
  601. /package/{config → .xtrm/config}/pi/extensions/pi-serena-compact/index.ts +0 -0
  602. /package/{config → .xtrm/config}/pi/extensions/pi-serena-compact/package.json +0 -0
  603. /package/{config → .xtrm/config}/pi/extensions/quality-gates/index.ts +0 -0
  604. /package/{config → .xtrm/config}/pi/extensions/quality-gates/package.json +0 -0
  605. /package/{config → .xtrm/config}/pi/extensions/service-skills/index.ts +0 -0
  606. /package/{config → .xtrm/config}/pi/extensions/service-skills/package.json +0 -0
  607. /package/{config → .xtrm/config}/pi/extensions/session-flow/index.ts +0 -0
  608. /package/{config → .xtrm/config}/pi/extensions/session-flow/package.json +0 -0
  609. /package/{config → .xtrm/config}/pi/extensions/xtrm-loader/index.ts +0 -0
  610. /package/{config → .xtrm/config}/pi/extensions/xtrm-loader/package.json +0 -0
  611. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/format.ts +0 -0
  612. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/package.json +0 -0
  613. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/themes/pidex-dark.json +0 -0
  614. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/themes/pidex-light.json +0 -0
  615. /package/{config → .xtrm/config}/pi/models.json.template +0 -0
  616. /package/{config → .xtrm/config}/pi/pi-worktrees-settings.json +0 -0
  617. /package/{config → .xtrm/config}/pi/settings.json.template +0 -0
  618. /package/{hooks → .xtrm/hooks}/beads-claim-sync.mjs +0 -0
  619. /package/{hooks → .xtrm/hooks}/beads-compact-restore.mjs +0 -0
  620. /package/{hooks → .xtrm/hooks}/beads-compact-save.mjs +0 -0
  621. /package/{hooks → .xtrm/hooks}/beads-edit-gate.mjs +0 -0
  622. /package/{hooks → .xtrm/hooks}/beads-gate-core.mjs +0 -0
  623. /package/{hooks → .xtrm/hooks}/beads-gate-messages.mjs +0 -0
  624. /package/{hooks → .xtrm/hooks}/beads-gate-utils.mjs +0 -0
  625. /package/{hooks → .xtrm/hooks}/beads-stop-gate.mjs +0 -0
  626. /package/{hooks → .xtrm/hooks}/gitnexus/gitnexus-hook.cjs +0 -0
  627. /package/{hooks → .xtrm/hooks}/quality-check.cjs +0 -0
  628. /package/{hooks → .xtrm/hooks}/quality-check.py +0 -0
  629. /package/{hooks → .xtrm/hooks}/worktree-boundary.mjs +0 -0
  630. /package/{hooks → .xtrm/hooks}/xtrm-logger.mjs +0 -0
  631. /package/{hooks → .xtrm/hooks}/xtrm-session-logger.mjs +0 -0
  632. /package/{hooks → .xtrm/hooks}/xtrm-tool-logger.mjs +0 -0
@@ -1,401 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Aggregate individual run results into benchmark summary statistics.
4
-
5
- Reads grading.json files from run directories and produces:
6
- - run_summary with mean, stddev, min, max for each metric
7
- - delta between with_skill and without_skill configurations
8
-
9
- Usage:
10
- python aggregate_benchmark.py <benchmark_dir>
11
-
12
- Example:
13
- python aggregate_benchmark.py benchmarks/2026-01-15T10-30-00/
14
-
15
- The script supports two directory layouts:
16
-
17
- Workspace layout (from skill-creator iterations):
18
- <benchmark_dir>/
19
- └── eval-N/
20
- ├── with_skill/
21
- │ ├── run-1/grading.json
22
- │ └── run-2/grading.json
23
- └── without_skill/
24
- ├── run-1/grading.json
25
- └── run-2/grading.json
26
-
27
- Legacy layout (with runs/ subdirectory):
28
- <benchmark_dir>/
29
- └── runs/
30
- └── eval-N/
31
- ├── with_skill/
32
- │ └── run-1/grading.json
33
- └── without_skill/
34
- └── run-1/grading.json
35
- """
36
-
37
- import argparse
38
- import json
39
- import math
40
- import sys
41
- from datetime import datetime, timezone
42
- from pathlib import Path
43
-
44
-
45
- def calculate_stats(values: list[float]) -> dict:
46
- """Calculate mean, stddev, min, max for a list of values."""
47
- if not values:
48
- return {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}
49
-
50
- n = len(values)
51
- mean = sum(values) / n
52
-
53
- if n > 1:
54
- variance = sum((x - mean) ** 2 for x in values) / (n - 1)
55
- stddev = math.sqrt(variance)
56
- else:
57
- stddev = 0.0
58
-
59
- return {
60
- "mean": round(mean, 4),
61
- "stddev": round(stddev, 4),
62
- "min": round(min(values), 4),
63
- "max": round(max(values), 4)
64
- }
65
-
66
-
67
- def load_run_results(benchmark_dir: Path) -> dict:
68
- """
69
- Load all run results from a benchmark directory.
70
-
71
- Returns dict keyed by config name (e.g. "with_skill"/"without_skill",
72
- or "new_skill"/"old_skill"), each containing a list of run results.
73
- """
74
- # Support both layouts: eval dirs directly under benchmark_dir, or under runs/
75
- runs_dir = benchmark_dir / "runs"
76
- if runs_dir.exists():
77
- search_dir = runs_dir
78
- elif list(benchmark_dir.glob("eval-*")):
79
- search_dir = benchmark_dir
80
- else:
81
- print(f"No eval directories found in {benchmark_dir} or {benchmark_dir / 'runs'}")
82
- return {}
83
-
84
- results: dict[str, list] = {}
85
-
86
- for eval_idx, eval_dir in enumerate(sorted(search_dir.glob("eval-*"))):
87
- metadata_path = eval_dir / "eval_metadata.json"
88
- if metadata_path.exists():
89
- try:
90
- with open(metadata_path) as mf:
91
- eval_id = json.load(mf).get("eval_id", eval_idx)
92
- except (json.JSONDecodeError, OSError):
93
- eval_id = eval_idx
94
- else:
95
- try:
96
- eval_id = int(eval_dir.name.split("-")[1])
97
- except ValueError:
98
- eval_id = eval_idx
99
-
100
- # Discover config directories dynamically rather than hardcoding names
101
- for config_dir in sorted(eval_dir.iterdir()):
102
- if not config_dir.is_dir():
103
- continue
104
- # Skip non-config directories (inputs, outputs, etc.)
105
- if not list(config_dir.glob("run-*")):
106
- continue
107
- config = config_dir.name
108
- if config not in results:
109
- results[config] = []
110
-
111
- for run_dir in sorted(config_dir.glob("run-*")):
112
- run_number = int(run_dir.name.split("-")[1])
113
- grading_file = run_dir / "grading.json"
114
-
115
- if not grading_file.exists():
116
- print(f"Warning: grading.json not found in {run_dir}")
117
- continue
118
-
119
- try:
120
- with open(grading_file) as f:
121
- grading = json.load(f)
122
- except json.JSONDecodeError as e:
123
- print(f"Warning: Invalid JSON in {grading_file}: {e}")
124
- continue
125
-
126
- # Extract metrics
127
- result = {
128
- "eval_id": eval_id,
129
- "run_number": run_number,
130
- "pass_rate": grading.get("summary", {}).get("pass_rate", 0.0),
131
- "passed": grading.get("summary", {}).get("passed", 0),
132
- "failed": grading.get("summary", {}).get("failed", 0),
133
- "total": grading.get("summary", {}).get("total", 0),
134
- }
135
-
136
- # Extract timing — check grading.json first, then sibling timing.json
137
- timing = grading.get("timing", {})
138
- result["time_seconds"] = timing.get("total_duration_seconds", 0.0)
139
- timing_file = run_dir / "timing.json"
140
- if result["time_seconds"] == 0.0 and timing_file.exists():
141
- try:
142
- with open(timing_file) as tf:
143
- timing_data = json.load(tf)
144
- result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
145
- result["tokens"] = timing_data.get("total_tokens", 0)
146
- except json.JSONDecodeError:
147
- pass
148
-
149
- # Extract metrics if available
150
- metrics = grading.get("execution_metrics", {})
151
- result["tool_calls"] = metrics.get("total_tool_calls", 0)
152
- if not result.get("tokens"):
153
- result["tokens"] = metrics.get("output_chars", 0)
154
- result["errors"] = metrics.get("errors_encountered", 0)
155
-
156
- # Extract expectations — viewer requires fields: text, passed, evidence
157
- raw_expectations = grading.get("expectations", [])
158
- for exp in raw_expectations:
159
- if "text" not in exp or "passed" not in exp:
160
- print(f"Warning: expectation in {grading_file} missing required fields (text, passed, evidence): {exp}")
161
- result["expectations"] = raw_expectations
162
-
163
- # Extract notes from user_notes_summary
164
- notes_summary = grading.get("user_notes_summary", {})
165
- notes = []
166
- notes.extend(notes_summary.get("uncertainties", []))
167
- notes.extend(notes_summary.get("needs_review", []))
168
- notes.extend(notes_summary.get("workarounds", []))
169
- result["notes"] = notes
170
-
171
- results[config].append(result)
172
-
173
- return results
174
-
175
-
176
- def aggregate_results(results: dict) -> dict:
177
- """
178
- Aggregate run results into summary statistics.
179
-
180
- Returns run_summary with stats for each configuration and delta.
181
- """
182
- run_summary = {}
183
- configs = list(results.keys())
184
-
185
- for config in configs:
186
- runs = results.get(config, [])
187
-
188
- if not runs:
189
- run_summary[config] = {
190
- "pass_rate": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
191
- "time_seconds": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
192
- "tokens": {"mean": 0, "stddev": 0, "min": 0, "max": 0}
193
- }
194
- continue
195
-
196
- pass_rates = [r["pass_rate"] for r in runs]
197
- times = [r["time_seconds"] for r in runs]
198
- tokens = [r.get("tokens", 0) for r in runs]
199
-
200
- run_summary[config] = {
201
- "pass_rate": calculate_stats(pass_rates),
202
- "time_seconds": calculate_stats(times),
203
- "tokens": calculate_stats(tokens)
204
- }
205
-
206
- # Calculate delta between the first two configs (if two exist)
207
- if len(configs) >= 2:
208
- primary = run_summary.get(configs[0], {})
209
- baseline = run_summary.get(configs[1], {})
210
- else:
211
- primary = run_summary.get(configs[0], {}) if configs else {}
212
- baseline = {}
213
-
214
- delta_pass_rate = primary.get("pass_rate", {}).get("mean", 0) - baseline.get("pass_rate", {}).get("mean", 0)
215
- delta_time = primary.get("time_seconds", {}).get("mean", 0) - baseline.get("time_seconds", {}).get("mean", 0)
216
- delta_tokens = primary.get("tokens", {}).get("mean", 0) - baseline.get("tokens", {}).get("mean", 0)
217
-
218
- run_summary["delta"] = {
219
- "pass_rate": f"{delta_pass_rate:+.2f}",
220
- "time_seconds": f"{delta_time:+.1f}",
221
- "tokens": f"{delta_tokens:+.0f}"
222
- }
223
-
224
- return run_summary
225
-
226
-
227
- def generate_benchmark(benchmark_dir: Path, skill_name: str = "", skill_path: str = "") -> dict:
228
- """
229
- Generate complete benchmark.json from run results.
230
- """
231
- results = load_run_results(benchmark_dir)
232
- run_summary = aggregate_results(results)
233
-
234
- # Build runs array for benchmark.json
235
- runs = []
236
- for config in results:
237
- for result in results[config]:
238
- runs.append({
239
- "eval_id": result["eval_id"],
240
- "configuration": config,
241
- "run_number": result["run_number"],
242
- "result": {
243
- "pass_rate": result["pass_rate"],
244
- "passed": result["passed"],
245
- "failed": result["failed"],
246
- "total": result["total"],
247
- "time_seconds": result["time_seconds"],
248
- "tokens": result.get("tokens", 0),
249
- "tool_calls": result.get("tool_calls", 0),
250
- "errors": result.get("errors", 0)
251
- },
252
- "expectations": result["expectations"],
253
- "notes": result["notes"]
254
- })
255
-
256
- # Determine eval IDs from results
257
- eval_ids = sorted(set(
258
- r["eval_id"]
259
- for config in results.values()
260
- for r in config
261
- ))
262
-
263
- benchmark = {
264
- "metadata": {
265
- "skill_name": skill_name or "<skill-name>",
266
- "skill_path": skill_path or "<path/to/skill>",
267
- "executor_model": "<model-name>",
268
- "analyzer_model": "<model-name>",
269
- "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
270
- "evals_run": eval_ids,
271
- "runs_per_configuration": 3
272
- },
273
- "runs": runs,
274
- "run_summary": run_summary,
275
- "notes": [] # To be filled by analyzer
276
- }
277
-
278
- return benchmark
279
-
280
-
281
- def generate_markdown(benchmark: dict) -> str:
282
- """Generate human-readable benchmark.md from benchmark data."""
283
- metadata = benchmark["metadata"]
284
- run_summary = benchmark["run_summary"]
285
-
286
- # Determine config names (excluding "delta")
287
- configs = [k for k in run_summary if k != "delta"]
288
- config_a = configs[0] if len(configs) >= 1 else "config_a"
289
- config_b = configs[1] if len(configs) >= 2 else "config_b"
290
- label_a = config_a.replace("_", " ").title()
291
- label_b = config_b.replace("_", " ").title()
292
-
293
- lines = [
294
- f"# Skill Benchmark: {metadata['skill_name']}",
295
- "",
296
- f"**Model**: {metadata['executor_model']}",
297
- f"**Date**: {metadata['timestamp']}",
298
- f"**Evals**: {', '.join(map(str, metadata['evals_run']))} ({metadata['runs_per_configuration']} runs each per configuration)",
299
- "",
300
- "## Summary",
301
- "",
302
- f"| Metric | {label_a} | {label_b} | Delta |",
303
- "|--------|------------|---------------|-------|",
304
- ]
305
-
306
- a_summary = run_summary.get(config_a, {})
307
- b_summary = run_summary.get(config_b, {})
308
- delta = run_summary.get("delta", {})
309
-
310
- # Format pass rate
311
- a_pr = a_summary.get("pass_rate", {})
312
- b_pr = b_summary.get("pass_rate", {})
313
- lines.append(f"| Pass Rate | {a_pr.get('mean', 0)*100:.0f}% ± {a_pr.get('stddev', 0)*100:.0f}% | {b_pr.get('mean', 0)*100:.0f}% ± {b_pr.get('stddev', 0)*100:.0f}% | {delta.get('pass_rate', '—')} |")
314
-
315
- # Format time
316
- a_time = a_summary.get("time_seconds", {})
317
- b_time = b_summary.get("time_seconds", {})
318
- lines.append(f"| Time | {a_time.get('mean', 0):.1f}s ± {a_time.get('stddev', 0):.1f}s | {b_time.get('mean', 0):.1f}s ± {b_time.get('stddev', 0):.1f}s | {delta.get('time_seconds', '—')}s |")
319
-
320
- # Format tokens
321
- a_tokens = a_summary.get("tokens", {})
322
- b_tokens = b_summary.get("tokens", {})
323
- lines.append(f"| Tokens | {a_tokens.get('mean', 0):.0f} ± {a_tokens.get('stddev', 0):.0f} | {b_tokens.get('mean', 0):.0f} ± {b_tokens.get('stddev', 0):.0f} | {delta.get('tokens', '—')} |")
324
-
325
- # Notes section
326
- if benchmark.get("notes"):
327
- lines.extend([
328
- "",
329
- "## Notes",
330
- ""
331
- ])
332
- for note in benchmark["notes"]:
333
- lines.append(f"- {note}")
334
-
335
- return "\n".join(lines)
336
-
337
-
338
- def main():
339
- parser = argparse.ArgumentParser(
340
- description="Aggregate benchmark run results into summary statistics"
341
- )
342
- parser.add_argument(
343
- "benchmark_dir",
344
- type=Path,
345
- help="Path to the benchmark directory"
346
- )
347
- parser.add_argument(
348
- "--skill-name",
349
- default="",
350
- help="Name of the skill being benchmarked"
351
- )
352
- parser.add_argument(
353
- "--skill-path",
354
- default="",
355
- help="Path to the skill being benchmarked"
356
- )
357
- parser.add_argument(
358
- "--output", "-o",
359
- type=Path,
360
- help="Output path for benchmark.json (default: <benchmark_dir>/benchmark.json)"
361
- )
362
-
363
- args = parser.parse_args()
364
-
365
- if not args.benchmark_dir.exists():
366
- print(f"Directory not found: {args.benchmark_dir}")
367
- sys.exit(1)
368
-
369
- # Generate benchmark
370
- benchmark = generate_benchmark(args.benchmark_dir, args.skill_name, args.skill_path)
371
-
372
- # Determine output paths
373
- output_json = args.output or (args.benchmark_dir / "benchmark.json")
374
- output_md = output_json.with_suffix(".md")
375
-
376
- # Write benchmark.json
377
- with open(output_json, "w") as f:
378
- json.dump(benchmark, f, indent=2)
379
- print(f"Generated: {output_json}")
380
-
381
- # Write benchmark.md
382
- markdown = generate_markdown(benchmark)
383
- with open(output_md, "w") as f:
384
- f.write(markdown)
385
- print(f"Generated: {output_md}")
386
-
387
- # Print summary
388
- run_summary = benchmark["run_summary"]
389
- configs = [k for k in run_summary if k != "delta"]
390
- delta = run_summary.get("delta", {})
391
-
392
- print(f"\nSummary:")
393
- for config in configs:
394
- pr = run_summary[config]["pass_rate"]["mean"]
395
- label = config.replace("_", " ").title()
396
- print(f" {label}: {pr*100:.1f}% pass rate")
397
- print(f" Delta: {delta.get('pass_rate', '—')}")
398
-
399
-
400
- if __name__ == "__main__":
401
- main()