xtrm-tools 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (633) hide show
  1. package/.xtrm/config/README.md +10 -0
  2. package/{config/mcp_servers.json → .xtrm/config/claude.mcp.json} +0 -28
  3. package/{config → .xtrm/config}/instructions/agents-top.md +12 -3
  4. package/{config → .xtrm/config}/instructions/claude-top.md +12 -3
  5. package/{config → .xtrm/config}/pi/extensions/beads/index.ts +44 -13
  6. package/{config → .xtrm/config}/pi/extensions/custom-footer/index.ts +59 -82
  7. package/{config → .xtrm/config}/pi/extensions/xtrm-ui/index.ts +2 -2
  8. package/{config → .xtrm/config}/pi/install-schema.json +2 -2
  9. package/.xtrm/config/pi.mcp.json +39 -0
  10. package/.xtrm/config/settings.json +41 -0
  11. package/.xtrm/extensions/auto-session-name/index.ts +29 -0
  12. package/.xtrm/extensions/auto-session-name/package.json +16 -0
  13. package/.xtrm/extensions/auto-update/index.ts +71 -0
  14. package/.xtrm/extensions/auto-update/package.json +16 -0
  15. package/.xtrm/extensions/beads/index.ts +232 -0
  16. package/.xtrm/extensions/beads/package.json +19 -0
  17. package/.xtrm/extensions/compact-header/index.ts +69 -0
  18. package/.xtrm/extensions/compact-header/package.json +16 -0
  19. package/.xtrm/extensions/core/adapter.ts +52 -0
  20. package/.xtrm/extensions/core/guard-rules.ts +100 -0
  21. package/.xtrm/extensions/core/lib.ts +3 -0
  22. package/.xtrm/extensions/core/logger.ts +45 -0
  23. package/.xtrm/extensions/core/package.json +18 -0
  24. package/.xtrm/extensions/core/runner.ts +71 -0
  25. package/.xtrm/extensions/core/session-state.ts +59 -0
  26. package/.xtrm/extensions/custom-footer/index.ts +398 -0
  27. package/.xtrm/extensions/custom-footer/package.json +19 -0
  28. package/.xtrm/extensions/custom-provider-qwen-cli/index.ts +363 -0
  29. package/.xtrm/extensions/custom-provider-qwen-cli/package.json +1 -0
  30. package/.xtrm/extensions/git-checkpoint/index.ts +53 -0
  31. package/.xtrm/extensions/git-checkpoint/package.json +16 -0
  32. package/.xtrm/extensions/lsp-bootstrap/index.ts +134 -0
  33. package/.xtrm/extensions/lsp-bootstrap/package.json +17 -0
  34. package/.xtrm/extensions/pi-serena-compact/index.ts +121 -0
  35. package/.xtrm/extensions/pi-serena-compact/package.json +16 -0
  36. package/.xtrm/extensions/quality-gates/index.ts +66 -0
  37. package/.xtrm/extensions/quality-gates/package.json +19 -0
  38. package/.xtrm/extensions/service-skills/index.ts +108 -0
  39. package/.xtrm/extensions/service-skills/package.json +19 -0
  40. package/.xtrm/extensions/session-flow/index.ts +96 -0
  41. package/.xtrm/extensions/session-flow/package.json +19 -0
  42. package/.xtrm/extensions/xtrm-loader/index.ts +152 -0
  43. package/.xtrm/extensions/xtrm-loader/package.json +19 -0
  44. package/.xtrm/extensions/xtrm-ui/format.ts +93 -0
  45. package/.xtrm/extensions/xtrm-ui/index.ts +1044 -0
  46. package/.xtrm/extensions/xtrm-ui/package.json +10 -0
  47. package/.xtrm/extensions/xtrm-ui/themes/pidex-dark.json +85 -0
  48. package/.xtrm/extensions/xtrm-ui/themes/pidex-light.json +85 -0
  49. package/{hooks → .xtrm/hooks}/README.md +2 -1
  50. package/{hooks → .xtrm/hooks}/beads-commit-gate.mjs +4 -0
  51. package/.xtrm/hooks/beads-memory-gate.mjs +119 -0
  52. package/{plugins/xtrm-tools → .xtrm}/hooks/quality-check-env.mjs +1 -4
  53. package/.xtrm/hooks/statusline.mjs +156 -0
  54. package/{plugins/xtrm-tools → .xtrm}/hooks/using-xtrm-reminder.mjs +8 -7
  55. package/.xtrm/registry.json +1323 -0
  56. package/CHANGELOG.md +31 -0
  57. package/README.md +20 -3
  58. package/cli/dist/index.cjs +26796 -30901
  59. package/cli/dist/index.cjs.map +1 -1
  60. package/cli/package.json +6 -3
  61. package/package.json +15 -13
  62. package/scripts/ghgrep.mjs +358 -0
  63. package/.claude-plugin/marketplace.json +0 -19
  64. package/.claude-plugin/plugin.json +0 -9
  65. package/config/hooks.json +0 -83
  66. package/config/settings.json +0 -70
  67. package/hooks/beads-memory-gate.mjs +0 -94
  68. package/hooks/quality-check-env.mjs +0 -79
  69. package/hooks/statusline.mjs +0 -183
  70. package/hooks/tsconfig-cache.json +0 -4
  71. package/hooks/using-xtrm-reminder.mjs +0 -47
  72. package/plugins/xtrm-tools/.claude-plugin/plugin.json +0 -9
  73. package/plugins/xtrm-tools/.mcp.json +0 -18
  74. package/plugins/xtrm-tools/hooks/README.md +0 -61
  75. package/plugins/xtrm-tools/hooks/beads-claim-sync.mjs +0 -154
  76. package/plugins/xtrm-tools/hooks/beads-commit-gate.mjs +0 -70
  77. package/plugins/xtrm-tools/hooks/beads-compact-restore.mjs +0 -77
  78. package/plugins/xtrm-tools/hooks/beads-compact-save.mjs +0 -63
  79. package/plugins/xtrm-tools/hooks/beads-edit-gate.mjs +0 -85
  80. package/plugins/xtrm-tools/hooks/beads-gate-core.mjs +0 -236
  81. package/plugins/xtrm-tools/hooks/beads-gate-messages.mjs +0 -75
  82. package/plugins/xtrm-tools/hooks/beads-gate-utils.mjs +0 -176
  83. package/plugins/xtrm-tools/hooks/beads-memory-gate.mjs +0 -94
  84. package/plugins/xtrm-tools/hooks/beads-stop-gate.mjs +0 -53
  85. package/plugins/xtrm-tools/hooks/gitnexus/gitnexus-hook.cjs +0 -222
  86. package/plugins/xtrm-tools/hooks/hooks.json +0 -129
  87. package/plugins/xtrm-tools/hooks/quality-check.cjs +0 -1286
  88. package/plugins/xtrm-tools/hooks/quality-check.py +0 -345
  89. package/plugins/xtrm-tools/hooks/statusline.mjs +0 -183
  90. package/plugins/xtrm-tools/hooks/tsconfig-cache.json +0 -4
  91. package/plugins/xtrm-tools/hooks/worktree-boundary.mjs +0 -33
  92. package/plugins/xtrm-tools/hooks/xtrm-logger.mjs +0 -123
  93. package/plugins/xtrm-tools/hooks/xtrm-session-logger.mjs +0 -27
  94. package/plugins/xtrm-tools/hooks/xtrm-tool-logger.mjs +0 -53
  95. package/plugins/xtrm-tools/skills/README.txt +0 -31
  96. package/plugins/xtrm-tools/skills/clean-code/SKILL.md +0 -201
  97. package/plugins/xtrm-tools/skills/creating-service-skills/SKILL.md +0 -433
  98. package/plugins/xtrm-tools/skills/creating-service-skills/references/script_quality_standards.md +0 -425
  99. package/plugins/xtrm-tools/skills/creating-service-skills/references/service_skill_system_guide.md +0 -278
  100. package/plugins/xtrm-tools/skills/creating-service-skills/scripts/bootstrap.py +0 -326
  101. package/plugins/xtrm-tools/skills/creating-service-skills/scripts/deep_dive.py +0 -304
  102. package/plugins/xtrm-tools/skills/creating-service-skills/scripts/scaffolder.py +0 -482
  103. package/plugins/xtrm-tools/skills/delegating/SKILL.md +0 -196
  104. package/plugins/xtrm-tools/skills/delegating/config.yaml +0 -210
  105. package/plugins/xtrm-tools/skills/delegating/references/orchestration-protocols.md +0 -41
  106. package/plugins/xtrm-tools/skills/docker-expert/SKILL.md +0 -409
  107. package/plugins/xtrm-tools/skills/documenting/CHANGELOG.md +0 -23
  108. package/plugins/xtrm-tools/skills/documenting/README.md +0 -148
  109. package/plugins/xtrm-tools/skills/documenting/SKILL.md +0 -113
  110. package/plugins/xtrm-tools/skills/documenting/examples/example_pattern.md +0 -70
  111. package/plugins/xtrm-tools/skills/documenting/examples/example_reference.md +0 -70
  112. package/plugins/xtrm-tools/skills/documenting/examples/example_ssot_analytics.md +0 -64
  113. package/plugins/xtrm-tools/skills/documenting/examples/example_workflow.md +0 -141
  114. package/plugins/xtrm-tools/skills/documenting/references/changelog-format.md +0 -97
  115. package/plugins/xtrm-tools/skills/documenting/references/metadata-schema.md +0 -136
  116. package/plugins/xtrm-tools/skills/documenting/references/taxonomy.md +0 -81
  117. package/plugins/xtrm-tools/skills/documenting/references/versioning-rules.md +0 -78
  118. package/plugins/xtrm-tools/skills/documenting/scripts/bump_version.sh +0 -60
  119. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/__init__.py +0 -0
  120. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/add_entry.py +0 -216
  121. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/bump_release.py +0 -117
  122. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/init_changelog.py +0 -54
  123. package/plugins/xtrm-tools/skills/documenting/scripts/changelog/validate_changelog.py +0 -128
  124. package/plugins/xtrm-tools/skills/documenting/scripts/drift_detector.py +0 -266
  125. package/plugins/xtrm-tools/skills/documenting/scripts/generate_template.py +0 -311
  126. package/plugins/xtrm-tools/skills/documenting/scripts/list_by_category.sh +0 -84
  127. package/plugins/xtrm-tools/skills/documenting/scripts/orchestrator.py +0 -255
  128. package/plugins/xtrm-tools/skills/documenting/scripts/validate_metadata.py +0 -242
  129. package/plugins/xtrm-tools/skills/documenting/templates/CHANGELOG.md.template +0 -13
  130. package/plugins/xtrm-tools/skills/documenting/tests/integration_test.sh +0 -70
  131. package/plugins/xtrm-tools/skills/documenting/tests/test_changelog.py +0 -201
  132. package/plugins/xtrm-tools/skills/documenting/tests/test_drift_detector.py +0 -80
  133. package/plugins/xtrm-tools/skills/documenting/tests/test_orchestrator.py +0 -52
  134. package/plugins/xtrm-tools/skills/documenting/tests/test_validate_metadata.py +0 -64
  135. package/plugins/xtrm-tools/skills/find-skills/SKILL.md +0 -133
  136. package/plugins/xtrm-tools/skills/gitnexus-exploring/SKILL.md +0 -75
  137. package/plugins/xtrm-tools/skills/gitnexus-impact-analysis/SKILL.md +0 -94
  138. package/plugins/xtrm-tools/skills/gitnexus-refactoring/SKILL.md +0 -113
  139. package/plugins/xtrm-tools/skills/hook-development/SKILL.md +0 -797
  140. package/plugins/xtrm-tools/skills/hook-development/examples/load-context.sh +0 -55
  141. package/plugins/xtrm-tools/skills/hook-development/examples/quality-check.js +0 -1168
  142. package/plugins/xtrm-tools/skills/hook-development/examples/validate-bash.sh +0 -43
  143. package/plugins/xtrm-tools/skills/hook-development/examples/validate-write.sh +0 -38
  144. package/plugins/xtrm-tools/skills/hook-development/references/advanced.md +0 -527
  145. package/plugins/xtrm-tools/skills/hook-development/references/migration.md +0 -369
  146. package/plugins/xtrm-tools/skills/hook-development/references/patterns.md +0 -412
  147. package/plugins/xtrm-tools/skills/hook-development/scripts/README.md +0 -164
  148. package/plugins/xtrm-tools/skills/hook-development/scripts/hook-linter.sh +0 -153
  149. package/plugins/xtrm-tools/skills/hook-development/scripts/test-hook.sh +0 -252
  150. package/plugins/xtrm-tools/skills/hook-development/scripts/validate-hook-schema.sh +0 -159
  151. package/plugins/xtrm-tools/skills/obsidian-cli/SKILL.md +0 -106
  152. package/plugins/xtrm-tools/skills/orchestrating-agents/SKILL.md +0 -135
  153. package/plugins/xtrm-tools/skills/orchestrating-agents/config.yaml +0 -45
  154. package/plugins/xtrm-tools/skills/orchestrating-agents/references/agent-context-integration.md +0 -37
  155. package/plugins/xtrm-tools/skills/orchestrating-agents/references/examples.md +0 -45
  156. package/plugins/xtrm-tools/skills/orchestrating-agents/references/handover-protocol.md +0 -31
  157. package/plugins/xtrm-tools/skills/orchestrating-agents/references/workflows.md +0 -42
  158. package/plugins/xtrm-tools/skills/orchestrating-agents/scripts/detect_neighbors.py +0 -23
  159. package/plugins/xtrm-tools/skills/planning/SKILL.md +0 -405
  160. package/plugins/xtrm-tools/skills/planning/evals/evals.json +0 -19
  161. package/plugins/xtrm-tools/skills/prompt-improving/README.md +0 -162
  162. package/plugins/xtrm-tools/skills/prompt-improving/SKILL.md +0 -74
  163. package/plugins/xtrm-tools/skills/prompt-improving/references/analysis_commands.md +0 -24
  164. package/plugins/xtrm-tools/skills/prompt-improving/references/chain_of_thought.md +0 -24
  165. package/plugins/xtrm-tools/skills/prompt-improving/references/mcp_definitions.md +0 -20
  166. package/plugins/xtrm-tools/skills/prompt-improving/references/multishot.md +0 -23
  167. package/plugins/xtrm-tools/skills/prompt-improving/references/xml_core.md +0 -60
  168. package/plugins/xtrm-tools/skills/python-testing/SKILL.md +0 -815
  169. package/plugins/xtrm-tools/skills/scoping-service-skills/SKILL.md +0 -231
  170. package/plugins/xtrm-tools/skills/scoping-service-skills/scripts/scope.py +0 -74
  171. package/plugins/xtrm-tools/skills/senior-backend/SKILL.md +0 -209
  172. package/plugins/xtrm-tools/skills/senior-backend/references/api_design_patterns.md +0 -103
  173. package/plugins/xtrm-tools/skills/senior-backend/references/backend_security_practices.md +0 -103
  174. package/plugins/xtrm-tools/skills/senior-backend/references/database_optimization_guide.md +0 -103
  175. package/plugins/xtrm-tools/skills/senior-backend/scripts/api_load_tester.py +0 -114
  176. package/plugins/xtrm-tools/skills/senior-backend/scripts/api_scaffolder.py +0 -114
  177. package/plugins/xtrm-tools/skills/senior-backend/scripts/database_migration_tool.py +0 -114
  178. package/plugins/xtrm-tools/skills/senior-data-scientist/SKILL.md +0 -226
  179. package/plugins/xtrm-tools/skills/senior-data-scientist/references/experiment_design_frameworks.md +0 -80
  180. package/plugins/xtrm-tools/skills/senior-data-scientist/references/feature_engineering_patterns.md +0 -80
  181. package/plugins/xtrm-tools/skills/senior-data-scientist/references/statistical_methods_advanced.md +0 -80
  182. package/plugins/xtrm-tools/skills/senior-data-scientist/scripts/experiment_designer.py +0 -100
  183. package/plugins/xtrm-tools/skills/senior-data-scientist/scripts/feature_engineering_pipeline.py +0 -100
  184. package/plugins/xtrm-tools/skills/senior-data-scientist/scripts/model_evaluation_suite.py +0 -100
  185. package/plugins/xtrm-tools/skills/senior-devops/SKILL.md +0 -209
  186. package/plugins/xtrm-tools/skills/senior-devops/references/cicd_pipeline_guide.md +0 -103
  187. package/plugins/xtrm-tools/skills/senior-devops/references/deployment_strategies.md +0 -103
  188. package/plugins/xtrm-tools/skills/senior-devops/references/infrastructure_as_code.md +0 -103
  189. package/plugins/xtrm-tools/skills/senior-devops/scripts/deployment_manager.py +0 -114
  190. package/plugins/xtrm-tools/skills/senior-devops/scripts/pipeline_generator.py +0 -114
  191. package/plugins/xtrm-tools/skills/senior-devops/scripts/terraform_scaffolder.py +0 -114
  192. package/plugins/xtrm-tools/skills/senior-security/SKILL.md +0 -209
  193. package/plugins/xtrm-tools/skills/senior-security/references/cryptography_implementation.md +0 -103
  194. package/plugins/xtrm-tools/skills/senior-security/references/penetration_testing_guide.md +0 -103
  195. package/plugins/xtrm-tools/skills/senior-security/references/security_architecture_patterns.md +0 -103
  196. package/plugins/xtrm-tools/skills/senior-security/scripts/pentest_automator.py +0 -114
  197. package/plugins/xtrm-tools/skills/senior-security/scripts/security_auditor.py +0 -114
  198. package/plugins/xtrm-tools/skills/senior-security/scripts/threat_modeler.py +0 -114
  199. package/plugins/xtrm-tools/skills/skill-creator/LICENSE.txt +0 -202
  200. package/plugins/xtrm-tools/skills/skill-creator/SKILL.md +0 -479
  201. package/plugins/xtrm-tools/skills/skill-creator/agents/analyzer.md +0 -274
  202. package/plugins/xtrm-tools/skills/skill-creator/agents/comparator.md +0 -202
  203. package/plugins/xtrm-tools/skills/skill-creator/agents/grader.md +0 -223
  204. package/plugins/xtrm-tools/skills/skill-creator/assets/eval_review.html +0 -146
  205. package/plugins/xtrm-tools/skills/skill-creator/eval-viewer/generate_review.py +0 -471
  206. package/plugins/xtrm-tools/skills/skill-creator/eval-viewer/viewer.html +0 -1325
  207. package/plugins/xtrm-tools/skills/skill-creator/references/schemas.md +0 -430
  208. package/plugins/xtrm-tools/skills/skill-creator/scripts/__init__.py +0 -0
  209. package/plugins/xtrm-tools/skills/skill-creator/scripts/aggregate_benchmark.py +0 -401
  210. package/plugins/xtrm-tools/skills/skill-creator/scripts/generate_report.py +0 -326
  211. package/plugins/xtrm-tools/skills/skill-creator/scripts/improve_description.py +0 -248
  212. package/plugins/xtrm-tools/skills/skill-creator/scripts/package_skill.py +0 -136
  213. package/plugins/xtrm-tools/skills/skill-creator/scripts/quick_validate.py +0 -103
  214. package/plugins/xtrm-tools/skills/skill-creator/scripts/run_eval.py +0 -310
  215. package/plugins/xtrm-tools/skills/skill-creator/scripts/run_loop.py +0 -332
  216. package/plugins/xtrm-tools/skills/skill-creator/scripts/utils.py +0 -47
  217. package/plugins/xtrm-tools/skills/sync-docs/SKILL.md +0 -286
  218. package/plugins/xtrm-tools/skills/sync-docs/evals/evals.json +0 -89
  219. package/plugins/xtrm-tools/skills/sync-docs/references/doc-structure.md +0 -99
  220. package/plugins/xtrm-tools/skills/sync-docs/references/schema.md +0 -103
  221. package/plugins/xtrm-tools/skills/sync-docs/scripts/changelog/add_entry.py +0 -216
  222. package/plugins/xtrm-tools/skills/sync-docs/scripts/context_gatherer.py +0 -240
  223. package/plugins/xtrm-tools/skills/sync-docs/scripts/doc_structure_analyzer.py +0 -495
  224. package/plugins/xtrm-tools/skills/sync-docs/scripts/drift_detector.py +0 -563
  225. package/plugins/xtrm-tools/skills/sync-docs/scripts/validate_doc.py +0 -365
  226. package/plugins/xtrm-tools/skills/sync-docs/scripts/validate_metadata.py +0 -185
  227. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/benchmark.json +0 -293
  228. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/benchmark.md +0 -13
  229. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +0 -27
  230. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +0 -210
  231. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +0 -28
  232. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  233. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +0 -101
  234. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +0 -28
  235. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  236. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +0 -5
  237. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +0 -27
  238. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +0 -198
  239. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +0 -28
  240. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  241. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +0 -94
  242. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +0 -28
  243. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +0 -1
  244. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +0 -27
  245. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +0 -237
  246. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +0 -28
  247. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  248. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +0 -134
  249. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +0 -28
  250. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +0 -1
  251. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/benchmark.json +0 -297
  252. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/benchmark.md +0 -13
  253. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +0 -27
  254. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +0 -137
  255. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +0 -92
  256. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  257. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +0 -134
  258. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +0 -86
  259. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +0 -1
  260. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +0 -27
  261. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +0 -193
  262. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +0 -72
  263. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  264. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +0 -211
  265. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +0 -91
  266. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  267. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +0 -27
  268. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +0 -182
  269. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  270. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  271. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +0 -222
  272. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +0 -88
  273. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  274. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/benchmark.json +0 -298
  275. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/benchmark.md +0 -13
  276. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +0 -27
  277. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +0 -125
  278. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +0 -97
  279. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +0 -5
  280. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +0 -144
  281. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +0 -78
  282. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  283. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +0 -27
  284. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +0 -104
  285. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +0 -91
  286. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +0 -5
  287. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +0 -79
  288. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +0 -82
  289. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  290. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +0 -27
  291. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +0 -302
  292. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +0 -33
  293. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +0 -114
  294. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +0 -118
  295. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +0 -38
  296. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +0 -158
  297. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  298. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +0 -5
  299. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +0 -71
  300. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +0 -90
  301. package/plugins/xtrm-tools/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  302. package/plugins/xtrm-tools/skills/test-planning/SKILL.md +0 -465
  303. package/plugins/xtrm-tools/skills/test-planning/evals/evals.json +0 -23
  304. package/plugins/xtrm-tools/skills/updating-service-skills/SKILL.md +0 -136
  305. package/plugins/xtrm-tools/skills/updating-service-skills/scripts/drift_detector.py +0 -222
  306. package/plugins/xtrm-tools/skills/using-quality-gates/SKILL.md +0 -254
  307. package/plugins/xtrm-tools/skills/using-serena-lsp/README.md +0 -8
  308. package/plugins/xtrm-tools/skills/using-serena-lsp/REFERENCE.md +0 -194
  309. package/plugins/xtrm-tools/skills/using-serena-lsp/SKILL.md +0 -82
  310. package/plugins/xtrm-tools/skills/using-service-skills/SKILL.md +0 -108
  311. package/plugins/xtrm-tools/skills/using-service-skills/scripts/cataloger.py +0 -74
  312. package/plugins/xtrm-tools/skills/using-service-skills/scripts/skill_activator.py +0 -152
  313. package/plugins/xtrm-tools/skills/using-service-skills/scripts/test_skill_activator.py +0 -58
  314. package/plugins/xtrm-tools/skills/using-tdd/SKILL.md +0 -410
  315. package/plugins/xtrm-tools/skills/using-xtrm/SKILL.md +0 -127
  316. package/plugins/xtrm-tools/skills/xt-debugging/SKILL.md +0 -149
  317. package/plugins/xtrm-tools/skills/xt-end/SKILL.md +0 -297
  318. package/plugins/xtrm-tools/skills/xt-merge/SKILL.md +0 -313
  319. package/project-skills/quality-gates/.claude/hooks/hook-config.json +0 -66
  320. package/project-skills/quality-gates/.claude/hooks/quality-check.cjs +0 -1286
  321. package/project-skills/quality-gates/.claude/hooks/quality-check.py +0 -334
  322. package/project-skills/quality-gates/.claude/settings.json +0 -3
  323. package/project-skills/quality-gates/.claude/skills/using-quality-gates/SKILL.md +0 -254
  324. package/project-skills/quality-gates/README.md +0 -109
  325. package/project-skills/quality-gates/evals/evals.json +0 -181
  326. package/project-skills/quality-gates/workspace/iteration-1/FINAL-EVAL-SUMMARY.md +0 -75
  327. package/project-skills/quality-gates/workspace/iteration-1/edge-case-auto-fix-verification/with_skill/outputs/response.md +0 -59
  328. package/project-skills/quality-gates/workspace/iteration-1/edge-case-mixed-language-project/with_skill/outputs/response.md +0 -60
  329. package/project-skills/quality-gates/workspace/iteration-1/eval-summary.md +0 -105
  330. package/project-skills/quality-gates/workspace/iteration-1/partial-install-python-only/with_skill/outputs/response.md +0 -93
  331. package/project-skills/quality-gates/workspace/iteration-1/python-refactor-request/with_skill/outputs/response.md +0 -104
  332. package/project-skills/quality-gates/workspace/iteration-1/quality-gate-error-fix/with_skill/outputs/response.md +0 -74
  333. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-general-chat/with_skill/outputs/response.md +0 -18
  334. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-math-question/with_skill/outputs/response.md +0 -18
  335. package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-unrelated-coding/with_skill/outputs/response.md +0 -56
  336. package/project-skills/quality-gates/workspace/iteration-1/tdd-guard-blocking-confusion/with_skill/outputs/response.md +0 -67
  337. package/project-skills/quality-gates/workspace/iteration-1/typescript-feature-with-tests/with_skill/outputs/response.md +0 -97
  338. package/project-skills/service-skills-set/.claude/git-hooks/doc_reminder.py +0 -67
  339. package/project-skills/service-skills-set/.claude/git-hooks/skill_staleness.py +0 -194
  340. package/project-skills/service-skills-set/.claude/service-registry.json +0 -4
  341. package/project-skills/service-skills-set/.claude/settings.json +0 -37
  342. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/SKILL.md +0 -433
  343. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/references/script_quality_standards.md +0 -425
  344. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/references/service_skill_system_guide.md +0 -278
  345. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/bootstrap.py +0 -308
  346. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/deep_dive.py +0 -304
  347. package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/scaffolder.py +0 -482
  348. package/project-skills/service-skills-set/.claude/skills/scoping-service-skills/SKILL.md +0 -231
  349. package/project-skills/service-skills-set/.claude/skills/scoping-service-skills/scripts/scope.py +0 -74
  350. package/project-skills/service-skills-set/.claude/skills/updating-service-skills/SKILL.md +0 -136
  351. package/project-skills/service-skills-set/.claude/skills/updating-service-skills/scripts/drift_detector.py +0 -222
  352. package/project-skills/service-skills-set/.claude/skills/using-service-skills/SKILL.md +0 -108
  353. package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/cataloger.py +0 -74
  354. package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/skill_activator.py +0 -152
  355. package/project-skills/service-skills-set/README.md +0 -93
  356. package/project-skills/service-skills-set/install-service-skills.py +0 -193
  357. package/project-skills/service-skills-set/service-skills-readme.md +0 -236
  358. package/skills/README.txt +0 -31
  359. package/skills/clean-code/SKILL.md +0 -201
  360. package/skills/creating-service-skills/SKILL.md +0 -433
  361. package/skills/creating-service-skills/references/script_quality_standards.md +0 -425
  362. package/skills/creating-service-skills/references/service_skill_system_guide.md +0 -278
  363. package/skills/creating-service-skills/scripts/bootstrap.py +0 -326
  364. package/skills/creating-service-skills/scripts/deep_dive.py +0 -304
  365. package/skills/creating-service-skills/scripts/scaffolder.py +0 -482
  366. package/skills/delegating/SKILL.md +0 -196
  367. package/skills/delegating/config.yaml +0 -210
  368. package/skills/delegating/references/orchestration-protocols.md +0 -41
  369. package/skills/docker-expert/SKILL.md +0 -409
  370. package/skills/documenting/CHANGELOG.md +0 -23
  371. package/skills/documenting/README.md +0 -148
  372. package/skills/documenting/SKILL.md +0 -113
  373. package/skills/documenting/examples/example_pattern.md +0 -70
  374. package/skills/documenting/examples/example_reference.md +0 -70
  375. package/skills/documenting/examples/example_ssot_analytics.md +0 -64
  376. package/skills/documenting/examples/example_workflow.md +0 -141
  377. package/skills/documenting/references/changelog-format.md +0 -97
  378. package/skills/documenting/references/metadata-schema.md +0 -136
  379. package/skills/documenting/references/taxonomy.md +0 -81
  380. package/skills/documenting/references/versioning-rules.md +0 -78
  381. package/skills/documenting/scripts/bump_version.sh +0 -60
  382. package/skills/documenting/scripts/changelog/__init__.py +0 -0
  383. package/skills/documenting/scripts/changelog/add_entry.py +0 -216
  384. package/skills/documenting/scripts/changelog/bump_release.py +0 -117
  385. package/skills/documenting/scripts/changelog/init_changelog.py +0 -54
  386. package/skills/documenting/scripts/changelog/validate_changelog.py +0 -128
  387. package/skills/documenting/scripts/drift_detector.py +0 -266
  388. package/skills/documenting/scripts/generate_template.py +0 -311
  389. package/skills/documenting/scripts/list_by_category.sh +0 -84
  390. package/skills/documenting/scripts/orchestrator.py +0 -255
  391. package/skills/documenting/scripts/validate_metadata.py +0 -242
  392. package/skills/documenting/templates/CHANGELOG.md.template +0 -13
  393. package/skills/find-skills/SKILL.md +0 -133
  394. package/skills/gitnexus-exploring/SKILL.md +0 -75
  395. package/skills/gitnexus-impact-analysis/SKILL.md +0 -94
  396. package/skills/gitnexus-refactoring/SKILL.md +0 -113
  397. package/skills/hook-development/SKILL.md +0 -797
  398. package/skills/hook-development/examples/load-context.sh +0 -55
  399. package/skills/hook-development/examples/quality-check.js +0 -1168
  400. package/skills/hook-development/examples/validate-bash.sh +0 -43
  401. package/skills/hook-development/examples/validate-write.sh +0 -38
  402. package/skills/hook-development/references/advanced.md +0 -527
  403. package/skills/hook-development/references/migration.md +0 -369
  404. package/skills/hook-development/references/patterns.md +0 -412
  405. package/skills/hook-development/scripts/README.md +0 -164
  406. package/skills/hook-development/scripts/hook-linter.sh +0 -153
  407. package/skills/hook-development/scripts/test-hook.sh +0 -252
  408. package/skills/hook-development/scripts/validate-hook-schema.sh +0 -159
  409. package/skills/obsidian-cli/SKILL.md +0 -106
  410. package/skills/orchestrating-agents/SKILL.md +0 -135
  411. package/skills/orchestrating-agents/config.yaml +0 -45
  412. package/skills/orchestrating-agents/references/agent-context-integration.md +0 -37
  413. package/skills/orchestrating-agents/references/examples.md +0 -45
  414. package/skills/orchestrating-agents/references/handover-protocol.md +0 -31
  415. package/skills/orchestrating-agents/references/workflows.md +0 -42
  416. package/skills/orchestrating-agents/scripts/detect_neighbors.py +0 -23
  417. package/skills/planning/SKILL.md +0 -405
  418. package/skills/planning/evals/evals.json +0 -19
  419. package/skills/prompt-improving/README.md +0 -162
  420. package/skills/prompt-improving/SKILL.md +0 -74
  421. package/skills/prompt-improving/references/analysis_commands.md +0 -24
  422. package/skills/prompt-improving/references/chain_of_thought.md +0 -24
  423. package/skills/prompt-improving/references/mcp_definitions.md +0 -20
  424. package/skills/prompt-improving/references/multishot.md +0 -23
  425. package/skills/prompt-improving/references/xml_core.md +0 -60
  426. package/skills/python-testing/SKILL.md +0 -815
  427. package/skills/scoping-service-skills/SKILL.md +0 -231
  428. package/skills/scoping-service-skills/scripts/scope.py +0 -74
  429. package/skills/senior-backend/SKILL.md +0 -209
  430. package/skills/senior-backend/references/api_design_patterns.md +0 -103
  431. package/skills/senior-backend/references/backend_security_practices.md +0 -103
  432. package/skills/senior-backend/references/database_optimization_guide.md +0 -103
  433. package/skills/senior-backend/scripts/api_load_tester.py +0 -114
  434. package/skills/senior-backend/scripts/api_scaffolder.py +0 -114
  435. package/skills/senior-backend/scripts/database_migration_tool.py +0 -114
  436. package/skills/senior-data-scientist/SKILL.md +0 -226
  437. package/skills/senior-data-scientist/references/experiment_design_frameworks.md +0 -80
  438. package/skills/senior-data-scientist/references/feature_engineering_patterns.md +0 -80
  439. package/skills/senior-data-scientist/references/statistical_methods_advanced.md +0 -80
  440. package/skills/senior-data-scientist/scripts/experiment_designer.py +0 -100
  441. package/skills/senior-data-scientist/scripts/feature_engineering_pipeline.py +0 -100
  442. package/skills/senior-data-scientist/scripts/model_evaluation_suite.py +0 -100
  443. package/skills/senior-devops/SKILL.md +0 -209
  444. package/skills/senior-devops/references/cicd_pipeline_guide.md +0 -103
  445. package/skills/senior-devops/references/deployment_strategies.md +0 -103
  446. package/skills/senior-devops/references/infrastructure_as_code.md +0 -103
  447. package/skills/senior-devops/scripts/deployment_manager.py +0 -114
  448. package/skills/senior-devops/scripts/pipeline_generator.py +0 -114
  449. package/skills/senior-devops/scripts/terraform_scaffolder.py +0 -114
  450. package/skills/senior-security/SKILL.md +0 -209
  451. package/skills/senior-security/references/cryptography_implementation.md +0 -103
  452. package/skills/senior-security/references/penetration_testing_guide.md +0 -103
  453. package/skills/senior-security/references/security_architecture_patterns.md +0 -103
  454. package/skills/senior-security/scripts/pentest_automator.py +0 -114
  455. package/skills/senior-security/scripts/security_auditor.py +0 -114
  456. package/skills/senior-security/scripts/threat_modeler.py +0 -114
  457. package/skills/skill-creator/LICENSE.txt +0 -202
  458. package/skills/skill-creator/SKILL.md +0 -479
  459. package/skills/skill-creator/agents/analyzer.md +0 -274
  460. package/skills/skill-creator/agents/comparator.md +0 -202
  461. package/skills/skill-creator/agents/grader.md +0 -223
  462. package/skills/skill-creator/assets/eval_review.html +0 -146
  463. package/skills/skill-creator/eval-viewer/generate_review.py +0 -471
  464. package/skills/skill-creator/eval-viewer/viewer.html +0 -1325
  465. package/skills/skill-creator/references/schemas.md +0 -430
  466. package/skills/skill-creator/scripts/__init__.py +0 -0
  467. package/skills/skill-creator/scripts/aggregate_benchmark.py +0 -401
  468. package/skills/skill-creator/scripts/generate_report.py +0 -326
  469. package/skills/skill-creator/scripts/improve_description.py +0 -248
  470. package/skills/skill-creator/scripts/package_skill.py +0 -136
  471. package/skills/skill-creator/scripts/quick_validate.py +0 -103
  472. package/skills/skill-creator/scripts/run_eval.py +0 -310
  473. package/skills/skill-creator/scripts/run_loop.py +0 -332
  474. package/skills/skill-creator/scripts/utils.py +0 -47
  475. package/skills/sync-docs/SKILL.md +0 -286
  476. package/skills/sync-docs/evals/evals.json +0 -89
  477. package/skills/sync-docs/references/doc-structure.md +0 -99
  478. package/skills/sync-docs/references/schema.md +0 -103
  479. package/skills/sync-docs/scripts/changelog/add_entry.py +0 -216
  480. package/skills/sync-docs/scripts/context_gatherer.py +0 -240
  481. package/skills/sync-docs/scripts/doc_structure_analyzer.py +0 -495
  482. package/skills/sync-docs/scripts/drift_detector.py +0 -563
  483. package/skills/sync-docs/scripts/validate_doc.py +0 -365
  484. package/skills/sync-docs/scripts/validate_metadata.py +0 -185
  485. package/skills/sync-docs-workspace/iteration-1/benchmark.json +0 -293
  486. package/skills/sync-docs-workspace/iteration-1/benchmark.md +0 -13
  487. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +0 -27
  488. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +0 -210
  489. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +0 -28
  490. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  491. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +0 -101
  492. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +0 -28
  493. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  494. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +0 -5
  495. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +0 -27
  496. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +0 -198
  497. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +0 -28
  498. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  499. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +0 -94
  500. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +0 -28
  501. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +0 -1
  502. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +0 -27
  503. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +0 -237
  504. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +0 -28
  505. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  506. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +0 -134
  507. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +0 -28
  508. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +0 -1
  509. package/skills/sync-docs-workspace/iteration-2/benchmark.json +0 -297
  510. package/skills/sync-docs-workspace/iteration-2/benchmark.md +0 -13
  511. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +0 -27
  512. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +0 -137
  513. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +0 -92
  514. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  515. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +0 -134
  516. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +0 -86
  517. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +0 -1
  518. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +0 -27
  519. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +0 -193
  520. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +0 -72
  521. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  522. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +0 -211
  523. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +0 -91
  524. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  525. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +0 -27
  526. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +0 -182
  527. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  528. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  529. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +0 -222
  530. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +0 -88
  531. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  532. package/skills/sync-docs-workspace/iteration-3/benchmark.json +0 -298
  533. package/skills/sync-docs-workspace/iteration-3/benchmark.md +0 -13
  534. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +0 -27
  535. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +0 -125
  536. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +0 -97
  537. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +0 -5
  538. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +0 -144
  539. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +0 -78
  540. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  541. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +0 -27
  542. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +0 -104
  543. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +0 -91
  544. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +0 -5
  545. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +0 -79
  546. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +0 -82
  547. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  548. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +0 -27
  549. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +0 -302
  550. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +0 -33
  551. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +0 -114
  552. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +0 -118
  553. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +0 -38
  554. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +0 -158
  555. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  556. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +0 -5
  557. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +0 -71
  558. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +0 -90
  559. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  560. package/skills/test-planning/SKILL.md +0 -465
  561. package/skills/test-planning/evals/evals.json +0 -23
  562. package/skills/updating-service-skills/SKILL.md +0 -136
  563. package/skills/updating-service-skills/scripts/drift_detector.py +0 -222
  564. package/skills/using-quality-gates/SKILL.md +0 -254
  565. package/skills/using-serena-lsp/README.md +0 -8
  566. package/skills/using-serena-lsp/REFERENCE.md +0 -194
  567. package/skills/using-serena-lsp/SKILL.md +0 -82
  568. package/skills/using-service-skills/SKILL.md +0 -108
  569. package/skills/using-service-skills/scripts/cataloger.py +0 -74
  570. package/skills/using-service-skills/scripts/skill_activator.py +0 -152
  571. package/skills/using-service-skills/scripts/test_skill_activator.py +0 -58
  572. package/skills/using-tdd/SKILL.md +0 -410
  573. package/skills/using-xtrm/SKILL.md +0 -127
  574. package/skills/xt-debugging/SKILL.md +0 -149
  575. package/skills/xt-end/SKILL.md +0 -297
  576. package/skills/xt-merge/SKILL.md +0 -313
  577. /package/{config → .xtrm/config}/.env.example +0 -0
  578. /package/{config/mcp_servers_optional.json → .xtrm/config/claude.mcp.optional.json} +0 -0
  579. /package/{hooks → .xtrm/config}/hooks.json +0 -0
  580. /package/{config → .xtrm/config}/pi/auth.json.template +0 -0
  581. /package/{config → .xtrm/config}/pi/extensions/auto-session-name/index.ts +0 -0
  582. /package/{config → .xtrm/config}/pi/extensions/auto-session-name/package.json +0 -0
  583. /package/{config → .xtrm/config}/pi/extensions/auto-update/index.ts +0 -0
  584. /package/{config → .xtrm/config}/pi/extensions/auto-update/package.json +0 -0
  585. /package/{config → .xtrm/config}/pi/extensions/beads/package.json +0 -0
  586. /package/{config → .xtrm/config}/pi/extensions/compact-header/index.ts +0 -0
  587. /package/{config → .xtrm/config}/pi/extensions/compact-header/package.json +0 -0
  588. /package/{config → .xtrm/config}/pi/extensions/core/adapter.ts +0 -0
  589. /package/{config → .xtrm/config}/pi/extensions/core/guard-rules.ts +0 -0
  590. /package/{config → .xtrm/config}/pi/extensions/core/lib.ts +0 -0
  591. /package/{config → .xtrm/config}/pi/extensions/core/logger.ts +0 -0
  592. /package/{config → .xtrm/config}/pi/extensions/core/package.json +0 -0
  593. /package/{config → .xtrm/config}/pi/extensions/core/runner.ts +0 -0
  594. /package/{config → .xtrm/config}/pi/extensions/core/session-state.ts +0 -0
  595. /package/{config → .xtrm/config}/pi/extensions/custom-footer/package.json +0 -0
  596. /package/{config → .xtrm/config}/pi/extensions/custom-provider-qwen-cli/index.ts +0 -0
  597. /package/{config → .xtrm/config}/pi/extensions/custom-provider-qwen-cli/package.json +0 -0
  598. /package/{config → .xtrm/config}/pi/extensions/git-checkpoint/index.ts +0 -0
  599. /package/{config → .xtrm/config}/pi/extensions/git-checkpoint/package.json +0 -0
  600. /package/{config → .xtrm/config}/pi/extensions/lsp-bootstrap/index.ts +0 -0
  601. /package/{config → .xtrm/config}/pi/extensions/lsp-bootstrap/package.json +0 -0
  602. /package/{config → .xtrm/config}/pi/extensions/pi-serena-compact/index.ts +0 -0
  603. /package/{config → .xtrm/config}/pi/extensions/pi-serena-compact/package.json +0 -0
  604. /package/{config → .xtrm/config}/pi/extensions/quality-gates/index.ts +0 -0
  605. /package/{config → .xtrm/config}/pi/extensions/quality-gates/package.json +0 -0
  606. /package/{config → .xtrm/config}/pi/extensions/service-skills/index.ts +0 -0
  607. /package/{config → .xtrm/config}/pi/extensions/service-skills/package.json +0 -0
  608. /package/{config → .xtrm/config}/pi/extensions/session-flow/index.ts +0 -0
  609. /package/{config → .xtrm/config}/pi/extensions/session-flow/package.json +0 -0
  610. /package/{config → .xtrm/config}/pi/extensions/xtrm-loader/index.ts +0 -0
  611. /package/{config → .xtrm/config}/pi/extensions/xtrm-loader/package.json +0 -0
  612. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/format.ts +0 -0
  613. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/package.json +0 -0
  614. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/themes/pidex-dark.json +0 -0
  615. /package/{config → .xtrm/config}/pi/extensions/xtrm-ui/themes/pidex-light.json +0 -0
  616. /package/{config → .xtrm/config}/pi/models.json.template +0 -0
  617. /package/{config → .xtrm/config}/pi/pi-worktrees-settings.json +0 -0
  618. /package/{config → .xtrm/config}/pi/settings.json.template +0 -0
  619. /package/{hooks → .xtrm/hooks}/beads-claim-sync.mjs +0 -0
  620. /package/{hooks → .xtrm/hooks}/beads-compact-restore.mjs +0 -0
  621. /package/{hooks → .xtrm/hooks}/beads-compact-save.mjs +0 -0
  622. /package/{hooks → .xtrm/hooks}/beads-edit-gate.mjs +0 -0
  623. /package/{hooks → .xtrm/hooks}/beads-gate-core.mjs +0 -0
  624. /package/{hooks → .xtrm/hooks}/beads-gate-messages.mjs +0 -0
  625. /package/{hooks → .xtrm/hooks}/beads-gate-utils.mjs +0 -0
  626. /package/{hooks → .xtrm/hooks}/beads-stop-gate.mjs +0 -0
  627. /package/{hooks → .xtrm/hooks}/gitnexus/gitnexus-hook.cjs +0 -0
  628. /package/{hooks → .xtrm/hooks}/quality-check.cjs +0 -0
  629. /package/{hooks → .xtrm/hooks}/quality-check.py +0 -0
  630. /package/{hooks → .xtrm/hooks}/worktree-boundary.mjs +0 -0
  631. /package/{hooks → .xtrm/hooks}/xtrm-logger.mjs +0 -0
  632. /package/{hooks → .xtrm/hooks}/xtrm-session-logger.mjs +0 -0
  633. /package/{hooks → .xtrm/hooks}/xtrm-tool-logger.mjs +0 -0
@@ -1,1325 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Eval Review</title>
7
- <link rel="preconnect" href="https://fonts.googleapis.com">
8
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
- <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@500;600&family=Lora:wght@400;500&display=swap" rel="stylesheet">
10
- <script src="https://cdn.sheetjs.com/xlsx-0.20.3/package/dist/xlsx.full.min.js" integrity="sha384-EnyY0/GSHQGSxSgMwaIPzSESbqoOLSexfnSMN2AP+39Ckmn92stwABZynq1JyzdT" crossorigin="anonymous"></script>
11
- <style>
12
- :root {
13
- --bg: #faf9f5;
14
- --surface: #ffffff;
15
- --border: #e8e6dc;
16
- --text: #141413;
17
- --text-muted: #b0aea5;
18
- --accent: #d97757;
19
- --accent-hover: #c4613f;
20
- --green: #788c5d;
21
- --green-bg: #eef2e8;
22
- --red: #c44;
23
- --red-bg: #fceaea;
24
- --header-bg: #141413;
25
- --header-text: #faf9f5;
26
- --radius: 6px;
27
- }
28
-
29
- * { box-sizing: border-box; margin: 0; padding: 0; }
30
-
31
- body {
32
- font-family: 'Lora', Georgia, serif;
33
- background: var(--bg);
34
- color: var(--text);
35
- height: 100vh;
36
- display: flex;
37
- flex-direction: column;
38
- }
39
-
40
- /* ---- Header ---- */
41
- .header {
42
- background: var(--header-bg);
43
- color: var(--header-text);
44
- padding: 1rem 2rem;
45
- display: flex;
46
- justify-content: space-between;
47
- align-items: center;
48
- flex-shrink: 0;
49
- }
50
- .header h1 {
51
- font-family: 'Poppins', sans-serif;
52
- font-size: 1.25rem;
53
- font-weight: 600;
54
- }
55
- .header .instructions {
56
- font-size: 0.8rem;
57
- opacity: 0.7;
58
- margin-top: 0.25rem;
59
- }
60
- .header .progress {
61
- font-size: 0.875rem;
62
- opacity: 0.8;
63
- text-align: right;
64
- }
65
-
66
- /* ---- Main content ---- */
67
- .main {
68
- flex: 1;
69
- overflow-y: auto;
70
- padding: 1.5rem 2rem;
71
- display: flex;
72
- flex-direction: column;
73
- gap: 1.25rem;
74
- }
75
-
76
- /* ---- Sections ---- */
77
- .section {
78
- background: var(--surface);
79
- border: 1px solid var(--border);
80
- border-radius: var(--radius);
81
- flex-shrink: 0;
82
- }
83
- .section-header {
84
- font-family: 'Poppins', sans-serif;
85
- padding: 0.75rem 1rem;
86
- font-size: 0.75rem;
87
- font-weight: 500;
88
- text-transform: uppercase;
89
- letter-spacing: 0.05em;
90
- color: var(--text-muted);
91
- border-bottom: 1px solid var(--border);
92
- background: var(--bg);
93
- }
94
- .section-body {
95
- padding: 1rem;
96
- }
97
-
98
- /* ---- Config badge ---- */
99
- .config-badge {
100
- display: inline-block;
101
- padding: 0.2rem 0.625rem;
102
- border-radius: 9999px;
103
- font-family: 'Poppins', sans-serif;
104
- font-size: 0.6875rem;
105
- font-weight: 600;
106
- text-transform: uppercase;
107
- letter-spacing: 0.03em;
108
- margin-left: 0.75rem;
109
- vertical-align: middle;
110
- }
111
- .config-badge.config-primary {
112
- background: rgba(33, 150, 243, 0.12);
113
- color: #1976d2;
114
- }
115
- .config-badge.config-baseline {
116
- background: rgba(255, 193, 7, 0.15);
117
- color: #f57f17;
118
- }
119
-
120
- /* ---- Prompt ---- */
121
- .prompt-text {
122
- white-space: pre-wrap;
123
- font-size: 0.9375rem;
124
- line-height: 1.6;
125
- }
126
-
127
- /* ---- Outputs ---- */
128
- .output-file {
129
- border: 1px solid var(--border);
130
- border-radius: var(--radius);
131
- overflow: hidden;
132
- }
133
- .output-file + .output-file {
134
- margin-top: 1rem;
135
- }
136
- .output-file-header {
137
- padding: 0.5rem 0.75rem;
138
- font-size: 0.8rem;
139
- font-weight: 600;
140
- color: var(--text-muted);
141
- background: var(--bg);
142
- border-bottom: 1px solid var(--border);
143
- font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
144
- display: flex;
145
- justify-content: space-between;
146
- align-items: center;
147
- }
148
- .output-file-header .dl-btn {
149
- font-size: 0.7rem;
150
- color: var(--accent);
151
- text-decoration: none;
152
- cursor: pointer;
153
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
154
- font-weight: 500;
155
- opacity: 0.8;
156
- }
157
- .output-file-header .dl-btn:hover {
158
- opacity: 1;
159
- text-decoration: underline;
160
- }
161
- .output-file-content {
162
- padding: 0.75rem;
163
- overflow-x: auto;
164
- }
165
- .output-file-content pre {
166
- font-size: 0.8125rem;
167
- line-height: 1.5;
168
- white-space: pre-wrap;
169
- word-break: break-word;
170
- font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
171
- }
172
- .output-file-content img {
173
- max-width: 100%;
174
- height: auto;
175
- border-radius: 4px;
176
- }
177
- .output-file-content iframe {
178
- width: 100%;
179
- height: 600px;
180
- border: none;
181
- }
182
- .output-file-content table {
183
- border-collapse: collapse;
184
- font-size: 0.8125rem;
185
- width: 100%;
186
- }
187
- .output-file-content table td,
188
- .output-file-content table th {
189
- border: 1px solid var(--border);
190
- padding: 0.375rem 0.5rem;
191
- text-align: left;
192
- }
193
- .output-file-content table th {
194
- background: var(--bg);
195
- font-weight: 600;
196
- }
197
- .output-file-content .download-link {
198
- display: inline-flex;
199
- align-items: center;
200
- gap: 0.5rem;
201
- padding: 0.5rem 1rem;
202
- background: var(--bg);
203
- border: 1px solid var(--border);
204
- border-radius: 4px;
205
- color: var(--accent);
206
- text-decoration: none;
207
- font-size: 0.875rem;
208
- cursor: pointer;
209
- }
210
- .output-file-content .download-link:hover {
211
- background: var(--border);
212
- }
213
- .empty-state {
214
- color: var(--text-muted);
215
- font-style: italic;
216
- padding: 2rem;
217
- text-align: center;
218
- }
219
-
220
- /* ---- Feedback ---- */
221
- .prev-feedback {
222
- background: var(--bg);
223
- border: 1px solid var(--border);
224
- border-radius: 4px;
225
- padding: 0.625rem 0.75rem;
226
- margin-top: 0.75rem;
227
- font-size: 0.8125rem;
228
- color: var(--text-muted);
229
- line-height: 1.5;
230
- }
231
- .prev-feedback-label {
232
- font-size: 0.7rem;
233
- font-weight: 600;
234
- text-transform: uppercase;
235
- letter-spacing: 0.04em;
236
- margin-bottom: 0.25rem;
237
- color: var(--text-muted);
238
- }
239
- .feedback-textarea {
240
- width: 100%;
241
- min-height: 100px;
242
- padding: 0.75rem;
243
- border: 1px solid var(--border);
244
- border-radius: 4px;
245
- font-family: inherit;
246
- font-size: 0.9375rem;
247
- line-height: 1.5;
248
- resize: vertical;
249
- color: var(--text);
250
- }
251
- .feedback-textarea:focus {
252
- outline: none;
253
- border-color: var(--accent);
254
- box-shadow: 0 0 0 3px rgba(37, 99, 235, 0.1);
255
- }
256
- .feedback-status {
257
- font-size: 0.75rem;
258
- color: var(--text-muted);
259
- margin-top: 0.5rem;
260
- min-height: 1.1em;
261
- }
262
-
263
- /* ---- Grades (collapsible) ---- */
264
- .grades-toggle {
265
- display: flex;
266
- align-items: center;
267
- cursor: pointer;
268
- user-select: none;
269
- }
270
- .grades-toggle:hover {
271
- color: var(--accent);
272
- }
273
- .grades-toggle .arrow {
274
- margin-right: 0.5rem;
275
- transition: transform 0.15s;
276
- font-size: 0.75rem;
277
- }
278
- .grades-toggle .arrow.open {
279
- transform: rotate(90deg);
280
- }
281
- .grades-content {
282
- display: none;
283
- margin-top: 0.75rem;
284
- }
285
- .grades-content.open {
286
- display: block;
287
- }
288
- .grades-summary {
289
- font-size: 0.875rem;
290
- margin-bottom: 0.75rem;
291
- display: flex;
292
- align-items: center;
293
- gap: 0.5rem;
294
- }
295
- .grade-badge {
296
- display: inline-block;
297
- padding: 0.125rem 0.5rem;
298
- border-radius: 9999px;
299
- font-size: 0.75rem;
300
- font-weight: 600;
301
- }
302
- .grade-pass { background: var(--green-bg); color: var(--green); }
303
- .grade-fail { background: var(--red-bg); color: var(--red); }
304
- .assertion-list {
305
- list-style: none;
306
- }
307
- .assertion-item {
308
- padding: 0.625rem 0;
309
- border-bottom: 1px solid var(--border);
310
- font-size: 0.8125rem;
311
- }
312
- .assertion-item:last-child { border-bottom: none; }
313
- .assertion-status {
314
- font-weight: 600;
315
- margin-right: 0.5rem;
316
- }
317
- .assertion-status.pass { color: var(--green); }
318
- .assertion-status.fail { color: var(--red); }
319
- .assertion-evidence {
320
- color: var(--text-muted);
321
- font-size: 0.75rem;
322
- margin-top: 0.25rem;
323
- padding-left: 1.5rem;
324
- }
325
-
326
- /* ---- View tabs ---- */
327
- .view-tabs {
328
- display: flex;
329
- gap: 0;
330
- padding: 0 2rem;
331
- background: var(--bg);
332
- border-bottom: 1px solid var(--border);
333
- flex-shrink: 0;
334
- }
335
- .view-tab {
336
- font-family: 'Poppins', sans-serif;
337
- padding: 0.625rem 1.25rem;
338
- font-size: 0.8125rem;
339
- font-weight: 500;
340
- cursor: pointer;
341
- border: none;
342
- background: none;
343
- color: var(--text-muted);
344
- border-bottom: 2px solid transparent;
345
- transition: all 0.15s;
346
- }
347
- .view-tab:hover { color: var(--text); }
348
- .view-tab.active {
349
- color: var(--accent);
350
- border-bottom-color: var(--accent);
351
- }
352
- .view-panel { display: none; }
353
- .view-panel.active { display: flex; flex-direction: column; flex: 1; overflow: hidden; }
354
-
355
- /* ---- Benchmark view ---- */
356
- .benchmark-view {
357
- padding: 1.5rem 2rem;
358
- overflow-y: auto;
359
- flex: 1;
360
- }
361
- .benchmark-table {
362
- border-collapse: collapse;
363
- background: var(--surface);
364
- border: 1px solid var(--border);
365
- border-radius: var(--radius);
366
- font-size: 0.8125rem;
367
- width: 100%;
368
- margin-bottom: 1.5rem;
369
- }
370
- .benchmark-table th, .benchmark-table td {
371
- padding: 0.625rem 0.75rem;
372
- text-align: left;
373
- border: 1px solid var(--border);
374
- }
375
- .benchmark-table th {
376
- font-family: 'Poppins', sans-serif;
377
- background: var(--header-bg);
378
- color: var(--header-text);
379
- font-weight: 500;
380
- font-size: 0.75rem;
381
- text-transform: uppercase;
382
- letter-spacing: 0.04em;
383
- }
384
- .benchmark-table tr:hover { background: var(--bg); }
385
- .benchmark-table tr.benchmark-row-with { background: rgba(33, 150, 243, 0.06); }
386
- .benchmark-table tr.benchmark-row-without { background: rgba(255, 193, 7, 0.06); }
387
- .benchmark-table tr.benchmark-row-with:hover { background: rgba(33, 150, 243, 0.12); }
388
- .benchmark-table tr.benchmark-row-without:hover { background: rgba(255, 193, 7, 0.12); }
389
- .benchmark-table tr.benchmark-row-avg { font-weight: 600; border-top: 2px solid var(--border); }
390
- .benchmark-table tr.benchmark-row-avg.benchmark-row-with { background: rgba(33, 150, 243, 0.12); }
391
- .benchmark-table tr.benchmark-row-avg.benchmark-row-without { background: rgba(255, 193, 7, 0.12); }
392
- .benchmark-delta-positive { color: var(--green); font-weight: 600; }
393
- .benchmark-delta-negative { color: var(--red); font-weight: 600; }
394
- .benchmark-notes {
395
- background: var(--surface);
396
- border: 1px solid var(--border);
397
- border-radius: var(--radius);
398
- padding: 1rem;
399
- }
400
- .benchmark-notes h3 {
401
- font-family: 'Poppins', sans-serif;
402
- font-size: 0.875rem;
403
- margin-bottom: 0.75rem;
404
- }
405
- .benchmark-notes ul {
406
- list-style: disc;
407
- padding-left: 1.25rem;
408
- }
409
- .benchmark-notes li {
410
- font-size: 0.8125rem;
411
- line-height: 1.6;
412
- margin-bottom: 0.375rem;
413
- }
414
- .benchmark-empty {
415
- color: var(--text-muted);
416
- font-style: italic;
417
- text-align: center;
418
- padding: 3rem;
419
- }
420
-
421
- /* ---- Navigation ---- */
422
- .nav {
423
- display: flex;
424
- justify-content: space-between;
425
- align-items: center;
426
- padding: 1rem 2rem;
427
- border-top: 1px solid var(--border);
428
- background: var(--surface);
429
- flex-shrink: 0;
430
- }
431
- .nav-btn {
432
- font-family: 'Poppins', sans-serif;
433
- padding: 0.5rem 1.25rem;
434
- border: 1px solid var(--border);
435
- border-radius: var(--radius);
436
- background: var(--surface);
437
- cursor: pointer;
438
- font-size: 0.875rem;
439
- font-weight: 500;
440
- color: var(--text);
441
- transition: all 0.15s;
442
- }
443
- .nav-btn:hover:not(:disabled) {
444
- background: var(--bg);
445
- border-color: var(--text-muted);
446
- }
447
- .nav-btn:disabled {
448
- opacity: 0.4;
449
- cursor: not-allowed;
450
- }
451
- .done-btn {
452
- font-family: 'Poppins', sans-serif;
453
- padding: 0.5rem 1.5rem;
454
- border: 1px solid var(--border);
455
- border-radius: var(--radius);
456
- background: var(--surface);
457
- color: var(--text);
458
- cursor: pointer;
459
- font-size: 0.875rem;
460
- font-weight: 500;
461
- transition: all 0.15s;
462
- }
463
- .done-btn:hover {
464
- background: var(--bg);
465
- border-color: var(--text-muted);
466
- }
467
- .done-btn.ready {
468
- border: none;
469
- background: var(--accent);
470
- color: white;
471
- font-weight: 600;
472
- }
473
- .done-btn.ready:hover {
474
- background: var(--accent-hover);
475
- }
476
- /* ---- Done overlay ---- */
477
- .done-overlay {
478
- display: none;
479
- position: fixed;
480
- inset: 0;
481
- background: rgba(0, 0, 0, 0.5);
482
- z-index: 100;
483
- justify-content: center;
484
- align-items: center;
485
- }
486
- .done-overlay.visible {
487
- display: flex;
488
- }
489
- .done-card {
490
- background: var(--surface);
491
- border-radius: 12px;
492
- padding: 2rem 3rem;
493
- text-align: center;
494
- box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
495
- max-width: 500px;
496
- }
497
- .done-card h2 {
498
- font-size: 1.5rem;
499
- margin-bottom: 0.5rem;
500
- }
501
- .done-card p {
502
- color: var(--text-muted);
503
- margin-bottom: 1.5rem;
504
- line-height: 1.5;
505
- }
506
- .done-card .btn-row {
507
- display: flex;
508
- gap: 0.5rem;
509
- justify-content: center;
510
- }
511
- .done-card button {
512
- padding: 0.5rem 1.25rem;
513
- border: 1px solid var(--border);
514
- border-radius: var(--radius);
515
- background: var(--surface);
516
- cursor: pointer;
517
- font-size: 0.875rem;
518
- }
519
- .done-card button:hover {
520
- background: var(--bg);
521
- }
522
- /* ---- Toast ---- */
523
- .toast {
524
- position: fixed;
525
- bottom: 5rem;
526
- left: 50%;
527
- transform: translateX(-50%);
528
- background: var(--header-bg);
529
- color: var(--header-text);
530
- padding: 0.625rem 1.25rem;
531
- border-radius: var(--radius);
532
- font-size: 0.875rem;
533
- opacity: 0;
534
- transition: opacity 0.3s;
535
- pointer-events: none;
536
- z-index: 200;
537
- }
538
- .toast.visible {
539
- opacity: 1;
540
- }
541
- </style>
542
- </head>
543
- <body>
544
- <div id="app" style="height:100vh; display:flex; flex-direction:column;">
545
- <div class="header">
546
- <div>
547
- <h1>Eval Review: <span id="skill-name"></span></h1>
548
- <div class="instructions">Review each output and leave feedback below. Navigate with arrow keys or buttons. When done, copy feedback and paste into Claude Code.</div>
549
- </div>
550
- <div class="progress" id="progress"></div>
551
- </div>
552
-
553
- <!-- View tabs (only shown when benchmark data exists) -->
554
- <div class="view-tabs" id="view-tabs" style="display:none;">
555
- <button class="view-tab active" onclick="switchView('outputs')">Outputs</button>
556
- <button class="view-tab" onclick="switchView('benchmark')">Benchmark</button>
557
- </div>
558
-
559
- <!-- Outputs panel (qualitative review) -->
560
- <div class="view-panel active" id="panel-outputs">
561
- <div class="main">
562
- <!-- Prompt -->
563
- <div class="section">
564
- <div class="section-header">Prompt <span class="config-badge" id="config-badge" style="display:none;"></span></div>
565
- <div class="section-body">
566
- <div class="prompt-text" id="prompt-text"></div>
567
- </div>
568
- </div>
569
-
570
- <!-- Outputs -->
571
- <div class="section">
572
- <div class="section-header">Output</div>
573
- <div class="section-body" id="outputs-body">
574
- <div class="empty-state">No output files found</div>
575
- </div>
576
- </div>
577
-
578
- <!-- Previous Output (collapsible) -->
579
- <div class="section" id="prev-outputs-section" style="display:none;">
580
- <div class="section-header">
581
- <div class="grades-toggle" onclick="togglePrevOutputs()">
582
- <span class="arrow" id="prev-outputs-arrow">&#9654;</span>
583
- Previous Output
584
- </div>
585
- </div>
586
- <div class="grades-content" id="prev-outputs-content"></div>
587
- </div>
588
-
589
- <!-- Grades (collapsible) -->
590
- <div class="section" id="grades-section" style="display:none;">
591
- <div class="section-header">
592
- <div class="grades-toggle" onclick="toggleGrades()">
593
- <span class="arrow" id="grades-arrow">&#9654;</span>
594
- Formal Grades
595
- </div>
596
- </div>
597
- <div class="grades-content" id="grades-content"></div>
598
- </div>
599
-
600
- <!-- Feedback -->
601
- <div class="section">
602
- <div class="section-header">Your Feedback</div>
603
- <div class="section-body">
604
- <textarea
605
- class="feedback-textarea"
606
- id="feedback"
607
- placeholder="What do you think of this output? Any issues, suggestions, or things that look great?"
608
- ></textarea>
609
- <div class="feedback-status" id="feedback-status"></div>
610
- <div class="prev-feedback" id="prev-feedback" style="display:none;">
611
- <div class="prev-feedback-label">Previous feedback</div>
612
- <div id="prev-feedback-text"></div>
613
- </div>
614
- </div>
615
- </div>
616
- </div>
617
-
618
- <div class="nav" id="outputs-nav">
619
- <button class="nav-btn" id="prev-btn" onclick="navigate(-1)">&#8592; Previous</button>
620
- <button class="done-btn" id="done-btn" onclick="showDoneDialog()">Submit All Reviews</button>
621
- <button class="nav-btn" id="next-btn" onclick="navigate(1)">Next &#8594;</button>
622
- </div>
623
- </div><!-- end panel-outputs -->
624
-
625
- <!-- Benchmark panel (quantitative stats) -->
626
- <div class="view-panel" id="panel-benchmark">
627
- <div class="benchmark-view" id="benchmark-content">
628
- <div class="benchmark-empty">No benchmark data available. Run a benchmark to see quantitative results here.</div>
629
- </div>
630
- </div>
631
- </div>
632
-
633
- <!-- Done overlay -->
634
- <div class="done-overlay" id="done-overlay">
635
- <div class="done-card">
636
- <h2>Review Complete</h2>
637
- <p>Your feedback has been saved. Go back to your Claude Code session and tell Claude you're done reviewing.</p>
638
- <div class="btn-row">
639
- <button onclick="closeDoneDialog()">OK</button>
640
- </div>
641
- </div>
642
- </div>
643
-
644
- <!-- Toast -->
645
- <div class="toast" id="toast"></div>
646
-
647
- <script>
648
- // ---- Embedded data (injected by generate_review.py) ----
649
- /*__EMBEDDED_DATA__*/
650
-
651
- // ---- State ----
652
- let feedbackMap = {}; // run_id -> feedback text
653
- let currentIndex = 0;
654
- let visitedRuns = new Set();
655
-
656
- // ---- Init ----
657
- async function init() {
658
- // Load saved feedback from server — but only if this isn't a fresh
659
- // iteration (indicated by previous_feedback being present). When
660
- // previous feedback exists, the feedback.json on disk is stale from
661
- // the prior iteration and should not pre-fill the textareas.
662
- const hasPrevious = Object.keys(EMBEDDED_DATA.previous_feedback || {}).length > 0
663
- || Object.keys(EMBEDDED_DATA.previous_outputs || {}).length > 0;
664
- if (!hasPrevious) {
665
- try {
666
- const resp = await fetch("/api/feedback");
667
- const data = await resp.json();
668
- if (data.reviews) {
669
- for (const r of data.reviews) feedbackMap[r.run_id] = r.feedback;
670
- }
671
- } catch { /* first run, no feedback yet */ }
672
- }
673
-
674
- document.getElementById("skill-name").textContent = EMBEDDED_DATA.skill_name;
675
- showRun(0);
676
-
677
- // Wire up feedback auto-save
678
- const textarea = document.getElementById("feedback");
679
- let saveTimeout = null;
680
- textarea.addEventListener("input", () => {
681
- clearTimeout(saveTimeout);
682
- document.getElementById("feedback-status").textContent = "";
683
- saveTimeout = setTimeout(() => saveCurrentFeedback(), 800);
684
- });
685
- }
686
-
687
- // ---- Navigation ----
688
- function navigate(delta) {
689
- const newIndex = currentIndex + delta;
690
- if (newIndex >= 0 && newIndex < EMBEDDED_DATA.runs.length) {
691
- saveCurrentFeedback();
692
- showRun(newIndex);
693
- }
694
- }
695
-
696
- function updateNavButtons() {
697
- document.getElementById("prev-btn").disabled = currentIndex === 0;
698
- document.getElementById("next-btn").disabled =
699
- currentIndex === EMBEDDED_DATA.runs.length - 1;
700
- }
701
-
702
- // ---- Show a run ----
703
- function showRun(index) {
704
- currentIndex = index;
705
- const run = EMBEDDED_DATA.runs[index];
706
-
707
- // Progress
708
- document.getElementById("progress").textContent =
709
- `${index + 1} of ${EMBEDDED_DATA.runs.length}`;
710
-
711
- // Prompt
712
- document.getElementById("prompt-text").textContent = run.prompt;
713
-
714
- // Config badge
715
- const badge = document.getElementById("config-badge");
716
- const configMatch = run.id.match(/(with_skill|without_skill|new_skill|old_skill)/);
717
- if (configMatch) {
718
- const config = configMatch[1];
719
- const isBaseline = config === "without_skill" || config === "old_skill";
720
- badge.textContent = config.replace(/_/g, " ");
721
- badge.className = "config-badge " + (isBaseline ? "config-baseline" : "config-primary");
722
- badge.style.display = "inline-block";
723
- } else {
724
- badge.style.display = "none";
725
- }
726
-
727
- // Outputs
728
- renderOutputs(run);
729
-
730
- // Previous outputs
731
- renderPrevOutputs(run);
732
-
733
- // Grades
734
- renderGrades(run);
735
-
736
- // Previous feedback
737
- const prevFb = (EMBEDDED_DATA.previous_feedback || {})[run.id];
738
- const prevEl = document.getElementById("prev-feedback");
739
- if (prevFb) {
740
- document.getElementById("prev-feedback-text").textContent = prevFb;
741
- prevEl.style.display = "block";
742
- } else {
743
- prevEl.style.display = "none";
744
- }
745
-
746
- // Feedback
747
- document.getElementById("feedback").value = feedbackMap[run.id] || "";
748
- document.getElementById("feedback-status").textContent = "";
749
-
750
- updateNavButtons();
751
-
752
- // Track visited runs and promote done button when all visited
753
- visitedRuns.add(index);
754
- const doneBtn = document.getElementById("done-btn");
755
- if (visitedRuns.size >= EMBEDDED_DATA.runs.length) {
756
- doneBtn.classList.add("ready");
757
- }
758
-
759
- // Scroll main content to top
760
- document.querySelector(".main").scrollTop = 0;
761
- }
762
-
763
- // ---- Render outputs ----
764
- function renderOutputs(run) {
765
- const container = document.getElementById("outputs-body");
766
- container.innerHTML = "";
767
-
768
- const outputs = run.outputs || [];
769
- if (outputs.length === 0) {
770
- container.innerHTML = '<div class="empty-state">No output files</div>';
771
- return;
772
- }
773
-
774
- for (const file of outputs) {
775
- const fileDiv = document.createElement("div");
776
- fileDiv.className = "output-file";
777
-
778
- // Always show file header with download link
779
- const header = document.createElement("div");
780
- header.className = "output-file-header";
781
- const nameSpan = document.createElement("span");
782
- nameSpan.textContent = file.name;
783
- header.appendChild(nameSpan);
784
- const dlBtn = document.createElement("a");
785
- dlBtn.className = "dl-btn";
786
- dlBtn.textContent = "Download";
787
- dlBtn.download = file.name;
788
- dlBtn.href = getDownloadUri(file);
789
- header.appendChild(dlBtn);
790
- fileDiv.appendChild(header);
791
-
792
- const content = document.createElement("div");
793
- content.className = "output-file-content";
794
-
795
- if (file.type === "text") {
796
- const pre = document.createElement("pre");
797
- pre.textContent = file.content;
798
- content.appendChild(pre);
799
- } else if (file.type === "image") {
800
- const img = document.createElement("img");
801
- img.src = file.data_uri;
802
- img.alt = file.name;
803
- content.appendChild(img);
804
- } else if (file.type === "pdf") {
805
- const iframe = document.createElement("iframe");
806
- iframe.src = file.data_uri;
807
- content.appendChild(iframe);
808
- } else if (file.type === "xlsx") {
809
- renderXlsx(content, file.data_b64);
810
- } else if (file.type === "binary") {
811
- const a = document.createElement("a");
812
- a.className = "download-link";
813
- a.href = file.data_uri;
814
- a.download = file.name;
815
- a.textContent = "Download " + file.name;
816
- content.appendChild(a);
817
- } else if (file.type === "error") {
818
- const pre = document.createElement("pre");
819
- pre.textContent = file.content;
820
- pre.style.color = "var(--red)";
821
- content.appendChild(pre);
822
- }
823
-
824
- fileDiv.appendChild(content);
825
- container.appendChild(fileDiv);
826
- }
827
- }
828
-
829
- // ---- XLSX rendering via SheetJS ----
830
- function renderXlsx(container, b64Data) {
831
- try {
832
- const raw = Uint8Array.from(atob(b64Data), c => c.charCodeAt(0));
833
- const wb = XLSX.read(raw, { type: "array" });
834
-
835
- for (let i = 0; i < wb.SheetNames.length; i++) {
836
- const sheetName = wb.SheetNames[i];
837
- const ws = wb.Sheets[sheetName];
838
-
839
- if (wb.SheetNames.length > 1) {
840
- const sheetLabel = document.createElement("div");
841
- sheetLabel.style.cssText =
842
- "font-weight:600; font-size:0.8rem; color:#b0aea5; margin-top:0.5rem; margin-bottom:0.25rem;";
843
- sheetLabel.textContent = "Sheet: " + sheetName;
844
- container.appendChild(sheetLabel);
845
- }
846
-
847
- const htmlStr = XLSX.utils.sheet_to_html(ws, { editable: false });
848
- const wrapper = document.createElement("div");
849
- wrapper.innerHTML = htmlStr;
850
- container.appendChild(wrapper);
851
- }
852
- } catch (err) {
853
- container.textContent = "Error rendering spreadsheet: " + err.message;
854
- }
855
- }
856
-
857
- // ---- Grades ----
858
- function renderGrades(run) {
859
- const section = document.getElementById("grades-section");
860
- const content = document.getElementById("grades-content");
861
-
862
- if (!run.grading) {
863
- section.style.display = "none";
864
- return;
865
- }
866
-
867
- const grading = run.grading;
868
- section.style.display = "block";
869
- // Reset to collapsed
870
- content.classList.remove("open");
871
- document.getElementById("grades-arrow").classList.remove("open");
872
-
873
- const summary = grading.summary || {};
874
- const expectations = grading.expectations || [];
875
-
876
- let html = '<div style="padding: 1rem;">';
877
-
878
- // Summary line
879
- const passRate = summary.pass_rate != null
880
- ? Math.round(summary.pass_rate * 100) + "%"
881
- : "?";
882
- const badgeClass = summary.pass_rate >= 0.8 ? "grade-pass" : summary.pass_rate >= 0.5 ? "" : "grade-fail";
883
- html += '<div class="grades-summary">';
884
- html += '<span class="grade-badge ' + badgeClass + '">' + passRate + '</span>';
885
- html += '<span>' + (summary.passed || 0) + ' passed, ' + (summary.failed || 0) + ' failed of ' + (summary.total || 0) + '</span>';
886
- html += '</div>';
887
-
888
- // Assertions list
889
- html += '<ul class="assertion-list">';
890
- for (const exp of expectations) {
891
- const statusClass = exp.passed ? "pass" : "fail";
892
- const statusIcon = exp.passed ? "\u2713" : "\u2717";
893
- html += '<li class="assertion-item">';
894
- html += '<span class="assertion-status ' + statusClass + '">' + statusIcon + '</span>';
895
- html += '<span>' + escapeHtml(exp.text) + '</span>';
896
- if (exp.evidence) {
897
- html += '<div class="assertion-evidence">' + escapeHtml(exp.evidence) + '</div>';
898
- }
899
- html += '</li>';
900
- }
901
- html += '</ul>';
902
-
903
- html += '</div>';
904
- content.innerHTML = html;
905
- }
906
-
907
- function toggleGrades() {
908
- const content = document.getElementById("grades-content");
909
- const arrow = document.getElementById("grades-arrow");
910
- content.classList.toggle("open");
911
- arrow.classList.toggle("open");
912
- }
913
-
914
- // ---- Previous outputs (collapsible) ----
915
- function renderPrevOutputs(run) {
916
- const section = document.getElementById("prev-outputs-section");
917
- const content = document.getElementById("prev-outputs-content");
918
- const prevOutputs = (EMBEDDED_DATA.previous_outputs || {})[run.id];
919
-
920
- if (!prevOutputs || prevOutputs.length === 0) {
921
- section.style.display = "none";
922
- return;
923
- }
924
-
925
- section.style.display = "block";
926
- // Reset to collapsed
927
- content.classList.remove("open");
928
- document.getElementById("prev-outputs-arrow").classList.remove("open");
929
-
930
- // Render the files into the content area
931
- content.innerHTML = "";
932
- const wrapper = document.createElement("div");
933
- wrapper.style.padding = "1rem";
934
-
935
- for (const file of prevOutputs) {
936
- const fileDiv = document.createElement("div");
937
- fileDiv.className = "output-file";
938
-
939
- const header = document.createElement("div");
940
- header.className = "output-file-header";
941
- const nameSpan = document.createElement("span");
942
- nameSpan.textContent = file.name;
943
- header.appendChild(nameSpan);
944
- const dlBtn = document.createElement("a");
945
- dlBtn.className = "dl-btn";
946
- dlBtn.textContent = "Download";
947
- dlBtn.download = file.name;
948
- dlBtn.href = getDownloadUri(file);
949
- header.appendChild(dlBtn);
950
- fileDiv.appendChild(header);
951
-
952
- const fc = document.createElement("div");
953
- fc.className = "output-file-content";
954
-
955
- if (file.type === "text") {
956
- const pre = document.createElement("pre");
957
- pre.textContent = file.content;
958
- fc.appendChild(pre);
959
- } else if (file.type === "image") {
960
- const img = document.createElement("img");
961
- img.src = file.data_uri;
962
- img.alt = file.name;
963
- fc.appendChild(img);
964
- } else if (file.type === "pdf") {
965
- const iframe = document.createElement("iframe");
966
- iframe.src = file.data_uri;
967
- fc.appendChild(iframe);
968
- } else if (file.type === "xlsx") {
969
- renderXlsx(fc, file.data_b64);
970
- } else if (file.type === "binary") {
971
- const a = document.createElement("a");
972
- a.className = "download-link";
973
- a.href = file.data_uri;
974
- a.download = file.name;
975
- a.textContent = "Download " + file.name;
976
- fc.appendChild(a);
977
- }
978
-
979
- fileDiv.appendChild(fc);
980
- wrapper.appendChild(fileDiv);
981
- }
982
-
983
- content.appendChild(wrapper);
984
- }
985
-
986
- function togglePrevOutputs() {
987
- const content = document.getElementById("prev-outputs-content");
988
- const arrow = document.getElementById("prev-outputs-arrow");
989
- content.classList.toggle("open");
990
- arrow.classList.toggle("open");
991
- }
992
-
993
- // ---- Feedback (saved to server -> feedback.json) ----
994
- function saveCurrentFeedback() {
995
- const run = EMBEDDED_DATA.runs[currentIndex];
996
- const text = document.getElementById("feedback").value;
997
-
998
- if (text.trim() === "") {
999
- delete feedbackMap[run.id];
1000
- } else {
1001
- feedbackMap[run.id] = text;
1002
- }
1003
-
1004
- // Build reviews array from map
1005
- const reviews = [];
1006
- for (const [run_id, feedback] of Object.entries(feedbackMap)) {
1007
- if (feedback.trim()) {
1008
- reviews.push({ run_id, feedback, timestamp: new Date().toISOString() });
1009
- }
1010
- }
1011
-
1012
- fetch("/api/feedback", {
1013
- method: "POST",
1014
- headers: { "Content-Type": "application/json" },
1015
- body: JSON.stringify({ reviews, status: "in_progress" }),
1016
- }).then(() => {
1017
- document.getElementById("feedback-status").textContent = "Saved";
1018
- }).catch(() => {
1019
- // Static mode or server unavailable — no-op on auto-save,
1020
- // feedback will be downloaded on final submit
1021
- document.getElementById("feedback-status").textContent = "Will download on submit";
1022
- });
1023
- }
1024
-
1025
- // ---- Done ----
1026
- function showDoneDialog() {
1027
- // Save current textarea to feedbackMap (but don't POST yet)
1028
- const run = EMBEDDED_DATA.runs[currentIndex];
1029
- const text = document.getElementById("feedback").value;
1030
- if (text.trim() === "") {
1031
- delete feedbackMap[run.id];
1032
- } else {
1033
- feedbackMap[run.id] = text;
1034
- }
1035
-
1036
- // POST once with status: complete — include ALL runs so the model
1037
- // can distinguish "no feedback" (looks good) from "not reviewed"
1038
- const reviews = [];
1039
- const ts = new Date().toISOString();
1040
- for (const r of EMBEDDED_DATA.runs) {
1041
- reviews.push({ run_id: r.id, feedback: feedbackMap[r.id] || "", timestamp: ts });
1042
- }
1043
- const payload = JSON.stringify({ reviews, status: "complete" }, null, 2);
1044
- fetch("/api/feedback", {
1045
- method: "POST",
1046
- headers: { "Content-Type": "application/json" },
1047
- body: payload,
1048
- }).then(() => {
1049
- document.getElementById("done-overlay").classList.add("visible");
1050
- }).catch(() => {
1051
- // Server not available (static mode) — download as file
1052
- const blob = new Blob([payload], { type: "application/json" });
1053
- const url = URL.createObjectURL(blob);
1054
- const a = document.createElement("a");
1055
- a.href = url;
1056
- a.download = "feedback.json";
1057
- a.click();
1058
- URL.revokeObjectURL(url);
1059
- document.getElementById("done-overlay").classList.add("visible");
1060
- });
1061
- }
1062
-
1063
- function closeDoneDialog() {
1064
- // Reset status back to in_progress
1065
- saveCurrentFeedback();
1066
- document.getElementById("done-overlay").classList.remove("visible");
1067
- }
1068
-
1069
- // ---- Toast ----
1070
- function showToast(message) {
1071
- const toast = document.getElementById("toast");
1072
- toast.textContent = message;
1073
- toast.classList.add("visible");
1074
- setTimeout(() => toast.classList.remove("visible"), 2000);
1075
- }
1076
-
1077
- // ---- Keyboard nav ----
1078
- document.addEventListener("keydown", (e) => {
1079
- // Don't capture when typing in textarea
1080
- if (e.target.tagName === "TEXTAREA") return;
1081
-
1082
- if (e.key === "ArrowLeft" || e.key === "ArrowUp") {
1083
- e.preventDefault();
1084
- navigate(-1);
1085
- } else if (e.key === "ArrowRight" || e.key === "ArrowDown") {
1086
- e.preventDefault();
1087
- navigate(1);
1088
- }
1089
- });
1090
-
1091
- // ---- Util ----
1092
- function getDownloadUri(file) {
1093
- if (file.data_uri) return file.data_uri;
1094
- if (file.data_b64) return "data:application/octet-stream;base64," + file.data_b64;
1095
- if (file.type === "text") return "data:text/plain;charset=utf-8," + encodeURIComponent(file.content);
1096
- return "#";
1097
- }
1098
-
1099
- function escapeHtml(text) {
1100
- const div = document.createElement("div");
1101
- div.textContent = text;
1102
- return div.innerHTML;
1103
- }
1104
-
1105
- // ---- View switching ----
1106
- function switchView(view) {
1107
- document.querySelectorAll(".view-tab").forEach(t => t.classList.remove("active"));
1108
- document.querySelectorAll(".view-panel").forEach(p => p.classList.remove("active"));
1109
- document.querySelector(`[onclick="switchView('${view}')"]`).classList.add("active");
1110
- document.getElementById("panel-" + view).classList.add("active");
1111
- }
1112
-
1113
- // ---- Benchmark rendering ----
1114
- function renderBenchmark() {
1115
- const data = EMBEDDED_DATA.benchmark;
1116
- if (!data) return;
1117
-
1118
- // Show the tabs
1119
- document.getElementById("view-tabs").style.display = "flex";
1120
-
1121
- const container = document.getElementById("benchmark-content");
1122
- const summary = data.run_summary || {};
1123
- const metadata = data.metadata || {};
1124
- const notes = data.notes || [];
1125
-
1126
- let html = "";
1127
-
1128
- // Header
1129
- html += "<h2 style='font-family: Poppins, sans-serif; margin-bottom: 0.5rem;'>Benchmark Results</h2>";
1130
- html += "<p style='color: var(--text-muted); font-size: 0.875rem; margin-bottom: 1.25rem;'>";
1131
- if (metadata.skill_name) html += "<strong>" + escapeHtml(metadata.skill_name) + "</strong> &mdash; ";
1132
- if (metadata.timestamp) html += metadata.timestamp + " &mdash; ";
1133
- if (metadata.evals_run) html += "Evals: " + metadata.evals_run.join(", ") + " &mdash; ";
1134
- html += (metadata.runs_per_configuration || "?") + " runs per configuration";
1135
- html += "</p>";
1136
-
1137
- // Summary table
1138
- html += '<table class="benchmark-table">';
1139
-
1140
- function fmtStat(stat, pct) {
1141
- if (!stat) return "—";
1142
- const suffix = pct ? "%" : "";
1143
- const m = pct ? (stat.mean * 100).toFixed(0) : stat.mean.toFixed(1);
1144
- const s = pct ? (stat.stddev * 100).toFixed(0) : stat.stddev.toFixed(1);
1145
- return m + suffix + " ± " + s + suffix;
1146
- }
1147
-
1148
- function deltaClass(val) {
1149
- if (!val) return "";
1150
- const n = parseFloat(val);
1151
- if (n > 0) return "benchmark-delta-positive";
1152
- if (n < 0) return "benchmark-delta-negative";
1153
- return "";
1154
- }
1155
-
1156
- // Discover config names dynamically (everything except "delta")
1157
- const configs = Object.keys(summary).filter(k => k !== "delta");
1158
- const configA = configs[0] || "config_a";
1159
- const configB = configs[1] || "config_b";
1160
- const labelA = configA.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1161
- const labelB = configB.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1162
- const a = summary[configA] || {};
1163
- const b = summary[configB] || {};
1164
- const delta = summary.delta || {};
1165
-
1166
- html += "<thead><tr><th>Metric</th><th>" + escapeHtml(labelA) + "</th><th>" + escapeHtml(labelB) + "</th><th>Delta</th></tr></thead>";
1167
- html += "<tbody>";
1168
-
1169
- html += "<tr><td><strong>Pass Rate</strong></td>";
1170
- html += "<td>" + fmtStat(a.pass_rate, true) + "</td>";
1171
- html += "<td>" + fmtStat(b.pass_rate, true) + "</td>";
1172
- html += '<td class="' + deltaClass(delta.pass_rate) + '">' + (delta.pass_rate || "—") + "</td></tr>";
1173
-
1174
- // Time (only show row if data exists)
1175
- if (a.time_seconds || b.time_seconds) {
1176
- html += "<tr><td><strong>Time (s)</strong></td>";
1177
- html += "<td>" + fmtStat(a.time_seconds, false) + "</td>";
1178
- html += "<td>" + fmtStat(b.time_seconds, false) + "</td>";
1179
- html += '<td class="' + deltaClass(delta.time_seconds) + '">' + (delta.time_seconds ? delta.time_seconds + "s" : "—") + "</td></tr>";
1180
- }
1181
-
1182
- // Tokens (only show row if data exists)
1183
- if (a.tokens || b.tokens) {
1184
- html += "<tr><td><strong>Tokens</strong></td>";
1185
- html += "<td>" + fmtStat(a.tokens, false) + "</td>";
1186
- html += "<td>" + fmtStat(b.tokens, false) + "</td>";
1187
- html += '<td class="' + deltaClass(delta.tokens) + '">' + (delta.tokens || "—") + "</td></tr>";
1188
- }
1189
-
1190
- html += "</tbody></table>";
1191
-
1192
- // Per-eval breakdown (if runs data available)
1193
- const runs = data.runs || [];
1194
- if (runs.length > 0) {
1195
- const evalIds = [...new Set(runs.map(r => r.eval_id))].sort((a, b) => a - b);
1196
-
1197
- html += "<h3 style='font-family: Poppins, sans-serif; margin-bottom: 0.75rem;'>Per-Eval Breakdown</h3>";
1198
-
1199
- const hasTime = runs.some(r => r.result && r.result.time_seconds != null);
1200
- const hasErrors = runs.some(r => r.result && r.result.errors > 0);
1201
-
1202
- for (const evalId of evalIds) {
1203
- const evalRuns = runs.filter(r => r.eval_id === evalId);
1204
- const evalName = evalRuns[0] && evalRuns[0].eval_name ? evalRuns[0].eval_name : "Eval " + evalId;
1205
-
1206
- html += "<h4 style='font-family: Poppins, sans-serif; margin: 1rem 0 0.5rem; color: var(--text);'>" + escapeHtml(evalName) + "</h4>";
1207
- html += '<table class="benchmark-table">';
1208
- html += "<thead><tr><th>Config</th><th>Run</th><th>Pass Rate</th>";
1209
- if (hasTime) html += "<th>Time (s)</th>";
1210
- if (hasErrors) html += "<th>Crashes During Execution</th>";
1211
- html += "</tr></thead>";
1212
- html += "<tbody>";
1213
-
1214
- // Group by config and render with average rows
1215
- const configGroups = [...new Set(evalRuns.map(r => r.configuration))];
1216
- for (let ci = 0; ci < configGroups.length; ci++) {
1217
- const config = configGroups[ci];
1218
- const configRuns = evalRuns.filter(r => r.configuration === config);
1219
- if (configRuns.length === 0) continue;
1220
-
1221
- const rowClass = ci === 0 ? "benchmark-row-with" : "benchmark-row-without";
1222
- const configLabel = config.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1223
-
1224
- for (const run of configRuns) {
1225
- const r = run.result || {};
1226
- const prClass = r.pass_rate >= 0.8 ? "benchmark-delta-positive" : r.pass_rate < 0.5 ? "benchmark-delta-negative" : "";
1227
- html += '<tr class="' + rowClass + '">';
1228
- html += "<td>" + configLabel + "</td>";
1229
- html += "<td>" + run.run_number + "</td>";
1230
- html += '<td class="' + prClass + '">' + ((r.pass_rate || 0) * 100).toFixed(0) + "% (" + (r.passed || 0) + "/" + (r.total || 0) + ")</td>";
1231
- if (hasTime) html += "<td>" + (r.time_seconds != null ? r.time_seconds.toFixed(1) : "—") + "</td>";
1232
- if (hasErrors) html += "<td>" + (r.errors || 0) + "</td>";
1233
- html += "</tr>";
1234
- }
1235
-
1236
- // Average row
1237
- const rates = configRuns.map(r => (r.result || {}).pass_rate || 0);
1238
- const avgRate = rates.reduce((a, b) => a + b, 0) / rates.length;
1239
- const avgPrClass = avgRate >= 0.8 ? "benchmark-delta-positive" : avgRate < 0.5 ? "benchmark-delta-negative" : "";
1240
- html += '<tr class="benchmark-row-avg ' + rowClass + '">';
1241
- html += "<td>" + configLabel + "</td>";
1242
- html += "<td>Avg</td>";
1243
- html += '<td class="' + avgPrClass + '">' + (avgRate * 100).toFixed(0) + "%</td>";
1244
- if (hasTime) {
1245
- const times = configRuns.map(r => (r.result || {}).time_seconds).filter(t => t != null);
1246
- html += "<td>" + (times.length ? (times.reduce((a, b) => a + b, 0) / times.length).toFixed(1) : "—") + "</td>";
1247
- }
1248
- if (hasErrors) html += "<td></td>";
1249
- html += "</tr>";
1250
- }
1251
- html += "</tbody></table>";
1252
-
1253
- // Per-assertion detail for this eval
1254
- const runsWithExpectations = {};
1255
- for (const config of configGroups) {
1256
- runsWithExpectations[config] = evalRuns.filter(r => r.configuration === config && r.expectations && r.expectations.length > 0);
1257
- }
1258
- const hasAnyExpectations = Object.values(runsWithExpectations).some(runs => runs.length > 0);
1259
- if (hasAnyExpectations) {
1260
- // Collect all unique assertion texts across all configs
1261
- const allAssertions = [];
1262
- const seen = new Set();
1263
- for (const config of configGroups) {
1264
- for (const run of runsWithExpectations[config]) {
1265
- for (const exp of (run.expectations || [])) {
1266
- if (!seen.has(exp.text)) {
1267
- seen.add(exp.text);
1268
- allAssertions.push(exp.text);
1269
- }
1270
- }
1271
- }
1272
- }
1273
-
1274
- html += '<table class="benchmark-table" style="margin-top: 0.5rem;">';
1275
- html += "<thead><tr><th>Assertion</th>";
1276
- for (const config of configGroups) {
1277
- const label = config.replace(/_/g, " ").replace(/\b\w/g, c => c.toUpperCase());
1278
- html += "<th>" + escapeHtml(label) + "</th>";
1279
- }
1280
- html += "</tr></thead><tbody>";
1281
-
1282
- for (const assertionText of allAssertions) {
1283
- html += "<tr><td>" + escapeHtml(assertionText) + "</td>";
1284
-
1285
- for (const config of configGroups) {
1286
- html += "<td>";
1287
- for (const run of runsWithExpectations[config]) {
1288
- const exp = (run.expectations || []).find(e => e.text === assertionText);
1289
- if (exp) {
1290
- const cls = exp.passed ? "benchmark-delta-positive" : "benchmark-delta-negative";
1291
- const icon = exp.passed ? "\u2713" : "\u2717";
1292
- html += '<span class="' + cls + '" title="Run ' + run.run_number + ': ' + escapeHtml(exp.evidence || "") + '">' + icon + "</span> ";
1293
- } else {
1294
- html += "— ";
1295
- }
1296
- }
1297
- html += "</td>";
1298
- }
1299
- html += "</tr>";
1300
- }
1301
- html += "</tbody></table>";
1302
- }
1303
- }
1304
- }
1305
-
1306
- // Notes
1307
- if (notes.length > 0) {
1308
- html += '<div class="benchmark-notes">';
1309
- html += "<h3>Analysis Notes</h3>";
1310
- html += "<ul>";
1311
- for (const note of notes) {
1312
- html += "<li>" + escapeHtml(note) + "</li>";
1313
- }
1314
- html += "</ul></div>";
1315
- }
1316
-
1317
- container.innerHTML = html;
1318
- }
1319
-
1320
- // ---- Start ----
1321
- init();
1322
- renderBenchmark();
1323
- </script>
1324
- </body>
1325
- </html>