@pennyfarthing/core 10.0.0 → 10.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (493) hide show
  1. package/LICENSE +14 -0
  2. package/package.json +22 -12
  3. package/pennyfarthing-dist/agents/README.md +348 -0
  4. package/pennyfarthing-dist/agents/architect.md +180 -0
  5. package/pennyfarthing-dist/agents/dev.md +169 -0
  6. package/pennyfarthing-dist/agents/devops.md +203 -0
  7. package/pennyfarthing-dist/agents/handoff.md +235 -0
  8. package/pennyfarthing-dist/agents/orchestrator.md +182 -0
  9. package/pennyfarthing-dist/agents/pm.md +152 -0
  10. package/pennyfarthing-dist/agents/reviewer-preflight.md +129 -0
  11. package/pennyfarthing-dist/agents/reviewer.md +197 -0
  12. package/pennyfarthing-dist/agents/sm-file-summary.md +79 -0
  13. package/pennyfarthing-dist/agents/sm-finish.md +82 -0
  14. package/pennyfarthing-dist/agents/sm-handoff.md +129 -0
  15. package/pennyfarthing-dist/agents/sm-setup.md +251 -0
  16. package/pennyfarthing-dist/agents/sm.md +298 -0
  17. package/pennyfarthing-dist/agents/tea.md +161 -0
  18. package/pennyfarthing-dist/agents/tech-writer.md +226 -0
  19. package/pennyfarthing-dist/agents/testing-runner.md +184 -0
  20. package/pennyfarthing-dist/agents/ux-designer.md +236 -0
  21. package/pennyfarthing-dist/agents/workflow-status-check.md +96 -0
  22. package/pennyfarthing-dist/commands/architect.md +9 -0
  23. package/pennyfarthing-dist/commands/benchmark-control.md +69 -0
  24. package/pennyfarthing-dist/commands/benchmark.md +485 -0
  25. package/pennyfarthing-dist/commands/brainstorming.md +91 -0
  26. package/pennyfarthing-dist/commands/check.md +156 -0
  27. package/pennyfarthing-dist/commands/chore.md +218 -0
  28. package/pennyfarthing-dist/commands/close-epic.md +139 -0
  29. package/pennyfarthing-dist/commands/continue-session.md +218 -0
  30. package/pennyfarthing-dist/commands/create-branches-from-story.md +358 -0
  31. package/pennyfarthing-dist/commands/create-theme.md +29 -0
  32. package/pennyfarthing-dist/commands/dev.md +9 -0
  33. package/pennyfarthing-dist/commands/devops.md +9 -0
  34. package/pennyfarthing-dist/commands/fix-blocker.md +22 -0
  35. package/pennyfarthing-dist/commands/git-cleanup.md +57 -0
  36. package/pennyfarthing-dist/commands/health-check.md +143 -0
  37. package/pennyfarthing-dist/commands/help.md +264 -0
  38. package/pennyfarthing-dist/commands/job-fair.md +102 -0
  39. package/pennyfarthing-dist/commands/list-themes.md +21 -0
  40. package/pennyfarthing-dist/commands/new-work.md +23 -0
  41. package/pennyfarthing-dist/commands/orchestrator.md +9 -0
  42. package/pennyfarthing-dist/commands/parallel-work.md +73 -0
  43. package/pennyfarthing-dist/commands/party-mode.md +77 -0
  44. package/pennyfarthing-dist/commands/patch.md +210 -0
  45. package/pennyfarthing-dist/commands/permissions.md +193 -0
  46. package/pennyfarthing-dist/commands/pm.md +9 -0
  47. package/pennyfarthing-dist/commands/prime.md +136 -0
  48. package/pennyfarthing-dist/commands/release.md +74 -0
  49. package/pennyfarthing-dist/commands/repo-status.md +49 -0
  50. package/pennyfarthing-dist/commands/retro.md +200 -0
  51. package/pennyfarthing-dist/commands/reviewer.md +9 -0
  52. package/pennyfarthing-dist/commands/run-ci.md +116 -0
  53. package/pennyfarthing-dist/commands/set-theme.md +56 -0
  54. package/pennyfarthing-dist/commands/setup.md +65 -0
  55. package/pennyfarthing-dist/commands/show-theme.md +21 -0
  56. package/pennyfarthing-dist/commands/sm.md +9 -0
  57. package/pennyfarthing-dist/commands/solo.md +447 -0
  58. package/pennyfarthing-dist/commands/sprint-planning.md +109 -0
  59. package/pennyfarthing-dist/commands/sprint.md +142 -0
  60. package/pennyfarthing-dist/commands/standalone.md +194 -0
  61. package/pennyfarthing-dist/commands/start-epic.md +168 -0
  62. package/pennyfarthing-dist/commands/sync-epic-to-jira.md +184 -0
  63. package/pennyfarthing-dist/commands/sync-work-with-sprint.md +373 -0
  64. package/pennyfarthing-dist/commands/tea.md +9 -0
  65. package/pennyfarthing-dist/commands/tech-writer.md +9 -0
  66. package/pennyfarthing-dist/commands/theme-maker.md +676 -0
  67. package/pennyfarthing-dist/commands/update-domain-docs.md +83 -0
  68. package/pennyfarthing-dist/commands/ux-designer.md +9 -0
  69. package/pennyfarthing-dist/commands/work.md +25 -0
  70. package/pennyfarthing-dist/commands/workflow.md +21 -0
  71. package/pennyfarthing-dist/guides/agent-behavior.md +92 -0
  72. package/pennyfarthing-dist/guides/agent-coordination.md +475 -0
  73. package/pennyfarthing-dist/guides/agent-tag-taxonomy.md +432 -0
  74. package/pennyfarthing-dist/guides/agent-template-strategic.md +148 -0
  75. package/pennyfarthing-dist/guides/agent-template-tactical.md +162 -0
  76. package/pennyfarthing-dist/guides/hooks.md +230 -0
  77. package/pennyfarthing-dist/guides/measurement-framework.md +210 -0
  78. package/pennyfarthing-dist/guides/patterns/approval-gates-pattern.md +766 -0
  79. package/pennyfarthing-dist/guides/patterns/fan-out-fan-in-pattern.md +574 -0
  80. package/pennyfarthing-dist/guides/patterns/helper-delegation-pattern.md +488 -0
  81. package/pennyfarthing-dist/guides/patterns/tdd-flow-pattern.md +402 -0
  82. package/pennyfarthing-dist/guides/permission-protocol.md +188 -0
  83. package/pennyfarthing-dist/guides/persona-loading.md +46 -0
  84. package/pennyfarthing-dist/guides/prompt-patterns.md +338 -0
  85. package/pennyfarthing-dist/guides/scale-levels.md +114 -0
  86. package/pennyfarthing-dist/guides/session-artifacts.md +193 -0
  87. package/pennyfarthing-dist/guides/session-schema.md +346 -0
  88. package/pennyfarthing-dist/guides/skill-schema.md +412 -0
  89. package/pennyfarthing-dist/guides/workflow-schema.md +257 -0
  90. package/pennyfarthing-dist/guides/workflow-step-schema.md +512 -0
  91. package/pennyfarthing-dist/guides/worktree-mode.md +113 -0
  92. package/pennyfarthing-dist/guides/xml-tags.md +627 -0
  93. package/pennyfarthing-dist/output-styles/teaching.md +33 -0
  94. package/pennyfarthing-dist/output-styles/terse.md +20 -0
  95. package/pennyfarthing-dist/output-styles/verbose.md +28 -0
  96. package/pennyfarthing-dist/personas/themes/a-team.yaml +331 -0
  97. package/pennyfarthing-dist/personas/themes/alice-in-wonderland.yaml +324 -0
  98. package/pennyfarthing-dist/personas/themes/battlestar-galactica.yaml +282 -0
  99. package/pennyfarthing-dist/personas/themes/blade-runner.yaml +289 -0
  100. package/pennyfarthing-dist/personas/themes/catch-22.yaml +304 -0
  101. package/pennyfarthing-dist/personas/themes/control.yaml +201 -0
  102. package/pennyfarthing-dist/personas/themes/cowboy-bebop.yaml +315 -0
  103. package/pennyfarthing-dist/personas/themes/discworld.yaml +334 -0
  104. package/pennyfarthing-dist/personas/themes/doctor-who.yaml +284 -0
  105. package/pennyfarthing-dist/personas/themes/dune.yaml +301 -0
  106. package/pennyfarthing-dist/personas/themes/firefly.yaml +320 -0
  107. package/pennyfarthing-dist/personas/themes/game-of-thrones.yaml +284 -0
  108. package/pennyfarthing-dist/personas/themes/harry-potter.yaml +316 -0
  109. package/pennyfarthing-dist/personas/themes/hitchhikers-guide.yaml +323 -0
  110. package/pennyfarthing-dist/personas/themes/lord-of-the-rings.yaml +326 -0
  111. package/pennyfarthing-dist/personas/themes/mad-max.yaml +349 -0
  112. package/pennyfarthing-dist/personas/themes/mash.yaml +329 -0
  113. package/pennyfarthing-dist/personas/themes/princess-bride.yaml +344 -0
  114. package/pennyfarthing-dist/personas/themes/sandman.yaml +282 -0
  115. package/pennyfarthing-dist/personas/themes/star-trek-tng.yaml +358 -0
  116. package/pennyfarthing-dist/personas/themes/star-wars.yaml +297 -0
  117. package/pennyfarthing-dist/personas/themes/the-expanse.yaml +337 -0
  118. package/pennyfarthing-dist/personas/themes/the-matrix.yaml +342 -0
  119. package/pennyfarthing-dist/personas/themes/watchmen.yaml +285 -0
  120. package/pennyfarthing-dist/personas/themes/west-wing.yaml +285 -0
  121. package/pennyfarthing-dist/personas/themes/x-files.yaml +296 -0
  122. package/pennyfarthing-dist/scripts/README.md +87 -0
  123. package/pennyfarthing-dist/scripts/core/README.md +25 -0
  124. package/pennyfarthing-dist/scripts/core/agent-session.sh +390 -0
  125. package/pennyfarthing-dist/scripts/core/check-context.sh +194 -0
  126. package/pennyfarthing-dist/scripts/core/handoff-marker.sh +112 -0
  127. package/pennyfarthing-dist/scripts/core/phase-check-start.sh +90 -0
  128. package/pennyfarthing-dist/scripts/core/prime.sh +30 -0
  129. package/pennyfarthing-dist/scripts/cyclist/is-cyclist.sh +21 -0
  130. package/pennyfarthing-dist/scripts/git/README.md +25 -0
  131. package/pennyfarthing-dist/scripts/git/create-feature-branches.sh +267 -0
  132. package/pennyfarthing-dist/scripts/git/git-status-all.sh +152 -0
  133. package/pennyfarthing-dist/scripts/git/install-git-hooks.sh +79 -0
  134. package/pennyfarthing-dist/scripts/git/release.sh +246 -0
  135. package/pennyfarthing-dist/scripts/git/worktree-manager.sh +497 -0
  136. package/pennyfarthing-dist/scripts/health/drift-detection.sh +156 -0
  137. package/pennyfarthing-dist/scripts/hooks/README.md +32 -0
  138. package/pennyfarthing-dist/scripts/hooks/__pycache__/question_reflector_check.cpython-314.pyc +0 -0
  139. package/pennyfarthing-dist/scripts/hooks/bell-mode-hook.sh +106 -0
  140. package/pennyfarthing-dist/scripts/hooks/context-circuit-breaker.sh +95 -0
  141. package/pennyfarthing-dist/scripts/hooks/context-warning.sh +65 -0
  142. package/pennyfarthing-dist/scripts/hooks/otel-auto-config.sh +35 -0
  143. package/pennyfarthing-dist/scripts/hooks/post-merge.sh +150 -0
  144. package/pennyfarthing-dist/scripts/hooks/pre-commit.sh +190 -0
  145. package/pennyfarthing-dist/scripts/hooks/pre-edit-check.sh +71 -0
  146. package/pennyfarthing-dist/scripts/hooks/pre-push.sh +42 -0
  147. package/pennyfarthing-dist/scripts/hooks/question-reflector-check.sh +20 -0
  148. package/pennyfarthing-dist/scripts/hooks/question_reflector_check.py +499 -0
  149. package/pennyfarthing-dist/scripts/hooks/schema-validation.sh +30 -0
  150. package/pennyfarthing-dist/scripts/hooks/session-start.sh +97 -0
  151. package/pennyfarthing-dist/scripts/hooks/session-stop.sh +65 -0
  152. package/pennyfarthing-dist/scripts/hooks/sprint-yaml-validation.sh +78 -0
  153. package/pennyfarthing-dist/scripts/hooks/welcome-hook.sh +94 -0
  154. package/pennyfarthing-dist/scripts/jira/README.md +36 -0
  155. package/pennyfarthing-dist/scripts/jira/create-jira-epic.sh +95 -0
  156. package/pennyfarthing-dist/scripts/jira/create-jira-story.sh +91 -0
  157. package/pennyfarthing-dist/scripts/jira/jira-claim-story.sh +22 -0
  158. package/pennyfarthing-dist/scripts/jira/jira-lib.sh +464 -0
  159. package/pennyfarthing-dist/scripts/jira/jira-reconcile.sh +260 -0
  160. package/pennyfarthing-dist/scripts/jira/jira-sync-story.sh +18 -0
  161. package/pennyfarthing-dist/scripts/jira/jira-sync.sh +16 -0
  162. package/pennyfarthing-dist/scripts/jira/sync-epic-jira.sh +16 -0
  163. package/pennyfarthing-dist/scripts/jira/sync-epic-to-jira.sh +16 -0
  164. package/pennyfarthing-dist/scripts/lib/README.md +29 -0
  165. package/pennyfarthing-dist/scripts/lib/background-tasks.sh +177 -0
  166. package/pennyfarthing-dist/scripts/lib/checkpoint.sh +136 -0
  167. package/pennyfarthing-dist/scripts/lib/common.sh +212 -0
  168. package/pennyfarthing-dist/scripts/lib/file-lock.sh +269 -0
  169. package/pennyfarthing-dist/scripts/lib/find-root.sh +58 -0
  170. package/pennyfarthing-dist/scripts/lib/logging.sh +186 -0
  171. package/pennyfarthing-dist/scripts/lib/retry.sh +76 -0
  172. package/pennyfarthing-dist/scripts/maintenance/migrate-theme-schema.mjs +102 -0
  173. package/pennyfarthing-dist/scripts/maintenance/sidecar-health.sh +91 -0
  174. package/pennyfarthing-dist/scripts/misc/README.md +44 -0
  175. package/pennyfarthing-dist/scripts/misc/add-short-names.sh +13 -0
  176. package/pennyfarthing-dist/scripts/misc/add_short_names.py +226 -0
  177. package/pennyfarthing-dist/scripts/misc/backlog.sh +77 -0
  178. package/pennyfarthing-dist/scripts/misc/check-status.sh +247 -0
  179. package/pennyfarthing-dist/scripts/misc/find-related-work.sh +231 -0
  180. package/pennyfarthing-dist/scripts/misc/generate-skill-docs.sh +107 -0
  181. package/pennyfarthing-dist/scripts/misc/log-skill-usage.sh +74 -0
  182. package/pennyfarthing-dist/scripts/misc/migrate-bmad-workflow.sh +10 -0
  183. package/pennyfarthing-dist/scripts/misc/migrate_bmad_workflow.py +319 -0
  184. package/pennyfarthing-dist/scripts/misc/repo-scan.sh +141 -0
  185. package/pennyfarthing-dist/scripts/misc/repo-utils.sh +778 -0
  186. package/pennyfarthing-dist/scripts/misc/run-ci.sh +212 -0
  187. package/pennyfarthing-dist/scripts/misc/run-timestamp.sh +7 -0
  188. package/pennyfarthing-dist/scripts/misc/session-cleanup.sh +319 -0
  189. package/pennyfarthing-dist/scripts/misc/skill-usage-report.sh +193 -0
  190. package/pennyfarthing-dist/scripts/misc/statusline.sh +257 -0
  191. package/pennyfarthing-dist/scripts/misc/uninstall.sh +275 -0
  192. package/pennyfarthing-dist/scripts/misc/validate-subagent-frontmatter.sh +160 -0
  193. package/pennyfarthing-dist/scripts/portraits/generate-portraits.py +417 -0
  194. package/pennyfarthing-dist/scripts/portraits/generate-portraits.sh +54 -0
  195. package/pennyfarthing-dist/scripts/sprint/README.md +29 -0
  196. package/pennyfarthing-dist/scripts/sprint/archive-story.sh +133 -0
  197. package/pennyfarthing-dist/scripts/sprint/available-stories.sh +91 -0
  198. package/pennyfarthing-dist/scripts/sprint/check-story.sh +158 -0
  199. package/pennyfarthing-dist/scripts/sprint/get-epic-field.sh +52 -0
  200. package/pennyfarthing-dist/scripts/sprint/get-story-field.sh +63 -0
  201. package/pennyfarthing-dist/scripts/sprint/list-future.sh +145 -0
  202. package/pennyfarthing-dist/scripts/sprint/new-sprint.sh +110 -0
  203. package/pennyfarthing-dist/scripts/sprint/promote-epic.sh +148 -0
  204. package/pennyfarthing-dist/scripts/sprint/sprint-common.sh +415 -0
  205. package/pennyfarthing-dist/scripts/sprint/sprint-info.sh +33 -0
  206. package/pennyfarthing-dist/scripts/sprint/sprint-metrics.sh +230 -0
  207. package/pennyfarthing-dist/scripts/sprint/sprint-status.sh +134 -0
  208. package/pennyfarthing-dist/scripts/sprint/validate-sprint-yaml.sh +139 -0
  209. package/pennyfarthing-dist/scripts/story/README.md +23 -0
  210. package/pennyfarthing-dist/scripts/story/create-story.sh +19 -0
  211. package/pennyfarthing-dist/scripts/story/size-story.sh +18 -0
  212. package/pennyfarthing-dist/scripts/story/story-template.sh +18 -0
  213. package/pennyfarthing-dist/scripts/test/README.md +23 -0
  214. package/pennyfarthing-dist/scripts/test/ensure-swebench-data.sh +59 -0
  215. package/pennyfarthing-dist/scripts/test/ground-truth-judge.py +220 -0
  216. package/pennyfarthing-dist/scripts/test/swebench-judge.py +374 -0
  217. package/pennyfarthing-dist/scripts/test/test-cache.sh +165 -0
  218. package/pennyfarthing-dist/scripts/test/test-setup.sh +337 -0
  219. package/pennyfarthing-dist/scripts/tests/check.test.sh +582 -0
  220. package/pennyfarthing-dist/scripts/tests/dev-story-workflow-import.test.sh +515 -0
  221. package/pennyfarthing-dist/scripts/tests/epics-and-stories-workflow-import.test.sh +599 -0
  222. package/pennyfarthing-dist/scripts/tests/handoff-phase-update.test.sh +332 -0
  223. package/pennyfarthing-dist/scripts/tests/implementation-readiness-workflow-import.test.sh +573 -0
  224. package/pennyfarthing-dist/scripts/tests/migrate-bmad-workflow.test.sh +859 -0
  225. package/pennyfarthing-dist/scripts/tests/prd-workflow-import.test.sh +662 -0
  226. package/pennyfarthing-dist/scripts/tests/project-context-workflow-import.test.sh +589 -0
  227. package/pennyfarthing-dist/scripts/tests/test-character-voice.sh +106 -0
  228. package/pennyfarthing-dist/scripts/tests/test-drift-detection.sh +597 -0
  229. package/pennyfarthing-dist/scripts/tests/test-post-merge-hook.sh +514 -0
  230. package/pennyfarthing-dist/scripts/tests/test-session-checkpoint.sh +517 -0
  231. package/pennyfarthing-dist/scripts/tests/test-solo-command.sh +331 -0
  232. package/pennyfarthing-dist/scripts/tests/ux-design-workflow-import.test.sh +647 -0
  233. package/pennyfarthing-dist/scripts/theme/README.md +22 -0
  234. package/pennyfarthing-dist/scripts/theme/compute-theme-tiers.sh +13 -0
  235. package/pennyfarthing-dist/scripts/theme/compute_theme_tiers.py +402 -0
  236. package/pennyfarthing-dist/scripts/theme/list-themes.sh +30 -0
  237. package/pennyfarthing-dist/scripts/theme/update-theme-tiers.sh +97 -0
  238. package/pennyfarthing-dist/scripts/validation/validate-agent-schema.sh +576 -0
  239. package/pennyfarthing-dist/scripts/workflow/README.md +28 -0
  240. package/pennyfarthing-dist/scripts/workflow/check.py +502 -0
  241. package/pennyfarthing-dist/scripts/workflow/check.sh +24 -0
  242. package/pennyfarthing-dist/scripts/workflow/complete-step.py +304 -0
  243. package/pennyfarthing-dist/scripts/workflow/finish-story.sh +154 -0
  244. package/pennyfarthing-dist/scripts/workflow/fix-session-phase.sh +222 -0
  245. package/pennyfarthing-dist/scripts/workflow/get-workflow-type.py +61 -0
  246. package/pennyfarthing-dist/scripts/workflow/get-workflow-type.sh +13 -0
  247. package/pennyfarthing-dist/scripts/workflow/list-workflows.sh +124 -0
  248. package/pennyfarthing-dist/scripts/workflow/phase-owner.sh +34 -0
  249. package/pennyfarthing-dist/scripts/workflow/resume-workflow.sh +157 -0
  250. package/pennyfarthing-dist/scripts/workflow/show-workflow.sh +132 -0
  251. package/pennyfarthing-dist/scripts/workflow/start-workflow.sh +250 -0
  252. package/pennyfarthing-dist/scripts/workflow/workflow-status.sh +161 -0
  253. package/pennyfarthing-dist/skills/agentic-patterns/SKILL.md +246 -0
  254. package/pennyfarthing-dist/skills/changelog/SKILL.md +385 -0
  255. package/pennyfarthing-dist/skills/code-review/SKILL.md +172 -0
  256. package/pennyfarthing-dist/skills/context-engineering/SKILL.md +277 -0
  257. package/pennyfarthing-dist/skills/cyclist/SKILL.md +88 -0
  258. package/pennyfarthing-dist/skills/dev-patterns/SKILL.md +461 -0
  259. package/pennyfarthing-dist/skills/finalize-run/SKILL.md +261 -0
  260. package/pennyfarthing-dist/skills/jira/SKILL.md +508 -0
  261. package/pennyfarthing-dist/skills/judge/SKILL.md +644 -0
  262. package/pennyfarthing-dist/skills/just/SKILL.md +414 -0
  263. package/pennyfarthing-dist/skills/mermaid/SKILL.md +256 -0
  264. package/pennyfarthing-dist/skills/otel/skill.md +227 -0
  265. package/pennyfarthing-dist/skills/permissions/skill.md +157 -0
  266. package/pennyfarthing-dist/skills/persona-benchmark/SKILL.md +187 -0
  267. package/pennyfarthing-dist/skills/skill-registry.schema.json +107 -0
  268. package/pennyfarthing-dist/skills/skill-registry.yaml +393 -0
  269. package/pennyfarthing-dist/skills/sprint/scripts/archive-story.sh +101 -0
  270. package/pennyfarthing-dist/skills/sprint/scripts/available-stories.sh +97 -0
  271. package/pennyfarthing-dist/skills/sprint/scripts/check-story.sh +164 -0
  272. package/pennyfarthing-dist/skills/sprint/scripts/create-jira-epic.sh +101 -0
  273. package/pennyfarthing-dist/skills/sprint/scripts/new-sprint.sh +116 -0
  274. package/pennyfarthing-dist/skills/sprint/scripts/promote-epic.sh +164 -0
  275. package/pennyfarthing-dist/skills/sprint/scripts/sprint-info.sh +39 -0
  276. package/pennyfarthing-dist/skills/sprint/scripts/sprint-status.sh +147 -0
  277. package/pennyfarthing-dist/skills/sprint/scripts/sync-epic-jira.sh +93 -0
  278. package/pennyfarthing-dist/skills/sprint/skill.md +465 -0
  279. package/pennyfarthing-dist/skills/story/scripts/create-story.sh +159 -0
  280. package/pennyfarthing-dist/skills/story/scripts/size-story.sh +198 -0
  281. package/pennyfarthing-dist/skills/story/scripts/story-template.sh +162 -0
  282. package/pennyfarthing-dist/skills/story/skill.md +219 -0
  283. package/pennyfarthing-dist/skills/systematic-debugging/SKILL.md +446 -0
  284. package/pennyfarthing-dist/skills/testing/SKILL.md +121 -0
  285. package/pennyfarthing-dist/skills/testing/references/troubleshooting.md +124 -0
  286. package/pennyfarthing-dist/skills/theme/skill.md +141 -0
  287. package/pennyfarthing-dist/skills/theme-creation/SKILL.md +178 -0
  288. package/pennyfarthing-dist/skills/workflow/scripts/list-workflows.sh +91 -0
  289. package/pennyfarthing-dist/skills/workflow/scripts/resume-workflow.sh +163 -0
  290. package/pennyfarthing-dist/skills/workflow/scripts/show-workflow.sh +138 -0
  291. package/pennyfarthing-dist/skills/workflow/scripts/start-workflow.sh +273 -0
  292. package/pennyfarthing-dist/skills/workflow/scripts/workflow-status.sh +167 -0
  293. package/pennyfarthing-dist/skills/workflow/skill.md +345 -0
  294. package/pennyfarthing-dist/skills/yq/SKILL.md +272 -0
  295. package/pennyfarthing-dist/templates/LEADERBOARD.schema.yaml +187 -0
  296. package/pennyfarthing-dist/templates/LEADERBOARD.template.md +59 -0
  297. package/pennyfarthing-dist/templates/agent-scopes.yaml.template +276 -0
  298. package/pennyfarthing-dist/templates/pennyfarthing-settings.yaml.template +61 -0
  299. package/pennyfarthing-dist/templates/persona-config.yaml.template +22 -0
  300. package/pennyfarthing-dist/templates/preferences.yaml.template +15 -0
  301. package/pennyfarthing-dist/templates/settings.local.json.template +130 -0
  302. package/pennyfarthing-dist/templates/setup-env.sh.template +18 -0
  303. package/pennyfarthing-dist/templates/shared-context.md.template +70 -0
  304. package/pennyfarthing-dist/templates/sidecar/decisions.md.template +40 -0
  305. package/pennyfarthing-dist/templates/sidecar/gotchas.md.template +37 -0
  306. package/pennyfarthing-dist/templates/sidecar/patterns.md.template +34 -0
  307. package/pennyfarthing-dist/workflows/agent-docs.yaml +70 -0
  308. package/pennyfarthing-dist/workflows/architecture/steps/step-01-initialize.md +113 -0
  309. package/pennyfarthing-dist/workflows/architecture/steps/step-01b-continue.md +105 -0
  310. package/pennyfarthing-dist/workflows/architecture/steps/step-02-context.md +127 -0
  311. package/pennyfarthing-dist/workflows/architecture/steps/step-03-patterns.md +145 -0
  312. package/pennyfarthing-dist/workflows/architecture/steps/step-04-components.md +150 -0
  313. package/pennyfarthing-dist/workflows/architecture/steps/step-05-interfaces.md +145 -0
  314. package/pennyfarthing-dist/workflows/architecture/steps/step-06-risks.md +154 -0
  315. package/pennyfarthing-dist/workflows/architecture/steps/step-07-document.md +172 -0
  316. package/pennyfarthing-dist/workflows/architecture/templates/architecture-decision.md +102 -0
  317. package/pennyfarthing-dist/workflows/architecture.yaml +65 -0
  318. package/pennyfarthing-dist/workflows/bdd.yaml +60 -0
  319. package/pennyfarthing-dist/workflows/brainstorming/brain-methods.csv +62 -0
  320. package/pennyfarthing-dist/workflows/brainstorming/checklist.md +44 -0
  321. package/pennyfarthing-dist/workflows/brainstorming/instructions.md +736 -0
  322. package/pennyfarthing-dist/workflows/brainstorming/workflow.yaml +49 -0
  323. package/pennyfarthing-dist/workflows/code-review/checklist.md +23 -0
  324. package/pennyfarthing-dist/workflows/code-review/instructions.md +234 -0
  325. package/pennyfarthing-dist/workflows/code-review/workflow.yaml +51 -0
  326. package/pennyfarthing-dist/workflows/dev-story/checklist.md +80 -0
  327. package/pennyfarthing-dist/workflows/dev-story/instructions.xml +410 -0
  328. package/pennyfarthing-dist/workflows/dev-story/workflow.yaml +50 -0
  329. package/pennyfarthing-dist/workflows/epics-and-stories/steps/step-01-validate-prerequisites.md +281 -0
  330. package/pennyfarthing-dist/workflows/epics-and-stories/steps/step-02-design-epics.md +256 -0
  331. package/pennyfarthing-dist/workflows/epics-and-stories/steps/step-03-create-stories.md +298 -0
  332. package/pennyfarthing-dist/workflows/epics-and-stories/steps/step-04-final-validation.md +177 -0
  333. package/pennyfarthing-dist/workflows/epics-and-stories/steps/step-05-import-to-future.md +145 -0
  334. package/pennyfarthing-dist/workflows/epics-and-stories/templates/epics-template.md +57 -0
  335. package/pennyfarthing-dist/workflows/epics-and-stories/workflow.yaml +28 -0
  336. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-01-analyze.md +103 -0
  337. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-02-categorize.md +147 -0
  338. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-03-execute.md +215 -0
  339. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-04-verify.md +97 -0
  340. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-05-complete.md +78 -0
  341. package/pennyfarthing-dist/workflows/git-cleanup.yaml +59 -0
  342. package/pennyfarthing-dist/workflows/implementation-readiness/steps/step-01-document-discovery.md +211 -0
  343. package/pennyfarthing-dist/workflows/implementation-readiness/steps/step-02-prd-analysis.md +199 -0
  344. package/pennyfarthing-dist/workflows/implementation-readiness/steps/step-03-epic-coverage-validation.md +202 -0
  345. package/pennyfarthing-dist/workflows/implementation-readiness/steps/step-04-ux-alignment.md +162 -0
  346. package/pennyfarthing-dist/workflows/implementation-readiness/steps/step-05-epic-quality-review.md +280 -0
  347. package/pennyfarthing-dist/workflows/implementation-readiness/steps/step-06-final-assessment.md +158 -0
  348. package/pennyfarthing-dist/workflows/implementation-readiness/templates/readiness-report-template.md +4 -0
  349. package/pennyfarthing-dist/workflows/implementation-readiness/workflow.yaml +40 -0
  350. package/pennyfarthing-dist/workflows/interactive-debug/steps/step-01-connect.md +257 -0
  351. package/pennyfarthing-dist/workflows/interactive-debug/steps/step-02-explore.md +107 -0
  352. package/pennyfarthing-dist/workflows/interactive-debug/steps/step-03-fix.md +127 -0
  353. package/pennyfarthing-dist/workflows/interactive-debug/steps/step-04-commit.md +122 -0
  354. package/pennyfarthing-dist/workflows/interactive-debug/workflow.yaml +51 -0
  355. package/pennyfarthing-dist/workflows/patch.yaml +67 -0
  356. package/pennyfarthing-dist/workflows/prd/data/domain-complexity.csv +13 -0
  357. package/pennyfarthing-dist/workflows/prd/data/prd-purpose.md +197 -0
  358. package/pennyfarthing-dist/workflows/prd/data/project-types.csv +11 -0
  359. package/pennyfarthing-dist/workflows/prd/steps-c/step-01-init.md +197 -0
  360. package/pennyfarthing-dist/workflows/prd/steps-c/step-01b-continue.md +159 -0
  361. package/pennyfarthing-dist/workflows/prd/steps-c/step-02-discovery.md +230 -0
  362. package/pennyfarthing-dist/workflows/prd/steps-c/step-03-success.md +232 -0
  363. package/pennyfarthing-dist/workflows/prd/steps-c/step-04-journeys.md +219 -0
  364. package/pennyfarthing-dist/workflows/prd/steps-c/step-05-domain.md +213 -0
  365. package/pennyfarthing-dist/workflows/prd/steps-c/step-06-innovation.md +232 -0
  366. package/pennyfarthing-dist/workflows/prd/steps-c/step-07-project-type.md +243 -0
  367. package/pennyfarthing-dist/workflows/prd/steps-c/step-08-scoping.md +234 -0
  368. package/pennyfarthing-dist/workflows/prd/steps-c/step-09-functional.md +237 -0
  369. package/pennyfarthing-dist/workflows/prd/steps-c/step-10-nonfunctional.md +248 -0
  370. package/pennyfarthing-dist/workflows/prd/steps-c/step-11-polish.md +223 -0
  371. package/pennyfarthing-dist/workflows/prd/steps-c/step-12-complete.md +186 -0
  372. package/pennyfarthing-dist/workflows/prd/steps-e/step-e-01-discovery.md +253 -0
  373. package/pennyfarthing-dist/workflows/prd/steps-e/step-e-01b-legacy-conversion.md +214 -0
  374. package/pennyfarthing-dist/workflows/prd/steps-e/step-e-02-review.md +255 -0
  375. package/pennyfarthing-dist/workflows/prd/steps-e/step-e-03-edit.md +259 -0
  376. package/pennyfarthing-dist/workflows/prd/steps-e/step-e-04-complete.md +174 -0
  377. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-01-discovery.md +224 -0
  378. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-02-format-detection.md +197 -0
  379. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-02b-parity-check.md +215 -0
  380. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-03-density-validation.md +180 -0
  381. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-04-brief-coverage-validation.md +220 -0
  382. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-05-measurability-validation.md +234 -0
  383. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-06-traceability-validation.md +223 -0
  384. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-07-implementation-leakage-validation.md +211 -0
  385. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-08-domain-compliance-validation.md +249 -0
  386. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-09-project-type-validation.md +269 -0
  387. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-10-smart-validation.md +215 -0
  388. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-11-holistic-quality-validation.md +270 -0
  389. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-12-completeness-validation.md +248 -0
  390. package/pennyfarthing-dist/workflows/prd/steps-v/step-v-13-report-complete.md +238 -0
  391. package/pennyfarthing-dist/workflows/prd/templates/prd-template.md +10 -0
  392. package/pennyfarthing-dist/workflows/prd/workflow.yaml +42 -0
  393. package/pennyfarthing-dist/workflows/product-brief/steps/step-01-init.md +195 -0
  394. package/pennyfarthing-dist/workflows/product-brief/steps/step-01b-continue.md +180 -0
  395. package/pennyfarthing-dist/workflows/product-brief/steps/step-02-vision.md +221 -0
  396. package/pennyfarthing-dist/workflows/product-brief/steps/step-03-users.md +224 -0
  397. package/pennyfarthing-dist/workflows/product-brief/steps/step-04-metrics.md +228 -0
  398. package/pennyfarthing-dist/workflows/product-brief/steps/step-05-scope.md +243 -0
  399. package/pennyfarthing-dist/workflows/product-brief/steps/step-06-complete.md +216 -0
  400. package/pennyfarthing-dist/workflows/product-brief/templates/product-brief.template.md +10 -0
  401. package/pennyfarthing-dist/workflows/product-brief/workflow.yaml +31 -0
  402. package/pennyfarthing-dist/workflows/project-context/project-context-template.md +21 -0
  403. package/pennyfarthing-dist/workflows/project-context/steps/step-01-discover.md +206 -0
  404. package/pennyfarthing-dist/workflows/project-context/steps/step-02-generate.md +349 -0
  405. package/pennyfarthing-dist/workflows/project-context/steps/step-03-complete.md +306 -0
  406. package/pennyfarthing-dist/workflows/project-context/workflow.yaml +27 -0
  407. package/pennyfarthing-dist/workflows/project-setup/steps/step-01-discover.md +157 -0
  408. package/pennyfarthing-dist/workflows/project-setup/steps/step-02-clone-repos.md +217 -0
  409. package/pennyfarthing-dist/workflows/project-setup/steps/step-03-repos-yaml.md +159 -0
  410. package/pennyfarthing-dist/workflows/project-setup/steps/step-04-claude-md.md +186 -0
  411. package/pennyfarthing-dist/workflows/project-setup/steps/step-05-shared-context.md +185 -0
  412. package/pennyfarthing-dist/workflows/project-setup/steps/step-06-task-runner.md +279 -0
  413. package/pennyfarthing-dist/workflows/project-setup/steps/step-07-theme.md +200 -0
  414. package/pennyfarthing-dist/workflows/project-setup/steps/step-08-theme-packs.md +142 -0
  415. package/pennyfarthing-dist/workflows/project-setup/steps/step-09-cyclist.md +245 -0
  416. package/pennyfarthing-dist/workflows/project-setup/steps/step-10-complete.md +204 -0
  417. package/pennyfarthing-dist/workflows/project-setup/workflow.yaml +41 -0
  418. package/pennyfarthing-dist/workflows/quick-dev/steps/step-01-mode-detection.md +177 -0
  419. package/pennyfarthing-dist/workflows/quick-dev/steps/step-02-context-gathering.md +143 -0
  420. package/pennyfarthing-dist/workflows/quick-dev/steps/step-03-execute.md +138 -0
  421. package/pennyfarthing-dist/workflows/quick-dev/steps/step-04-self-check.md +135 -0
  422. package/pennyfarthing-dist/workflows/quick-dev/steps/step-05-adversarial-review.md +129 -0
  423. package/pennyfarthing-dist/workflows/quick-dev/steps/step-06-resolve-findings.md +163 -0
  424. package/pennyfarthing-dist/workflows/quick-dev/workflow.yaml +27 -0
  425. package/pennyfarthing-dist/workflows/quick-spec/steps/step-01-understand.md +201 -0
  426. package/pennyfarthing-dist/workflows/quick-spec/steps/step-02-investigate.md +156 -0
  427. package/pennyfarthing-dist/workflows/quick-spec/steps/step-03-generate.md +140 -0
  428. package/pennyfarthing-dist/workflows/quick-spec/steps/step-04-review.md +203 -0
  429. package/pennyfarthing-dist/workflows/quick-spec/tech-spec-template.md +74 -0
  430. package/pennyfarthing-dist/workflows/quick-spec/workflow.yaml +27 -0
  431. package/pennyfarthing-dist/workflows/release/steps/step-01-preflight.md +105 -0
  432. package/pennyfarthing-dist/workflows/release/steps/step-02-bump.md +95 -0
  433. package/pennyfarthing-dist/workflows/release/steps/step-03-changelog.md +125 -0
  434. package/pennyfarthing-dist/workflows/release/steps/step-04-readme.md +101 -0
  435. package/pennyfarthing-dist/workflows/release/steps/step-05-claude-md.md +102 -0
  436. package/pennyfarthing-dist/workflows/release/steps/step-06-retro.md +59 -0
  437. package/pennyfarthing-dist/workflows/release/steps/step-07-commit.md +109 -0
  438. package/pennyfarthing-dist/workflows/release/steps/step-08-merge.md +65 -0
  439. package/pennyfarthing-dist/workflows/release/steps/step-09-push.md +75 -0
  440. package/pennyfarthing-dist/workflows/release/steps/step-10-publish.md +93 -0
  441. package/pennyfarthing-dist/workflows/release/steps/step-11-finalize.md +71 -0
  442. package/pennyfarthing-dist/workflows/release.yaml +62 -0
  443. package/pennyfarthing-dist/workflows/research/steps-domain/step-01-init.md +159 -0
  444. package/pennyfarthing-dist/workflows/research/steps-domain/step-02-domain-analysis.md +253 -0
  445. package/pennyfarthing-dist/workflows/research/steps-domain/step-03-competitive-landscape.md +263 -0
  446. package/pennyfarthing-dist/workflows/research/steps-domain/step-04-regulatory-focus.md +232 -0
  447. package/pennyfarthing-dist/workflows/research/steps-domain/step-05-technical-trends.md +260 -0
  448. package/pennyfarthing-dist/workflows/research/steps-domain/step-06-research-synthesis.md +477 -0
  449. package/pennyfarthing-dist/workflows/research/steps-market/step-01-init.md +205 -0
  450. package/pennyfarthing-dist/workflows/research/steps-market/step-02-customer-behavior.md +262 -0
  451. package/pennyfarthing-dist/workflows/research/steps-market/step-02-customer-insights.md +227 -0
  452. package/pennyfarthing-dist/workflows/research/steps-market/step-03-customer-pain-points.md +275 -0
  453. package/pennyfarthing-dist/workflows/research/steps-market/step-04-customer-decisions.md +286 -0
  454. package/pennyfarthing-dist/workflows/research/steps-market/step-05-competitive-analysis.md +203 -0
  455. package/pennyfarthing-dist/workflows/research/steps-market/step-06-research-completion.md +510 -0
  456. package/pennyfarthing-dist/workflows/research/steps-technical/step-01-init.md +159 -0
  457. package/pennyfarthing-dist/workflows/research/steps-technical/step-02-technical-overview.md +264 -0
  458. package/pennyfarthing-dist/workflows/research/steps-technical/step-03-integration-patterns.md +274 -0
  459. package/pennyfarthing-dist/workflows/research/steps-technical/step-04-architectural-patterns.md +228 -0
  460. package/pennyfarthing-dist/workflows/research/steps-technical/step-05-implementation-research.md +267 -0
  461. package/pennyfarthing-dist/workflows/research/steps-technical/step-06-research-synthesis.md +522 -0
  462. package/pennyfarthing-dist/workflows/research/templates/research.template.md +29 -0
  463. package/pennyfarthing-dist/workflows/research/workflow.yaml +45 -0
  464. package/pennyfarthing-dist/workflows/retrospective/checklist.md +31 -0
  465. package/pennyfarthing-dist/workflows/retrospective/instructions.md +1443 -0
  466. package/pennyfarthing-dist/workflows/retrospective/workflow.yaml +50 -0
  467. package/pennyfarthing-dist/workflows/sprint-planning/checklist.md +33 -0
  468. package/pennyfarthing-dist/workflows/sprint-planning/sprint-status-template.yaml +55 -0
  469. package/pennyfarthing-dist/workflows/sprint-planning/steps/step-01-parse-epic-files.md +69 -0
  470. package/pennyfarthing-dist/workflows/sprint-planning/steps/step-02-build-sprint-status.md +61 -0
  471. package/pennyfarthing-dist/workflows/sprint-planning/steps/step-03-status-detection.md +80 -0
  472. package/pennyfarthing-dist/workflows/sprint-planning/steps/step-04-generate-status-file.md +90 -0
  473. package/pennyfarthing-dist/workflows/sprint-planning/steps/step-05-validate-and-report.md +78 -0
  474. package/pennyfarthing-dist/workflows/sprint-planning/workflow.yaml +34 -0
  475. package/pennyfarthing-dist/workflows/tdd.yaml +50 -0
  476. package/pennyfarthing-dist/workflows/trivial.yaml +40 -0
  477. package/pennyfarthing-dist/workflows/ux-design/steps/step-01-init.md +141 -0
  478. package/pennyfarthing-dist/workflows/ux-design/steps/step-01b-continue.md +133 -0
  479. package/pennyfarthing-dist/workflows/ux-design/steps/step-02-discovery.md +196 -0
  480. package/pennyfarthing-dist/workflows/ux-design/steps/step-03-core-experience.md +222 -0
  481. package/pennyfarthing-dist/workflows/ux-design/steps/step-04-emotional-response.md +225 -0
  482. package/pennyfarthing-dist/workflows/ux-design/steps/step-05-inspiration.md +240 -0
  483. package/pennyfarthing-dist/workflows/ux-design/steps/step-06-design-system.md +258 -0
  484. package/pennyfarthing-dist/workflows/ux-design/steps/step-07-defining-experience.md +260 -0
  485. package/pennyfarthing-dist/workflows/ux-design/steps/step-08-visual-foundation.md +230 -0
  486. package/pennyfarthing-dist/workflows/ux-design/steps/step-09-design-directions.md +230 -0
  487. package/pennyfarthing-dist/workflows/ux-design/steps/step-10-user-journeys.md +247 -0
  488. package/pennyfarthing-dist/workflows/ux-design/steps/step-11-component-strategy.md +254 -0
  489. package/pennyfarthing-dist/workflows/ux-design/steps/step-12-ux-patterns.md +243 -0
  490. package/pennyfarthing-dist/workflows/ux-design/steps/step-13-responsive-accessibility.md +270 -0
  491. package/pennyfarthing-dist/workflows/ux-design/steps/step-14-complete.md +234 -0
  492. package/pennyfarthing-dist/workflows/ux-design/ux-design-template.md +13 -0
  493. package/pennyfarthing-dist/workflows/ux-design/workflow.yaml +41 -0
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Ground-truth judge for SWE-bench scenarios.
4
+
5
+ Compares Claude's proposed fix against the actual SWE-bench patch.
6
+ Scores based on:
7
+ - File identification (20%)
8
+ - Function/location identification (20%)
9
+ - Fix logic match (40%)
10
+ - Completeness (20%)
11
+ """
12
+
13
+ import json
14
+ import re
15
+ import sys
16
+ from pathlib import Path
17
+ from difflib import SequenceMatcher
18
+
19
+ # Add parent to path for pennyfarthing_scripts imports
20
+ sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
21
+
22
+ from pennyfarthing_scripts.swebench import (
23
+ extract_patch_info,
24
+ extract_problem_keywords,
25
+ find_scenario,
26
+ get_meaningful_patterns,
27
+ load_swebench_data,
28
+ )
29
+
30
+
31
+ def score_response(response_text, ground_truth):
32
+ """Score a response against ground truth patch."""
33
+ patch_info = extract_patch_info(ground_truth['patch'])
34
+
35
+ scores = {
36
+ 'file_identification': 0,
37
+ 'location_identification': 0,
38
+ 'fix_logic_match': 0,
39
+ 'completeness': 0,
40
+ 'details': {}
41
+ }
42
+
43
+ response_lower = response_text.lower()
44
+
45
+ # 1. FILE IDENTIFICATION (20 points)
46
+ files_found = 0
47
+ for f in patch_info.files:
48
+ # Check various forms of the filename
49
+ filename = Path(f).name
50
+ if filename.lower() in response_lower or f.lower() in response_lower:
51
+ files_found += 1
52
+
53
+ if patch_info.files:
54
+ file_score = (files_found / len(patch_info.files)) * 20
55
+ scores['file_identification'] = min(20, file_score)
56
+ scores['details']['files_expected'] = patch_info.files
57
+ scores['details']['files_found'] = files_found
58
+ else:
59
+ scores['file_identification'] = 20 # No specific file in patch
60
+
61
+ # 2. LOCATION IDENTIFICATION (20 points)
62
+ # Look for function/class names mentioned in the patch
63
+ locations_found = 0
64
+ for func in patch_info.functions:
65
+ # Extract the function/class name
66
+ func_match = re.search(r'(def|class)\s+(\w+)', func)
67
+ if func_match:
68
+ func_name = func_match.group(2)
69
+ if func_name.lower() in response_lower:
70
+ locations_found += 1
71
+ elif func.strip() and func.strip().split()[0] in response_lower:
72
+ locations_found += 1
73
+
74
+ if patch_info.functions:
75
+ loc_score = (locations_found / len(patch_info.functions)) * 20
76
+ scores['location_identification'] = min(20, loc_score)
77
+ scores['details']['locations_expected'] = patch_info.functions[:3]
78
+ scores['details']['locations_found'] = locations_found
79
+ else:
80
+ scores['location_identification'] = 10 # Partial credit
81
+
82
+ # 3. FIX LOGIC MATCH (40 points)
83
+ # Check if key code patterns from the fix appear in the response
84
+ meaningful_patterns = get_meaningful_patterns(patch_info.key_patterns)
85
+
86
+ patterns_found = 0
87
+ for pattern in meaningful_patterns:
88
+ if pattern.lower() in response_lower:
89
+ patterns_found += 1
90
+
91
+ if meaningful_patterns:
92
+ pattern_score = (patterns_found / len(meaningful_patterns)) * 20
93
+ scores['details']['patterns_expected'] = meaningful_patterns[:10]
94
+ scores['details']['patterns_found'] = patterns_found
95
+ else:
96
+ pattern_score = 10
97
+
98
+ # Check for actual code additions
99
+ additions_matched = 0
100
+ for addition in patch_info.additions[:5]: # Check first 5 additions
101
+ # Normalize and check
102
+ addition_normalized = re.sub(r'\s+', ' ', addition.lower())
103
+ response_normalized = re.sub(r'\s+', ' ', response_lower)
104
+
105
+ # Use fuzzy matching
106
+ similarity = SequenceMatcher(None, addition_normalized, response_normalized).ratio()
107
+ if similarity > 0.6 or addition_normalized in response_normalized:
108
+ additions_matched += 1
109
+
110
+ if patch_info.additions:
111
+ addition_score = (additions_matched / min(5, len(patch_info.additions))) * 20
112
+ scores['details']['additions_matched'] = additions_matched
113
+ else:
114
+ addition_score = 10
115
+
116
+ scores['fix_logic_match'] = min(40, pattern_score + addition_score)
117
+
118
+ # 4. COMPLETENESS (20 points)
119
+ # Does the response have all the elements of a good fix?
120
+ completeness_score = 0
121
+
122
+ # Has code block?
123
+ if '```' in response_text:
124
+ completeness_score += 5
125
+
126
+ # Has test considerations?
127
+ if 'test' in response_lower:
128
+ completeness_score += 5
129
+
130
+ # Mentions the specific error/issue?
131
+ problem_keywords = extract_problem_keywords(ground_truth.get('problem_statement', ''))
132
+ keywords_found = sum(1 for kw in problem_keywords if kw.lower() in response_lower)
133
+ if problem_keywords:
134
+ completeness_score += min(5, (keywords_found / len(problem_keywords)) * 5)
135
+ else:
136
+ completeness_score += 2.5
137
+
138
+ # Has explanation of why fix works?
139
+ explanation_words = ['because', 'this fixes', 'this resolves', 'the issue', 'the problem', 'solution']
140
+ if any(word in response_lower for word in explanation_words):
141
+ completeness_score += 5
142
+
143
+ scores['completeness'] = min(20, completeness_score)
144
+
145
+ # Total
146
+ scores['total'] = round(
147
+ scores['file_identification'] +
148
+ scores['location_identification'] +
149
+ scores['fix_logic_match'] +
150
+ scores['completeness']
151
+ , 1)
152
+
153
+ return scores
154
+
155
+
156
+ def main():
157
+ if len(sys.argv) < 3:
158
+ print("Usage: ground-truth-judge.py <scenario_name> <response_file>")
159
+ print("Example: ground-truth-judge.py flask-5014 run_20260102T134237Z.json")
160
+ sys.exit(1)
161
+
162
+ scenario_name = sys.argv[1]
163
+ response_file = sys.argv[2]
164
+
165
+ # Load SWE-bench data
166
+ swebench_data = load_swebench_data()
167
+
168
+ # Find scenario
169
+ scenario = find_scenario(swebench_data, scenario_name)
170
+ if not scenario:
171
+ print(f"Error: Scenario '{scenario_name}' not found in SWE-bench data")
172
+ sys.exit(1)
173
+
174
+ # Load response
175
+ with open(response_file, 'r') as f:
176
+ response_data = json.load(f)
177
+
178
+ response_text = response_data.get('result', '')
179
+ if not response_text:
180
+ print("Error: No 'result' field in response file")
181
+ sys.exit(1)
182
+
183
+ # Score
184
+ scores = score_response(response_text, scenario)
185
+
186
+ # Output
187
+ print(f"\n{'='*60}")
188
+ print(f"GROUND TRUTH EVALUATION: {scenario_name}")
189
+ print(f"{'='*60}")
190
+ print(f"\nScores:")
191
+ print(f" File Identification: {scores['file_identification']:5.1f}/20")
192
+ print(f" Location Identification: {scores['location_identification']:5.1f}/20")
193
+ print(f" Fix Logic Match: {scores['fix_logic_match']:5.1f}/40")
194
+ print(f" Completeness: {scores['completeness']:5.1f}/20")
195
+ print(f" {'─'*40}")
196
+ print(f" TOTAL: {scores['total']:5.1f}/100")
197
+
198
+ print(f"\nDetails:")
199
+ for key, value in scores['details'].items():
200
+ print(f" {key}: {value}")
201
+
202
+ # Output JSON for programmatic use
203
+ output = {
204
+ 'scenario': scenario_name,
205
+ 'instance_id': scenario.get('instance_id'),
206
+ 'scores': scores,
207
+ 'ground_truth_patch_preview': scenario.get('patch', '')[:300]
208
+ }
209
+
210
+ # Save judge output
211
+ output_path = response_file.replace('run_', 'gt_judge_')
212
+ with open(output_path, 'w') as f:
213
+ json.dump(output, f, indent=2)
214
+ print(f"\nSaved to: {output_path}")
215
+
216
+ return scores
217
+
218
+
219
+ if __name__ == '__main__':
220
+ main()
@@ -0,0 +1,374 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SWE-bench scenario judge using:
4
+ 1. Scenario-specific scoring rubric from YAML
5
+ 2. Ground-truth validation from actual SWE-bench patches
6
+
7
+ Scoring structure:
8
+ - root_cause (30%): IDENTIFIES_BUG_LOCATION (15) + EXPLAINS_WHY_BROKEN (15)
9
+ - fix_quality (40%): FIX_ADDRESSES_ISSUE (20) + FIX_IS_MINIMAL (10) + FIX_SYNTAX_CORRECT (10)
10
+ - completeness (20%): EDGE_CASES (10) + TEST_COVERAGE (10)
11
+ - persona (10%): IN_CHARACTER (10)
12
+ """
13
+
14
+ import json
15
+ import re
16
+ import sys
17
+ from pathlib import Path
18
+ from difflib import SequenceMatcher
19
+
20
+ # Add parent to path for pennyfarthing_scripts imports
21
+ sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
22
+
23
+ from pennyfarthing_scripts.swebench import (
24
+ extract_patch_info,
25
+ find_scenario,
26
+ load_swebench_data,
27
+ )
28
+
29
+
30
+ def score_identifies_bug_location(response, ground_truth):
31
+ """Score IDENTIFIES_BUG_LOCATION (15 pts) using ground truth."""
32
+ patch_info = extract_patch_info(ground_truth.get('patch', ''))
33
+ response_lower = response.lower()
34
+
35
+ score = 0
36
+ details = []
37
+
38
+ # Check files (7.5 pts)
39
+ files_found = 0
40
+ for f in patch_info.files:
41
+ filename = Path(f).name.lower()
42
+ if filename in response_lower or f.lower() in response_lower:
43
+ files_found += 1
44
+
45
+ if patch_info.files:
46
+ file_score = (files_found / len(patch_info.files)) * 7.5
47
+ score += file_score
48
+ details.append(f"Files: {files_found}/{len(patch_info.files)} found")
49
+
50
+ # Check functions/classes (7.5 pts)
51
+ funcs_found = 0
52
+ for func in patch_info.functions:
53
+ func_match = re.search(r'(def|class)\s+(\w+)', func)
54
+ if func_match:
55
+ func_name = func_match.group(2).lower()
56
+ if func_name in response_lower:
57
+ funcs_found += 1
58
+
59
+ if patch_info.functions:
60
+ func_score = min(7.5, (funcs_found / len(patch_info.functions)) * 7.5)
61
+ score += func_score
62
+ details.append(f"Functions: {funcs_found}/{len(patch_info.functions)} found")
63
+ else:
64
+ score += 3.75 # Partial credit if no specific function in patch
65
+
66
+ return min(15, score), details
67
+
68
+
69
+ def score_explains_why_broken(response, ground_truth):
70
+ """Score EXPLAINS_WHY_BROKEN (15 pts)."""
71
+ response_lower = response.lower()
72
+ problem = ground_truth.get('problem_statement', '').lower()
73
+
74
+ score = 0
75
+ details = []
76
+
77
+ # Extract key terms from problem statement
78
+ key_terms = re.findall(r'[`\'"]([^`\'"]+)[`\'"]', problem)
79
+ key_terms += re.findall(r'\b\w+Error\b|\b\w+Exception\b', problem, re.IGNORECASE)
80
+ key_terms = list(set(key_terms))[:10]
81
+
82
+ # Check for explanation of the issue
83
+ explanation_markers = ['because', 'this happens', 'the issue', 'the problem', 'fails when', 'breaks when', 'causes']
84
+ has_explanation = any(marker in response_lower for marker in explanation_markers)
85
+ if has_explanation:
86
+ score += 7.5
87
+ details.append("Has explanation of why broken")
88
+
89
+ # Check for key terms from problem
90
+ terms_found = sum(1 for term in key_terms if term.lower() in response_lower)
91
+ if key_terms:
92
+ term_score = (terms_found / len(key_terms)) * 7.5
93
+ score += term_score
94
+ details.append(f"Key terms: {terms_found}/{len(key_terms)}")
95
+ else:
96
+ score += 3.75
97
+
98
+ return min(15, score), details
99
+
100
+
101
+ def score_fix_addresses_issue(response, ground_truth):
102
+ """Score FIX_ADDRESSES_ISSUE (20 pts) using ground truth patch."""
103
+ patch_info = extract_patch_info(ground_truth.get('patch', ''))
104
+ response_lower = response.lower()
105
+
106
+ score = 0
107
+ details = []
108
+
109
+ # Check if key additions from patch appear in response
110
+ additions_matched = 0
111
+ for addition in patch_info.additions[:5]:
112
+ # Normalize whitespace
113
+ addition_norm = re.sub(r'\s+', ' ', addition.lower())
114
+ response_norm = re.sub(r'\s+', ' ', response_lower)
115
+
116
+ # Check for exact or fuzzy match
117
+ if addition_norm in response_norm:
118
+ additions_matched += 1
119
+ else:
120
+ # Fuzzy match
121
+ sim = SequenceMatcher(None, addition_norm, response_norm).ratio()
122
+ if sim > 0.7:
123
+ additions_matched += 0.5
124
+
125
+ if patch_info.additions:
126
+ addition_score = (additions_matched / min(5, len(patch_info.additions))) * 15
127
+ score += addition_score
128
+ details.append(f"Code matches: {additions_matched}/{min(5, len(patch_info.additions))}")
129
+
130
+ # Check for code block with fix
131
+ if '```' in response:
132
+ score += 5
133
+ details.append("Has code block")
134
+
135
+ return min(20, score), details
136
+
137
+
138
+ def score_fix_is_minimal(response, ground_truth):
139
+ """Score FIX_IS_MINIMAL (10 pts)."""
140
+ patch_info = extract_patch_info(ground_truth.get('patch', ''))
141
+
142
+ score = 0
143
+ details = []
144
+
145
+ # Count lines in patch vs lines in response code blocks
146
+ patch_lines = len(patch_info.additions) + len(patch_info.deletions)
147
+
148
+ # Extract code blocks from response
149
+ code_blocks = re.findall(r'```[\w]*\n(.*?)```', response, re.DOTALL)
150
+ response_code_lines = sum(len(block.strip().split('\n')) for block in code_blocks)
151
+
152
+ # If response is within 2x of patch size, it's minimal
153
+ if patch_lines > 0:
154
+ ratio = response_code_lines / patch_lines if response_code_lines > 0 else 1
155
+ if ratio <= 2:
156
+ score = 10
157
+ details.append(f"Minimal: {response_code_lines} lines (patch: {patch_lines})")
158
+ elif ratio <= 4:
159
+ score = 5
160
+ details.append(f"Somewhat verbose: {response_code_lines} lines (patch: {patch_lines})")
161
+ else:
162
+ score = 2
163
+ details.append(f"Over-engineered: {response_code_lines} lines (patch: {patch_lines})")
164
+ else:
165
+ score = 5
166
+
167
+ return min(10, score), details
168
+
169
+
170
+ def score_fix_syntax_correct(response):
171
+ """Score FIX_SYNTAX_CORRECT (10 pts)."""
172
+ score = 0
173
+ details = []
174
+
175
+ # Extract code blocks
176
+ code_blocks = re.findall(r'```python\n(.*?)```', response, re.DOTALL)
177
+ if not code_blocks:
178
+ code_blocks = re.findall(r'```\n(.*?)```', response, re.DOTALL)
179
+
180
+ if code_blocks:
181
+ # Basic syntax checks
182
+ valid = True
183
+ for block in code_blocks:
184
+ try:
185
+ compile(block, '<string>', 'exec')
186
+ except SyntaxError:
187
+ valid = False
188
+ break
189
+
190
+ if valid:
191
+ score = 10
192
+ details.append("Syntax valid")
193
+ else:
194
+ score = 5
195
+ details.append("Syntax errors detected")
196
+ else:
197
+ score = 5
198
+ details.append("No code blocks to validate")
199
+
200
+ return min(10, score), details
201
+
202
+
203
+ def score_edge_cases(response):
204
+ """Score EDGE_CASES (10 pts)."""
205
+ response_lower = response.lower()
206
+
207
+ score = 0
208
+ details = []
209
+
210
+ edge_markers = ['edge case', 'corner case', 'what if', 'consider', 'also', 'none', 'empty', 'null', 'zero', 'negative', 'boundary']
211
+ found = sum(1 for m in edge_markers if m in response_lower)
212
+
213
+ score = min(10, found * 2)
214
+ details.append(f"Edge case markers: {found}")
215
+
216
+ return score, details
217
+
218
+
219
+ def score_test_coverage(response):
220
+ """Score TEST_COVERAGE (10 pts)."""
221
+ response_lower = response.lower()
222
+
223
+ score = 0
224
+ details = []
225
+
226
+ # Check for test-related content
227
+ has_test_section = 'test' in response_lower
228
+ has_test_function = 'def test_' in response_lower or 'test_' in response
229
+ has_assert = 'assert' in response_lower or 'pytest' in response_lower
230
+
231
+ if has_test_function:
232
+ score += 5
233
+ details.append("Has test function")
234
+ if has_assert:
235
+ score += 3
236
+ details.append("Has assertions")
237
+ if has_test_section:
238
+ score += 2
239
+ details.append("Has test section")
240
+
241
+ return min(10, score), details
242
+
243
+
244
+ def score_in_character(response, persona="senior developer"):
245
+ """Score IN_CHARACTER (10 pts)."""
246
+ response_lower = response.lower()
247
+
248
+ score = 0
249
+ details = []
250
+
251
+ # For control baseline, check professional tone
252
+ professional_markers = ['i recommend', 'we should', 'this approach', 'the fix', 'analysis', 'root cause']
253
+ found = sum(1 for m in professional_markers if m in response_lower)
254
+
255
+ score = min(10, found * 2)
256
+ details.append(f"Professional markers: {found}")
257
+
258
+ return score, details
259
+
260
+
261
+ def judge_response(scenario_name, response_text, swebench_data):
262
+ """Full judgment using scenario rubric + ground truth."""
263
+ ground_truth = find_scenario(swebench_data, scenario_name)
264
+
265
+ if not ground_truth:
266
+ return {'error': f'Scenario {scenario_name} not found in SWE-bench data'}
267
+
268
+ scores = {}
269
+ all_details = {}
270
+
271
+ # root_cause (30%)
272
+ loc_score, loc_details = score_identifies_bug_location(response_text, ground_truth)
273
+ why_score, why_details = score_explains_why_broken(response_text, ground_truth)
274
+ scores['root_cause'] = {
275
+ 'IDENTIFIES_BUG_LOCATION': loc_score,
276
+ 'EXPLAINS_WHY_BROKEN': why_score,
277
+ 'subtotal': loc_score + why_score
278
+ }
279
+ all_details['root_cause'] = loc_details + why_details
280
+
281
+ # fix_quality (40%)
282
+ fix_score, fix_details = score_fix_addresses_issue(response_text, ground_truth)
283
+ min_score, min_details = score_fix_is_minimal(response_text, ground_truth)
284
+ syn_score, syn_details = score_fix_syntax_correct(response_text)
285
+ scores['fix_quality'] = {
286
+ 'FIX_ADDRESSES_ISSUE': fix_score,
287
+ 'FIX_IS_MINIMAL': min_score,
288
+ 'FIX_SYNTAX_CORRECT': syn_score,
289
+ 'subtotal': fix_score + min_score + syn_score
290
+ }
291
+ all_details['fix_quality'] = fix_details + min_details + syn_details
292
+
293
+ # completeness (20%)
294
+ edge_score, edge_details = score_edge_cases(response_text)
295
+ test_score, test_details = score_test_coverage(response_text)
296
+ scores['completeness'] = {
297
+ 'EDGE_CASES': edge_score,
298
+ 'TEST_COVERAGE': test_score,
299
+ 'subtotal': edge_score + test_score
300
+ }
301
+ all_details['completeness'] = edge_details + test_details
302
+
303
+ # persona (10%)
304
+ char_score, char_details = score_in_character(response_text)
305
+ scores['persona'] = {
306
+ 'IN_CHARACTER': char_score,
307
+ 'subtotal': char_score
308
+ }
309
+ all_details['persona'] = char_details
310
+
311
+ # Total
312
+ total = (
313
+ scores['root_cause']['subtotal'] +
314
+ scores['fix_quality']['subtotal'] +
315
+ scores['completeness']['subtotal'] +
316
+ scores['persona']['subtotal']
317
+ )
318
+
319
+ patch_info = extract_patch_info(ground_truth.get('patch', ''))
320
+ return {
321
+ 'scenario': scenario_name,
322
+ 'instance_id': ground_truth.get('instance_id'),
323
+ 'scores': scores,
324
+ 'total': round(total, 1),
325
+ 'details': all_details,
326
+ 'ground_truth_files': patch_info.files
327
+ }
328
+
329
+
330
+ def main():
331
+ if len(sys.argv) < 3:
332
+ print("Usage: swebench-judge.py <scenario_name> <response_file>")
333
+ sys.exit(1)
334
+
335
+ scenario_name = sys.argv[1]
336
+ response_file = sys.argv[2]
337
+
338
+ # Load data
339
+ swebench_data = load_swebench_data()
340
+
341
+ with open(response_file, 'r') as f:
342
+ response_data = json.load(f)
343
+
344
+ # Handle different JSON structures
345
+ response_text = response_data.get('result', '') or response_data.get('response_text', '')
346
+
347
+ # Judge
348
+ result = judge_response(scenario_name, response_text, swebench_data)
349
+
350
+ # Display
351
+ print(f"\n{'='*60}")
352
+ print(f"SWE-BENCH JUDGE: {scenario_name}")
353
+ print(f"{'='*60}")
354
+
355
+ for category, scores in result['scores'].items():
356
+ print(f"\n{category.upper()} ({scores['subtotal']:.1f} pts)")
357
+ for criterion, score in scores.items():
358
+ if criterion != 'subtotal':
359
+ print(f" {criterion}: {score:.1f}")
360
+
361
+ print(f"\n{'─'*40}")
362
+ print(f"TOTAL: {result['total']}/100")
363
+
364
+ print(f"\nGround truth files: {result['ground_truth_files']}")
365
+
366
+ # Save
367
+ output_path = response_file.replace('run_', 'swebench_judge_')
368
+ with open(output_path, 'w') as f:
369
+ json.dump(result, f, indent=2)
370
+ print(f"\nSaved to: {output_path}")
371
+
372
+
373
+ if __name__ == '__main__':
374
+ main()