agentsys 5.0.2 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. package/.claude-plugin/marketplace.json +21 -14
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/AGENTS.md +2 -1
  4. package/CHANGELOG.md +24 -1
  5. package/README.md +7 -6
  6. package/adapters/codex/skills/agnix/SKILL.md +0 -1
  7. package/adapters/codex/skills/audit-project/SKILL.md +0 -1
  8. package/adapters/codex/skills/audit-project-agents/SKILL.md +0 -1
  9. package/adapters/codex/skills/audit-project-github/SKILL.md +0 -1
  10. package/adapters/codex/skills/consult/SKILL.md +133 -59
  11. package/adapters/codex/skills/debate/SKILL.md +214 -0
  12. package/adapters/codex/skills/delivery-approval/SKILL.md +0 -1
  13. package/adapters/codex/skills/deslop/SKILL.md +0 -1
  14. package/adapters/codex/skills/drift-detect/SKILL.md +0 -1
  15. package/adapters/codex/skills/enhance/SKILL.md +0 -1
  16. package/adapters/codex/skills/learn/SKILL.md +0 -1
  17. package/adapters/codex/skills/next-task/SKILL.md +0 -1
  18. package/adapters/codex/skills/perf/SKILL.md +0 -1
  19. package/adapters/codex/skills/repo-map/SKILL.md +0 -1
  20. package/adapters/codex/skills/ship/SKILL.md +0 -1
  21. package/adapters/codex/skills/ship-ci-review-loop/SKILL.md +0 -1
  22. package/adapters/codex/skills/ship-deployment/SKILL.md +0 -1
  23. package/adapters/codex/skills/ship-error-handling/SKILL.md +0 -1
  24. package/adapters/codex/skills/sync-docs/SKILL.md +0 -1
  25. package/adapters/opencode/agents/agent-enhancer.md +0 -1
  26. package/adapters/opencode/agents/agnix-agent.md +0 -1
  27. package/adapters/opencode/agents/ci-fixer.md +0 -1
  28. package/adapters/opencode/agents/ci-monitor.md +0 -1
  29. package/adapters/opencode/agents/claudemd-enhancer.md +0 -1
  30. package/adapters/opencode/agents/consult-agent.md +123 -31
  31. package/adapters/opencode/agents/cross-file-enhancer.md +0 -1
  32. package/adapters/opencode/agents/debate-orchestrator.md +169 -0
  33. package/adapters/opencode/agents/delivery-validator.md +0 -1
  34. package/adapters/opencode/agents/deslop-agent.md +0 -1
  35. package/adapters/opencode/agents/docs-enhancer.md +0 -1
  36. package/adapters/opencode/agents/exploration-agent.md +0 -1
  37. package/adapters/opencode/agents/hooks-enhancer.md +0 -1
  38. package/adapters/opencode/agents/implementation-agent.md +0 -1
  39. package/adapters/opencode/agents/learn-agent.md +0 -1
  40. package/adapters/opencode/agents/map-validator.md +0 -1
  41. package/adapters/opencode/agents/perf-analyzer.md +0 -1
  42. package/adapters/opencode/agents/perf-code-paths.md +0 -1
  43. package/adapters/opencode/agents/perf-investigation-logger.md +0 -1
  44. package/adapters/opencode/agents/perf-orchestrator.md +0 -1
  45. package/adapters/opencode/agents/perf-theory-gatherer.md +0 -1
  46. package/adapters/opencode/agents/perf-theory-tester.md +0 -1
  47. package/adapters/opencode/agents/plan-synthesizer.md +0 -1
  48. package/adapters/opencode/agents/planning-agent.md +0 -1
  49. package/adapters/opencode/agents/plugin-enhancer.md +0 -1
  50. package/adapters/opencode/agents/prompt-enhancer.md +0 -1
  51. package/adapters/opencode/agents/simple-fixer.md +0 -1
  52. package/adapters/opencode/agents/skills-enhancer.md +0 -1
  53. package/adapters/opencode/agents/sync-docs-agent.md +0 -1
  54. package/adapters/opencode/agents/task-discoverer.md +0 -1
  55. package/adapters/opencode/agents/test-coverage-checker.md +0 -1
  56. package/adapters/opencode/agents/worktree-manager.md +0 -1
  57. package/adapters/opencode/commands/agnix.md +0 -1
  58. package/adapters/opencode/commands/audit-project-agents.md +0 -1
  59. package/adapters/opencode/commands/audit-project-github.md +0 -1
  60. package/adapters/opencode/commands/audit-project.md +0 -1
  61. package/adapters/opencode/commands/consult.md +134 -59
  62. package/adapters/opencode/commands/debate.md +224 -0
  63. package/adapters/opencode/commands/delivery-approval.md +0 -1
  64. package/adapters/opencode/commands/deslop.md +0 -1
  65. package/adapters/opencode/commands/drift-detect.md +0 -1
  66. package/adapters/opencode/commands/enhance.md +0 -1
  67. package/adapters/opencode/commands/learn.md +0 -1
  68. package/adapters/opencode/commands/next-task.md +0 -1
  69. package/adapters/opencode/commands/perf.md +0 -1
  70. package/adapters/opencode/commands/repo-map.md +0 -1
  71. package/adapters/opencode/commands/ship-ci-review-loop.md +0 -1
  72. package/adapters/opencode/commands/ship-deployment.md +0 -1
  73. package/adapters/opencode/commands/ship-error-handling.md +0 -1
  74. package/adapters/opencode/commands/ship.md +0 -1
  75. package/adapters/opencode/commands/sync-docs.md +0 -1
  76. package/adapters/opencode/skills/agnix/SKILL.md +1 -2
  77. package/adapters/opencode/skills/consult/SKILL.md +41 -27
  78. package/adapters/opencode/skills/debate/SKILL.md +245 -0
  79. package/adapters/opencode/skills/deslop/SKILL.md +1 -2
  80. package/adapters/opencode/skills/discover-tasks/SKILL.md +1 -2
  81. package/adapters/opencode/skills/drift-analysis/SKILL.md +1 -2
  82. package/adapters/opencode/skills/enhance-agent-prompts/SKILL.md +1 -2
  83. package/adapters/opencode/skills/enhance-claude-memory/SKILL.md +1 -2
  84. package/adapters/opencode/skills/enhance-cross-file/SKILL.md +1 -2
  85. package/adapters/opencode/skills/enhance-docs/SKILL.md +1 -2
  86. package/adapters/opencode/skills/enhance-hooks/SKILL.md +1 -2
  87. package/adapters/opencode/skills/enhance-orchestrator/SKILL.md +1 -2
  88. package/adapters/opencode/skills/enhance-plugins/SKILL.md +1 -2
  89. package/adapters/opencode/skills/enhance-prompts/SKILL.md +1 -2
  90. package/adapters/opencode/skills/enhance-skills/SKILL.md +1 -2
  91. package/adapters/opencode/skills/learn/SKILL.md +1 -2
  92. package/adapters/opencode/skills/orchestrate-review/SKILL.md +0 -1
  93. package/adapters/opencode/skills/perf-analyzer/SKILL.md +1 -2
  94. package/adapters/opencode/skills/perf-baseline-manager/SKILL.md +1 -2
  95. package/adapters/opencode/skills/perf-benchmarker/SKILL.md +1 -2
  96. package/adapters/opencode/skills/perf-code-paths/SKILL.md +1 -2
  97. package/adapters/opencode/skills/perf-investigation-logger/SKILL.md +1 -2
  98. package/adapters/opencode/skills/perf-profiler/SKILL.md +1 -2
  99. package/adapters/opencode/skills/perf-theory-gatherer/SKILL.md +1 -2
  100. package/adapters/opencode/skills/perf-theory-tester/SKILL.md +1 -2
  101. package/adapters/opencode/skills/repo-mapping/SKILL.md +1 -2
  102. package/adapters/opencode/skills/sync-docs/SKILL.md +1 -2
  103. package/adapters/opencode/skills/validate-delivery/SKILL.md +1 -2
  104. package/lib/adapter-transforms.js +24 -4
  105. package/package.json +1 -1
  106. package/plugins/agnix/.claude-plugin/plugin.json +1 -1
  107. package/plugins/agnix/skills/agnix/SKILL.md +1 -1
  108. package/plugins/audit-project/.claude-plugin/plugin.json +1 -1
  109. package/plugins/audit-project/lib/adapter-transforms.js +24 -4
  110. package/plugins/consult/.claude-plugin/plugin.json +1 -1
  111. package/plugins/consult/agents/consult-agent.md +123 -30
  112. package/plugins/consult/commands/consult.md +136 -60
  113. package/plugins/consult/skills/consult/SKILL.md +39 -24
  114. package/plugins/debate/.claude-plugin/plugin.json +21 -0
  115. package/plugins/debate/agents/debate-orchestrator.md +175 -0
  116. package/plugins/debate/commands/debate.md +221 -0
  117. package/plugins/debate/lib/adapter-transforms.js +298 -0
  118. package/plugins/debate/lib/collectors/codebase.js +392 -0
  119. package/plugins/debate/lib/collectors/docs-patterns.js +713 -0
  120. package/plugins/debate/lib/collectors/documentation.js +219 -0
  121. package/plugins/debate/lib/collectors/github.js +330 -0
  122. package/plugins/debate/lib/collectors/index.js +126 -0
  123. package/plugins/debate/lib/config/index.js +14 -0
  124. package/plugins/debate/lib/cross-platform/index.js +539 -0
  125. package/plugins/debate/lib/discovery/index.js +352 -0
  126. package/plugins/debate/lib/drift-detect/collectors.js +37 -0
  127. package/plugins/debate/lib/enhance/agent-analyzer.js +421 -0
  128. package/plugins/debate/lib/enhance/agent-patterns.js +571 -0
  129. package/plugins/debate/lib/enhance/auto-suppression.js +622 -0
  130. package/plugins/debate/lib/enhance/benchmark.js +417 -0
  131. package/plugins/debate/lib/enhance/cross-file-analyzer.js +930 -0
  132. package/plugins/debate/lib/enhance/cross-file-patterns.js +370 -0
  133. package/plugins/debate/lib/enhance/docs-analyzer.js +325 -0
  134. package/plugins/debate/lib/enhance/docs-patterns.js +671 -0
  135. package/plugins/debate/lib/enhance/fixer.js +721 -0
  136. package/plugins/debate/lib/enhance/hook-analyzer.js +135 -0
  137. package/plugins/debate/lib/enhance/hook-patterns.js +40 -0
  138. package/plugins/debate/lib/enhance/index.js +127 -0
  139. package/plugins/debate/lib/enhance/plugin-analyzer.js +402 -0
  140. package/plugins/debate/lib/enhance/plugin-patterns.js +326 -0
  141. package/plugins/debate/lib/enhance/projectmemory-analyzer.js +551 -0
  142. package/plugins/debate/lib/enhance/projectmemory-patterns.js +617 -0
  143. package/plugins/debate/lib/enhance/prompt-analyzer.js +457 -0
  144. package/plugins/debate/lib/enhance/prompt-patterns.js +1484 -0
  145. package/plugins/debate/lib/enhance/reporter.js +1348 -0
  146. package/plugins/debate/lib/enhance/security-patterns.js +284 -0
  147. package/plugins/debate/lib/enhance/skill-analyzer.js +182 -0
  148. package/plugins/debate/lib/enhance/skill-patterns.js +147 -0
  149. package/plugins/debate/lib/enhance/suppression.js +352 -0
  150. package/plugins/debate/lib/enhance/tool-patterns.js +373 -0
  151. package/plugins/debate/lib/index.js +270 -0
  152. package/plugins/debate/lib/patterns/cli-enhancers.js +611 -0
  153. package/plugins/debate/lib/patterns/pipeline.js +948 -0
  154. package/plugins/debate/lib/patterns/review-patterns.js +558 -0
  155. package/plugins/debate/lib/patterns/slop-analyzers.js +2305 -0
  156. package/plugins/debate/lib/patterns/slop-patterns.js +1187 -0
  157. package/plugins/debate/lib/perf/analyzer/index.js +22 -0
  158. package/plugins/debate/lib/perf/argument-parser.js +105 -0
  159. package/plugins/debate/lib/perf/baseline-comparator.js +50 -0
  160. package/plugins/debate/lib/perf/baseline-store.js +127 -0
  161. package/plugins/debate/lib/perf/benchmark-runner.js +404 -0
  162. package/plugins/debate/lib/perf/breaking-point-finder.js +52 -0
  163. package/plugins/debate/lib/perf/breaking-point-runner.js +60 -0
  164. package/plugins/debate/lib/perf/checkpoint.js +123 -0
  165. package/plugins/debate/lib/perf/code-paths.js +86 -0
  166. package/plugins/debate/lib/perf/consolidation.js +37 -0
  167. package/plugins/debate/lib/perf/constraint-runner.js +71 -0
  168. package/plugins/debate/lib/perf/experiment-runner.js +32 -0
  169. package/plugins/debate/lib/perf/index.js +41 -0
  170. package/plugins/debate/lib/perf/investigation-state.js +874 -0
  171. package/plugins/debate/lib/perf/optimization-runner.js +79 -0
  172. package/plugins/debate/lib/perf/profilers/go.js +22 -0
  173. package/plugins/debate/lib/perf/profilers/index.js +46 -0
  174. package/plugins/debate/lib/perf/profilers/java.js +23 -0
  175. package/plugins/debate/lib/perf/profilers/node.js +27 -0
  176. package/plugins/debate/lib/perf/profilers/python.js +23 -0
  177. package/plugins/debate/lib/perf/profilers/rust.js +23 -0
  178. package/plugins/debate/lib/perf/profiling-runner.js +75 -0
  179. package/plugins/debate/lib/perf/schemas.js +140 -0
  180. package/plugins/debate/lib/platform/detect-platform.js +413 -0
  181. package/plugins/debate/lib/platform/detection-configs.js +93 -0
  182. package/plugins/debate/lib/platform/state-dir.js +132 -0
  183. package/plugins/debate/lib/platform/verify-tools.js +182 -0
  184. package/plugins/debate/lib/repo-map/cache.js +152 -0
  185. package/plugins/debate/lib/repo-map/concurrency.js +29 -0
  186. package/plugins/debate/lib/repo-map/index.js +222 -0
  187. package/plugins/debate/lib/repo-map/installer.js +212 -0
  188. package/plugins/debate/lib/repo-map/queries/go.js +27 -0
  189. package/plugins/debate/lib/repo-map/queries/index.js +100 -0
  190. package/plugins/debate/lib/repo-map/queries/java.js +38 -0
  191. package/plugins/debate/lib/repo-map/queries/javascript.js +55 -0
  192. package/plugins/debate/lib/repo-map/queries/python.js +24 -0
  193. package/plugins/debate/lib/repo-map/queries/rust.js +73 -0
  194. package/plugins/debate/lib/repo-map/queries/typescript.js +38 -0
  195. package/plugins/debate/lib/repo-map/runner.js +1364 -0
  196. package/plugins/debate/lib/repo-map/updater.js +562 -0
  197. package/plugins/debate/lib/repo-map/usage-analyzer.js +407 -0
  198. package/plugins/debate/lib/schemas/plugin-manifest.schema.json +57 -0
  199. package/plugins/debate/lib/schemas/validator.js +247 -0
  200. package/plugins/debate/lib/sources/custom-handler.js +199 -0
  201. package/plugins/debate/lib/sources/policy-questions.js +246 -0
  202. package/plugins/debate/lib/sources/source-cache.js +165 -0
  203. package/plugins/debate/lib/state/workflow-state.js +576 -0
  204. package/plugins/debate/lib/types/agent-frontmatter.d.ts +134 -0
  205. package/plugins/debate/lib/types/command-frontmatter.d.ts +107 -0
  206. package/plugins/debate/lib/types/hook-frontmatter.d.ts +115 -0
  207. package/plugins/debate/lib/types/index.d.ts +84 -0
  208. package/plugins/debate/lib/types/plugin-manifest.d.ts +102 -0
  209. package/plugins/debate/lib/types/skill-frontmatter.d.ts +89 -0
  210. package/plugins/debate/lib/utils/atomic-write.js +94 -0
  211. package/plugins/debate/lib/utils/cache-manager.js +159 -0
  212. package/plugins/debate/lib/utils/command-parser.js +0 -0
  213. package/plugins/debate/lib/utils/context-optimizer.js +300 -0
  214. package/plugins/debate/lib/utils/deprecation.js +37 -0
  215. package/plugins/debate/lib/utils/shell-escape.js +88 -0
  216. package/plugins/debate/lib/utils/state-helpers.js +61 -0
  217. package/plugins/debate/skills/debate/SKILL.md +264 -0
  218. package/plugins/deslop/.claude-plugin/plugin.json +1 -1
  219. package/plugins/deslop/lib/adapter-transforms.js +24 -4
  220. package/plugins/deslop/skills/deslop/SKILL.md +1 -1
  221. package/plugins/drift-detect/.claude-plugin/plugin.json +1 -1
  222. package/plugins/drift-detect/lib/adapter-transforms.js +24 -4
  223. package/plugins/drift-detect/skills/drift-analysis/SKILL.md +1 -1
  224. package/plugins/enhance/.claude-plugin/plugin.json +1 -1
  225. package/plugins/enhance/lib/adapter-transforms.js +24 -4
  226. package/plugins/enhance/skills/enhance-agent-prompts/SKILL.md +1 -1
  227. package/plugins/enhance/skills/enhance-claude-memory/SKILL.md +1 -1
  228. package/plugins/enhance/skills/enhance-cross-file/SKILL.md +1 -1
  229. package/plugins/enhance/skills/enhance-docs/SKILL.md +1 -1
  230. package/plugins/enhance/skills/enhance-hooks/SKILL.md +1 -1
  231. package/plugins/enhance/skills/enhance-orchestrator/SKILL.md +1 -1
  232. package/plugins/enhance/skills/enhance-plugins/SKILL.md +1 -1
  233. package/plugins/enhance/skills/enhance-prompts/SKILL.md +1 -1
  234. package/plugins/enhance/skills/enhance-skills/SKILL.md +1 -1
  235. package/plugins/learn/.claude-plugin/plugin.json +1 -1
  236. package/plugins/learn/agents/learn-agent.md +1 -1
  237. package/plugins/learn/lib/adapter-transforms.js +24 -4
  238. package/plugins/learn/skills/learn/SKILL.md +1 -1
  239. package/plugins/next-task/.claude-plugin/plugin.json +1 -1
  240. package/plugins/next-task/agents/exploration-agent.md +1 -1
  241. package/plugins/next-task/lib/adapter-transforms.js +24 -4
  242. package/plugins/next-task/skills/discover-tasks/SKILL.md +1 -1
  243. package/plugins/next-task/skills/validate-delivery/SKILL.md +1 -1
  244. package/plugins/perf/.claude-plugin/plugin.json +1 -1
  245. package/plugins/perf/lib/adapter-transforms.js +24 -4
  246. package/plugins/perf/skills/perf-analyzer/SKILL.md +1 -1
  247. package/plugins/perf/skills/perf-baseline-manager/SKILL.md +1 -1
  248. package/plugins/perf/skills/perf-benchmarker/SKILL.md +1 -1
  249. package/plugins/perf/skills/perf-code-paths/SKILL.md +1 -1
  250. package/plugins/perf/skills/perf-investigation-logger/SKILL.md +1 -1
  251. package/plugins/perf/skills/perf-profiler/SKILL.md +1 -1
  252. package/plugins/perf/skills/perf-theory-gatherer/SKILL.md +1 -1
  253. package/plugins/perf/skills/perf-theory-tester/SKILL.md +1 -1
  254. package/plugins/repo-map/.claude-plugin/plugin.json +1 -1
  255. package/plugins/repo-map/lib/adapter-transforms.js +24 -4
  256. package/plugins/ship/.claude-plugin/plugin.json +1 -1
  257. package/plugins/ship/lib/adapter-transforms.js +24 -4
  258. package/plugins/sync-docs/.claude-plugin/plugin.json +1 -1
  259. package/plugins/sync-docs/lib/adapter-transforms.js +24 -4
  260. package/plugins/sync-docs/skills/sync-docs/SKILL.md +1 -1
  261. package/scripts/gen-adapters.js +6 -7
  262. package/scripts/generate-docs.js +4 -2
  263. package/scripts/plugins.txt +1 -0
  264. package/site/content.json +6 -6
@@ -0,0 +1,417 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+
4
+ /**
5
+ * Run pattern detection benchmarks against a manifest of test fixtures
6
+ * @param {string} manifestPath - Path to the manifest.json file
7
+ * @param {Object} analyzers - Map of analyzer names to analyzer functions
8
+ * @returns {Object} Benchmark results with byPattern, byFixture, and summary metrics
9
+ */
10
+ function runPatternBenchmarks(manifestPath, analyzers) {
11
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
12
+ const fixturesDir = path.dirname(manifestPath);
13
+
14
+ const results = {
15
+ byPattern: {},
16
+ byFixture: {},
17
+ summary: {
18
+ totalFixtures: 0,
19
+ totalExpectedFindings: 0,
20
+ totalActualFindings: 0,
21
+ truePositives: 0,
22
+ falsePositives: 0,
23
+ falseNegatives: 0
24
+ }
25
+ };
26
+
27
+ // Process each fixture
28
+ const fixtures = manifest?.fixtures && typeof manifest.fixtures === 'object' && !Array.isArray(manifest.fixtures)
29
+ ? manifest.fixtures
30
+ : {};
31
+ for (const [fixturePath, expectations] of Object.entries(fixtures)) {
32
+ const fullPath = path.join(fixturesDir, fixturePath);
33
+
34
+ if (!fs.existsSync(fullPath)) {
35
+ results.byFixture[fixturePath] = { error: 'File not found' };
36
+ continue;
37
+ }
38
+
39
+ results.summary.totalFixtures++;
40
+
41
+ // Run appropriate analyzer
42
+ const analyzer = analyzers[expectations.analyzer];
43
+ if (!analyzer) {
44
+ results.byFixture[fixturePath] = { error: `Unknown analyzer: ${expectations.analyzer}` };
45
+ continue;
46
+ }
47
+
48
+ let findings;
49
+ try {
50
+ findings = analyzer(fullPath);
51
+ } catch (err) {
52
+ results.byFixture[fixturePath] = { error: err.message };
53
+ continue;
54
+ }
55
+
56
+ // Extract pattern IDs from findings
57
+ const foundPatternsArray = extractPatternIds(findings);
58
+ const foundPatterns = new Set(foundPatternsArray);
59
+ const expectedPatterns = new Set(expectations.expectedPatterns || []);
60
+ const mustNotTrigger = new Set(expectations.mustNotTrigger || []);
61
+
62
+ // Calculate metrics for this fixture
63
+ const fixtureResult = {
64
+ expected: Array.from(expectedPatterns),
65
+ found: foundPatternsArray,
66
+ truePositives: [],
67
+ falsePositives: [],
68
+ falseNegatives: [],
69
+ mustNotTriggerViolations: []
70
+ };
71
+
72
+ // True positives: expected AND found
73
+ for (const pattern of expectedPatterns) {
74
+ if (foundPatterns.has(pattern)) {
75
+ fixtureResult.truePositives.push(pattern);
76
+ results.summary.truePositives++;
77
+ updatePatternStats(results.byPattern, pattern, 'tp');
78
+ } else {
79
+ fixtureResult.falseNegatives.push(pattern);
80
+ results.summary.falseNegatives++;
81
+ updatePatternStats(results.byPattern, pattern, 'fn');
82
+ }
83
+ }
84
+
85
+ // False positives: found but NOT expected
86
+ for (const pattern of foundPatterns) {
87
+ if (!expectedPatterns.has(pattern)) {
88
+ fixtureResult.falsePositives.push(pattern);
89
+ results.summary.falsePositives++;
90
+ updatePatternStats(results.byPattern, pattern, 'fp');
91
+ }
92
+ }
93
+
94
+ // Must-not-trigger violations
95
+ for (const pattern of mustNotTrigger) {
96
+ if (foundPatterns.has(pattern)) {
97
+ fixtureResult.mustNotTriggerViolations.push(pattern);
98
+ }
99
+ }
100
+
101
+ results.summary.totalExpectedFindings += expectedPatterns.size;
102
+ results.summary.totalActualFindings += foundPatterns.length;
103
+ results.byFixture[fixturePath] = fixtureResult;
104
+ }
105
+
106
+ // Calculate precision and recall per pattern
107
+ for (const [pattern, stats] of Object.entries(results.byPattern)) {
108
+ stats.precision = stats.tp / (stats.tp + stats.fp) || 0;
109
+ stats.recall = stats.tp / (stats.tp + stats.fn) || 0;
110
+ stats.f1 = 2 * (stats.precision * stats.recall) / (stats.precision + stats.recall) || 0;
111
+ }
112
+
113
+ // Calculate overall metrics
114
+ const { truePositives, falsePositives, falseNegatives } = results.summary;
115
+ results.summary.precision = truePositives / (truePositives + falsePositives) || 0;
116
+ results.summary.recall = truePositives / (truePositives + falseNegatives) || 0;
117
+ results.summary.f1 = 2 * (results.summary.precision * results.summary.recall) /
118
+ (results.summary.precision + results.summary.recall) || 0;
119
+
120
+ return results;
121
+ }
122
+
123
+ /**
124
+ * Run fix effectiveness benchmarks against before/after file pairs
125
+ * @param {string} manifestPath - Path to the manifest.json file
126
+ * @param {Object} options - Options containing fixer and analyzers
127
+ * @param {Object} options.fixer - Fixer module with fix functions
128
+ * @param {Object} options.analyzers - Map of analyzer names to analyzer functions
129
+ * @returns {Object} Fix benchmark results with byPair and summary metrics
130
+ */
131
+ function runFixBenchmarks(manifestPath, options = {}) {
132
+ const { fixer, analyzers } = options;
133
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
134
+ const fixturesDir = path.dirname(manifestPath);
135
+
136
+ const results = {
137
+ byPair: {},
138
+ summary: {
139
+ totalPairs: 0,
140
+ fixesApplied: 0,
141
+ findingsRemoved: 0,
142
+ regressions: 0,
143
+ matchesExpected: 0
144
+ }
145
+ };
146
+
147
+ for (const [pairName, pairConfig] of Object.entries(manifest.fixPairs || {})) {
148
+ const beforePath = path.join(fixturesDir, pairConfig.before);
149
+ const afterPath = path.join(fixturesDir, pairConfig.after);
150
+
151
+ if (!fs.existsSync(beforePath) || !fs.existsSync(afterPath)) {
152
+ results.byPair[pairName] = { error: 'Before or after file not found' };
153
+ continue;
154
+ }
155
+
156
+ results.summary.totalPairs++;
157
+
158
+ const beforeContent = fs.readFileSync(beforePath, 'utf8');
159
+ const expectedAfter = fs.readFileSync(afterPath, 'utf8');
160
+
161
+ // Apply fix to before content
162
+ const fixFn = fixer[`fix${toPascalCase(pairConfig.pattern)}`];
163
+ if (!fixFn) {
164
+ results.byPair[pairName] = { error: `No fixer for pattern: ${pairConfig.pattern}` };
165
+ continue;
166
+ }
167
+
168
+ let actualAfter;
169
+ try {
170
+ actualAfter = fixFn(beforeContent);
171
+ results.summary.fixesApplied++;
172
+ } catch (err) {
173
+ results.byPair[pairName] = { error: `Fix failed: ${err.message}` };
174
+ continue;
175
+ }
176
+
177
+ // Check if finding is removed after fix
178
+ const analyzer = analyzers[pairConfig.analyzer || 'prompt'];
179
+ let beforeFindings = [];
180
+ let afterFindings = [];
181
+
182
+ if (analyzer) {
183
+ // Create temp files to analyze
184
+ const tempBefore = path.join(fixturesDir, '.temp-before.md');
185
+ const tempAfter = path.join(fixturesDir, '.temp-after.md');
186
+
187
+ try {
188
+ fs.writeFileSync(tempBefore, beforeContent);
189
+ fs.writeFileSync(tempAfter, actualAfter);
190
+
191
+ beforeFindings = extractPatternIds(analyzer(tempBefore));
192
+ afterFindings = extractPatternIds(analyzer(tempAfter));
193
+ } finally {
194
+ // Cleanup temp files
195
+ try { fs.unlinkSync(tempBefore); } catch { /* cleanup - ignore */ }
196
+ try { fs.unlinkSync(tempAfter); } catch { /* cleanup - ignore */ }
197
+ }
198
+ }
199
+
200
+ const pairResult = {
201
+ pattern: pairConfig.pattern,
202
+ beforeHadPattern: beforeFindings.includes(pairConfig.pattern),
203
+ afterHasPattern: afterFindings.includes(pairConfig.pattern),
204
+ findingRemoved: beforeFindings.includes(pairConfig.pattern) &&
205
+ !afterFindings.includes(pairConfig.pattern),
206
+ matchesExpected: normalizeWhitespace(actualAfter) === normalizeWhitespace(expectedAfter),
207
+ newFindings: afterFindings.filter(f => !beforeFindings.includes(f))
208
+ };
209
+
210
+ if (pairResult.findingRemoved) {
211
+ results.summary.findingsRemoved++;
212
+ }
213
+ if (pairResult.matchesExpected) {
214
+ results.summary.matchesExpected++;
215
+ }
216
+ if (pairResult.newFindings.length > 0) {
217
+ results.summary.regressions++;
218
+ }
219
+
220
+ results.byPair[pairName] = pairResult;
221
+ }
222
+
223
+ return results;
224
+ }
225
+
226
+ /**
227
+ * Generate a markdown report from benchmark results
228
+ * @param {Object} patternResults - Results from runPatternBenchmarks
229
+ * @param {Object|null} fixResults - Optional results from runFixBenchmarks
230
+ * @returns {string} Markdown-formatted benchmark report
231
+ */
232
+ function generateReport(patternResults, fixResults = null) {
233
+ const lines = [];
234
+
235
+ lines.push('# Pattern Validation Benchmark Report');
236
+ lines.push('');
237
+ lines.push(`**Generated**: ${new Date().toISOString()}`);
238
+ lines.push('');
239
+
240
+ // Overall summary
241
+ lines.push('## Summary');
242
+ lines.push('');
243
+ lines.push('| Metric | Value |');
244
+ lines.push('|--------|-------|');
245
+ lines.push(`| Total Fixtures | ${patternResults.summary.totalFixtures} |`);
246
+ lines.push(`| True Positives | ${patternResults.summary.truePositives} |`);
247
+ lines.push(`| False Positives | ${patternResults.summary.falsePositives} |`);
248
+ lines.push(`| False Negatives | ${patternResults.summary.falseNegatives} |`);
249
+ lines.push(`| **Precision** | **${(patternResults.summary.precision * 100).toFixed(1)}%** |`);
250
+ lines.push(`| **Recall** | **${(patternResults.summary.recall * 100).toFixed(1)}%** |`);
251
+ lines.push(`| **F1 Score** | **${(patternResults.summary.f1 * 100).toFixed(1)}%** |`);
252
+ lines.push('');
253
+
254
+ // Per-pattern metrics
255
+ lines.push('## Pattern Health');
256
+ lines.push('');
257
+ lines.push('| Pattern | TP | FP | FN | Precision | Recall | F1 |');
258
+ lines.push('|---------|----|----|----|-----------:|-------:|----:|');
259
+
260
+ const sortedPatterns = Object.entries(patternResults.byPattern)
261
+ .sort((a, b) => a[1].precision - b[1].precision);
262
+
263
+ for (const [pattern, stats] of sortedPatterns) {
264
+ const prec = (stats.precision * 100).toFixed(0);
265
+ const rec = (stats.recall * 100).toFixed(0);
266
+ const f1 = (stats.f1 * 100).toFixed(0);
267
+ lines.push(`| ${pattern} | ${stats.tp} | ${stats.fp} | ${stats.fn} | ${prec}% | ${rec}% | ${f1}% |`);
268
+ }
269
+ lines.push('');
270
+
271
+ // Fix effectiveness (if provided)
272
+ if (fixResults) {
273
+ lines.push('## Fix Effectiveness');
274
+ lines.push('');
275
+ lines.push('| Metric | Value |');
276
+ lines.push('|--------|-------|');
277
+ lines.push(`| Total Fix Pairs | ${fixResults.summary.totalPairs} |`);
278
+ lines.push(`| Fixes Applied | ${fixResults.summary.fixesApplied} |`);
279
+ lines.push(`| Findings Removed | ${fixResults.summary.findingsRemoved} |`);
280
+ lines.push(`| Matches Expected | ${fixResults.summary.matchesExpected} |`);
281
+ lines.push(`| Regressions | ${fixResults.summary.regressions} |`);
282
+ lines.push('');
283
+
284
+ // Per-pair details
285
+ lines.push('### Fix Pair Details');
286
+ lines.push('');
287
+ lines.push('| Pair | Pattern | Removed | Matches | Regressions |');
288
+ lines.push('|------|---------|---------|---------|-------------|');
289
+
290
+ for (const [pair, result] of Object.entries(fixResults.byPair)) {
291
+ if (result.error) {
292
+ lines.push(`| ${pair} | - | ERROR | - | ${result.error} |`);
293
+ } else {
294
+ const removed = result.findingRemoved ? 'Yes' : 'No';
295
+ const matches = result.matchesExpected ? 'Yes' : 'No';
296
+ const regs = result.newFindings.length > 0 ? result.newFindings.join(', ') : 'None';
297
+ lines.push(`| ${pair} | ${result.pattern} | ${removed} | ${matches} | ${regs} |`);
298
+ }
299
+ }
300
+ lines.push('');
301
+ }
302
+
303
+ // Failing fixtures
304
+ const failingFixtures = Object.entries(patternResults.byFixture)
305
+ .filter(([_, result]) =>
306
+ result.falseNegatives?.length > 0 ||
307
+ result.falsePositives?.length > 0 ||
308
+ result.mustNotTriggerViolations?.length > 0
309
+ );
310
+
311
+ if (failingFixtures.length > 0) {
312
+ lines.push('## Issues Found');
313
+ lines.push('');
314
+
315
+ for (const [fixture, result] of failingFixtures) {
316
+ lines.push(`### ${fixture}`);
317
+ if (result.falseNegatives?.length > 0) {
318
+ lines.push(`- **False Negatives**: ${result.falseNegatives.join(', ')}`);
319
+ }
320
+ if (result.falsePositives?.length > 0) {
321
+ lines.push(`- **False Positives**: ${result.falsePositives.join(', ')}`);
322
+ }
323
+ if (result.mustNotTriggerViolations?.length > 0) {
324
+ lines.push(`- **Must-Not-Trigger Violations**: ${result.mustNotTriggerViolations.join(', ')}`);
325
+ }
326
+ lines.push('');
327
+ }
328
+ }
329
+
330
+ return lines.join('\n');
331
+ }
332
+
333
+ function extractPatternIds(findings) {
334
+ const ids = [];
335
+
336
+ if (Array.isArray(findings)) {
337
+ for (const f of findings) {
338
+ if (f.patternId) ids.push(f.patternId);
339
+ }
340
+ } else if (findings && typeof findings === 'object') {
341
+ // Handle analyzer result objects with issue arrays
342
+ for (const value of Object.values(findings)) {
343
+ if (Array.isArray(value)) {
344
+ for (const item of value) {
345
+ if (item.patternId) ids.push(item.patternId);
346
+ }
347
+ }
348
+ }
349
+ }
350
+
351
+ return ids;
352
+ }
353
+
354
+ function updatePatternStats(byPattern, pattern, type) {
355
+ if (!byPattern[pattern]) {
356
+ byPattern[pattern] = { tp: 0, fp: 0, fn: 0 };
357
+ }
358
+ byPattern[pattern][type]++;
359
+ }
360
+
361
+ function normalizeWhitespace(str) {
362
+ return str.replace(/\s+/g, ' ').trim();
363
+ }
364
+
365
+ function toPascalCase(str) {
366
+ return str
367
+ .split('_')
368
+ .map(part => part.charAt(0).toUpperCase() + part.slice(1))
369
+ .join('');
370
+ }
371
+
372
+ /**
373
+ * Assert benchmark results meet specified thresholds (for CI gates)
374
+ * @param {Object} results - Results from runPatternBenchmarks
375
+ * @param {Object} thresholds - Threshold configuration
376
+ * @param {number} [thresholds.minPrecision=0.8] - Minimum precision (0-1)
377
+ * @param {number} [thresholds.minRecall=0.8] - Minimum recall (0-1)
378
+ * @param {number} [thresholds.minF1=0.8] - Minimum F1 score (0-1)
379
+ * @param {number} [thresholds.maxFalsePositives=10] - Maximum allowed false positives
380
+ * @throws {Error} If any threshold is not met
381
+ */
382
+ function assertThresholds(results, thresholds = {}) {
383
+ const {
384
+ minPrecision = 0.8,
385
+ minRecall = 0.8,
386
+ minF1 = 0.8,
387
+ maxFalsePositives = 10
388
+ } = thresholds;
389
+
390
+ const errors = [];
391
+
392
+ if (results.summary.precision < minPrecision) {
393
+ errors.push(`Precision ${(results.summary.precision * 100).toFixed(1)}% below threshold ${minPrecision * 100}%`);
394
+ }
395
+ if (results.summary.recall < minRecall) {
396
+ errors.push(`Recall ${(results.summary.recall * 100).toFixed(1)}% below threshold ${minRecall * 100}%`);
397
+ }
398
+ if (results.summary.f1 < minF1) {
399
+ errors.push(`F1 ${(results.summary.f1 * 100).toFixed(1)}% below threshold ${minF1 * 100}%`);
400
+ }
401
+ if (results.summary.falsePositives > maxFalsePositives) {
402
+ errors.push(`False positives ${results.summary.falsePositives} exceeds max ${maxFalsePositives}`);
403
+ }
404
+
405
+ if (errors.length > 0) {
406
+ throw new Error(`Benchmark thresholds not met:\n${errors.join('\n')}`);
407
+ }
408
+ }
409
+
410
+ module.exports = {
411
+ runPatternBenchmarks,
412
+ runFixBenchmarks,
413
+ generateReport,
414
+ assertThresholds,
415
+ extractPatternIds,
416
+ toPascalCase
417
+ };