claude-flow-novice 1.5.2 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/.claude/agents/architecture/system-architect.md +3 -44
  2. package/.claude/agents/benchmarking-tests/test-agent-code-heavy.md +747 -0
  3. package/.claude/agents/benchmarking-tests/test-agent-metadata.md +181 -0
  4. package/.claude/agents/benchmarking-tests/test-agent-minimal.md +67 -0
  5. package/.claude/agents/data/ml/data-ml-model.md +5 -119
  6. package/.claude/agents/development/backend/dev-backend-api.md +4 -115
  7. package/.claude/agents/devops/ci-cd/ops-cicd-github.md +4 -114
  8. package/.claude/agents/documentation/api-docs/docs-api-openapi.md +4 -113
  9. package/.claude/agents/github/multi-repo-swarm.md +1 -28
  10. package/.claude/agents/github/pr-manager.md +1 -29
  11. package/.claude/agents/github/project-board-sync.md +1 -32
  12. package/.claude/agents/github/release-manager.md +1 -32
  13. package/.claude/agents/github/release-swarm.md +1 -33
  14. package/.claude/agents/github/repo-architect.md +1 -34
  15. package/.claude/agents/github/swarm-issue.md +1 -26
  16. package/.claude/agents/github/swarm-pr.md +1 -30
  17. package/.claude/agents/github/sync-coordinator.md +1 -30
  18. package/.claude/agents/github/workflow-automation.md +1 -31
  19. package/.claude/agents/neural/neural-pattern-agent.md +2 -50
  20. package/.claude/agents/specialized/mobile/spec-mobile-react-native.md +6 -142
  21. package/.claude/agents/sublinear/consciousness-evolution-agent.md +2 -18
  22. package/.claude/agents/sublinear/matrix-solver-agent.md +2 -16
  23. package/.claude/agents/sublinear/nanosecond-scheduler-agent.md +2 -19
  24. package/.claude/agents/sublinear/pagerank-agent.md +2 -19
  25. package/.claude/agents/sublinear/phi-calculator-agent.md +2 -19
  26. package/.claude/agents/sublinear/psycho-symbolic-agent.md +2 -19
  27. package/.claude/agents/sublinear/sublinear.md +2 -1
  28. package/.claude/agents/sublinear/temporal-advantage-agent.md +2 -16
  29. package/.claude/agents/testing/e2e/playwright-agent.md +7 -0
  30. package/.claude-flow-novice/.claude/agents/architecture/system-architect.md +3 -44
  31. package/.claude-flow-novice/.claude/agents/benchmarking-tests/test-agent-code-heavy.md +747 -0
  32. package/.claude-flow-novice/.claude/agents/benchmarking-tests/test-agent-metadata.md +181 -0
  33. package/.claude-flow-novice/.claude/agents/benchmarking-tests/test-agent-minimal.md +67 -0
  34. package/.claude-flow-novice/.claude/agents/data/ml/data-ml-model.md +5 -119
  35. package/.claude-flow-novice/.claude/agents/development/backend/dev-backend-api.md +4 -115
  36. package/.claude-flow-novice/.claude/agents/devops/ci-cd/ops-cicd-github.md +4 -114
  37. package/.claude-flow-novice/.claude/agents/documentation/api-docs/docs-api-openapi.md +4 -113
  38. package/.claude-flow-novice/.claude/agents/github/multi-repo-swarm.md +1 -28
  39. package/.claude-flow-novice/.claude/agents/github/pr-manager.md +1 -29
  40. package/.claude-flow-novice/.claude/agents/github/project-board-sync.md +1 -32
  41. package/.claude-flow-novice/.claude/agents/github/release-manager.md +1 -32
  42. package/.claude-flow-novice/.claude/agents/github/release-swarm.md +1 -33
  43. package/.claude-flow-novice/.claude/agents/github/repo-architect.md +1 -34
  44. package/.claude-flow-novice/.claude/agents/github/swarm-issue.md +1 -26
  45. package/.claude-flow-novice/.claude/agents/github/swarm-pr.md +1 -30
  46. package/.claude-flow-novice/.claude/agents/github/sync-coordinator.md +1 -30
  47. package/.claude-flow-novice/.claude/agents/github/workflow-automation.md +1 -31
  48. package/.claude-flow-novice/.claude/agents/neural/neural-pattern-agent.md +2 -50
  49. package/.claude-flow-novice/.claude/agents/specialized/mobile/spec-mobile-react-native.md +6 -142
  50. package/.claude-flow-novice/.claude/agents/sublinear/consciousness-evolution-agent.md +2 -18
  51. package/.claude-flow-novice/.claude/agents/sublinear/matrix-solver-agent.md +2 -16
  52. package/.claude-flow-novice/.claude/agents/sublinear/nanosecond-scheduler-agent.md +2 -19
  53. package/.claude-flow-novice/.claude/agents/sublinear/pagerank-agent.md +2 -19
  54. package/.claude-flow-novice/.claude/agents/sublinear/phi-calculator-agent.md +2 -19
  55. package/.claude-flow-novice/.claude/agents/sublinear/psycho-symbolic-agent.md +2 -19
  56. package/.claude-flow-novice/.claude/agents/sublinear/sublinear.md +2 -1
  57. package/.claude-flow-novice/.claude/agents/sublinear/temporal-advantage-agent.md +2 -16
  58. package/.claude-flow-novice/.claude/agents/testing/e2e/playwright-agent.md +7 -0
  59. package/.claude-flow-novice/dist/src/cli/simple-commands/init/CLAUDE.md +188 -0
  60. package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-flow-universal +81 -0
  61. package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-flow.bat +18 -0
  62. package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-flow.ps1 +24 -0
  63. package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-md.js +982 -0
  64. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/analysis/bottleneck-detect.md +162 -0
  65. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/automation/auto-agent.md +122 -0
  66. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/coordination/swarm-init.md +85 -0
  67. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/github/github-swarm.md +121 -0
  68. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/helpers/standard-checkpoint-hooks.sh +179 -0
  69. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/notification.md +113 -0
  70. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/post-command.md +116 -0
  71. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/post-edit.md +117 -0
  72. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/post-task.md +112 -0
  73. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-command.md +113 -0
  74. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-edit.md +113 -0
  75. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-search.md +112 -0
  76. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-task.md +111 -0
  77. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/session-end.md +118 -0
  78. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/session-restore.md +118 -0
  79. package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/session-start.md +117 -0
  80. package/.claude-flow-novice/dist/src/cli/simple-commands/init/coordination-md.js +340 -0
  81. package/.claude-flow-novice/dist/src/cli/simple-commands/init/coordination.md +16 -0
  82. package/.claude-flow-novice/dist/src/cli/simple-commands/init/enhanced-templates.js +2347 -0
  83. package/.claude-flow-novice/dist/src/cli/simple-commands/init/github-safe-enhanced.js +331 -0
  84. package/.claude-flow-novice/dist/src/cli/simple-commands/init/github-safe.js +106 -0
  85. package/.claude-flow-novice/dist/src/cli/simple-commands/init/memory-bank-md.js +259 -0
  86. package/.claude-flow-novice/dist/src/cli/simple-commands/init/memory-bank.md +16 -0
  87. package/.claude-flow-novice/dist/src/cli/simple-commands/init/readme-files.js +72 -0
  88. package/.claude-flow-novice/dist/src/cli/simple-commands/init/safe-hook-patterns.js +430 -0
  89. package/.claude-flow-novice/dist/src/cli/simple-commands/init/settings.json +109 -0
  90. package/.claude-flow-novice/dist/src/cli/simple-commands/init/settings.json.enhanced +35 -0
  91. package/.claude-flow-novice/dist/src/cli/simple-commands/init/sparc-modes.js +1401 -0
  92. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/CLAUDE.md +188 -0
  93. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-flow-universal +81 -0
  94. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-flow.bat +18 -0
  95. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-flow.ps1 +24 -0
  96. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-md.js +982 -0
  97. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/analysis/bottleneck-detect.md +162 -0
  98. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/automation/auto-agent.md +122 -0
  99. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/coordination/swarm-init.md +85 -0
  100. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/github/github-swarm.md +121 -0
  101. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/helpers/standard-checkpoint-hooks.sh +179 -0
  102. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/notification.md +113 -0
  103. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/post-command.md +116 -0
  104. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/post-edit.md +117 -0
  105. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/post-task.md +112 -0
  106. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-command.md +113 -0
  107. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-edit.md +113 -0
  108. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-search.md +112 -0
  109. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-task.md +111 -0
  110. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/session-end.md +118 -0
  111. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/session-restore.md +118 -0
  112. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/session-start.md +117 -0
  113. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/coordination-md.js +340 -0
  114. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/coordination.md +16 -0
  115. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/enhanced-templates.js +2347 -0
  116. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/github-safe-enhanced.js +331 -0
  117. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/github-safe.js +106 -0
  118. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/memory-bank-md.js +259 -0
  119. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/memory-bank.md +16 -0
  120. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/readme-files.js +72 -0
  121. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/safe-hook-patterns.js +430 -0
  122. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/settings.json +109 -0
  123. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/settings.json.enhanced +35 -0
  124. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/sparc-modes.js +1401 -0
  125. package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/verification-claude-md.js +432 -0
  126. package/.claude-flow-novice/dist/src/cli/simple-commands/init/verification-claude-md.js +432 -0
  127. package/.claude-flow-novice/dist/src/slash-commands/benchmark-prompts.js +281 -0
  128. package/CLAUDE.md +1927 -127
  129. package/package.json +3 -3
  130. package/src/cli/simple-commands/init/index.js +39 -4
  131. package/src/cli/simple-commands/init/templates/CLAUDE.md +8 -10
  132. package/src/slash-commands/benchmark-prompts.js +281 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-flow-novice",
3
- "version": "1.5.2",
3
+ "version": "1.5.3",
4
4
  "description": "Standalone Claude Flow for beginners - AI agent orchestration made easy with enhanced TDD testing pipeline. Enhanced init command creates complete agent system, MCP configuration with 30 essential tools, and automated hooks with single-file testing, real-time coverage analysis, and advanced validation. Fully standalone with zero external dependencies, complete project setup in one command.",
5
5
  "mcpName": "io.github.ruvnet/claude-flow",
6
6
  "main": ".claude-flow-novice/dist/index.js",
@@ -39,9 +39,9 @@
39
39
  "optimize:validate:hardware": "node scripts/optimization/config-validator.js validate hardware",
40
40
  "optimize:validate:monitoring": "node scripts/optimization/config-validator.js validate monitoring",
41
41
  "build": "scripts/build/unified-builder.sh safe",
42
- "build:swc": "swc src -d .claude-flow-novice/dist --only='**/*.ts' --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && cp -r .claude/agents .claude-flow-novice/.claude/",
42
+ "build:swc": "swc src -d .claude-flow-novice/dist --only='**/*.ts' --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && cp -r src/cli/simple-commands/init/templates .claude-flow-novice/dist/src/cli/simple-commands/init/ && cp -r .claude/agents .claude-flow-novice/.claude/",
43
43
  "build:types": "tsc --project config/typescript/tsconfig.json --emitDeclarationOnly --outDir .claude-flow-novice/dist --skipLibCheck",
44
- "build:watch": "swc src -d .claude-flow-novice/dist --watch --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && npm run copy:agents",
44
+ "build:watch": "swc src -d .claude-flow-novice/dist --watch --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && cp -r src/cli/simple-commands/init/templates .claude-flow-novice/dist/src/cli/simple-commands/init/ && npm run copy:agents",
45
45
  "build:legacy": "scripts/build/unified-builder.sh migration",
46
46
  "build:workaround": "scripts/build/unified-builder.sh workaround",
47
47
  "build:force": "scripts/build/unified-builder.sh force",
@@ -1,6 +1,9 @@
1
1
  // init/index.js - Initialize Claude Code integration files
2
2
  import { printSuccess, printError, printWarning, exit } from '../../utils.js';
3
3
  import { existsSync } from 'fs';
4
+ import { promises as fs } from 'fs';
5
+ import { fileURLToPath } from 'url';
6
+ import { dirname, join } from 'path';
4
7
  import process from 'process';
5
8
  import { spawn, execSync } from 'child_process';
6
9
  import { promisify } from 'util';
@@ -83,6 +86,37 @@ import {
83
86
  import { createAgentsReadme, createSessionsReadme } from './templates/readme-files.js';
84
87
  import { initializeHiveMind, getHiveMindStatus, rollbackHiveMindInit } from './hive-mind-init.js';
85
88
 
89
+ // Get the directory path of this module for resolving template files
90
+ const __filename = fileURLToPath(import.meta.url);
91
+ const __dirname = dirname(__filename);
92
+
93
+ /**
94
+ * Read the CLAUDE.md template file
95
+ * @returns {Promise<string>} The template content
96
+ */
97
+ async function readClaudeMdTemplate() {
98
+ // In source: templates/CLAUDE.md
99
+ // In dist: the files are copied directly to init/ directory (not templates/)
100
+ const possiblePaths = [
101
+ join(__dirname, 'templates', 'CLAUDE.md'), // Source location
102
+ join(__dirname, 'CLAUDE.md'), // Dist location (files copied directly)
103
+ ];
104
+
105
+ for (const templatePath of possiblePaths) {
106
+ try {
107
+ const content = await fs.readFile(templatePath, 'utf8');
108
+ return content;
109
+ } catch (error) {
110
+ // Try next path
111
+ continue;
112
+ }
113
+ }
114
+
115
+ // Fallback to generating if template file is not found in any location
116
+ console.warn('Warning: Template file not found in any location, using generated content');
117
+ return createOptimizedSparcClaudeMd();
118
+ }
119
+
86
120
  /**
87
121
  * Check if Claude Code CLI is installed
88
122
  */
@@ -1313,12 +1347,13 @@ async function enhancedClaudeFlowInit(flags, subArgs = []) {
1313
1347
  return;
1314
1348
  }
1315
1349
 
1316
- // Create CLAUDE.md
1350
+ // Create CLAUDE.md from template
1317
1351
  if (!dryRun) {
1318
- await fs.writeFile(`${workingDir}/CLAUDE.md`, createOptimizedSparcClaudeMd(), 'utf8');
1319
- printSuccess('✓ Created CLAUDE.md (Claude Flow v2.0.0 - Optimized)');
1352
+ const claudeMdContent = await readClaudeMdTemplate();
1353
+ await fs.writeFile(`${workingDir}/CLAUDE.md`, claudeMdContent, 'utf8');
1354
+ printSuccess('✓ Created CLAUDE.md (Claude Flow v2.0.0 - Optimized Template)');
1320
1355
  } else {
1321
- console.log('[DRY RUN] Would create CLAUDE.md (Claude Flow v2.0.0 - Optimized)');
1356
+ console.log('[DRY RUN] Would create CLAUDE.md (Claude Flow v2.0.0 - Optimized Template)');
1322
1357
  }
1323
1358
 
1324
1359
  // Create .claude directory structure
@@ -177,14 +177,12 @@ claude mcp add claude-flow-novice npx claude-flow-novice mcp start
177
177
  - `/fullstack "goal"` - Launch full-stack development team with consensus validation
178
178
  - `/swarm`, `/sparc`, `/hooks` - Other slash commands (auto-discovered)
179
179
 
180
- ## DEVELOPMENT FLOW
181
- 1. Execute - Primary swarm (3-8 agents) produces deliverables with confidence score
182
- 1a. Only when the swarm believes its done move to step 2.
183
- 1b. If swarm does not believe it's done (confidence scores < 75%, repeat step 1)
184
- 2. Verify - Consensus swarm (2-4 validators) runs comprehensive checks with Byzantine voting
185
- 3. Decision - PASS (≥90% agreement + critical criteria) OR FAIL
186
- 4. Action -
180
+ ## DEVELOPMENT FLOW LOOP
181
+ 1. Execute - Primary swarm (3-8 agents) produces deliverables with confidence score. Each agent MUST be using the enhanced post edit pipeline after file edits.
182
+ 2. When the swarm believes its done with all tasks, move to step 4
183
+ 3. If swarm does not believe it's done (confidence scores < 75%, relaunch agents for step 1)
184
+ 4. Verify - Consensus swarm (2-4 validators) runs comprehensive checks with Byzantine voting
185
+ 5. Decision - PASS (≥90% agreement + critical criteria) OR FAIL
186
+ 6. Action -
187
187
  - PASS → Store results → Move to next task
188
- - FAIL → Round++ → If <10: inject feedback → Relaunch swarm | If ≥10: Escalate to human
189
- 5. Repeat - Iterative improvement with accumulated context from all previous rounds
190
- 6. Escalate - Full history + feedback + recommendations after 10 rounds
188
+ - FAIL → Round++ → If <10: inject feedback → Relaunch swarm on step 1 | If ≥10: Escalate to human
@@ -0,0 +1,281 @@
1
+ const path = require('path');
2
+ const { AgentBenchmarkSystem } = require('../../benchmark/agent-benchmarking/index');
3
+
4
+ /**
5
+ * Benchmark agent prompt formats
6
+ *
7
+ * @command /benchmark-prompts
8
+ * @description Run automated benchmarks comparing different agent prompt formatting styles
9
+ * @example /benchmark-prompts run
10
+ * @example /benchmark-prompts run --rounds 5 --parallel
11
+ * @example /benchmark-prompts analyze
12
+ * @example /benchmark-prompts report markdown
13
+ */
14
+ async function benchmarkPrompts(args) {
15
+ const system = new AgentBenchmarkSystem();
16
+
17
+ const command = args[0] || 'help';
18
+
19
+ try {
20
+ switch (command) {
21
+ case 'run':
22
+ return await runBenchmark(system, args);
23
+
24
+ case 'test':
25
+ return await testFormat(system, args);
26
+
27
+ case 'analyze':
28
+ return await analyzeBenchmark(system);
29
+
30
+ case 'report':
31
+ return await generateReport(system, args);
32
+
33
+ case 'compare':
34
+ return await compareBenchmark(system);
35
+
36
+ case 'list':
37
+ return await listBenchmarks(system);
38
+
39
+ case 'reset':
40
+ return await resetBenchmarks(system);
41
+
42
+ case 'help':
43
+ default:
44
+ return showHelp();
45
+ }
46
+ } catch (error) {
47
+ console.error(`\n❌ Error: ${error.message}\n`);
48
+ if (args.includes('--verbose')) {
49
+ console.error(error.stack);
50
+ }
51
+ return { success: false, error: error.message };
52
+ }
53
+ }
54
+
55
+ async function runBenchmark(system, args) {
56
+ console.log('\n🎯 Running Agent Prompt Format Benchmark\n');
57
+
58
+ const options = {
59
+ rounds: 3,
60
+ parallel: false,
61
+ verbose: false,
62
+ scenarios: null
63
+ };
64
+
65
+ // Parse options
66
+ for (let i = 1; i < args.length; i++) {
67
+ const arg = args[i];
68
+ if (arg === '--rounds' && args[i + 1]) {
69
+ options.rounds = parseInt(args[i + 1]);
70
+ i++;
71
+ } else if (arg === '--parallel') {
72
+ options.parallel = true;
73
+ } else if (arg === '--verbose') {
74
+ options.verbose = true;
75
+ } else if (arg === '--scenarios' && args[i + 1]) {
76
+ options.scenarios = args[i + 1];
77
+ i++;
78
+ }
79
+ }
80
+
81
+ console.log('Configuration:');
82
+ console.log(` Rounds: ${options.rounds}`);
83
+ console.log(` Parallel: ${options.parallel ? 'Yes' : 'No'}`);
84
+ console.log(` Scenarios: ${options.scenarios || 'All'}\n`);
85
+
86
+ const result = await system.run(options);
87
+
88
+ console.log('\n✅ Benchmark complete!');
89
+ console.log('\nNext steps:');
90
+ console.log(' /benchmark-prompts analyze - View statistical analysis');
91
+ console.log(' /benchmark-prompts report - Generate detailed reports');
92
+ console.log(' /benchmark-prompts compare - Interactive comparison\n');
93
+
94
+ return { success: true, result };
95
+ }
96
+
97
+ async function testFormat(system, args) {
98
+ const format = args[1];
99
+ if (!format || !['minimal', 'metadata', 'code-heavy'].includes(format)) {
100
+ console.error('\n❌ Invalid format. Use: minimal, metadata, or code-heavy\n');
101
+ return { success: false };
102
+ }
103
+
104
+ console.log(`\n🧪 Testing ${format} format\n`);
105
+
106
+ // Run benchmark with only specified format
107
+ const options = {
108
+ rounds: parseInt(args[2]) || 3,
109
+ parallel: false,
110
+ verbose: args.includes('--verbose')
111
+ };
112
+
113
+ // This would need modification to support single format testing
114
+ console.log('Note: Currently runs all formats. Single format testing coming soon.\n');
115
+
116
+ const result = await system.run(options);
117
+ return { success: true, result };
118
+ }
119
+
120
+ async function analyzeBenchmark(system) {
121
+ console.log('\n📊 Analyzing Benchmark Results\n');
122
+
123
+ const result = await system.analyze();
124
+
125
+ if (!result) {
126
+ return { success: false };
127
+ }
128
+
129
+ console.log('\n✅ Analysis complete\n');
130
+ return { success: true, result };
131
+ }
132
+
133
+ async function generateReport(system, args) {
134
+ const format = args[1] || 'all';
135
+
136
+ if (!['all', 'markdown', 'csv', 'json'].includes(format)) {
137
+ console.error('\n❌ Invalid format. Use: all, markdown, csv, or json\n');
138
+ return { success: false };
139
+ }
140
+
141
+ console.log(`\n📄 Generating ${format} report(s)\n`);
142
+
143
+ const result = await system.generateReport(format);
144
+
145
+ if (!result) {
146
+ return { success: false };
147
+ }
148
+
149
+ console.log('\n✅ Report generation complete\n');
150
+ return { success: true, result };
151
+ }
152
+
153
+ async function compareBenchmark(system) {
154
+ console.log('\n📊 Interactive Benchmark Comparison\n');
155
+
156
+ // Load and analyze latest results
157
+ const result = await system.analyze();
158
+
159
+ if (!result) {
160
+ return { success: false };
161
+ }
162
+
163
+ const { results, analysis } = result;
164
+
165
+ // Print comparison table
166
+ console.log('Format Comparison:');
167
+ console.log('─'.repeat(80));
168
+ console.log('Format'.padEnd(15) + 'Quality'.padEnd(15) + 'Speed'.padEnd(15) + 'Consistency'.padEnd(15) + 'Success');
169
+ console.log('─'.repeat(80));
170
+
171
+ for (const [formatName, formatData] of Object.entries(results.formats)) {
172
+ const agg = formatData.aggregated;
173
+ console.log(
174
+ formatName.padEnd(15) +
175
+ `${agg.overallQuality.toFixed(1)}%`.padEnd(15) +
176
+ `${agg.overallResponseTime.toFixed(0)}ms`.padEnd(15) +
177
+ `${agg.overallConsistency.toFixed(1)}%`.padEnd(15) +
178
+ `${agg.successRate.toFixed(1)}%`
179
+ );
180
+ }
181
+ console.log('─'.repeat(80));
182
+
183
+ console.log('\n🏆 Winner: ' + results.summary.winner.toUpperCase());
184
+ console.log('\nRecommendations:');
185
+ for (const rec of results.summary.recommendation) {
186
+ console.log(` • ${rec}`);
187
+ }
188
+ console.log('');
189
+
190
+ return { success: true, results, analysis };
191
+ }
192
+
193
+ async function listBenchmarks(system) {
194
+ console.log('\n📂 Benchmark Results\n');
195
+ await system.listResults();
196
+ console.log('');
197
+ return { success: true };
198
+ }
199
+
200
+ async function resetBenchmarks(system) {
201
+ console.log('\n⚠️ This will delete all benchmark results.\n');
202
+
203
+ // In interactive mode, would ask for confirmation
204
+ // For now, proceeding with reset
205
+
206
+ await system.reset();
207
+ console.log('');
208
+ return { success: true };
209
+ }
210
+
211
+ function showHelp() {
212
+ console.log(`
213
+ ╔════════════════════════════════════════════════════════════════════════════╗
214
+ ║ Agent Prompt Format Benchmark System ║
215
+ ╚════════════════════════════════════════════════════════════════════════════╝
216
+
217
+ Compare different agent prompt formatting styles:
218
+ • Minimal Format - Simple frontmatter only
219
+ • Metadata Format - Full configuration with hooks and capabilities
220
+ • Code-Heavy Format - Extensive code examples
221
+
222
+ COMMANDS:
223
+ /benchmark-prompts run [options] Run full benchmark suite
224
+ /benchmark-prompts test <format> Test specific format
225
+ /benchmark-prompts analyze Analyze collected results
226
+ /benchmark-prompts report [format] Generate reports (all|markdown|csv|json)
227
+ /benchmark-prompts compare Interactive comparison view
228
+ /benchmark-prompts list List all benchmark results
229
+ /benchmark-prompts reset Clear all benchmark data
230
+
231
+ OPTIONS:
232
+ --rounds <n> Number of test rounds (default: 3)
233
+ --parallel Run tests in parallel
234
+ --scenarios <list> Comma-separated scenario IDs to test
235
+ --verbose Detailed output
236
+ --export Export results
237
+
238
+ EXAMPLES:
239
+ /benchmark-prompts run
240
+ /benchmark-prompts run --rounds 5 --parallel
241
+ /benchmark-prompts test minimal --verbose
242
+ /benchmark-prompts analyze
243
+ /benchmark-prompts report markdown
244
+ /benchmark-prompts compare
245
+
246
+ TEST SCENARIOS:
247
+ • simple-code-analysis - Basic algorithm optimization
248
+ • memory-leak-detection - Memory leak identification
249
+ • database-query-optimization - Query performance optimization
250
+ • caching-strategy - Cache architecture design
251
+ • resource-allocation - Resource calculation
252
+ • async-pattern-optimization - Async code optimization
253
+ • algorithm-complexity-reduction - Algorithm improvement
254
+ • load-testing-strategy - Load test planning
255
+ • bottleneck-identification - Performance bottleneck analysis
256
+ • scalability-architecture - Scalability design
257
+
258
+ BENCHMARK METRICS:
259
+ • Quality Score - Completeness, accuracy, relevance, clarity
260
+ • Response Time - Total time to generate response
261
+ • Consistency - Variance across multiple rounds
262
+ • Success Rate - Percentage of successful completions
263
+
264
+ For more information, see: /benchmark/agent-benchmarking/README.md
265
+ `);
266
+
267
+ return { success: true };
268
+ }
269
+
270
+ module.exports = {
271
+ command: 'benchmark-prompts',
272
+ description: 'Run automated benchmarks comparing agent prompt formats',
273
+ handler: benchmarkPrompts,
274
+ examples: [
275
+ '/benchmark-prompts run',
276
+ '/benchmark-prompts run --rounds 5 --parallel',
277
+ '/benchmark-prompts analyze',
278
+ '/benchmark-prompts report markdown',
279
+ '/benchmark-prompts compare'
280
+ ]
281
+ };