claude-flow-novice 1.5.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/architecture/system-architect.md +3 -44
- package/.claude/agents/benchmarking-tests/test-agent-code-heavy.md +747 -0
- package/.claude/agents/benchmarking-tests/test-agent-metadata.md +181 -0
- package/.claude/agents/benchmarking-tests/test-agent-minimal.md +67 -0
- package/.claude/agents/data/ml/data-ml-model.md +5 -119
- package/.claude/agents/development/backend/dev-backend-api.md +4 -115
- package/.claude/agents/devops/ci-cd/ops-cicd-github.md +4 -114
- package/.claude/agents/documentation/api-docs/docs-api-openapi.md +4 -113
- package/.claude/agents/github/multi-repo-swarm.md +1 -28
- package/.claude/agents/github/pr-manager.md +1 -29
- package/.claude/agents/github/project-board-sync.md +1 -32
- package/.claude/agents/github/release-manager.md +1 -32
- package/.claude/agents/github/release-swarm.md +1 -33
- package/.claude/agents/github/repo-architect.md +1 -34
- package/.claude/agents/github/swarm-issue.md +1 -26
- package/.claude/agents/github/swarm-pr.md +1 -30
- package/.claude/agents/github/sync-coordinator.md +1 -30
- package/.claude/agents/github/workflow-automation.md +1 -31
- package/.claude/agents/neural/neural-pattern-agent.md +2 -50
- package/.claude/agents/specialized/mobile/spec-mobile-react-native.md +6 -142
- package/.claude/agents/sublinear/consciousness-evolution-agent.md +2 -18
- package/.claude/agents/sublinear/matrix-solver-agent.md +2 -16
- package/.claude/agents/sublinear/nanosecond-scheduler-agent.md +2 -19
- package/.claude/agents/sublinear/pagerank-agent.md +2 -19
- package/.claude/agents/sublinear/phi-calculator-agent.md +2 -19
- package/.claude/agents/sublinear/psycho-symbolic-agent.md +2 -19
- package/.claude/agents/sublinear/sublinear.md +2 -1
- package/.claude/agents/sublinear/temporal-advantage-agent.md +2 -16
- package/.claude/agents/testing/e2e/playwright-agent.md +7 -0
- package/.claude-flow-novice/.claude/agents/architecture/system-architect.md +3 -44
- package/.claude-flow-novice/.claude/agents/benchmarking-tests/test-agent-code-heavy.md +747 -0
- package/.claude-flow-novice/.claude/agents/benchmarking-tests/test-agent-metadata.md +181 -0
- package/.claude-flow-novice/.claude/agents/benchmarking-tests/test-agent-minimal.md +67 -0
- package/.claude-flow-novice/.claude/agents/data/ml/data-ml-model.md +5 -119
- package/.claude-flow-novice/.claude/agents/development/backend/dev-backend-api.md +4 -115
- package/.claude-flow-novice/.claude/agents/devops/ci-cd/ops-cicd-github.md +4 -114
- package/.claude-flow-novice/.claude/agents/documentation/api-docs/docs-api-openapi.md +4 -113
- package/.claude-flow-novice/.claude/agents/github/multi-repo-swarm.md +1 -28
- package/.claude-flow-novice/.claude/agents/github/pr-manager.md +1 -29
- package/.claude-flow-novice/.claude/agents/github/project-board-sync.md +1 -32
- package/.claude-flow-novice/.claude/agents/github/release-manager.md +1 -32
- package/.claude-flow-novice/.claude/agents/github/release-swarm.md +1 -33
- package/.claude-flow-novice/.claude/agents/github/repo-architect.md +1 -34
- package/.claude-flow-novice/.claude/agents/github/swarm-issue.md +1 -26
- package/.claude-flow-novice/.claude/agents/github/swarm-pr.md +1 -30
- package/.claude-flow-novice/.claude/agents/github/sync-coordinator.md +1 -30
- package/.claude-flow-novice/.claude/agents/github/workflow-automation.md +1 -31
- package/.claude-flow-novice/.claude/agents/neural/neural-pattern-agent.md +2 -50
- package/.claude-flow-novice/.claude/agents/specialized/mobile/spec-mobile-react-native.md +6 -142
- package/.claude-flow-novice/.claude/agents/sublinear/consciousness-evolution-agent.md +2 -18
- package/.claude-flow-novice/.claude/agents/sublinear/matrix-solver-agent.md +2 -16
- package/.claude-flow-novice/.claude/agents/sublinear/nanosecond-scheduler-agent.md +2 -19
- package/.claude-flow-novice/.claude/agents/sublinear/pagerank-agent.md +2 -19
- package/.claude-flow-novice/.claude/agents/sublinear/phi-calculator-agent.md +2 -19
- package/.claude-flow-novice/.claude/agents/sublinear/psycho-symbolic-agent.md +2 -19
- package/.claude-flow-novice/.claude/agents/sublinear/sublinear.md +2 -1
- package/.claude-flow-novice/.claude/agents/sublinear/temporal-advantage-agent.md +2 -16
- package/.claude-flow-novice/.claude/agents/testing/e2e/playwright-agent.md +7 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/CLAUDE.md +188 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-flow-universal +81 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-flow.bat +18 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-flow.ps1 +24 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/claude-md.js +982 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/analysis/bottleneck-detect.md +162 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/automation/auto-agent.md +122 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/coordination/swarm-init.md +85 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/github/github-swarm.md +121 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/helpers/standard-checkpoint-hooks.sh +179 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/notification.md +113 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/post-command.md +116 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/post-edit.md +117 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/post-task.md +112 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-command.md +113 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-edit.md +113 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-search.md +112 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/pre-task.md +111 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/session-end.md +118 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/session-restore.md +118 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/commands/hooks/session-start.md +117 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/coordination-md.js +340 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/coordination.md +16 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/enhanced-templates.js +2347 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/github-safe-enhanced.js +331 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/github-safe.js +106 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/memory-bank-md.js +259 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/memory-bank.md +16 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/readme-files.js +72 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/safe-hook-patterns.js +430 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/settings.json +109 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/settings.json.enhanced +35 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/sparc-modes.js +1401 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/CLAUDE.md +188 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-flow-universal +81 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-flow.bat +18 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-flow.ps1 +24 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/claude-md.js +982 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/analysis/bottleneck-detect.md +162 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/automation/auto-agent.md +122 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/coordination/swarm-init.md +85 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/github/github-swarm.md +121 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/helpers/standard-checkpoint-hooks.sh +179 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/notification.md +113 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/post-command.md +116 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/post-edit.md +117 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/post-task.md +112 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-command.md +113 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-edit.md +113 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-search.md +112 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/pre-task.md +111 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/session-end.md +118 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/session-restore.md +118 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/commands/hooks/session-start.md +117 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/coordination-md.js +340 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/coordination.md +16 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/enhanced-templates.js +2347 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/github-safe-enhanced.js +331 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/github-safe.js +106 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/memory-bank-md.js +259 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/memory-bank.md +16 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/readme-files.js +72 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/safe-hook-patterns.js +430 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/settings.json +109 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/settings.json.enhanced +35 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/sparc-modes.js +1401 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/templates/verification-claude-md.js +432 -0
- package/.claude-flow-novice/dist/src/cli/simple-commands/init/verification-claude-md.js +432 -0
- package/.claude-flow-novice/dist/src/slash-commands/benchmark-prompts.js +281 -0
- package/CLAUDE.md +1927 -127
- package/package.json +3 -3
- package/src/cli/simple-commands/init/index.js +39 -4
- package/src/cli/simple-commands/init/templates/CLAUDE.md +8 -10
- package/src/slash-commands/benchmark-prompts.js +281 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-flow-novice",
|
|
3
|
-
"version": "1.5.
|
|
3
|
+
"version": "1.5.3",
|
|
4
4
|
"description": "Standalone Claude Flow for beginners - AI agent orchestration made easy with enhanced TDD testing pipeline. Enhanced init command creates complete agent system, MCP configuration with 30 essential tools, and automated hooks with single-file testing, real-time coverage analysis, and advanced validation. Fully standalone with zero external dependencies, complete project setup in one command.",
|
|
5
5
|
"mcpName": "io.github.ruvnet/claude-flow",
|
|
6
6
|
"main": ".claude-flow-novice/dist/index.js",
|
|
@@ -39,9 +39,9 @@
|
|
|
39
39
|
"optimize:validate:hardware": "node scripts/optimization/config-validator.js validate hardware",
|
|
40
40
|
"optimize:validate:monitoring": "node scripts/optimization/config-validator.js validate monitoring",
|
|
41
41
|
"build": "scripts/build/unified-builder.sh safe",
|
|
42
|
-
"build:swc": "swc src -d .claude-flow-novice/dist --only='**/*.ts' --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && cp -r .claude/agents .claude-flow-novice/.claude/",
|
|
42
|
+
"build:swc": "swc src -d .claude-flow-novice/dist --only='**/*.ts' --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && cp -r src/cli/simple-commands/init/templates .claude-flow-novice/dist/src/cli/simple-commands/init/ && cp -r .claude/agents .claude-flow-novice/.claude/",
|
|
43
43
|
"build:types": "tsc --project config/typescript/tsconfig.json --emitDeclarationOnly --outDir .claude-flow-novice/dist --skipLibCheck",
|
|
44
|
-
"build:watch": "swc src -d .claude-flow-novice/dist --watch --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && npm run copy:agents",
|
|
44
|
+
"build:watch": "swc src -d .claude-flow-novice/dist --watch --config-file .swcrc && cp -r src/slash-commands .claude-flow-novice/dist/src/ && cp -r src/cli/simple-commands/hooks .claude-flow-novice/dist/src/cli/simple-commands/ && cp -r src/cli/simple-commands/init/templates .claude-flow-novice/dist/src/cli/simple-commands/init/ && npm run copy:agents",
|
|
45
45
|
"build:legacy": "scripts/build/unified-builder.sh migration",
|
|
46
46
|
"build:workaround": "scripts/build/unified-builder.sh workaround",
|
|
47
47
|
"build:force": "scripts/build/unified-builder.sh force",
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
// init/index.js - Initialize Claude Code integration files
|
|
2
2
|
import { printSuccess, printError, printWarning, exit } from '../../utils.js';
|
|
3
3
|
import { existsSync } from 'fs';
|
|
4
|
+
import { promises as fs } from 'fs';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
import { dirname, join } from 'path';
|
|
4
7
|
import process from 'process';
|
|
5
8
|
import { spawn, execSync } from 'child_process';
|
|
6
9
|
import { promisify } from 'util';
|
|
@@ -83,6 +86,37 @@ import {
|
|
|
83
86
|
import { createAgentsReadme, createSessionsReadme } from './templates/readme-files.js';
|
|
84
87
|
import { initializeHiveMind, getHiveMindStatus, rollbackHiveMindInit } from './hive-mind-init.js';
|
|
85
88
|
|
|
89
|
+
// Get the directory path of this module for resolving template files
|
|
90
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
91
|
+
const __dirname = dirname(__filename);
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Read the CLAUDE.md template file
|
|
95
|
+
* @returns {Promise<string>} The template content
|
|
96
|
+
*/
|
|
97
|
+
async function readClaudeMdTemplate() {
|
|
98
|
+
// In source: templates/CLAUDE.md
|
|
99
|
+
// In dist: the files are copied directly to init/ directory (not templates/)
|
|
100
|
+
const possiblePaths = [
|
|
101
|
+
join(__dirname, 'templates', 'CLAUDE.md'), // Source location
|
|
102
|
+
join(__dirname, 'CLAUDE.md'), // Dist location (files copied directly)
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
for (const templatePath of possiblePaths) {
|
|
106
|
+
try {
|
|
107
|
+
const content = await fs.readFile(templatePath, 'utf8');
|
|
108
|
+
return content;
|
|
109
|
+
} catch (error) {
|
|
110
|
+
// Try next path
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Fallback to generating if template file is not found in any location
|
|
116
|
+
console.warn('Warning: Template file not found in any location, using generated content');
|
|
117
|
+
return createOptimizedSparcClaudeMd();
|
|
118
|
+
}
|
|
119
|
+
|
|
86
120
|
/**
|
|
87
121
|
* Check if Claude Code CLI is installed
|
|
88
122
|
*/
|
|
@@ -1313,12 +1347,13 @@ async function enhancedClaudeFlowInit(flags, subArgs = []) {
|
|
|
1313
1347
|
return;
|
|
1314
1348
|
}
|
|
1315
1349
|
|
|
1316
|
-
// Create CLAUDE.md
|
|
1350
|
+
// Create CLAUDE.md from template
|
|
1317
1351
|
if (!dryRun) {
|
|
1318
|
-
await
|
|
1319
|
-
|
|
1352
|
+
const claudeMdContent = await readClaudeMdTemplate();
|
|
1353
|
+
await fs.writeFile(`${workingDir}/CLAUDE.md`, claudeMdContent, 'utf8');
|
|
1354
|
+
printSuccess('✓ Created CLAUDE.md (Claude Flow v2.0.0 - Optimized Template)');
|
|
1320
1355
|
} else {
|
|
1321
|
-
console.log('[DRY RUN] Would create CLAUDE.md (Claude Flow v2.0.0 - Optimized)');
|
|
1356
|
+
console.log('[DRY RUN] Would create CLAUDE.md (Claude Flow v2.0.0 - Optimized Template)');
|
|
1322
1357
|
}
|
|
1323
1358
|
|
|
1324
1359
|
// Create .claude directory structure
|
|
@@ -177,14 +177,12 @@ claude mcp add claude-flow-novice npx claude-flow-novice mcp start
|
|
|
177
177
|
- `/fullstack "goal"` - Launch full-stack development team with consensus validation
|
|
178
178
|
- `/swarm`, `/sparc`, `/hooks` - Other slash commands (auto-discovered)
|
|
179
179
|
|
|
180
|
-
## DEVELOPMENT FLOW
|
|
181
|
-
1. Execute - Primary swarm (3-8 agents) produces deliverables with confidence score
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
180
|
+
## DEVELOPMENT FLOW LOOP
|
|
181
|
+
1. Execute - Primary swarm (3-8 agents) produces deliverables with confidence score. Each agent MUST be using the enhanced post edit pipeline after file edits.
|
|
182
|
+
2. When the swarm believes its done with all tasks, move to step 4
|
|
183
|
+
3. If swarm does not believe it's done (confidence scores < 75%, relaunch agents for step 1)
|
|
184
|
+
4. Verify - Consensus swarm (2-4 validators) runs comprehensive checks with Byzantine voting
|
|
185
|
+
5. Decision - PASS (≥90% agreement + critical criteria) OR FAIL
|
|
186
|
+
6. Action -
|
|
187
187
|
- PASS → Store results → Move to next task
|
|
188
|
-
- FAIL → Round++ → If <10: inject feedback → Relaunch swarm | If ≥10: Escalate to human
|
|
189
|
-
5. Repeat - Iterative improvement with accumulated context from all previous rounds
|
|
190
|
-
6. Escalate - Full history + feedback + recommendations after 10 rounds
|
|
188
|
+
- FAIL → Round++ → If <10: inject feedback → Relaunch swarm on step 1 | If ≥10: Escalate to human
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const { AgentBenchmarkSystem } = require('../../benchmark/agent-benchmarking/index');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Benchmark agent prompt formats
|
|
6
|
+
*
|
|
7
|
+
* @command /benchmark-prompts
|
|
8
|
+
* @description Run automated benchmarks comparing different agent prompt formatting styles
|
|
9
|
+
* @example /benchmark-prompts run
|
|
10
|
+
* @example /benchmark-prompts run --rounds 5 --parallel
|
|
11
|
+
* @example /benchmark-prompts analyze
|
|
12
|
+
* @example /benchmark-prompts report markdown
|
|
13
|
+
*/
|
|
14
|
+
async function benchmarkPrompts(args) {
|
|
15
|
+
const system = new AgentBenchmarkSystem();
|
|
16
|
+
|
|
17
|
+
const command = args[0] || 'help';
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
switch (command) {
|
|
21
|
+
case 'run':
|
|
22
|
+
return await runBenchmark(system, args);
|
|
23
|
+
|
|
24
|
+
case 'test':
|
|
25
|
+
return await testFormat(system, args);
|
|
26
|
+
|
|
27
|
+
case 'analyze':
|
|
28
|
+
return await analyzeBenchmark(system);
|
|
29
|
+
|
|
30
|
+
case 'report':
|
|
31
|
+
return await generateReport(system, args);
|
|
32
|
+
|
|
33
|
+
case 'compare':
|
|
34
|
+
return await compareBenchmark(system);
|
|
35
|
+
|
|
36
|
+
case 'list':
|
|
37
|
+
return await listBenchmarks(system);
|
|
38
|
+
|
|
39
|
+
case 'reset':
|
|
40
|
+
return await resetBenchmarks(system);
|
|
41
|
+
|
|
42
|
+
case 'help':
|
|
43
|
+
default:
|
|
44
|
+
return showHelp();
|
|
45
|
+
}
|
|
46
|
+
} catch (error) {
|
|
47
|
+
console.error(`\n❌ Error: ${error.message}\n`);
|
|
48
|
+
if (args.includes('--verbose')) {
|
|
49
|
+
console.error(error.stack);
|
|
50
|
+
}
|
|
51
|
+
return { success: false, error: error.message };
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function runBenchmark(system, args) {
|
|
56
|
+
console.log('\n🎯 Running Agent Prompt Format Benchmark\n');
|
|
57
|
+
|
|
58
|
+
const options = {
|
|
59
|
+
rounds: 3,
|
|
60
|
+
parallel: false,
|
|
61
|
+
verbose: false,
|
|
62
|
+
scenarios: null
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
// Parse options
|
|
66
|
+
for (let i = 1; i < args.length; i++) {
|
|
67
|
+
const arg = args[i];
|
|
68
|
+
if (arg === '--rounds' && args[i + 1]) {
|
|
69
|
+
options.rounds = parseInt(args[i + 1]);
|
|
70
|
+
i++;
|
|
71
|
+
} else if (arg === '--parallel') {
|
|
72
|
+
options.parallel = true;
|
|
73
|
+
} else if (arg === '--verbose') {
|
|
74
|
+
options.verbose = true;
|
|
75
|
+
} else if (arg === '--scenarios' && args[i + 1]) {
|
|
76
|
+
options.scenarios = args[i + 1];
|
|
77
|
+
i++;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
console.log('Configuration:');
|
|
82
|
+
console.log(` Rounds: ${options.rounds}`);
|
|
83
|
+
console.log(` Parallel: ${options.parallel ? 'Yes' : 'No'}`);
|
|
84
|
+
console.log(` Scenarios: ${options.scenarios || 'All'}\n`);
|
|
85
|
+
|
|
86
|
+
const result = await system.run(options);
|
|
87
|
+
|
|
88
|
+
console.log('\n✅ Benchmark complete!');
|
|
89
|
+
console.log('\nNext steps:');
|
|
90
|
+
console.log(' /benchmark-prompts analyze - View statistical analysis');
|
|
91
|
+
console.log(' /benchmark-prompts report - Generate detailed reports');
|
|
92
|
+
console.log(' /benchmark-prompts compare - Interactive comparison\n');
|
|
93
|
+
|
|
94
|
+
return { success: true, result };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async function testFormat(system, args) {
|
|
98
|
+
const format = args[1];
|
|
99
|
+
if (!format || !['minimal', 'metadata', 'code-heavy'].includes(format)) {
|
|
100
|
+
console.error('\n❌ Invalid format. Use: minimal, metadata, or code-heavy\n');
|
|
101
|
+
return { success: false };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
console.log(`\n🧪 Testing ${format} format\n`);
|
|
105
|
+
|
|
106
|
+
// Run benchmark with only specified format
|
|
107
|
+
const options = {
|
|
108
|
+
rounds: parseInt(args[2]) || 3,
|
|
109
|
+
parallel: false,
|
|
110
|
+
verbose: args.includes('--verbose')
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
// This would need modification to support single format testing
|
|
114
|
+
console.log('Note: Currently runs all formats. Single format testing coming soon.\n');
|
|
115
|
+
|
|
116
|
+
const result = await system.run(options);
|
|
117
|
+
return { success: true, result };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async function analyzeBenchmark(system) {
|
|
121
|
+
console.log('\n📊 Analyzing Benchmark Results\n');
|
|
122
|
+
|
|
123
|
+
const result = await system.analyze();
|
|
124
|
+
|
|
125
|
+
if (!result) {
|
|
126
|
+
return { success: false };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
console.log('\n✅ Analysis complete\n');
|
|
130
|
+
return { success: true, result };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
async function generateReport(system, args) {
|
|
134
|
+
const format = args[1] || 'all';
|
|
135
|
+
|
|
136
|
+
if (!['all', 'markdown', 'csv', 'json'].includes(format)) {
|
|
137
|
+
console.error('\n❌ Invalid format. Use: all, markdown, csv, or json\n');
|
|
138
|
+
return { success: false };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
console.log(`\n📄 Generating ${format} report(s)\n`);
|
|
142
|
+
|
|
143
|
+
const result = await system.generateReport(format);
|
|
144
|
+
|
|
145
|
+
if (!result) {
|
|
146
|
+
return { success: false };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
console.log('\n✅ Report generation complete\n');
|
|
150
|
+
return { success: true, result };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
async function compareBenchmark(system) {
|
|
154
|
+
console.log('\n📊 Interactive Benchmark Comparison\n');
|
|
155
|
+
|
|
156
|
+
// Load and analyze latest results
|
|
157
|
+
const result = await system.analyze();
|
|
158
|
+
|
|
159
|
+
if (!result) {
|
|
160
|
+
return { success: false };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const { results, analysis } = result;
|
|
164
|
+
|
|
165
|
+
// Print comparison table
|
|
166
|
+
console.log('Format Comparison:');
|
|
167
|
+
console.log('─'.repeat(80));
|
|
168
|
+
console.log('Format'.padEnd(15) + 'Quality'.padEnd(15) + 'Speed'.padEnd(15) + 'Consistency'.padEnd(15) + 'Success');
|
|
169
|
+
console.log('─'.repeat(80));
|
|
170
|
+
|
|
171
|
+
for (const [formatName, formatData] of Object.entries(results.formats)) {
|
|
172
|
+
const agg = formatData.aggregated;
|
|
173
|
+
console.log(
|
|
174
|
+
formatName.padEnd(15) +
|
|
175
|
+
`${agg.overallQuality.toFixed(1)}%`.padEnd(15) +
|
|
176
|
+
`${agg.overallResponseTime.toFixed(0)}ms`.padEnd(15) +
|
|
177
|
+
`${agg.overallConsistency.toFixed(1)}%`.padEnd(15) +
|
|
178
|
+
`${agg.successRate.toFixed(1)}%`
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
console.log('─'.repeat(80));
|
|
182
|
+
|
|
183
|
+
console.log('\n🏆 Winner: ' + results.summary.winner.toUpperCase());
|
|
184
|
+
console.log('\nRecommendations:');
|
|
185
|
+
for (const rec of results.summary.recommendation) {
|
|
186
|
+
console.log(` • ${rec}`);
|
|
187
|
+
}
|
|
188
|
+
console.log('');
|
|
189
|
+
|
|
190
|
+
return { success: true, results, analysis };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async function listBenchmarks(system) {
|
|
194
|
+
console.log('\n📂 Benchmark Results\n');
|
|
195
|
+
await system.listResults();
|
|
196
|
+
console.log('');
|
|
197
|
+
return { success: true };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
async function resetBenchmarks(system) {
|
|
201
|
+
console.log('\n⚠️ This will delete all benchmark results.\n');
|
|
202
|
+
|
|
203
|
+
// In interactive mode, would ask for confirmation
|
|
204
|
+
// For now, proceeding with reset
|
|
205
|
+
|
|
206
|
+
await system.reset();
|
|
207
|
+
console.log('');
|
|
208
|
+
return { success: true };
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function showHelp() {
|
|
212
|
+
console.log(`
|
|
213
|
+
╔════════════════════════════════════════════════════════════════════════════╗
|
|
214
|
+
║ Agent Prompt Format Benchmark System ║
|
|
215
|
+
╚════════════════════════════════════════════════════════════════════════════╝
|
|
216
|
+
|
|
217
|
+
Compare different agent prompt formatting styles:
|
|
218
|
+
• Minimal Format - Simple frontmatter only
|
|
219
|
+
• Metadata Format - Full configuration with hooks and capabilities
|
|
220
|
+
• Code-Heavy Format - Extensive code examples
|
|
221
|
+
|
|
222
|
+
COMMANDS:
|
|
223
|
+
/benchmark-prompts run [options] Run full benchmark suite
|
|
224
|
+
/benchmark-prompts test <format> Test specific format
|
|
225
|
+
/benchmark-prompts analyze Analyze collected results
|
|
226
|
+
/benchmark-prompts report [format] Generate reports (all|markdown|csv|json)
|
|
227
|
+
/benchmark-prompts compare Interactive comparison view
|
|
228
|
+
/benchmark-prompts list List all benchmark results
|
|
229
|
+
/benchmark-prompts reset Clear all benchmark data
|
|
230
|
+
|
|
231
|
+
OPTIONS:
|
|
232
|
+
--rounds <n> Number of test rounds (default: 3)
|
|
233
|
+
--parallel Run tests in parallel
|
|
234
|
+
--scenarios <list> Comma-separated scenario IDs to test
|
|
235
|
+
--verbose Detailed output
|
|
236
|
+
--export Export results
|
|
237
|
+
|
|
238
|
+
EXAMPLES:
|
|
239
|
+
/benchmark-prompts run
|
|
240
|
+
/benchmark-prompts run --rounds 5 --parallel
|
|
241
|
+
/benchmark-prompts test minimal --verbose
|
|
242
|
+
/benchmark-prompts analyze
|
|
243
|
+
/benchmark-prompts report markdown
|
|
244
|
+
/benchmark-prompts compare
|
|
245
|
+
|
|
246
|
+
TEST SCENARIOS:
|
|
247
|
+
• simple-code-analysis - Basic algorithm optimization
|
|
248
|
+
• memory-leak-detection - Memory leak identification
|
|
249
|
+
• database-query-optimization - Query performance optimization
|
|
250
|
+
• caching-strategy - Cache architecture design
|
|
251
|
+
• resource-allocation - Resource calculation
|
|
252
|
+
• async-pattern-optimization - Async code optimization
|
|
253
|
+
• algorithm-complexity-reduction - Algorithm improvement
|
|
254
|
+
• load-testing-strategy - Load test planning
|
|
255
|
+
• bottleneck-identification - Performance bottleneck analysis
|
|
256
|
+
• scalability-architecture - Scalability design
|
|
257
|
+
|
|
258
|
+
BENCHMARK METRICS:
|
|
259
|
+
• Quality Score - Completeness, accuracy, relevance, clarity
|
|
260
|
+
• Response Time - Total time to generate response
|
|
261
|
+
• Consistency - Variance across multiple rounds
|
|
262
|
+
• Success Rate - Percentage of successful completions
|
|
263
|
+
|
|
264
|
+
For more information, see: /benchmark/agent-benchmarking/README.md
|
|
265
|
+
`);
|
|
266
|
+
|
|
267
|
+
return { success: true };
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
module.exports = {
|
|
271
|
+
command: 'benchmark-prompts',
|
|
272
|
+
description: 'Run automated benchmarks comparing agent prompt formats',
|
|
273
|
+
handler: benchmarkPrompts,
|
|
274
|
+
examples: [
|
|
275
|
+
'/benchmark-prompts run',
|
|
276
|
+
'/benchmark-prompts run --rounds 5 --parallel',
|
|
277
|
+
'/benchmark-prompts analyze',
|
|
278
|
+
'/benchmark-prompts report markdown',
|
|
279
|
+
'/benchmark-prompts compare'
|
|
280
|
+
]
|
|
281
|
+
};
|