forge-workflow 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.claude/commands/dev.md +314 -0
  2. package/.claude/commands/plan.md +389 -0
  3. package/.claude/commands/premerge.md +179 -0
  4. package/.claude/commands/research.md +42 -0
  5. package/.claude/commands/review.md +442 -0
  6. package/.claude/commands/rollback.md +721 -0
  7. package/.claude/commands/ship.md +134 -0
  8. package/.claude/commands/sonarcloud.md +152 -0
  9. package/.claude/commands/status.md +77 -0
  10. package/.claude/commands/validate.md +237 -0
  11. package/.claude/commands/verify.md +221 -0
  12. package/.claude/rules/greptile-review-process.md +285 -0
  13. package/.claude/rules/workflow.md +105 -0
  14. package/.claude/scripts/greptile-resolve.sh +526 -0
  15. package/.claude/scripts/load-env.sh +32 -0
  16. package/.forge/hooks/check-tdd.js +240 -0
  17. package/.github/PLUGIN_TEMPLATE.json +32 -0
  18. package/.mcp.json.example +12 -0
  19. package/AGENTS.md +169 -0
  20. package/CLAUDE.md +99 -0
  21. package/LICENSE +21 -0
  22. package/README.md +414 -0
  23. package/bin/forge-cmd.js +313 -0
  24. package/bin/forge-validate.js +303 -0
  25. package/bin/forge.js +4228 -0
  26. package/docs/AGENT_INSTALL_PROMPT.md +342 -0
  27. package/docs/ENHANCED_ONBOARDING.md +602 -0
  28. package/docs/EXAMPLES.md +482 -0
  29. package/docs/GREPTILE_SETUP.md +400 -0
  30. package/docs/MANUAL_REVIEW_GUIDE.md +106 -0
  31. package/docs/ROADMAP.md +359 -0
  32. package/docs/SETUP.md +632 -0
  33. package/docs/TOOLCHAIN.md +849 -0
  34. package/docs/VALIDATION.md +363 -0
  35. package/docs/WORKFLOW.md +400 -0
  36. package/docs/planning/PROGRESS.md +396 -0
  37. package/docs/plans/.gitkeep +0 -0
  38. package/docs/plans/2026-02-27-forge-test-suite-v2-decisions.md +21 -0
  39. package/docs/plans/2026-02-27-forge-test-suite-v2-design.md +362 -0
  40. package/docs/plans/2026-02-27-forge-test-suite-v2-tasks.md +343 -0
  41. package/docs/plans/2026-03-02-superpowers-gaps-decisions.md +26 -0
  42. package/docs/plans/2026-03-02-superpowers-gaps-design.md +239 -0
  43. package/docs/plans/2026-03-02-superpowers-gaps-tasks.md +260 -0
  44. package/docs/plans/2026-03-04-agent-command-parity-design.md +163 -0
  45. package/docs/plans/2026-03-04-verify-worktree-cleanup-decisions.md +7 -0
  46. package/docs/plans/2026-03-04-verify-worktree-cleanup-design.md +165 -0
  47. package/docs/plans/2026-03-05-forge-uto-decisions.md +6 -0
  48. package/docs/plans/2026-03-05-forge-uto-design.md +116 -0
  49. package/docs/plans/2026-03-05-forge-uto-tasks.md +244 -0
  50. package/docs/plans/2026-03-10-command-creator-and-eval-decisions.md +52 -0
  51. package/docs/plans/2026-03-10-command-creator-and-eval-design.md +350 -0
  52. package/docs/plans/2026-03-10-command-creator-and-eval-tasks.md +426 -0
  53. package/docs/plans/2026-03-10-stale-workflow-refs-decisions.md +8 -0
  54. package/docs/plans/2026-03-10-stale-workflow-refs-design.md +80 -0
  55. package/docs/plans/2026-03-10-stale-workflow-refs-tasks.md +90 -0
  56. package/docs/plans/2026-03-14-beads-plan-context-decisions.md +9 -0
  57. package/docs/plans/2026-03-14-beads-plan-context-design.md +171 -0
  58. package/docs/plans/2026-03-14-beads-plan-context-tasks.md +160 -0
  59. package/docs/plans/2026-03-14-skill-eval-loop-decisions.md +33 -0
  60. package/docs/plans/2026-03-14-skill-eval-loop-design.md +118 -0
  61. package/docs/plans/2026-03-14-skill-eval-loop-results.md +78 -0
  62. package/docs/plans/2026-03-14-skill-eval-loop-tasks.md +160 -0
  63. package/docs/plans/2026-03-15-agent-command-parity-v2-decisions.md +11 -0
  64. package/docs/plans/2026-03-15-agent-command-parity-v2-design.md +145 -0
  65. package/docs/plans/2026-03-15-agent-command-parity-v2-tasks.md +211 -0
  66. package/docs/research/TEMPLATE.md +292 -0
  67. package/docs/research/advanced-testing.md +297 -0
  68. package/docs/research/agent-permissions.md +167 -0
  69. package/docs/research/dependency-chain.md +328 -0
  70. package/docs/research/forge-workflow-v2.md +550 -0
  71. package/docs/research/plugin-architecture.md +772 -0
  72. package/docs/research/pr4-cli-automation.md +326 -0
  73. package/docs/research/premerge-verify-restructure.md +205 -0
  74. package/docs/research/skills-restructure.md +508 -0
  75. package/docs/research/sonarcloud-perfection-plan.md +166 -0
  76. package/docs/research/sonarcloud-quality-gate.md +184 -0
  77. package/docs/research/superpowers-integration.md +403 -0
  78. package/docs/research/superpowers.md +319 -0
  79. package/docs/research/test-environment.md +519 -0
  80. package/install.sh +1062 -0
  81. package/lefthook.yml +39 -0
  82. package/lib/agents/README.md +198 -0
  83. package/lib/agents/claude.plugin.json +28 -0
  84. package/lib/agents/cline.plugin.json +22 -0
  85. package/lib/agents/codex.plugin.json +19 -0
  86. package/lib/agents/copilot.plugin.json +24 -0
  87. package/lib/agents/cursor.plugin.json +25 -0
  88. package/lib/agents/kilocode.plugin.json +22 -0
  89. package/lib/agents/opencode.plugin.json +20 -0
  90. package/lib/agents/roo.plugin.json +23 -0
  91. package/lib/agents-config.js +2112 -0
  92. package/lib/commands/dev.js +513 -0
  93. package/lib/commands/plan.js +696 -0
  94. package/lib/commands/recommend.js +119 -0
  95. package/lib/commands/ship.js +377 -0
  96. package/lib/commands/status.js +378 -0
  97. package/lib/commands/validate.js +602 -0
  98. package/lib/context-merge.js +359 -0
  99. package/lib/plugin-catalog.js +360 -0
  100. package/lib/plugin-manager.js +166 -0
  101. package/lib/plugin-recommender.js +141 -0
  102. package/lib/project-discovery.js +491 -0
  103. package/lib/setup.js +118 -0
  104. package/lib/workflow-profiles.js +203 -0
  105. package/package.json +115 -0
@@ -0,0 +1,519 @@
1
+ # Research: Comprehensive Test Environment for Forge
2
+
3
+ **Date**: 2026-02-03
4
+ **Researcher**: AI Assistant
5
+ **Epic**: forge-hql
6
+ **Status**: Complete
7
+
8
+ ## Executive Summary
9
+
10
+ Research into creating a comprehensive test environment for the Forge workflow project, covering installation flows, edge cases, onboarding processes, and multi-installation validation. The current test suite (9 files) needs expansion to 50+ tests with automated fixtures, security validation, and improvement recommendations.
11
+
12
+ ## Problem Statement
13
+
14
+ Forge is a universal AI agent workflow tool supporting 11 agent plugins with complex installation flows. Current testing is insufficient:
15
+
16
+ - **Test coverage gaps**: Only 9 test files, missing edge cases like permissions, network failures, unicode handling
17
+ - **No multi-installation validation**: Not tested across npm/yarn/pnpm/bun or different frameworks
18
+ - **Manual onboarding testing**: No automated validation of 11 agent combinations
19
+ - **Security concerns**: Limited validation of user inputs, path traversal, injection attacks
20
+ - **No improvement tracking**: Need systematic identification of UX and reliability issues
21
+
22
+ ## Current State Analysis
23
+
24
+ ### Existing Test Infrastructure
25
+
26
+ **9 test files** (using Node.js `node:test`, no external dependencies):
27
+
28
+ 1. `test/agents-md/structure.test.js` - AGENTS.md structure validation
29
+ 2. `test/plugins/plugin-manager.test.js` - Plugin loading tests
30
+ 3. `test/plugins/plugin-schema.test.js` - Plugin JSON schema validation
31
+ 4. `test/validation/forge-validate.test.js` - CLI validation
32
+ 5. `test/validation/git-hooks.test.js` - Lefthook configuration
33
+ 6. `test/validation/project-tools.test.js` - Project tools detection
34
+ 7. `test/rollback-validation.test.js` - Rollback input validation
35
+ 8. `test/rollback-user-sections.test.js` - User section preservation
36
+ 9. `test/rollback-edge-cases.test.js` - Security validation (434 lines)
37
+
38
+ **Coverage**: ~30% of critical flows
39
+
40
+ ### Installation Entry Points
41
+
42
+ 1. **Postinstall** (automatic): Creates AGENTS.md + docs/ (minimal)
43
+ 2. **Interactive setup**: `bunx forge setup` (full agent selection)
44
+ 3. **Quick mode**: `bunx forge setup --quick` (all defaults)
45
+ 4. **Curl install**: `install.sh` (bash script, 1063 lines)
46
+
47
+ ### Critical Files
48
+
49
+ - `bin/forge.js` (3,771 lines) - Main CLI, all flows
50
+ - `bin/forge-validate.js` (303 lines) - Validation CLI
51
+ - `lib/plugin-manager.js` (115 lines) - Plugin loading
52
+ - `install.sh` (1,063 lines) - Curl installation
53
+ - 11 plugin files in `lib/agents/*.plugin.json`
54
+
55
+ ### Known Edge Cases (Discovered)
56
+
57
+ From code analysis, these edge cases exist but lack tests:
58
+
59
+ 1. **Prerequisites**: Missing git, gh, Node < 20, no package manager
60
+ 2. **Permissions**: Read-only directories, locked files
61
+ 3. **Git states**: Detached HEAD, uncommitted changes, merge conflicts
62
+ 4. **Partial install**: Some files exist, others missing
63
+ 5. **Conflicts**: Both AGENTS.md and CLAUDE.md present
64
+ 6. **File limits**: AGENTS.md > 200 lines (warning triggers)
65
+ 7. **Unicode/special chars**: Not validated in paths
66
+ 8. **Network failures**: No timeout handling visible
67
+ 9. **Invalid JSON**: Plugin validation exists but not comprehensive
68
+ 10. **Path traversal**: Some validation in rollback, needs expansion
69
+
70
+ ## Research Findings
71
+
72
+ ### 1. Security Validation Patterns
73
+
74
+ **Good pattern found**: `test/rollback-edge-cases.test.js:10-54`
75
+
76
+ ```javascript
77
+ function validateRollbackInput(method, target) {
78
+ // Validates:
79
+ // - Commit hash format (4-40 hex chars)
80
+ // - Shell injection characters (;|&$`()<>)
81
+ // - Path traversal (../, URL-encoded)
82
+ // - Non-ASCII characters
83
+ // - NULL bytes
84
+
85
+ // Returns: { valid: boolean, error?: string }
86
+ }
87
+ ```
88
+
89
+ **Tests cover**:
90
+ - Shell injection via semicolon, pipe, ampersand, dollar, backtick
91
+ - Path traversal attempts (simple, encoded, Windows-style)
92
+ - Unicode injection
93
+ - File path validation (within project root)
94
+
95
+ **Should apply to**:
96
+ - Installation target paths (`--path` flag)
97
+ - Agent selection names
98
+ - File paths in partial rollback
99
+ - API keys in .env.local
100
+ - Plugin JSON file paths
101
+
102
+ ### 2. Installation Flow Complexity
103
+
104
+ **Three installation modes**:
105
+
106
+ ```
107
+ Mode 1: Postinstall (automatic)
108
+ bun add forge-workflow
109
+
110
+ Creates: AGENTS.md + docs/ only
111
+ Duration: ~5 seconds
112
+ Files created: ~5
113
+
114
+ Mode 2: Interactive Setup
115
+ bunx forge setup
116
+
117
+ Prompts: Agent selection (11 options)
118
+ Prompts: File overwrites (if exists)
119
+ Prompts: Beads/OpenSpec installation
120
+ Prompts: External services (code review, quality, research)
121
+
122
+ Creates: Agent-specific dirs + configs
123
+ Duration: 2-5 minutes (interactive)
124
+ Files created: 5-50 depending on agents
125
+
126
+ Mode 3: Quick Mode
127
+ bunx forge setup --quick
128
+
129
+ Defaults: All agents, GitHub Code Quality, ESLint
130
+ No prompts (except file overwrites)
131
+ Duration: ~30 seconds
132
+ Files created: ~50
133
+ ```
134
+
135
+ **Complexity points**:
136
+ - File overwrite handling (backup, prompt, merge)
137
+ - USER section preservation in AGENTS.md
138
+ - .env.local preservation of existing vars
139
+ - Symlink fallback to copy on Windows
140
+ - Plugin JSON validation and loading
141
+ - Smart project type detection (Next.js, NestJS, etc.)
142
+
143
+ ### 3. Multi-Agent Combinations
144
+
145
+ **11 agents = 2,047 possible combinations** (2^11 - 1)
146
+
147
+ Realistic sampling strategy:
148
+ - Single agent: 11 tests
149
+ - Popular pairs: Claude+Cursor, Claude+Cline, Cursor+Kilo (3 tests)
150
+ - All agents: 1 test
151
+ - No agents: 1 test (error handling)
152
+
153
+ **Total: 16 representative tests** (covers ~80% of use cases)
154
+
155
+ ### 4. Package Manager Detection
156
+
157
+ **Code**: `install.sh:114-138` and `bin/forge.js:detectPackageManager()`
158
+
159
+ **Detection logic**:
160
+ 1. Check lock files (bun.lockb, pnpm-lock.yaml, yarn.lock, package-lock.json)
161
+ 2. Check commands available (bun, pnpm, yarn, npm)
162
+ 3. Priority: bun > pnpm > yarn > npm
163
+
164
+ **Edge cases**:
165
+ - Monorepo with mixed package managers
166
+ - Missing lock file but command available
167
+ - Multiple lock files (corrupted state)
168
+ - No package manager installed
169
+
170
+ ### 5. Framework Detection Accuracy
171
+
172
+ **Code**: `bin/forge.js:detectProjectType()`
173
+
174
+ **Detects**:
175
+ - Next.js (next.config.js, next.config.mjs)
176
+ - NestJS (@nestjs/core in dependencies)
177
+ - React (react in dependencies)
178
+ - Vue (vue in dependencies)
179
+ - Angular (angular.json)
180
+ - Remix (remix.config.js)
181
+ - SvelteKit (svelte.config.js)
182
+ - Astro (astro.config.mjs)
183
+
184
+ **Adds framework-specific tips to AGENTS.md**
185
+
186
+ **Edge cases**:
187
+ - Multiple frameworks in monorepo
188
+ - Framework migration in progress
189
+ - Custom build configurations
190
+
191
+ ### 6. External Services Configuration
192
+
193
+ **Four service categories**:
194
+
195
+ 1. **Code Review** (3 options + skip):
196
+ - GitHub Code Quality (free, default)
197
+ - CodeRabbit (free for OSS)
198
+ - Greptile (paid, requires API key)
199
+
200
+ 2. **Code Quality** (3 options + skip):
201
+ - ESLint only (free, default)
202
+ - SonarCloud (50k LoC free)
203
+ - SonarQube Community (self-hosted)
204
+
205
+ 3. **Research Tool** (2 options):
206
+ - Manual (default)
207
+ - Parallel AI (requires API key)
208
+
209
+ 4. **Context7 MCP**:
210
+ - Auto-installed for Claude Code (.mcp.json)
211
+ - Manual setup for others
212
+
213
+ **Edge cases**:
214
+ - API key validation (no network validation currently)
215
+ - .env.local already exists with custom vars
216
+ - Service requires additional setup (Docker for SonarQube)
217
+
218
+ ### 7. Backup and Recovery
219
+
220
+ **Current state**:
221
+ - AGENTS.md backed up before overwrite: `AGENTS.md.backup`
222
+ - No comprehensive backup system
223
+ - No rollback capability (except git)
224
+
225
+ **Needed**:
226
+ - Transaction-like installation (all-or-nothing)
227
+ - Backup directory with timestamp
228
+ - Rollback command: `bunx forge rollback --backup <timestamp>`
229
+ - Keep last 5 backups, auto-cleanup
230
+
231
+ ### 8. Performance Characteristics
232
+
233
+ **Measured from code**:
234
+
235
+ - Plugin loading: O(n) where n=11 plugins (fast)
236
+ - File downloads: Serial, not parallel (slow)
237
+ - File writes: Serial (could parallelize)
238
+ - Git operations: Blocking (necessary)
239
+
240
+ **Targets**:
241
+ - Quick mode: < 30 seconds
242
+ - Interactive mode: 2-5 minutes (acceptable)
243
+ - Postinstall: < 10 seconds
244
+
245
+ **Bottlenecks**:
246
+ - Network requests (curl downloads in install.sh)
247
+ - Git operations (gh pr create, git push)
248
+ - User input waits (interactive prompts)
249
+
250
+ ## Key Decisions
251
+
252
+ ### 1. Test Framework Choice
253
+
254
+ **Decision**: Use Node.js built-in `node:test` (no external dependencies)
255
+
256
+ **Rationale**:
257
+ - Already used in 9 existing tests
258
+ - No npm install needed (zero dependencies)
259
+ - Fast, lightweight
260
+ - Standard assert library sufficient
261
+
262
+ **Alternatives considered**:
263
+ - Jest (too heavy, requires setup)
264
+ - Vitest (requires Vite)
265
+ - Mocha (requires install)
266
+
267
+ ### 2. Test Isolation Strategy
268
+
269
+ **Decision**: Use temp directories per test, cleanup after
270
+
271
+ **Rationale**:
272
+ - Prevents test pollution
273
+ - Allows parallel execution
274
+ - Safe to run repeatedly
275
+ - Matches current pattern
276
+
277
+ **Implementation**:
278
+ ```javascript
279
+ const { mkdtempSync } = require('fs');
280
+ const { tmpdir } = require('os');
281
+ const { join } = require('path');
282
+
283
+ const testDir = mkdtempSync(join(tmpdir(), 'forge-test-'));
284
+ // Run test in testDir
285
+ // Cleanup: fs.rmSync(testDir, { recursive: true })
286
+ ```
287
+
288
+ ### 3. Fixture Management
289
+
290
+ **Decision**: Create fixtures once via script, reuse across tests
291
+
292
+ **Rationale**:
293
+ - Faster test execution
294
+ - Consistent test environments
295
+ - Easy to add new fixtures
296
+
297
+ **Script**: `automation/setup-fixtures.sh`
298
+
299
+ ### 4. Security Test Approach
300
+
301
+ **Decision**: Follow pattern from `test/rollback-edge-cases.test.js`
302
+
303
+ **Rationale**:
304
+ - Already proven pattern
305
+ - Comprehensive coverage (43 security tests)
306
+ - Clear test naming
307
+ - Returns validation objects
308
+
309
+ **Expand to**:
310
+ - Installation paths
311
+ - Agent names
312
+ - API keys
313
+ - File paths
314
+
315
+ ### 5. Multi-Installation Testing
316
+
317
+ **Decision**: Use Docker containers for prerequisite testing
318
+
319
+ **Rationale**:
320
+ - Can simulate missing git, gh, Node versions
321
+ - Isolated environments
322
+ - Reproducible
323
+
324
+ **Alternatives**:
325
+ - Mock commands (doesn't test real behavior)
326
+ - Manual VMs (slow, not reproducible)
327
+
328
+ ### 6. Improvement Prioritization
329
+
330
+ **Decision**: Four-tier priority system (P1-P4)
331
+
332
+ **P1 (Critical)**: Security, data loss prevention
333
+ **P2 (High)**: User experience, error messages
334
+ **P3 (Medium)**: Testing, reliability
335
+ **P4 (Low)**: Nice-to-have features
336
+
337
+ **Rationale**:
338
+ - Focuses on impact
339
+ - Clear implementation order
340
+ - Balances short-term and long-term
341
+
342
+ ### 7. Reporting Format
343
+
344
+ **Decision**: HTML report with interactive sections
345
+
346
+ **Rationale**:
347
+ - Easy to share
348
+ - Visual representation
349
+ - Can drill down into failures
350
+ - Includes benchmarks
351
+
352
+ **Libraries**: None (generate raw HTML)
353
+
354
+ ### 8. CI/CD Integration
355
+
356
+ **Decision**: GitHub Actions with matrix testing
357
+
358
+ **Matrix**:
359
+ - OS: Ubuntu, macOS, Windows
360
+ - Node: 20.x, 22.x
361
+ - Package Manager: npm, yarn, pnpm
362
+
363
+ **Rationale**:
364
+ - Covers 80% of users
365
+ - Catches platform-specific bugs
366
+ - Automated on every PR
367
+
368
+ ## Risks and Mitigations
369
+
370
+ ### Risk 1: Test execution time too long
371
+
372
+ **Impact**: Medium
373
+ **Probability**: High
374
+
375
+ **Mitigation**:
376
+ - Run edge case tests in parallel
377
+ - Use test fixtures (not fresh setup each time)
378
+ - Skip slow tests in pre-commit hook
379
+ - Full suite only on CI/CD
380
+
381
+ ### Risk 2: Docker dependency for prerequisite tests
382
+
383
+ **Impact**: Medium
384
+ **Probability**: Medium
385
+
386
+ **Mitigation**:
387
+ - Make Docker tests optional
388
+ - Provide mock alternative
389
+ - Document Docker setup clearly
390
+
391
+ ### Risk 3: Windows compatibility issues
392
+
393
+ **Impact**: High
394
+ **Probability**: Medium
395
+
396
+ **Mitigation**:
397
+ - Test on Windows in CI/CD
398
+ - Handle symlink failures (already done)
399
+ - Path normalization (use `path.join()`)
400
+
401
+ ### Risk 4: Breaking changes during improvement implementation
402
+
403
+ **Impact**: High
404
+ **Probability**: Low
405
+
406
+ **Mitigation**:
407
+ - Comprehensive tests before changes
408
+ - Feature flags for new features
409
+ - Gradual rollout (start with P1)
410
+ - Backup system (ironically, solving this risk is P1)
411
+
412
+ ## Recommended Approach
413
+
414
+ ### Phase 1: Test Infrastructure (Immediate)
415
+
416
+ **Goal**: Create test environment and fixtures
417
+
418
+ 1. Create `test-env/` directory structure
419
+ 2. Build 15 test fixtures
420
+ 3. Create 4 validation helpers
421
+ 4. Write 4 automation scripts
422
+
423
+ **Duration**: 2-3 hours
424
+ **Deliverable**: Test infrastructure ready
425
+
426
+ ### Phase 2: Edge Case Tests (Immediate)
427
+
428
+ **Goal**: Expand test coverage to 50+ tests
429
+
430
+ 1. Create 8 edge case test files
431
+ 2. Create 6 integration test files
432
+ 3. Create 11 agent validation tests
433
+ 4. Create 4 package manager tests
434
+
435
+ **Duration**: 12-15 hours
436
+ **Deliverable**: Comprehensive test suite
437
+
438
+ ### Phase 3: Multi-Installation Testing (Immediate)
439
+
440
+ **Goal**: Validate across platforms and scenarios
441
+
442
+ 1. Create `run-multi-install.sh` script
443
+ 2. Test 13 installation scenarios
444
+ 3. Generate performance benchmarks
445
+ 4. Create HTML report generator
446
+
447
+ **Duration**: 3-4 hours
448
+ **Deliverable**: Automated validation across scenarios
449
+
450
+ ### Phase 4: Critical Improvements (Priority 1)
451
+
452
+ **Goal**: Security and data loss prevention
453
+
454
+ 1. Implement backup system
455
+ 2. Implement atomic installation
456
+ 3. Enhance security validation
457
+
458
+ **Duration**: 10-13 hours
459
+ **Deliverable**: Production-ready reliability
460
+
461
+ ### Phase 5: UX Improvements (Priority 2)
462
+
463
+ **Goal**: Better error handling and recovery
464
+
465
+ 1. Create `forge doctor` command
466
+ 2. Interactive recovery mode
467
+ 3. Progress indication
468
+
469
+ **Duration**: 9-12 hours
470
+ **Deliverable**: Better user experience
471
+
472
+ ## Success Metrics
473
+
474
+ 1. **Test coverage**: 50+ test files (from 9)
475
+ 2. **Edge case coverage**: 100% of identified edge cases tested
476
+ 3. **Security validation**: 100% of injection attempts blocked
477
+ 4. **Installation success rate**: 99%+ across 13 scenarios
478
+ 5. **Performance**: Quick mode < 30 seconds
479
+ 6. **User satisfaction**: Clear error messages, recovery options
480
+
481
+ ## Next Steps
482
+
483
+ 1. Create Beads epic: `bd create "Comprehensive test environment"`
484
+ 2. Create branch: `git checkout -b feat/test-environment`
485
+ 3. Implement Phase 1 (test infrastructure)
486
+ 4. Implement Phase 2 (edge case tests)
487
+ 5. Implement Phase 3 (multi-installation)
488
+ 6. Generate first report
489
+ 7. Review with team
490
+ 8. Implement Phase 4-5 based on priorities
491
+
492
+ ## References
493
+
494
+ - Forge codebase: `bin/forge.js`, `bin/forge-validate.js`, `lib/plugin-manager.js`
495
+ - Security test pattern: `test/rollback-edge-cases.test.js`
496
+ - Installation script: `install.sh`
497
+ - Plugin definitions: `lib/agents/*.plugin.json`
498
+
499
+ ## Appendix: Test Scenarios Matrix
500
+
501
+ | Category | Scenario | Files Affected | Priority |
502
+ |----------|----------|----------------|----------|
503
+ | Prerequisites | Missing git | bin/forge.js:146-200 | P1 |
504
+ | Prerequisites | Node < 20 | bin/forge.js:146-200 | P1 |
505
+ | Prerequisites | No package manager | install.sh:114-138 | P1 |
506
+ | Permissions | Read-only .claude/ | bin/forge.js (multiple) | P1 |
507
+ | Git States | Detached HEAD | bin/forge.js (git ops) | P2 |
508
+ | Git States | Uncommitted changes | bin/forge.js (git ops) | P2 |
509
+ | Git States | Merge conflict | bin/forge.js (git ops) | P2 |
510
+ | Partial Install | Missing commands | bin/forge.js:400-406 | P1 |
511
+ | Conflicts | Both AGENTS + CLAUDE | bin/forge.js:275-340 | P2 |
512
+ | File Limits | AGENTS.md > 200 lines | test/agents-md/structure.test.js | P3 |
513
+ | Security | Shell injection | All user inputs | P1 |
514
+ | Security | Path traversal | File operations | P1 |
515
+ | Security | Unicode injection | All user inputs | P1 |
516
+ | Network | npm install timeout | install.sh:280-290 | P2 |
517
+ | Network | API validation failure | bin/forge.js:2800-3000 | P3 |
518
+
519
+ **Total scenarios**: 15 fixtures + 40+ test cases = 55+ tests needed