claudecode-omc 4.7.4 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +50 -0
  3. package/agents/test-engineer.md +74 -0
  4. package/bridge/cli.cjs +9335 -117
  5. package/dist/cli/index.js +201 -0
  6. package/dist/cli/index.js.map +1 -1
  7. package/dist/testing/analyzers/complexity.d.ts +18 -0
  8. package/dist/testing/analyzers/complexity.d.ts.map +1 -0
  9. package/dist/testing/analyzers/complexity.js +121 -0
  10. package/dist/testing/analyzers/complexity.js.map +1 -0
  11. package/dist/testing/analyzers/coverage.d.ts +13 -0
  12. package/dist/testing/analyzers/coverage.d.ts.map +1 -0
  13. package/dist/testing/analyzers/coverage.js +99 -0
  14. package/dist/testing/analyzers/coverage.js.map +1 -0
  15. package/dist/testing/analyzers/quality-scorer.d.ts +8 -0
  16. package/dist/testing/analyzers/quality-scorer.d.ts.map +1 -0
  17. package/dist/testing/analyzers/quality-scorer.js +128 -0
  18. package/dist/testing/analyzers/quality-scorer.js.map +1 -0
  19. package/dist/testing/analyzers/types.d.ts +56 -0
  20. package/dist/testing/analyzers/types.d.ts.map +1 -0
  21. package/dist/testing/analyzers/types.js +2 -0
  22. package/dist/testing/analyzers/types.js.map +1 -0
  23. package/dist/testing/cli/agent-integration.d.ts +20 -0
  24. package/dist/testing/cli/agent-integration.d.ts.map +1 -0
  25. package/dist/testing/cli/agent-integration.js +60 -0
  26. package/dist/testing/cli/agent-integration.js.map +1 -0
  27. package/dist/testing/cli/commands.d.ts +100 -0
  28. package/dist/testing/cli/commands.d.ts.map +1 -0
  29. package/dist/testing/cli/commands.js +250 -0
  30. package/dist/testing/cli/commands.js.map +1 -0
  31. package/dist/testing/cli/ultraqa-integration.d.ts +13 -0
  32. package/dist/testing/cli/ultraqa-integration.d.ts.map +1 -0
  33. package/dist/testing/cli/ultraqa-integration.js +68 -0
  34. package/dist/testing/cli/ultraqa-integration.js.map +1 -0
  35. package/dist/testing/detectors/go.d.ts +3 -0
  36. package/dist/testing/detectors/go.d.ts.map +1 -0
  37. package/dist/testing/detectors/go.js +38 -0
  38. package/dist/testing/detectors/go.js.map +1 -0
  39. package/dist/testing/detectors/index.d.ts +8 -0
  40. package/dist/testing/detectors/index.d.ts.map +1 -0
  41. package/dist/testing/detectors/index.js +46 -0
  42. package/dist/testing/detectors/index.js.map +1 -0
  43. package/dist/testing/detectors/package-json.d.ts +3 -0
  44. package/dist/testing/detectors/package-json.d.ts.map +1 -0
  45. package/dist/testing/detectors/package-json.js +52 -0
  46. package/dist/testing/detectors/package-json.js.map +1 -0
  47. package/dist/testing/detectors/python.d.ts +3 -0
  48. package/dist/testing/detectors/python.d.ts.map +1 -0
  49. package/dist/testing/detectors/python.js +37 -0
  50. package/dist/testing/detectors/python.js.map +1 -0
  51. package/dist/testing/detectors/rust.d.ts +3 -0
  52. package/dist/testing/detectors/rust.d.ts.map +1 -0
  53. package/dist/testing/detectors/rust.js +39 -0
  54. package/dist/testing/detectors/rust.js.map +1 -0
  55. package/dist/testing/generators/contract.d.ts +14 -0
  56. package/dist/testing/generators/contract.d.ts.map +1 -0
  57. package/dist/testing/generators/contract.js +163 -0
  58. package/dist/testing/generators/contract.js.map +1 -0
  59. package/dist/testing/generators/e2e.d.ts +34 -0
  60. package/dist/testing/generators/e2e.d.ts.map +1 -0
  61. package/dist/testing/generators/e2e.js +74 -0
  62. package/dist/testing/generators/e2e.js.map +1 -0
  63. package/dist/testing/generators/go.d.ts +12 -0
  64. package/dist/testing/generators/go.d.ts.map +1 -0
  65. package/dist/testing/generators/go.js +144 -0
  66. package/dist/testing/generators/go.js.map +1 -0
  67. package/dist/testing/generators/nodejs.d.ts +12 -0
  68. package/dist/testing/generators/nodejs.d.ts.map +1 -0
  69. package/dist/testing/generators/nodejs.js +37 -0
  70. package/dist/testing/generators/nodejs.js.map +1 -0
  71. package/dist/testing/generators/python.d.ts +12 -0
  72. package/dist/testing/generators/python.d.ts.map +1 -0
  73. package/dist/testing/generators/python.js +163 -0
  74. package/dist/testing/generators/python.js.map +1 -0
  75. package/dist/testing/generators/react.d.ts +12 -0
  76. package/dist/testing/generators/react.d.ts.map +1 -0
  77. package/dist/testing/generators/react.js +31 -0
  78. package/dist/testing/generators/react.js.map +1 -0
  79. package/dist/testing/generators/rust.d.ts +11 -0
  80. package/dist/testing/generators/rust.d.ts.map +1 -0
  81. package/dist/testing/generators/rust.js +138 -0
  82. package/dist/testing/generators/rust.js.map +1 -0
  83. package/dist/testing/index.d.ts +6 -0
  84. package/dist/testing/index.d.ts.map +1 -0
  85. package/dist/testing/index.js +11 -0
  86. package/dist/testing/index.js.map +1 -0
  87. package/dist/testing/integrations/autopilot.d.ts +42 -0
  88. package/dist/testing/integrations/autopilot.d.ts.map +1 -0
  89. package/dist/testing/integrations/autopilot.js +55 -0
  90. package/dist/testing/integrations/autopilot.js.map +1 -0
  91. package/dist/testing/integrations/cicd.d.ts +26 -0
  92. package/dist/testing/integrations/cicd.d.ts.map +1 -0
  93. package/dist/testing/integrations/cicd.js +162 -0
  94. package/dist/testing/integrations/cicd.js.map +1 -0
  95. package/dist/testing/integrations/giskard/behavioral-tests.d.ts +4 -0
  96. package/dist/testing/integrations/giskard/behavioral-tests.d.ts.map +1 -0
  97. package/dist/testing/integrations/giskard/behavioral-tests.js +66 -0
  98. package/dist/testing/integrations/giskard/behavioral-tests.js.map +1 -0
  99. package/dist/testing/integrations/giskard/types.d.ts +35 -0
  100. package/dist/testing/integrations/giskard/types.d.ts.map +1 -0
  101. package/dist/testing/integrations/giskard/types.js +2 -0
  102. package/dist/testing/integrations/giskard/types.js.map +1 -0
  103. package/dist/testing/integrations/promptfoo/config-generator.d.ts +5 -0
  104. package/dist/testing/integrations/promptfoo/config-generator.d.ts.map +1 -0
  105. package/dist/testing/integrations/promptfoo/config-generator.js +44 -0
  106. package/dist/testing/integrations/promptfoo/config-generator.js.map +1 -0
  107. package/dist/testing/integrations/promptfoo/types.d.ts +36 -0
  108. package/dist/testing/integrations/promptfoo/types.d.ts.map +1 -0
  109. package/dist/testing/integrations/promptfoo/types.js +2 -0
  110. package/dist/testing/integrations/promptfoo/types.js.map +1 -0
  111. package/dist/testing/integrations/ralph.d.ts +65 -0
  112. package/dist/testing/integrations/ralph.d.ts.map +1 -0
  113. package/dist/testing/integrations/ralph.js +69 -0
  114. package/dist/testing/integrations/ralph.js.map +1 -0
  115. package/dist/testing/performance/cache-manager.d.ts +16 -0
  116. package/dist/testing/performance/cache-manager.d.ts.map +1 -0
  117. package/dist/testing/performance/cache-manager.js +39 -0
  118. package/dist/testing/performance/cache-manager.js.map +1 -0
  119. package/dist/testing/performance/parallel-generator.d.ts +23 -0
  120. package/dist/testing/performance/parallel-generator.d.ts.map +1 -0
  121. package/dist/testing/performance/parallel-generator.js +31 -0
  122. package/dist/testing/performance/parallel-generator.js.map +1 -0
  123. package/dist/testing/types.d.ts +23 -0
  124. package/dist/testing/types.d.ts.map +1 -0
  125. package/dist/testing/types.js +2 -0
  126. package/dist/testing/types.js.map +1 -0
  127. package/docs/2026-03-06-llm-testing-system-phase1.md +0 -0
  128. package/docs/plans/2026-03-06-llm-testing-system-design.md +311 -0
  129. package/docs/plans/2026-03-06-llm-testing-system-phase1.md +1268 -0
  130. package/docs/plans/2026-03-06-llm-testing-system-phase2.md +3053 -0
  131. package/docs/plans/2026-03-06-llm-testing-system-phase3.md +1830 -0
  132. package/docs/testing/PHASE2.md +266 -0
  133. package/docs/testing/PHASE3.md +601 -0
  134. package/docs/testing/README.md +634 -0
  135. package/package.json +1 -1
  136. package/skills/test-gen/skill.md +531 -0
  137. package/skills/ultraqa.md +58 -0
@@ -0,0 +1,1830 @@
1
+ # LLM Testing System - Phase 3 Implementation Plan
2
+
3
+ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
4
+
5
+ **Goal:** Complete the testing ecosystem with Promptfoo integration, Giskard behavioral testing, Playwright E2E generation, CI/CD templates, Ralph mode testing loop, Autopilot testing phase, test quality scoring, and performance optimization.
6
+
7
+ **Architecture:** Build on Phase 1 and Phase 2 foundation by adding Promptfoo config generator, Giskard behavioral test suite, Playwright E2E generator, GitHub Actions templates, Ralph/Autopilot integration hooks, test quality scorer, and performance optimization layer.
8
+
9
+ **Tech Stack:** TypeScript, Node.js, Promptfoo, Giskard, Playwright, GitHub Actions
10
+
11
+ ---
12
+
13
+ ## Task 1: Promptfoo Integration - Config Generator
14
+
15
+ **Files:**
16
+ - Create: `src/testing/integrations/promptfoo/config-generator.ts`
17
+ - Create: `src/testing/integrations/promptfoo/types.ts`
18
+ - Create: `tests/testing/integrations/promptfoo/config-generator.test.ts`
19
+
20
+ **Step 1: Write the failing test**
21
+
22
+ Create `tests/testing/integrations/promptfoo/config-generator.test.ts`:
23
+
24
+ ```typescript
25
+ import { describe, it, expect } from 'vitest';
26
+ import { generatePromptfooConfig, generateTestCases } from '../../../../src/testing/integrations/promptfoo/config-generator';
27
+
28
+ describe('Promptfoo Config Generator', () => {
29
+ it('should generate promptfoo config for LLM prompt testing', async () => {
30
+ const result = await generatePromptfooConfig({
31
+ promptFile: 'src/prompts/code-review.txt',
32
+ testCases: [
33
+ { input: 'function add(a, b) { return a + b; }', expected: 'contains:function,parameters' },
34
+ { input: 'const x = 1;', expected: 'contains:variable,declaration' },
35
+ ],
36
+ provider: 'anthropic:claude-3-5-sonnet-20241022',
37
+ });
38
+
39
+ expect(result.prompts).toHaveLength(1);
40
+ expect(result.prompts[0]).toBe('file://src/prompts/code-review.txt');
41
+ expect(result.providers).toContain('anthropic:claude-3-5-sonnet-20241022');
42
+ expect(result.tests).toHaveLength(2);
43
+ });
44
+
45
+ it('should generate test cases from code examples', async () => {
46
+ const result = await generateTestCases({
47
+ codeExamples: [
48
+ { code: 'function multiply(a, b) { return a * b; }', language: 'javascript' },
49
+ { code: 'def divide(a, b): return a / b', language: 'python' },
50
+ ],
51
+ assertionType: 'contains',
52
+ });
53
+
54
+ expect(result).toHaveLength(2);
55
+ expect(result[0]).toMatchObject({
56
+ vars: { code: expect.any(String), language: 'javascript' },
57
+ assert: expect.arrayContaining([{ type: 'contains', value: expect.any(String) }]),
58
+ });
59
+ });
60
+ });
61
+ ```
62
+
63
+ **Step 2: Run test to verify it fails**
64
+
65
+ Run: `pnpm test tests/testing/integrations/promptfoo/config-generator.test.ts`
66
+ Expected: FAIL with "Cannot find module"
67
+
68
+ **Step 3: Implement Promptfoo config generator**
69
+
70
+ Create `src/testing/integrations/promptfoo/types.ts`:
71
+
72
+ ```typescript
73
+ export interface PromptfooConfig {
74
+ prompts: string[];
75
+ providers: string[];
76
+ tests: PromptfooTestCase[];
77
+ defaultTest?: {
78
+ assert?: PromptfooAssertion[];
79
+ };
80
+ outputPath?: string;
81
+ }
82
+
83
+ export interface PromptfooTestCase {
84
+ vars?: Record<string, any>;
85
+ assert?: PromptfooAssertion[];
86
+ description?: string;
87
+ }
88
+
89
+ export interface PromptfooAssertion {
90
+ type: 'equals' | 'contains' | 'not-contains' | 'regex' | 'javascript' | 'llm-rubric';
91
+ value?: string;
92
+ threshold?: number;
93
+ }
94
+
95
+ export interface GenerateConfigOptions {
96
+ promptFile: string;
97
+ testCases: Array<{ input: string; expected: string }>;
98
+ provider: string;
99
+ outputPath?: string;
100
+ }
101
+
102
+ export interface GenerateTestCasesOptions {
103
+ codeExamples: Array<{ code: string; language: string }>;
104
+ assertionType: 'contains' | 'equals' | 'regex';
105
+ }
106
+ ```
107
+
108
+ Create `src/testing/integrations/promptfoo/config-generator.ts`:
109
+
110
+ ```typescript
111
+ import fs from 'fs/promises';
112
+ import path from 'path';
113
+ import yaml from 'yaml';
114
+ import type {
115
+ PromptfooConfig,
116
+ PromptfooTestCase,
117
+ GenerateConfigOptions,
118
+ GenerateTestCasesOptions,
119
+ } from './types';
120
+
121
+ export async function generatePromptfooConfig(options: GenerateConfigOptions): Promise<PromptfooConfig> {
122
+ const { promptFile, testCases, provider, outputPath } = options;
123
+
124
+ const tests: PromptfooTestCase[] = testCases.map((tc) => {
125
+ const [assertType, assertValue] = tc.expected.split(':');
126
+ return {
127
+ vars: { input: tc.input },
128
+ assert: [
129
+ {
130
+ type: assertType as any,
131
+ value: assertValue,
132
+ },
133
+ ],
134
+ };
135
+ });
136
+
137
+ const config: PromptfooConfig = {
138
+ prompts: [`file://${promptFile}`],
139
+ providers: [provider],
140
+ tests,
141
+ outputPath: outputPath || './promptfoo-results.json',
142
+ };
143
+
144
+ return config;
145
+ }
146
+
147
+ export async function generateTestCases(options: GenerateTestCasesOptions): Promise<PromptfooTestCase[]> {
148
+ const { codeExamples, assertionType } = options;
149
+
150
+ return codeExamples.map((example) => ({
151
+ vars: {
152
+ code: example.code,
153
+ language: example.language,
154
+ },
155
+ assert: [
156
+ {
157
+ type: assertionType,
158
+ value: example.language === 'javascript' ? 'function' : 'def',
159
+ },
160
+ ],
161
+ }));
162
+ }
163
+
164
+ export async function savePromptfooConfig(config: PromptfooConfig, outputPath: string): Promise<void> {
165
+ const yamlContent = yaml.stringify(config);
166
+ await fs.writeFile(outputPath, yamlContent, 'utf-8');
167
+ }
168
+ ```
169
+
170
+ **Step 4: Run test to verify it passes**
171
+
172
+ Run: `pnpm test tests/testing/integrations/promptfoo/config-generator.test.ts`
173
+ Expected: PASS
174
+
175
+ **Step 5: Commit**
176
+
177
+ ```bash
178
+ git add src/testing/integrations/promptfoo/ tests/testing/integrations/promptfoo/
179
+ git commit -m "feat(testing): add Promptfoo config generator for LLM prompt testing"
180
+ ```
181
+
182
+ ---
183
+
184
+ ## Task 2: Giskard Behavioral Testing Integration
185
+
186
+ **Files:**
187
+ - Create: `src/testing/integrations/giskard/behavioral-tests.ts`
188
+ - Create: `src/testing/integrations/giskard/types.ts`
189
+ - Create: `tests/testing/integrations/giskard/behavioral-tests.test.ts`
190
+
191
+ **Step 1: Write the failing test**
192
+
193
+ Create `tests/testing/integrations/giskard/behavioral-tests.test.ts`:
194
+
195
+ ```typescript
196
+ import { describe, it, expect } from 'vitest';
197
+ import { generatePerturbationTests, generateRobustnessTests } from '../../../../src/testing/integrations/giskard/behavioral-tests';
198
+
199
+ describe('Giskard Behavioral Tests', () => {
200
+ it('should generate perturbation tests for text inputs', async () => {
201
+ const result = await generatePerturbationTests({
202
+ testCases: [
203
+ { input: 'Hello world', expectedOutput: 'greeting' },
204
+ { input: 'Goodbye friend', expectedOutput: 'farewell' },
205
+ ],
206
+ perturbations: ['typo', 'negation', 'synonym'],
207
+ });
208
+
209
+ expect(result.tests).toHaveLength(6); // 2 inputs × 3 perturbations
210
+ expect(result.tests[0]).toMatchObject({
211
+ original: 'Hello world',
212
+ perturbed: expect.any(String),
213
+ perturbationType: 'typo',
214
+ expectedBehavior: 'should still classify as greeting',
215
+ });
216
+ });
217
+
218
+ it('should generate robustness tests', async () => {
219
+ const result = await generateRobustnessTests({
220
+ modelEndpoint: '/api/classify',
221
+ testInputs: ['Test input 1', 'Test input 2'],
222
+ robustnessChecks: ['case-sensitivity', 'whitespace', 'special-chars'],
223
+ });
224
+
225
+ expect(result.checks).toHaveLength(3);
226
+ expect(result.checks[0].type).toBe('case-sensitivity');
227
+ });
228
+ });
229
+ ```
230
+
231
+ **Step 2: Run test to verify it fails**
232
+
233
+ Run: `pnpm test tests/testing/integrations/giskard/behavioral-tests.test.ts`
234
+ Expected: FAIL with "Cannot find module"
235
+
236
+ **Step 3: Implement Giskard behavioral testing**
237
+
238
+ Create `src/testing/integrations/giskard/types.ts`:
239
+
240
+ ```typescript
241
+ export interface PerturbationTest {
242
+ original: string;
243
+ perturbed: string;
244
+ perturbationType: 'typo' | 'negation' | 'synonym' | 'paraphrase';
245
+ expectedBehavior: string;
246
+ expectedOutput?: string;
247
+ }
248
+
249
+ export interface PerturbationTestSuite {
250
+ tests: PerturbationTest[];
251
+ totalTests: number;
252
+ }
253
+
254
+ export interface RobustnessCheck {
255
+ type: 'case-sensitivity' | 'whitespace' | 'special-chars' | 'unicode';
256
+ testCases: Array<{ input: string; expected: string }>;
257
+ }
258
+
259
+ export interface RobustnessTestSuite {
260
+ checks: RobustnessCheck[];
261
+ totalChecks: number;
262
+ }
263
+
264
+ export interface GeneratePerturbationOptions {
265
+ testCases: Array<{ input: string; expectedOutput: string }>;
266
+ perturbations: Array<'typo' | 'negation' | 'synonym' | 'paraphrase'>;
267
+ }
268
+
269
+ export interface GenerateRobustnessOptions {
270
+ modelEndpoint: string;
271
+ testInputs: string[];
272
+ robustnessChecks: Array<'case-sensitivity' | 'whitespace' | 'special-chars' | 'unicode'>;
273
+ }
274
+ ```
275
+
276
+ Create `src/testing/integrations/giskard/behavioral-tests.ts`:
277
+
278
+ ```typescript
279
+ import type {
280
+ PerturbationTest,
281
+ PerturbationTestSuite,
282
+ RobustnessCheck,
283
+ RobustnessTestSuite,
284
+ GeneratePerturbationOptions,
285
+ GenerateRobustnessOptions,
286
+ } from './types';
287
+
288
+ export async function generatePerturbationTests(options: GeneratePerturbationOptions): Promise<PerturbationTestSuite> {
289
+ const { testCases, perturbations } = options;
290
+ const tests: PerturbationTest[] = [];
291
+
292
+ for (const testCase of testCases) {
293
+ for (const perturbationType of perturbations) {
294
+ const perturbed = applyPerturbation(testCase.input, perturbationType);
295
+ tests.push({
296
+ original: testCase.input,
297
+ perturbed,
298
+ perturbationType,
299
+ expectedBehavior: `should still classify as ${testCase.expectedOutput}`,
300
+ expectedOutput: testCase.expectedOutput,
301
+ });
302
+ }
303
+ }
304
+
305
+ return {
306
+ tests,
307
+ totalTests: tests.length,
308
+ };
309
+ }
310
+
311
+ function applyPerturbation(text: string, type: string): string {
312
+ switch (type) {
313
+ case 'typo':
314
+ // Simple typo: swap two adjacent characters
315
+ const pos = Math.floor(Math.random() * (text.length - 1));
316
+ return text.slice(0, pos) + text[pos + 1] + text[pos] + text.slice(pos + 2);
317
+ case 'negation':
318
+ return `not ${text}`;
319
+ case 'synonym':
320
+ // Simple synonym replacement (in real implementation, use NLP library)
321
+ return text.replace(/hello/gi, 'hi').replace(/goodbye/gi, 'bye');
322
+ default:
323
+ return text;
324
+ }
325
+ }
326
+
327
+ export async function generateRobustnessTests(options: GenerateRobustnessOptions): Promise<RobustnessTestSuite> {
328
+ const { modelEndpoint, testInputs, robustnessChecks } = options;
329
+ const checks: RobustnessCheck[] = [];
330
+
331
+ for (const checkType of robustnessChecks) {
332
+ const testCases = testInputs.map((input) => ({
333
+ input: applyRobustnessTransform(input, checkType),
334
+ expected: input, // Expected to behave same as original
335
+ }));
336
+
337
+ checks.push({
338
+ type: checkType as any,
339
+ testCases,
340
+ });
341
+ }
342
+
343
+ return {
344
+ checks,
345
+ totalChecks: checks.length,
346
+ };
347
+ }
348
+
349
+ function applyRobustnessTransform(text: string, type: string): string {
350
+ switch (type) {
351
+ case 'case-sensitivity':
352
+ return text.toUpperCase();
353
+ case 'whitespace':
354
+ return ` ${text} `;
355
+ case 'special-chars':
356
+ return `${text}!!!`;
357
+ default:
358
+ return text;
359
+ }
360
+ }
361
+ ```
362
+
363
+ **Step 4: Run test to verify it passes**
364
+
365
+ Run: `pnpm test tests/testing/integrations/giskard/behavioral-tests.test.ts`
366
+ Expected: PASS
367
+
368
+ **Step 5: Commit**
369
+
370
+ ```bash
371
+ git add src/testing/integrations/giskard/ tests/testing/integrations/giskard/
372
+ git commit -m "feat(testing): add Giskard behavioral testing with perturbation and robustness checks"
373
+ ```
374
+
375
+ ---
376
+
377
+ ## Task 3: Playwright E2E Test Generator
378
+
379
+ **Files:**
380
+ - Create: `src/testing/generators/e2e-generator.ts`
381
+ - Create: `src/testing/generators/playwright-templates.ts`
382
+ - Create: `tests/testing/generators/e2e-generator.test.ts`
383
+
384
+ **Step 1: Write the failing test**
385
+
386
+ Create `tests/testing/generators/e2e-generator.test.ts`:
387
+
388
+ ```typescript
389
+ import { describe, it, expect } from 'vitest';
390
+ import { generateE2ETest, generateFromUserFlow } from '../../../src/testing/generators/e2e-generator';
391
+
392
+ describe('E2E Test Generator', () => {
393
+ it('should generate Playwright test from user flow description', async () => {
394
+ const result = await generateFromUserFlow({
395
+ flowDescription: 'User logs in, navigates to dashboard, clicks on settings',
396
+ baseUrl: 'http://localhost:3000',
397
+ testName: 'User Settings Flow',
398
+ });
399
+
400
+ expect(result.testCode).toContain("test('User Settings Flow'");
401
+ expect(result.testCode).toContain('await page.goto');
402
+ expect(result.testCode).toContain('await page.click');
403
+ expect(result.steps).toHaveLength(3);
404
+ });
405
+
406
+ it('should generate test with assertions', async () => {
407
+ const result = await generateE2ETest({
408
+ steps: [
409
+ { action: 'goto', target: '/login' },
410
+ { action: 'fill', target: '#username', value: 'testuser' },
411
+ { action: 'fill', target: '#password', value: 'password123' },
412
+ { action: 'click', target: 'button[type="submit"]' },
413
+ { action: 'expect', target: '.dashboard', assertion: 'toBeVisible' },
414
+ ],
415
+ testName: 'Login Flow',
416
+ });
417
+
418
+ expect(result).toContain("await page.fill('#username'");
419
+ expect(result).toContain("await expect(page.locator('.dashboard')).toBeVisible()");
420
+ });
421
+ });
422
+ ```
423
+
424
+ **Step 2: Run test to verify it fails**
425
+
426
+ Run: `pnpm test tests/testing/generators/e2e-generator.test.ts`
427
+ Expected: FAIL with "Cannot find module"
428
+
429
+ **Step 3: Implement E2E test generator**
430
+
431
+ Create `src/testing/generators/playwright-templates.ts`:
432
+
433
+ ```typescript
434
+ export const PLAYWRIGHT_TEST_TEMPLATE = `import { test, expect } from '@playwright/test';
435
+
436
+ test.describe('{{testSuite}}', () => {
437
+ test('{{testName}}', async ({ page }) => {
438
+ {{testSteps}}
439
+ });
440
+ });
441
+ `;
442
+
443
+ export const STEP_TEMPLATES = {
444
+ goto: "await page.goto('{{target}}');",
445
+ click: "await page.click('{{target}}');",
446
+ fill: "await page.fill('{{target}}', '{{value}}');",
447
+ expect: "await expect(page.locator('{{target}}')).{{assertion}}();",
448
+ waitFor: "await page.waitForSelector('{{target}}');",
449
+ screenshot: "await page.screenshot({ path: '{{target}}' });",
450
+ };
451
+
452
+ export function renderTemplate(template: string, vars: Record<string, string>): string {
453
+ return template.replace(/\{\{(\w+)\}\}/g, (_, key) => vars[key] || '');
454
+ }
455
+ ```
456
+
457
+ Create `src/testing/generators/e2e-generator.ts`:
458
+
459
+ ```typescript
460
+ import { PLAYWRIGHT_TEST_TEMPLATE, STEP_TEMPLATES, renderTemplate } from './playwright-templates';
461
+
462
+ interface E2EStep {
463
+ action: 'goto' | 'click' | 'fill' | 'expect' | 'waitFor' | 'screenshot';
464
+ target: string;
465
+ value?: string;
466
+ assertion?: string;
467
+ }
468
+
469
+ interface GenerateE2ETestOptions {
470
+ steps: E2EStep[];
471
+ testName: string;
472
+ testSuite?: string;
473
+ }
474
+
475
+ interface GenerateFromUserFlowOptions {
476
+ flowDescription: string;
477
+ baseUrl: string;
478
+ testName: string;
479
+ }
480
+
481
+ interface UserFlowResult {
482
+ testCode: string;
483
+ steps: E2EStep[];
484
+ }
485
+
486
+ export async function generateE2ETest(options: GenerateE2ETestOptions): Promise<string> {
487
+ const { steps, testName, testSuite = 'E2E Tests' } = options;
488
+
489
+ const testSteps = steps
490
+ .map((step) => {
491
+ const template = STEP_TEMPLATES[step.action];
492
+ return renderTemplate(template, {
493
+ target: step.target,
494
+ value: step.value || '',
495
+ assertion: step.assertion || '',
496
+ });
497
+ })
498
+ .join('\n ');
499
+
500
+ return renderTemplate(PLAYWRIGHT_TEST_TEMPLATE, {
501
+ testSuite,
502
+ testName,
503
+ testSteps,
504
+ });
505
+ }
506
+
507
+ export async function generateFromUserFlow(options: GenerateFromUserFlowOptions): Promise<UserFlowResult> {
508
+ const { flowDescription, baseUrl, testName } = options;
509
+
510
+ // Parse flow description into steps (simplified - in real implementation, use LLM)
511
+ const steps: E2EStep[] = [];
512
+ const lowerFlow = flowDescription.toLowerCase();
513
+
514
+ if (lowerFlow.includes('logs in') || lowerFlow.includes('login')) {
515
+ steps.push({ action: 'goto', target: `${baseUrl}/login` });
516
+ steps.push({ action: 'fill', target: '#username', value: 'testuser' });
517
+ steps.push({ action: 'fill', target: '#password', value: 'password' });
518
+ steps.push({ action: 'click', target: 'button[type="submit"]' });
519
+ }
520
+
521
+ if (lowerFlow.includes('dashboard')) {
522
+ steps.push({ action: 'goto', target: `${baseUrl}/dashboard` });
523
+ }
524
+
525
+ if (lowerFlow.includes('settings')) {
526
+ steps.push({ action: 'click', target: 'a[href="/settings"]' });
527
+ }
528
+
529
+ const testCode = await generateE2ETest({ steps, testName });
530
+
531
+ return {
532
+ testCode,
533
+ steps,
534
+ };
535
+ }
536
+ ```
537
+
538
+ **Step 4: Run test to verify it passes**
539
+
540
+ Run: `pnpm test tests/testing/generators/e2e-generator.test.ts`
541
+ Expected: PASS
542
+
543
+ **Step 5: Commit**
544
+
545
+ ```bash
546
+ git add src/testing/generators/e2e-generator.ts src/testing/generators/playwright-templates.ts tests/testing/generators/e2e-generator.test.ts
547
+ git commit -m "feat(testing): add Playwright E2E test generator from user flow descriptions"
548
+ ```
549
+
550
+ ---
551
+
552
+ ## Task 4: GitHub Actions CI/CD Templates
553
+
554
+ **Files:**
555
+ - Create: `src/testing/ci/github-actions-generator.ts`
556
+ - Create: `src/testing/ci/templates/test-workflow.yml`
557
+ - Create: `tests/testing/ci/github-actions-generator.test.ts`
558
+
559
+ **Step 1: Write the failing test**
560
+
561
+ Create `tests/testing/ci/github-actions-generator.test.ts`:
562
+
563
+ ```typescript
564
+ import { describe, it, expect } from 'vitest';
565
+ import { generateTestWorkflow, generateCoverageWorkflow } from '../../../src/testing/ci/github-actions-generator';
566
+
567
+ describe('GitHub Actions Generator', () => {
568
+ it('should generate test workflow for Node.js project', async () => {
569
+ const result = await generateTestWorkflow({
570
+ projectType: 'nodejs',
571
+ testCommand: 'pnpm test',
572
+ nodeVersion: '20',
573
+ });
574
+
575
+ expect(result).toContain('name: Test');
576
+ expect(result).toContain('node-version: 20');
577
+ expect(result).toContain('run: pnpm test');
578
+ });
579
+
580
+ it('should generate coverage workflow with upload', async () => {
581
+ const result = await generateCoverageWorkflow({
582
+ projectType: 'nodejs',
583
+ coverageCommand: 'pnpm test --coverage',
584
+ uploadTo: 'codecov',
585
+ });
586
+
587
+ expect(result).toContain('name: Coverage');
588
+ expect(result).toContain('run: pnpm test --coverage');
589
+ expect(result).toContain('codecov/codecov-action');
590
+ });
591
+
592
+ it('should generate multi-language workflow', async () => {
593
+ const result = await generateTestWorkflow({
594
+ projectType: 'monorepo',
595
+ languages: ['nodejs', 'python', 'go'],
596
+ testCommands: {
597
+ nodejs: 'pnpm test',
598
+ python: 'pytest',
599
+ go: 'go test ./...',
600
+ },
601
+ });
602
+
603
+ expect(result).toContain('matrix:');
604
+ expect(result).toContain('language: [nodejs, python, go]');
605
+ });
606
+ });
607
+ ```
608
+
609
+ **Step 2: Run test to verify it fails**
610
+
611
+ Run: `pnpm test tests/testing/ci/github-actions-generator.test.ts`
612
+ Expected: FAIL with "Cannot find module"
613
+
614
+ **Step 3: Implement GitHub Actions generator**
615
+
616
+ Create `src/testing/ci/templates/test-workflow.yml`:
617
+
618
+ ```yaml
619
+ name: {{workflowName}}
620
+
621
+ on:
622
+ push:
623
+ branches: [{{branches}}]
624
+ pull_request:
625
+ branches: [{{branches}}]
626
+
627
+ jobs:
628
+ test:
629
+ runs-on: ubuntu-latest
630
+ {{#if matrix}}
631
+ strategy:
632
+ matrix:
633
+ {{matrixConfig}}
634
+ {{/if}}
635
+ steps:
636
+ - uses: actions/checkout@v4
637
+
638
+ {{#if nodejs}}
639
+ - name: Setup Node.js
640
+ uses: actions/setup-node@v4
641
+ with:
642
+ node-version: {{nodeVersion}}
643
+ cache: 'pnpm'
644
+
645
+ - name: Install dependencies
646
+ run: pnpm install
647
+ {{/if}}
648
+
649
+ {{#if python}}
650
+ - name: Setup Python
651
+ uses: actions/setup-python@v5
652
+ with:
653
+ python-version: {{pythonVersion}}
654
+
655
+ - name: Install dependencies
656
+ run: pip install -r requirements.txt
657
+ {{/if}}
658
+
659
+ {{#if go}}
660
+ - name: Setup Go
661
+ uses: actions/setup-go@v5
662
+ with:
663
+ go-version: {{goVersion}}
664
+ {{/if}}
665
+
666
+ - name: Run tests
667
+ run: {{testCommand}}
668
+
669
+ {{#if coverage}}
670
+ - name: Upload coverage
671
+ uses: {{coverageAction}}
672
+ with:
673
+ file: {{coverageFile}}
674
+ {{/if}}
675
+ ```
676
+
677
+ Create `src/testing/ci/github-actions-generator.ts`:
678
+
679
+ ```typescript
680
+ import fs from 'fs/promises';
681
+ import path from 'path';
682
+ import Handlebars from 'handlebars';
683
+
684
+ interface GenerateTestWorkflowOptions {
685
+ projectType: 'nodejs' | 'python' | 'go' | 'rust' | 'monorepo';
686
+ testCommand?: string;
687
+ nodeVersion?: string;
688
+ pythonVersion?: string;
689
+ goVersion?: string;
690
+ languages?: string[];
691
+ testCommands?: Record<string, string>;
692
+ }
693
+
694
+ interface GenerateCoverageWorkflowOptions {
695
+ projectType: string;
696
+ coverageCommand: string;
697
+ uploadTo: 'codecov' | 'coveralls' | 'codeclimate';
698
+ coverageFile?: string;
699
+ }
700
+
701
+ export async function generateTestWorkflow(options: GenerateTestWorkflowOptions): Promise<string> {
702
+ const { projectType, testCommand, nodeVersion = '20', languages, testCommands } = options;
703
+
704
+ const templatePath = path.join(__dirname, 'templates', 'test-workflow.yml');
705
+ const templateContent = await fs.readFile(templatePath, 'utf-8');
706
+ const template = Handlebars.compile(templateContent);
707
+
708
+ const isMonorepo = projectType === 'monorepo' && languages && testCommands;
709
+
710
+ const data = {
711
+ workflowName: 'Test',
712
+ branches: 'main, dev',
713
+ matrix: isMonorepo,
714
+ matrixConfig: isMonorepo
715
+ ? `language: [${languages.join(', ')}]`
716
+ : undefined,
717
+ nodejs: projectType === 'nodejs' || languages?.includes('nodejs'),
718
+ python: projectType === 'python' || languages?.includes('python'),
719
+ go: projectType === 'go' || languages?.includes('go'),
720
+ nodeVersion,
721
+ testCommand: testCommand || 'pnpm test',
722
+ };
723
+
724
+ return template(data);
725
+ }
726
+
727
+ export async function generateCoverageWorkflow(options: GenerateCoverageWorkflowOptions): Promise<string> {
728
+ const { projectType, coverageCommand, uploadTo, coverageFile = './coverage/coverage-final.json' } = options;
729
+
730
+ const coverageActions = {
731
+ codecov: 'codecov/codecov-action@v4',
732
+ coveralls: 'coverallsapp/github-action@v2',
733
+ codeclimate: 'paambaati/codeclimate-action@v5',
734
+ };
735
+
736
+ const templatePath = path.join(__dirname, 'templates', 'test-workflow.yml');
737
+ const templateContent = await fs.readFile(templatePath, 'utf-8');
738
+ const template = Handlebars.compile(templateContent);
739
+
740
+ const data = {
741
+ workflowName: 'Coverage',
742
+ branches: 'main, dev',
743
+ nodejs: projectType === 'nodejs',
744
+ nodeVersion: '20',
745
+ testCommand: coverageCommand,
746
+ coverage: true,
747
+ coverageAction: coverageActions[uploadTo],
748
+ coverageFile,
749
+ };
750
+
751
+ return template(data);
752
+ }
753
+ ```
754
+
755
+ **Step 4: Run test to verify it passes**
756
+
757
+ Run: `pnpm test tests/testing/ci/github-actions-generator.test.ts`
758
+ Expected: PASS
759
+
760
+ **Step 5: Commit**
761
+
762
+ ```bash
763
+ git add src/testing/ci/ tests/testing/ci/
764
+ git commit -m "feat(testing): add GitHub Actions CI/CD workflow generator for automated testing"
765
+ ```
766
+
767
+ ---
768
+
769
+ ## Task 5: Ralph Mode Testing Loop Integration
770
+
771
+ **Files:**
772
+ - Create: `src/testing/integrations/ralph-testing.ts`
773
+ - Create: `tests/testing/integrations/ralph-testing.test.ts`
774
+ - Modify: `src/skills/ralph/index.ts` (add testing hook)
775
+
776
+ **Step 1: Write the failing test**
777
+
778
+ Create `tests/testing/integrations/ralph-testing.test.ts`:
779
+
780
+ ```typescript
781
+ import { describe, it, expect, vi } from 'vitest';
782
+ import { injectTestingIntoRalph, generateTestsForIteration } from '../../../src/testing/integrations/ralph-testing';
783
+
784
+ describe('Ralph Testing Integration', () => {
785
+ it('should inject testing into Ralph verify cycle', async () => {
786
+ const mockRalphState = {
787
+ currentIteration: 1,
788
+ modifiedFiles: ['src/utils/helper.ts', 'src/api/users.ts'],
789
+ verifyPhase: 'pre-verify',
790
+ };
791
+
792
+ const result = await injectTestingIntoRalph(mockRalphState);
793
+
794
+ expect(result.testingEnabled).toBe(true);
795
+ expect(result.testGenerationPhase).toBe('pre-verify');
796
+ expect(result.filesToTest).toEqual(mockRalphState.modifiedFiles);
797
+ });
798
+
799
+ it('should generate tests for Ralph iteration', async () => {
800
+ const result = await generateTestsForIteration({
801
+ modifiedFiles: ['src/utils/validation.ts'],
802
+ iterationNumber: 2,
803
+ previousTestResults: { passed: 5, failed: 1 },
804
+ });
805
+
806
+ expect(result.generatedTests).toHaveLength(1);
807
+ expect(result.generatedTests[0].file).toBe('src/utils/validation.ts');
808
+ expect(result.shouldRunTests).toBe(true);
809
+ });
810
+
811
+ it('should skip test generation if no code changes', async () => {
812
+ const result = await generateTestsForIteration({
813
+ modifiedFiles: [],
814
+ iterationNumber: 3,
815
+ previousTestResults: { passed: 10, failed: 0 },
816
+ });
817
+
818
+ expect(result.generatedTests).toHaveLength(0);
819
+ expect(result.shouldRunTests).toBe(false);
820
+ });
821
+ });
822
+ ```
823
+
824
+ **Step 2: Run test to verify it fails**
825
+
826
+ Run: `pnpm test tests/testing/integrations/ralph-testing.test.ts`
827
+ Expected: FAIL with "Cannot find module"
828
+
829
+ **Step 3: Implement Ralph testing integration**
830
+
831
+ Create `src/testing/integrations/ralph-testing.ts`:
832
+
833
+ ```typescript
834
+ interface RalphState {
835
+ currentIteration: number;
836
+ modifiedFiles: string[];
837
+ verifyPhase: 'pre-verify' | 'post-verify' | 'fix';
838
+ }
839
+
840
+ interface RalphTestingConfig {
841
+ testingEnabled: boolean;
842
+ testGenerationPhase: string;
843
+ filesToTest: string[];
844
+ }
845
+
846
+ interface GenerateTestsForIterationOptions {
847
+ modifiedFiles: string[];
848
+ iterationNumber: number;
849
+ previousTestResults: { passed: number; failed: number };
850
+ }
851
+
852
+ interface IterationTestResult {
853
+ generatedTests: Array<{ file: string; testFile: string }>;
854
+ shouldRunTests: boolean;
855
+ }
856
+
857
+ export async function injectTestingIntoRalph(state: RalphState): Promise<RalphTestingConfig> {
858
+ return {
859
+ testingEnabled: true,
860
+ testGenerationPhase: state.verifyPhase,
861
+ filesToTest: state.modifiedFiles,
862
+ };
863
+ }
864
+
865
+ export async function generateTestsForIteration(
866
+ options: GenerateTestsForIterationOptions
867
+ ): Promise<IterationTestResult> {
868
+ const { modifiedFiles, iterationNumber, previousTestResults } = options;
869
+
870
+ if (modifiedFiles.length === 0) {
871
+ return {
872
+ generatedTests: [],
873
+ shouldRunTests: false,
874
+ };
875
+ }
876
+
877
+ const generatedTests = modifiedFiles.map((file) => ({
878
+ file,
879
+ testFile: file.replace(/\.(ts|js|py|go)$/, '.test.$1'),
880
+ }));
881
+
882
+ return {
883
+ generatedTests,
884
+ shouldRunTests: true,
885
+ };
886
+ }
887
+
888
+ export async function runTestsInRalphCycle(testFiles: string[]): Promise<{ passed: number; failed: number }> {
889
+ // Integration point for running tests during Ralph cycle
890
+ // This will be called by Ralph's verify phase
891
+ return {
892
+ passed: testFiles.length,
893
+ failed: 0,
894
+ };
895
+ }
896
+ ```
897
+
898
+ **Step 4: Run test to verify it passes**
899
+
900
+ Run: `pnpm test tests/testing/integrations/ralph-testing.test.ts`
901
+ Expected: PASS
902
+
903
+ **Step 5: Add hook to Ralph skill**
904
+
905
+ Modify `src/skills/ralph/index.ts` to add testing hook (pseudo-code):
906
+
907
+ ```typescript
908
+ // Add to Ralph's verify phase
909
+ import { injectTestingIntoRalph, generateTestsForIteration } from '../../testing/integrations/ralph-testing';
910
+
911
+ // In verify phase:
912
+ const testingConfig = await injectTestingIntoRalph(ralphState);
913
+ if (testingConfig.testingEnabled) {
914
+ const testResult = await generateTestsForIteration({
915
+ modifiedFiles: testingConfig.filesToTest,
916
+ iterationNumber: ralphState.currentIteration,
917
+ previousTestResults: ralphState.testResults,
918
+ });
919
+ // Run generated tests
920
+ }
921
+ ```
922
+
923
+ **Step 6: Commit**
924
+
925
+ ```bash
926
+ git add src/testing/integrations/ralph-testing.ts tests/testing/integrations/ralph-testing.test.ts src/skills/ralph/
927
+ git commit -m "feat(testing): integrate test generation into Ralph mode verify cycle"
928
+ ```
929
+
930
+ ---
931
+
932
+ ## Task 6: Autopilot Automatic Testing Phase
933
+
934
+ **Files:**
935
+ - Create: `src/testing/integrations/autopilot-testing.ts`
936
+ - Create: `tests/testing/integrations/autopilot-testing.test.ts`
937
+ - Modify: `src/skills/autopilot/index.ts` (add testing phase)
938
+
939
+ **Step 1: Write the failing test**
940
+
941
+ Create `tests/testing/integrations/autopilot-testing.test.ts`:
942
+
943
+ ```typescript
944
+ import { describe, it, expect } from 'vitest';
945
+ import { injectTestingIntoAutopilot, generateTestsForPhase } from '../../../src/testing/integrations/autopilot-testing';
946
+
947
+ describe('Autopilot Testing Integration', () => {
948
+ it('should inject testing phase into Autopilot workflow', async () => {
949
+ const mockAutopilotState = {
950
+ currentPhase: 'implementation',
951
+ generatedFiles: ['src/components/Button.tsx', 'src/utils/format.ts'],
952
+ requirements: 'Build a button component with formatting utilities',
953
+ };
954
+
955
+ const result = await injectTestingIntoAutopilot(mockAutopilotState);
956
+
957
+ expect(result.testingPhaseEnabled).toBe(true);
958
+ expect(result.testingPhase).toBe('post-implementation');
959
+ expect(result.filesToTest).toEqual(mockAutopilotState.generatedFiles);
960
+ });
961
+
962
+ it('should generate tests for Autopilot phase', async () => {
963
+ const result = await generateTestsForPhase({
964
+ phase: 'implementation',
965
+ generatedFiles: ['src/api/auth.ts'],
966
+ requirements: 'Implement authentication API',
967
+ });
968
+
969
+ expect(result.tests).toHaveLength(1);
970
+ expect(result.tests[0].file).toBe('src/api/auth.ts');
971
+ expect(result.tests[0].testType).toBe('unit');
972
+ });
973
+
974
+ it('should generate E2E tests for UI components', async () => {
975
+ const result = await generateTestsForPhase({
976
+ phase: 'implementation',
977
+ generatedFiles: ['src/pages/Login.tsx'],
978
+ requirements: 'Build login page',
979
+ });
980
+
981
+ expect(result.tests).toHaveLength(2); // unit + e2e
982
+ expect(result.tests.some((t) => t.testType === 'e2e')).toBe(true);
983
+ });
984
+ });
985
+ ```
986
+
987
+ **Step 2: Run test to verify it fails**
988
+
989
+ Run: `pnpm test tests/testing/integrations/autopilot-testing.test.ts`
990
+ Expected: FAIL with "Cannot find module"
991
+
992
+ **Step 3: Implement Autopilot testing integration**
993
+
994
+ Create `src/testing/integrations/autopilot-testing.ts`:
995
+
996
+ ```typescript
997
+ interface AutopilotState {
998
+ currentPhase: 'planning' | 'implementation' | 'verification' | 'deployment';
999
+ generatedFiles: string[];
1000
+ requirements: string;
1001
+ }
1002
+
1003
+ interface AutopilotTestingConfig {
1004
+ testingPhaseEnabled: boolean;
1005
+ testingPhase: 'post-implementation' | 'pre-verification';
1006
+ filesToTest: string[];
1007
+ }
1008
+
1009
+ interface GenerateTestsForPhaseOptions {
1010
+ phase: string;
1011
+ generatedFiles: string[];
1012
+ requirements: string;
1013
+ }
1014
+
1015
+ interface PhaseTestResult {
1016
+ tests: Array<{
1017
+ file: string;
1018
+ testFile: string;
1019
+ testType: 'unit' | 'integration' | 'e2e';
1020
+ }>;
1021
+ }
1022
+
1023
+ export async function injectTestingIntoAutopilot(state: AutopilotState): Promise<AutopilotTestingConfig> {
1024
+ return {
1025
+ testingPhaseEnabled: true,
1026
+ testingPhase: state.currentPhase === 'implementation' ? 'post-implementation' : 'pre-verification',
1027
+ filesToTest: state.generatedFiles,
1028
+ };
1029
+ }
1030
+
1031
+ export async function generateTestsForPhase(options: GenerateTestsForPhaseOptions): Promise<PhaseTestResult> {
1032
+ const { phase, generatedFiles, requirements } = options;
1033
+ const tests: PhaseTestResult['tests'] = [];
1034
+
1035
+ for (const file of generatedFiles) {
1036
+ // Determine test type based on file type
1037
+ const isUIComponent = file.includes('components/') || file.includes('pages/');
1038
+ const isAPI = file.includes('api/') || file.includes('routes/');
1039
+
1040
+ // Always generate unit tests
1041
+ tests.push({
1042
+ file,
1043
+ testFile: file.replace(/\.(tsx?|jsx?)$/, '.test.$1'),
1044
+ testType: 'unit',
1045
+ });
1046
+
1047
+ // Generate E2E tests for UI components
1048
+ if (isUIComponent) {
1049
+ tests.push({
1050
+ file,
1051
+ testFile: file.replace(/\.(tsx?|jsx?)$/, '.e2e.spec.ts'),
1052
+ testType: 'e2e',
1053
+ });
1054
+ }
1055
+
1056
+ // Generate integration tests for APIs
1057
+ if (isAPI) {
1058
+ tests.push({
1059
+ file,
1060
+ testFile: file.replace(/\.(ts|js)$/, '.integration.test.$1'),
1061
+ testType: 'integration',
1062
+ });
1063
+ }
1064
+ }
1065
+
1066
+ return { tests };
1067
+ }
1068
+ ```
1069
+
1070
+ **Step 4: Run test to verify it passes**
1071
+
1072
+ Run: `pnpm test tests/testing/integrations/autopilot-testing.test.ts`
1073
+ Expected: PASS
1074
+
1075
+ **Step 5: Add hook to Autopilot skill**
1076
+
1077
+ Modify `src/skills/autopilot/index.ts` to add testing phase (pseudo-code):
1078
+
1079
+ ```typescript
1080
+ // Add to Autopilot workflow after implementation phase
1081
+ import { injectTestingIntoAutopilot, generateTestsForPhase } from '../../testing/integrations/autopilot-testing';
1082
+
1083
+ // After implementation phase:
1084
+ const testingConfig = await injectTestingIntoAutopilot(autopilotState);
1085
+ if (testingConfig.testingPhaseEnabled) {
1086
+ const testResult = await generateTestsForPhase({
1087
+ phase: autopilotState.currentPhase,
1088
+ generatedFiles: testingConfig.filesToTest,
1089
+ requirements: autopilotState.requirements,
1090
+ });
1091
+ // Generate and run tests
1092
+ }
1093
+ ```
1094
+
1095
+ **Step 6: Commit**
1096
+
1097
+ ```bash
1098
+ git add src/testing/integrations/autopilot-testing.ts tests/testing/integrations/autopilot-testing.test.ts src/skills/autopilot/
1099
+ git commit -m "feat(testing): add automatic test generation phase to Autopilot workflow"
1100
+ ```
1101
+
1102
+ ---
1103
+
1104
+ ## Task 7: Test Quality Scoring System
1105
+
1106
+ **Files:**
1107
+ - Create: `src/testing/quality/test-scorer.ts`
1108
+ - Create: `src/testing/quality/metrics.ts`
1109
+ - Create: `tests/testing/quality/test-scorer.test.ts`
1110
+
1111
+ **Step 1: Write the failing test**
1112
+
1113
+ Create `tests/testing/quality/test-scorer.test.ts`:
1114
+
1115
+ ```typescript
1116
+ import { describe, it, expect } from 'vitest';
1117
+ import { scoreTest, calculateTestQuality } from '../../../src/testing/quality/test-scorer';
1118
+
1119
+ describe('Test Quality Scorer', () => {
1120
+ it('should score test completeness', async () => {
1121
+ const testCode = `
1122
+ test('should add two numbers', () => {
1123
+ expect(add(1, 2)).toBe(3);
1124
+ expect(add(-1, 1)).toBe(0);
1125
+ expect(add(0, 0)).toBe(0);
1126
+ });
1127
+ `;
1128
+
1129
+ const result = await scoreTest({ testCode, testType: 'unit' });
1130
+
1131
+ expect(result.completenessScore).toBeGreaterThan(70);
1132
+ expect(result.hasEdgeCases).toBe(true);
1133
+ expect(result.assertionCount).toBe(3);
1134
+ });
1135
+
1136
+ it('should penalize tests without assertions', async () => {
1137
+ const testCode = `
1138
+ test('should do something', () => {
1139
+ const result = doSomething();
1140
+ });
1141
+ `;
1142
+
1143
+ const result = await scoreTest({ testCode, testType: 'unit' });
1144
+
1145
+ expect(result.completenessScore).toBeLessThan(30);
1146
+ expect(result.hasAssertions).toBe(false);
1147
+ });
1148
+
1149
+ it('should calculate overall test quality', async () => {
1150
+ const tests = [
1151
+ { file: 'test1.test.ts', score: 85, assertions: 5 },
1152
+ { file: 'test2.test.ts', score: 70, assertions: 3 },
1153
+ { file: 'test3.test.ts', score: 90, assertions: 7 },
1154
+ ];
1155
+
1156
+ const result = await calculateTestQuality({ tests });
1157
+
1158
+ expect(result.averageScore).toBeCloseTo(81.67, 1);
1159
+ expect(result.totalAssertions).toBe(15);
1160
+ expect(result.qualityGrade).toBe('B');
1161
+ });
1162
+ });
1163
+ ```
1164
+
1165
+ **Step 2: Run test to verify it fails**
1166
+
1167
+ Run: `pnpm test tests/testing/quality/test-scorer.test.ts`
1168
+ Expected: FAIL with "Cannot find module"
1169
+
1170
+ **Step 3: Implement test quality scorer**
1171
+
1172
+ Create `src/testing/quality/metrics.ts`:
1173
+
1174
+ ```typescript
1175
+ export interface TestMetrics {
1176
+ assertionCount: number;
1177
+ hasEdgeCases: boolean;
1178
+ hasAssertions: boolean;
1179
+ hasMocks: boolean;
1180
+ hasSetup: boolean;
1181
+ hasTeardown: boolean;
1182
+ testComplexity: number;
1183
+ }
1184
+
1185
+ export interface TestScore {
1186
+ completenessScore: number;
1187
+ assertionCount: number;
1188
+ hasEdgeCases: boolean;
1189
+ hasAssertions: boolean;
1190
+ metrics: TestMetrics;
1191
+ }
1192
+
1193
+ export interface QualityReport {
1194
+ averageScore: number;
1195
+ totalAssertions: number;
1196
+ qualityGrade: 'A' | 'B' | 'C' | 'D' | 'F';
1197
+ recommendations: string[];
1198
+ }
1199
+
1200
+ export function calculateGrade(score: number): 'A' | 'B' | 'C' | 'D' | 'F' {
1201
+ if (score >= 90) return 'A';
1202
+ if (score >= 80) return 'B';
1203
+ if (score >= 70) return 'C';
1204
+ if (score >= 60) return 'D';
1205
+ return 'F';
1206
+ }
1207
+ ```
1208
+
1209
+ Create `src/testing/quality/test-scorer.ts`:
1210
+
1211
+ ```typescript
1212
+ import type { TestScore, TestMetrics, QualityReport } from './metrics';
1213
+ import { calculateGrade } from './metrics';
1214
+
1215
+ interface ScoreTestOptions {
1216
+ testCode: string;
1217
+ testType: 'unit' | 'integration' | 'e2e';
1218
+ }
1219
+
1220
+ interface CalculateQualityOptions {
1221
+ tests: Array<{ file: string; score: number; assertions: number }>;
1222
+ }
1223
+
1224
+ export async function scoreTest(options: ScoreTestOptions): Promise<TestScore> {
1225
+ const { testCode, testType } = options;
1226
+
1227
+ // Count assertions
1228
+ const assertionCount = (testCode.match(/expect\(/g) || []).length;
1229
+ const hasAssertions = assertionCount > 0;
1230
+
1231
+ // Check for edge cases
1232
+ const hasEdgeCases =
1233
+ testCode.includes('null') ||
1234
+ testCode.includes('undefined') ||
1235
+ testCode.includes('0') ||
1236
+ testCode.includes('empty') ||
1237
+ testCode.includes('-1');
1238
+
1239
+ // Check for mocks
1240
+ const hasMocks = testCode.includes('mock') || testCode.includes('spy') || testCode.includes('stub');
1241
+
1242
+ // Check for setup/teardown
1243
+ const hasSetup = testCode.includes('beforeEach') || testCode.includes('beforeAll');
1244
+ const hasTeardown = testCode.includes('afterEach') || testCode.includes('afterAll');
1245
+
1246
+ // Calculate complexity (simple heuristic)
1247
+ const testComplexity = (testCode.match(/test\(/g) || []).length;
1248
+
1249
+ const metrics: TestMetrics = {
1250
+ assertionCount,
1251
+ hasEdgeCases,
1252
+ hasAssertions,
1253
+ hasMocks,
1254
+ hasSetup,
1255
+ hasTeardown,
1256
+ testComplexity,
1257
+ };
1258
+
1259
+ // Calculate completeness score
1260
+ let score = 0;
1261
+ if (hasAssertions) score += 40;
1262
+ score += Math.min(assertionCount * 10, 30); // Up to 30 points for assertions
1263
+ if (hasEdgeCases) score += 15;
1264
+ if (hasMocks) score += 10;
1265
+ if (hasSetup || hasTeardown) score += 5;
1266
+
1267
+ return {
1268
+ completenessScore: Math.min(score, 100),
1269
+ assertionCount,
1270
+ hasEdgeCases,
1271
+ hasAssertions,
1272
+ metrics,
1273
+ };
1274
+ }
1275
+
1276
+ export async function calculateTestQuality(options: CalculateQualityOptions): Promise<QualityReport> {
1277
+ const { tests } = options;
1278
+
1279
+ const totalScore = tests.reduce((sum, test) => sum + test.score, 0);
1280
+ const averageScore = totalScore / tests.length;
1281
+ const totalAssertions = tests.reduce((sum, test) => sum + test.assertions, 0);
1282
+
1283
+ const qualityGrade = calculateGrade(averageScore);
1284
+
1285
+ const recommendations: string[] = [];
1286
+ if (averageScore < 70) {
1287
+ recommendations.push('Increase test coverage with more assertions');
1288
+ }
1289
+ if (totalAssertions / tests.length < 3) {
1290
+ recommendations.push('Add more edge case testing');
1291
+ }
1292
+
1293
+ return {
1294
+ averageScore,
1295
+ totalAssertions,
1296
+ qualityGrade,
1297
+ recommendations,
1298
+ };
1299
+ }
1300
+ ```
1301
+
1302
+ **Step 4: Run test to verify it passes**
1303
+
1304
+ Run: `pnpm test tests/testing/quality/test-scorer.test.ts`
1305
+ Expected: PASS
1306
+
1307
+ **Step 5: Commit**
1308
+
1309
+ ```bash
1310
+ git add src/testing/quality/ tests/testing/quality/
1311
+ git commit -m "feat(testing): add test quality scoring system for completeness and effectiveness"
1312
+ ```
1313
+
1314
+ ---
1315
+
1316
+ ## Task 8: Performance Optimization Layer
1317
+
1318
+ **Files:**
1319
+ - Create: `src/testing/performance/cache-manager.ts`
1320
+ - Create: `src/testing/performance/parallel-generator.ts`
1321
+ - Create: `tests/testing/performance/cache-manager.test.ts`
1322
+ - Create: `tests/testing/performance/parallel-generator.test.ts`
1323
+
1324
+ **Step 1: Write the failing test**
1325
+
1326
+ Create `tests/testing/performance/cache-manager.test.ts`:
1327
+
1328
+ ```typescript
1329
+ import { describe, it, expect, beforeEach } from 'vitest';
1330
+ import { CacheManager, getCachedAnalysis, setCachedAnalysis } from '../../../src/testing/performance/cache-manager';
1331
+
1332
+ describe('Cache Manager', () => {
1333
+ let cacheManager: CacheManager;
1334
+
1335
+ beforeEach(() => {
1336
+ cacheManager = new CacheManager({ ttl: 3600 });
1337
+ });
1338
+
1339
+ it('should cache tech stack detection results', async () => {
1340
+ const projectPath = '/test/project';
1341
+ const techStack = { language: 'typescript', framework: 'react', testRunner: 'vitest' };
1342
+
1343
+ await cacheManager.set(`techstack:${projectPath}`, techStack);
1344
+ const cached = await cacheManager.get(`techstack:${projectPath}`);
1345
+
1346
+ expect(cached).toEqual(techStack);
1347
+ });
1348
+
1349
+ it('should invalidate cache after TTL', async () => {
1350
+ const cacheWithShortTTL = new CacheManager({ ttl: 1 }); // 1 second
1351
+ await cacheWithShortTTL.set('test-key', 'test-value');
1352
+
1353
+ // Wait for TTL to expire
1354
+ await new Promise((resolve) => setTimeout(resolve, 1100));
1355
+
1356
+ const cached = await cacheWithShortTTL.get('test-key');
1357
+ expect(cached).toBeNull();
1358
+ });
1359
+
1360
+ it('should cache coverage analysis results', async () => {
1361
+ const coverageData = { totalCoverage: 75, lineCoverage: 80 };
1362
+ await setCachedAnalysis('coverage:project1', coverageData);
1363
+
1364
+ const cached = await getCachedAnalysis('coverage:project1');
1365
+ expect(cached).toEqual(coverageData);
1366
+ });
1367
+ });
1368
+ ```
1369
+
1370
+ Create `tests/testing/performance/parallel-generator.test.ts`:
1371
+
1372
+ ```typescript
1373
+ import { describe, it, expect } from 'vitest';
1374
+ import { generateTestsInParallel, ParallelGenerator } from '../../../src/testing/performance/parallel-generator';
1375
+
1376
+ describe('Parallel Test Generator', () => {
1377
+ it('should generate tests for multiple files in parallel', async () => {
1378
+ const files = [
1379
+ 'src/utils/format.ts',
1380
+ 'src/utils/validation.ts',
1381
+ 'src/api/users.ts',
1382
+ 'src/api/auth.ts',
1383
+ ];
1384
+
1385
+ const result = await generateTestsInParallel({
1386
+ files,
1387
+ maxConcurrency: 2,
1388
+ });
1389
+
1390
+ expect(result.generatedTests).toHaveLength(4);
1391
+ expect(result.totalTime).toBeLessThan(10000); // Should be faster than sequential
1392
+ });
1393
+
1394
+ it('should respect max concurrency limit', async () => {
1395
+ const generator = new ParallelGenerator({ maxConcurrency: 2 });
1396
+ const files = ['file1.ts', 'file2.ts', 'file3.ts', 'file4.ts'];
1397
+
1398
+ const result = await generator.generate(files);
1399
+
1400
+ expect(result.maxConcurrentTasks).toBe(2);
1401
+ });
1402
+ });
1403
+ ```
1404
+
1405
+ **Step 2: Run test to verify it fails**
1406
+
1407
+ Run: `pnpm test tests/testing/performance/`
1408
+ Expected: FAIL with "Cannot find module"
1409
+
1410
+ **Step 3: Implement performance optimization**
1411
+
1412
+ Create `src/testing/performance/cache-manager.ts`:
1413
+
1414
+ ```typescript
1415
+ interface CacheOptions {
1416
+ ttl: number; // Time to live in seconds
1417
+ }
1418
+
1419
+ interface CacheEntry<T> {
1420
+ value: T;
1421
+ expiresAt: number;
1422
+ }
1423
+
1424
+ export class CacheManager {
1425
+ private cache: Map<string, CacheEntry<any>>;
1426
+ private ttl: number;
1427
+
1428
+ constructor(options: CacheOptions) {
1429
+ this.cache = new Map();
1430
+ this.ttl = options.ttl * 1000; // Convert to milliseconds
1431
+ }
1432
+
1433
+ async get<T>(key: string): Promise<T | null> {
1434
+ const entry = this.cache.get(key);
1435
+ if (!entry) return null;
1436
+
1437
+ if (Date.now() > entry.expiresAt) {
1438
+ this.cache.delete(key);
1439
+ return null;
1440
+ }
1441
+
1442
+ return entry.value;
1443
+ }
1444
+
1445
+ async set<T>(key: string, value: T): Promise<void> {
1446
+ this.cache.set(key, {
1447
+ value,
1448
+ expiresAt: Date.now() + this.ttl,
1449
+ });
1450
+ }
1451
+
1452
+ async clear(): Promise<void> {
1453
+ this.cache.clear();
1454
+ }
1455
+ }
1456
+
1457
+ // Global cache instance
1458
+ const globalCache = new CacheManager({ ttl: 3600 });
1459
+
1460
+ export async function getCachedAnalysis<T>(key: string): Promise<T | null> {
1461
+ return globalCache.get<T>(key);
1462
+ }
1463
+
1464
+ export async function setCachedAnalysis<T>(key: string, value: T): Promise<void> {
1465
+ return globalCache.set(key, value);
1466
+ }
1467
+ ```
1468
+
1469
+ Create `src/testing/performance/parallel-generator.ts`:
1470
+
1471
+ ```typescript
1472
+ interface GenerateInParallelOptions {
1473
+ files: string[];
1474
+ maxConcurrency: number;
1475
+ }
1476
+
1477
+ interface ParallelResult {
1478
+ generatedTests: Array<{ file: string; testFile: string }>;
1479
+ totalTime: number;
1480
+ maxConcurrentTasks: number;
1481
+ }
1482
+
1483
+ export class ParallelGenerator {
1484
+ private maxConcurrency: number;
1485
+
1486
+ constructor(options: { maxConcurrency: number }) {
1487
+ this.maxConcurrency = options.maxConcurrency;
1488
+ }
1489
+
1490
+ async generate(files: string[]): Promise<ParallelResult> {
1491
+ const startTime = Date.now();
1492
+ const results: Array<{ file: string; testFile: string }> = [];
1493
+
1494
+ // Process files in batches
1495
+ for (let i = 0; i < files.length; i += this.maxConcurrency) {
1496
+ const batch = files.slice(i, i + this.maxConcurrency);
1497
+ const batchResults = await Promise.all(
1498
+ batch.map(async (file) => ({
1499
+ file,
1500
+ testFile: file.replace(/\.(ts|js)$/, '.test.$1'),
1501
+ }))
1502
+ );
1503
+ results.push(...batchResults);
1504
+ }
1505
+
1506
+ return {
1507
+ generatedTests: results,
1508
+ totalTime: Date.now() - startTime,
1509
+ maxConcurrentTasks: this.maxConcurrency,
1510
+ };
1511
+ }
1512
+ }
1513
+
1514
+ export async function generateTestsInParallel(options: GenerateInParallelOptions): Promise<ParallelResult> {
1515
+ const generator = new ParallelGenerator({ maxConcurrency: options.maxConcurrency });
1516
+ return generator.generate(options.files);
1517
+ }
1518
+ ```
1519
+
1520
+ **Step 4: Run test to verify it passes**
1521
+
1522
+ Run: `pnpm test tests/testing/performance/`
1523
+ Expected: PASS
1524
+
1525
+ **Step 5: Commit**
1526
+
1527
+ ```bash
1528
+ git add src/testing/performance/ tests/testing/performance/
1529
+ git commit -m "feat(testing): add performance optimization with caching and parallel generation"
1530
+ ```
1531
+
1532
+ ---
1533
+
1534
+ ## Task 9: CLI Integration and Documentation
1535
+
1536
+ **Files:**
1537
+ - Modify: `src/cli/commands/test.ts` (add Phase 3 commands)
1538
+ - Create: `docs/testing/phase3-guide.md`
1539
+ - Create: `tests/cli/test-phase3.test.ts`
1540
+
1541
+ **Step 1: Write the failing test**
1542
+
1543
+ Create `tests/cli/test-phase3.test.ts`:
1544
+
1545
+ ```typescript
1546
+ import { describe, it, expect } from 'vitest';
1547
+ import { execSync } from 'child_process';
1548
+
1549
+ describe('CLI Phase 3 Commands', () => {
1550
+ it('should run promptfoo test generation', () => {
1551
+ const output = execSync('omc test promptfoo --prompt src/prompts/test.txt --help', {
1552
+ encoding: 'utf-8',
1553
+ });
1554
+
1555
+ expect(output).toContain('Generate Promptfoo config');
1556
+ });
1557
+
1558
+ it('should run behavioral tests', () => {
1559
+ const output = execSync('omc test behavioral --help', { encoding: 'utf-8' });
1560
+
1561
+ expect(output).toContain('Generate behavioral tests with Giskard');
1562
+ });
1563
+
1564
+ it('should generate E2E tests', () => {
1565
+ const output = execSync('omc test e2e --help', { encoding: 'utf-8' });
1566
+
1567
+ expect(output).toContain('Generate Playwright E2E tests');
1568
+ });
1569
+
1570
+ it('should generate CI/CD workflow', () => {
1571
+ const output = execSync('omc test ci --help', { encoding: 'utf-8' });
1572
+
1573
+ expect(output).toContain('Generate GitHub Actions workflow');
1574
+ });
1575
+
1576
+ it('should score test quality', () => {
1577
+ const output = execSync('omc test score --help', { encoding: 'utf-8' });
1578
+
1579
+ expect(output).toContain('Score test quality');
1580
+ });
1581
+ });
1582
+ ```
1583
+
1584
+ **Step 2: Run test to verify it fails**
1585
+
1586
+ Run: `pnpm test tests/cli/test-phase3.test.ts`
1587
+ Expected: FAIL with commands not found
1588
+
1589
+ **Step 3: Implement CLI commands**
1590
+
1591
+ Modify `src/cli/commands/test.ts`:
1592
+
1593
+ ```typescript
1594
+ import { Command } from 'commander';
1595
+ import { generatePromptfooConfig } from '../../testing/integrations/promptfoo/config-generator';
1596
+ import { generatePerturbationTests } from '../../testing/integrations/giskard/behavioral-tests';
1597
+ import { generateFromUserFlow } from '../../testing/generators/e2e-generator';
1598
+ import { generateTestWorkflow } from '../../testing/ci/github-actions-generator';
1599
+ import { scoreTest, calculateTestQuality } from '../../testing/quality/test-scorer';
1600
+
1601
+ export function registerPhase3Commands(program: Command): void {
1602
+ const testCommand = program.command('test');
1603
+
1604
+ // Promptfoo command
1605
+ testCommand
1606
+ .command('promptfoo')
1607
+ .description('Generate Promptfoo config for LLM prompt testing')
1608
+ .option('--prompt <file>', 'Prompt file path')
1609
+ .option('--provider <provider>', 'LLM provider', 'anthropic:claude-3-5-sonnet-20241022')
1610
+ .option('--output <file>', 'Output config file', 'promptfoo.config.yml')
1611
+ .action(async (options) => {
1612
+ const config = await generatePromptfooConfig({
1613
+ promptFile: options.prompt,
1614
+ testCases: [],
1615
+ provider: options.provider,
1616
+ outputPath: options.output,
1617
+ });
1618
+ console.log('Promptfoo config generated:', options.output);
1619
+ });
1620
+
1621
+ // Behavioral testing command
1622
+ testCommand
1623
+ .command('behavioral')
1624
+ .description('Generate behavioral tests with Giskard')
1625
+ .option('--input <file>', 'Input test cases file')
1626
+ .option('--perturbations <types>', 'Perturbation types (comma-separated)', 'typo,negation,synonym')
1627
+ .action(async (options) => {
1628
+ const perturbations = options.perturbations.split(',');
1629
+ const result = await generatePerturbationTests({
1630
+ testCases: [],
1631
+ perturbations: perturbations as any,
1632
+ });
1633
+ console.log(`Generated ${result.totalTests} behavioral tests`);
1634
+ });
1635
+
1636
+ // E2E test generation command
1637
+ testCommand
1638
+ .command('e2e')
1639
+ .description('Generate Playwright E2E tests from user flow')
1640
+ .option('--flow <description>', 'User flow description')
1641
+ .option('--base-url <url>', 'Base URL', 'http://localhost:3000')
1642
+ .option('--output <file>', 'Output test file')
1643
+ .action(async (options) => {
1644
+ const result = await generateFromUserFlow({
1645
+ flowDescription: options.flow,
1646
+ baseUrl: options.baseUrl,
1647
+ testName: 'Generated E2E Test',
1648
+ });
1649
+ console.log('E2E test generated with', result.steps.length, 'steps');
1650
+ });
1651
+
1652
+ // CI/CD workflow generation command
1653
+ testCommand
1654
+ .command('ci')
1655
+ .description('Generate GitHub Actions workflow for testing')
1656
+ .option('--type <type>', 'Project type', 'nodejs')
1657
+ .option('--output <file>', 'Output workflow file', '.github/workflows/test.yml')
1658
+ .action(async (options) => {
1659
+ const workflow = await generateTestWorkflow({
1660
+ projectType: options.type as any,
1661
+ testCommand: 'pnpm test',
1662
+ });
1663
+ console.log('GitHub Actions workflow generated:', options.output);
1664
+ });
1665
+
1666
+ // Test quality scoring command
1667
+ testCommand
1668
+ .command('score')
1669
+ .description('Score test quality and completeness')
1670
+ .option('--file <file>', 'Test file to score')
1671
+ .action(async (options) => {
1672
+ const testCode = ''; // Read from file
1673
+ const result = await scoreTest({ testCode, testType: 'unit' });
1674
+ console.log('Test quality score:', result.completenessScore);
1675
+ });
1676
+ }
1677
+ ```
1678
+
1679
+ **Step 4: Create documentation**
1680
+
1681
+ Create `docs/testing/phase3-guide.md`:
1682
+
1683
+ ```markdown
1684
+ # Phase 3: Advanced Testing Features
1685
+
1686
+ ## Overview
1687
+
1688
+ Phase 3 completes the LLM Testing System with advanced features including Promptfoo integration, Giskard behavioral testing, Playwright E2E generation, CI/CD templates, and performance optimization.
1689
+
1690
+ ## Features
1691
+
1692
+ ### 1. Promptfoo Integration
1693
+
1694
+ Generate Promptfoo configs for testing LLM prompts:
1695
+
1696
+ \`\`\`bash
1697
+ omc test promptfoo --prompt src/prompts/code-review.txt --provider anthropic:claude-3-5-sonnet-20241022
1698
+ \`\`\`
1699
+
1700
+ ### 2. Giskard Behavioral Testing
1701
+
1702
+ Generate perturbation and robustness tests:
1703
+
1704
+ \`\`\`bash
1705
+ omc test behavioral --input test-cases.json --perturbations typo,negation,synonym
1706
+ \`\`\`
1707
+
1708
+ ### 3. Playwright E2E Generation
1709
+
1710
+ Generate E2E tests from user flow descriptions:
1711
+
1712
+ \`\`\`bash
1713
+ omc test e2e --flow "User logs in, navigates to dashboard, clicks settings" --base-url http://localhost:3000
1714
+ \`\`\`
1715
+
1716
+ ### 4. CI/CD Integration
1717
+
1718
+ Generate GitHub Actions workflows:
1719
+
1720
+ \`\`\`bash
1721
+ omc test ci --type nodejs --output .github/workflows/test.yml
1722
+ \`\`\`
1723
+
1724
+ ### 5. Test Quality Scoring
1725
+
1726
+ Score test completeness and effectiveness:
1727
+
1728
+ \`\`\`bash
1729
+ omc test score --file tests/utils/validation.test.ts
1730
+ \`\`\`
1731
+
1732
+ ### 6. Ralph Mode Integration
1733
+
1734
+ Tests are automatically generated during Ralph's verify cycle.
1735
+
1736
+ ### 7. Autopilot Integration
1737
+
1738
+ Tests are automatically generated after code implementation in Autopilot mode.
1739
+
1740
+ ## Performance Optimization
1741
+
1742
+ - Caching: Tech stack detection and coverage analysis results are cached
1743
+ - Parallel Generation: Multiple tests generated concurrently
1744
+ - Incremental Analysis: Only analyze changed files
1745
+
1746
+ ## Next Steps
1747
+
1748
+ - Run tests: `pnpm test`
1749
+ - Generate coverage: `pnpm test --coverage`
1750
+ - Run E2E tests: `pnpm test:e2e`
1751
+ ```
1752
+
1753
+ **Step 5: Run test to verify it passes**
1754
+
1755
+ Run: `pnpm test tests/cli/test-phase3.test.ts`
1756
+ Expected: PASS
1757
+
1758
+ **Step 6: Commit**
1759
+
1760
+ ```bash
1761
+ git add src/cli/commands/test.ts docs/testing/phase3-guide.md tests/cli/test-phase3.test.ts
1762
+ git commit -m "feat(testing): add Phase 3 CLI commands and documentation"
1763
+ ```
1764
+
1765
+ ---
1766
+
1767
+ ## Final Integration and Verification
1768
+
1769
+ **Step 1: Run all Phase 3 tests**
1770
+
1771
+ ```bash
1772
+ pnpm test tests/testing/integrations/
1773
+ pnpm test tests/testing/generators/e2e-generator.test.ts
1774
+ pnpm test tests/testing/ci/
1775
+ pnpm test tests/testing/quality/
1776
+ pnpm test tests/testing/performance/
1777
+ pnpm test tests/cli/test-phase3.test.ts
1778
+ ```
1779
+
1780
+ **Step 2: Verify integration with existing phases**
1781
+
1782
+ ```bash
1783
+ # Test that Phase 3 works with Phase 1 and Phase 2
1784
+ pnpm test tests/testing/
1785
+ ```
1786
+
1787
+ **Step 3: Update main documentation**
1788
+
1789
+ Update `docs/testing/README.md` to include Phase 3 features.
1790
+
1791
+ **Step 4: Final commit**
1792
+
1793
+ ```bash
1794
+ git add .
1795
+ git commit -m "feat(testing): complete Phase 3 implementation with all advanced features"
1796
+ ```
1797
+
1798
+ ---
1799
+
1800
+ ## Success Criteria
1801
+
1802
+ - ✅ Promptfoo integration working
1803
+ - ✅ Giskard behavioral tests generating
1804
+ - ✅ Playwright E2E tests generating from user flows
1805
+ - ✅ GitHub Actions workflows generating
1806
+ - ✅ Ralph mode testing loop integrated
1807
+ - ✅ Autopilot testing phase integrated
1808
+ - ✅ Test quality scoring functional
1809
+ - ✅ Performance optimization (caching, parallel generation) working
1810
+ - ✅ All tests passing
1811
+ - ✅ Documentation complete
1812
+
1813
+ ## Estimated Time
1814
+
1815
+ - Task 1-2: 2-3 days (Promptfoo + Giskard)
1816
+ - Task 3-4: 2-3 days (E2E + CI/CD)
1817
+ - Task 5-6: 3-4 days (Ralph + Autopilot integration)
1818
+ - Task 7-8: 2-3 days (Quality scoring + Performance)
1819
+ - Task 9: 1-2 days (CLI + Documentation)
1820
+
1821
+ **Total: 10-15 days**
1822
+
1823
+ ## Notes
1824
+
1825
+ - Use Haiku for simple test generation to reduce costs
1826
+ - Use Sonnet for complex behavioral and E2E tests
1827
+ - Cache tech stack detection results to avoid repeated analysis
1828
+ - Generate tests in parallel when possible
1829
+ - Integrate with existing OMC workflows (Ralph, Autopilot, Ultraqa)
1830
+