claudecode-omc 4.7.4 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +50 -0
  3. package/agents/test-engineer.md +74 -0
  4. package/bridge/cli.cjs +9335 -117
  5. package/dist/cli/index.js +201 -0
  6. package/dist/cli/index.js.map +1 -1
  7. package/dist/testing/analyzers/complexity.d.ts +18 -0
  8. package/dist/testing/analyzers/complexity.d.ts.map +1 -0
  9. package/dist/testing/analyzers/complexity.js +121 -0
  10. package/dist/testing/analyzers/complexity.js.map +1 -0
  11. package/dist/testing/analyzers/coverage.d.ts +13 -0
  12. package/dist/testing/analyzers/coverage.d.ts.map +1 -0
  13. package/dist/testing/analyzers/coverage.js +99 -0
  14. package/dist/testing/analyzers/coverage.js.map +1 -0
  15. package/dist/testing/analyzers/quality-scorer.d.ts +8 -0
  16. package/dist/testing/analyzers/quality-scorer.d.ts.map +1 -0
  17. package/dist/testing/analyzers/quality-scorer.js +128 -0
  18. package/dist/testing/analyzers/quality-scorer.js.map +1 -0
  19. package/dist/testing/analyzers/types.d.ts +56 -0
  20. package/dist/testing/analyzers/types.d.ts.map +1 -0
  21. package/dist/testing/analyzers/types.js +2 -0
  22. package/dist/testing/analyzers/types.js.map +1 -0
  23. package/dist/testing/cli/agent-integration.d.ts +20 -0
  24. package/dist/testing/cli/agent-integration.d.ts.map +1 -0
  25. package/dist/testing/cli/agent-integration.js +60 -0
  26. package/dist/testing/cli/agent-integration.js.map +1 -0
  27. package/dist/testing/cli/commands.d.ts +100 -0
  28. package/dist/testing/cli/commands.d.ts.map +1 -0
  29. package/dist/testing/cli/commands.js +250 -0
  30. package/dist/testing/cli/commands.js.map +1 -0
  31. package/dist/testing/cli/ultraqa-integration.d.ts +13 -0
  32. package/dist/testing/cli/ultraqa-integration.d.ts.map +1 -0
  33. package/dist/testing/cli/ultraqa-integration.js +68 -0
  34. package/dist/testing/cli/ultraqa-integration.js.map +1 -0
  35. package/dist/testing/detectors/go.d.ts +3 -0
  36. package/dist/testing/detectors/go.d.ts.map +1 -0
  37. package/dist/testing/detectors/go.js +38 -0
  38. package/dist/testing/detectors/go.js.map +1 -0
  39. package/dist/testing/detectors/index.d.ts +8 -0
  40. package/dist/testing/detectors/index.d.ts.map +1 -0
  41. package/dist/testing/detectors/index.js +46 -0
  42. package/dist/testing/detectors/index.js.map +1 -0
  43. package/dist/testing/detectors/package-json.d.ts +3 -0
  44. package/dist/testing/detectors/package-json.d.ts.map +1 -0
  45. package/dist/testing/detectors/package-json.js +52 -0
  46. package/dist/testing/detectors/package-json.js.map +1 -0
  47. package/dist/testing/detectors/python.d.ts +3 -0
  48. package/dist/testing/detectors/python.d.ts.map +1 -0
  49. package/dist/testing/detectors/python.js +37 -0
  50. package/dist/testing/detectors/python.js.map +1 -0
  51. package/dist/testing/detectors/rust.d.ts +3 -0
  52. package/dist/testing/detectors/rust.d.ts.map +1 -0
  53. package/dist/testing/detectors/rust.js +39 -0
  54. package/dist/testing/detectors/rust.js.map +1 -0
  55. package/dist/testing/generators/contract.d.ts +14 -0
  56. package/dist/testing/generators/contract.d.ts.map +1 -0
  57. package/dist/testing/generators/contract.js +163 -0
  58. package/dist/testing/generators/contract.js.map +1 -0
  59. package/dist/testing/generators/e2e.d.ts +34 -0
  60. package/dist/testing/generators/e2e.d.ts.map +1 -0
  61. package/dist/testing/generators/e2e.js +74 -0
  62. package/dist/testing/generators/e2e.js.map +1 -0
  63. package/dist/testing/generators/go.d.ts +12 -0
  64. package/dist/testing/generators/go.d.ts.map +1 -0
  65. package/dist/testing/generators/go.js +144 -0
  66. package/dist/testing/generators/go.js.map +1 -0
  67. package/dist/testing/generators/nodejs.d.ts +12 -0
  68. package/dist/testing/generators/nodejs.d.ts.map +1 -0
  69. package/dist/testing/generators/nodejs.js +37 -0
  70. package/dist/testing/generators/nodejs.js.map +1 -0
  71. package/dist/testing/generators/python.d.ts +12 -0
  72. package/dist/testing/generators/python.d.ts.map +1 -0
  73. package/dist/testing/generators/python.js +163 -0
  74. package/dist/testing/generators/python.js.map +1 -0
  75. package/dist/testing/generators/react.d.ts +12 -0
  76. package/dist/testing/generators/react.d.ts.map +1 -0
  77. package/dist/testing/generators/react.js +31 -0
  78. package/dist/testing/generators/react.js.map +1 -0
  79. package/dist/testing/generators/rust.d.ts +11 -0
  80. package/dist/testing/generators/rust.d.ts.map +1 -0
  81. package/dist/testing/generators/rust.js +138 -0
  82. package/dist/testing/generators/rust.js.map +1 -0
  83. package/dist/testing/index.d.ts +6 -0
  84. package/dist/testing/index.d.ts.map +1 -0
  85. package/dist/testing/index.js +11 -0
  86. package/dist/testing/index.js.map +1 -0
  87. package/dist/testing/integrations/autopilot.d.ts +42 -0
  88. package/dist/testing/integrations/autopilot.d.ts.map +1 -0
  89. package/dist/testing/integrations/autopilot.js +55 -0
  90. package/dist/testing/integrations/autopilot.js.map +1 -0
  91. package/dist/testing/integrations/cicd.d.ts +26 -0
  92. package/dist/testing/integrations/cicd.d.ts.map +1 -0
  93. package/dist/testing/integrations/cicd.js +162 -0
  94. package/dist/testing/integrations/cicd.js.map +1 -0
  95. package/dist/testing/integrations/giskard/behavioral-tests.d.ts +4 -0
  96. package/dist/testing/integrations/giskard/behavioral-tests.d.ts.map +1 -0
  97. package/dist/testing/integrations/giskard/behavioral-tests.js +66 -0
  98. package/dist/testing/integrations/giskard/behavioral-tests.js.map +1 -0
  99. package/dist/testing/integrations/giskard/types.d.ts +35 -0
  100. package/dist/testing/integrations/giskard/types.d.ts.map +1 -0
  101. package/dist/testing/integrations/giskard/types.js +2 -0
  102. package/dist/testing/integrations/giskard/types.js.map +1 -0
  103. package/dist/testing/integrations/promptfoo/config-generator.d.ts +5 -0
  104. package/dist/testing/integrations/promptfoo/config-generator.d.ts.map +1 -0
  105. package/dist/testing/integrations/promptfoo/config-generator.js +44 -0
  106. package/dist/testing/integrations/promptfoo/config-generator.js.map +1 -0
  107. package/dist/testing/integrations/promptfoo/types.d.ts +36 -0
  108. package/dist/testing/integrations/promptfoo/types.d.ts.map +1 -0
  109. package/dist/testing/integrations/promptfoo/types.js +2 -0
  110. package/dist/testing/integrations/promptfoo/types.js.map +1 -0
  111. package/dist/testing/integrations/ralph.d.ts +65 -0
  112. package/dist/testing/integrations/ralph.d.ts.map +1 -0
  113. package/dist/testing/integrations/ralph.js +69 -0
  114. package/dist/testing/integrations/ralph.js.map +1 -0
  115. package/dist/testing/performance/cache-manager.d.ts +16 -0
  116. package/dist/testing/performance/cache-manager.d.ts.map +1 -0
  117. package/dist/testing/performance/cache-manager.js +39 -0
  118. package/dist/testing/performance/cache-manager.js.map +1 -0
  119. package/dist/testing/performance/parallel-generator.d.ts +23 -0
  120. package/dist/testing/performance/parallel-generator.d.ts.map +1 -0
  121. package/dist/testing/performance/parallel-generator.js +31 -0
  122. package/dist/testing/performance/parallel-generator.js.map +1 -0
  123. package/dist/testing/types.d.ts +23 -0
  124. package/dist/testing/types.d.ts.map +1 -0
  125. package/dist/testing/types.js +2 -0
  126. package/dist/testing/types.js.map +1 -0
  127. package/docs/2026-03-06-llm-testing-system-phase1.md +0 -0
  128. package/docs/plans/2026-03-06-llm-testing-system-design.md +311 -0
  129. package/docs/plans/2026-03-06-llm-testing-system-phase1.md +1268 -0
  130. package/docs/plans/2026-03-06-llm-testing-system-phase2.md +3053 -0
  131. package/docs/plans/2026-03-06-llm-testing-system-phase3.md +1830 -0
  132. package/docs/testing/PHASE2.md +266 -0
  133. package/docs/testing/PHASE3.md +601 -0
  134. package/docs/testing/README.md +634 -0
  135. package/package.json +1 -1
  136. package/skills/test-gen/skill.md +531 -0
  137. package/skills/ultraqa.md +58 -0
@@ -0,0 +1,601 @@
1
+ # Phase 3: Advanced Testing Features
2
+
3
+ Phase 3 completes the OMC testing ecosystem with advanced integrations for LLM prompt testing, behavioral testing, E2E test generation, CI/CD automation, and test quality scoring.
4
+
5
+ ## Overview
6
+
7
+ Phase 3 adds:
8
+ - **Promptfoo Integration**: LLM prompt testing and evaluation
9
+ - **Giskard Behavioral Tests**: Robustness and perturbation testing
10
+ - **Playwright E2E Generation**: User flow to test automation
11
+ - **CI/CD Templates**: GitHub Actions workflow generation
12
+ - **Test Quality Scoring**: Automated test quality assessment
13
+ - **Ralph/Autopilot Integration**: Automated testing loops
14
+
15
+ ## Features
16
+
17
+ ### 1. Promptfoo Integration
18
+
19
+ Generate Promptfoo configurations for testing LLM prompts with multiple providers and test cases.
20
+
21
+ **Command:**
22
+ ```bash
23
+ omc test promptfoo <prompt-file> [options]
24
+ ```
25
+
26
+ **Options:**
27
+ - `-p, --provider <provider>`: LLM provider (default: `anthropic:claude-3-5-sonnet-20241022`)
28
+ - `-o, --output <path>`: Output config file path (default: `./promptfoo.config.yaml`)
29
+
30
+ **Example:**
31
+ ```bash
32
+ # Generate Promptfoo config for a code review prompt
33
+ omc test promptfoo src/prompts/code-review.txt
34
+
35
+ # Use a different provider
36
+ omc test promptfoo src/prompts/summarize.txt -p openai:gpt-4
37
+
38
+ # Custom output path
39
+ omc test promptfoo src/prompts/analyze.txt -o config/promptfoo.yaml
40
+ ```
41
+
42
+ **Generated Config:**
43
+ ```yaml
44
+ prompts:
45
+ - file://src/prompts/code-review.txt
46
+ providers:
47
+ - anthropic:claude-3-5-sonnet-20241022
48
+ tests: []
49
+ outputPath: ./promptfoo-results.json
50
+ ```
51
+
52
+ **Usage:**
53
+ 1. Generate config: `omc test promptfoo <prompt-file>`
54
+ 2. Add test cases to the generated YAML
55
+ 3. Run tests: `npx promptfoo eval`
56
+ 4. View results: `npx promptfoo view`
57
+
58
+ ### 2. E2E Test Generation
59
+
60
+ Generate Playwright E2E tests from natural language user flow descriptions.
61
+
62
+ **Command:**
63
+ ```bash
64
+ omc test e2e <flow-description> [options]
65
+ ```
66
+
67
+ **Options:**
68
+ - `-b, --base-url <url>`: Base URL (default: `http://localhost:3000`)
69
+ - `-n, --test-name <name>`: Test name (default: `User flow test`)
70
+ - `-o, --output <path>`: Output file (default: `./tests/e2e/user-flow.spec.ts`)
71
+
72
+ **Example:**
73
+ ```bash
74
+ # Generate E2E test from flow description
75
+ omc test e2e "User logs in, navigates to dashboard, clicks on settings"
76
+
77
+ # Custom base URL and test name
78
+ omc test e2e "Admin creates new user" \
79
+ -b https://app.example.com \
80
+ -n "Admin user creation flow"
81
+
82
+ # Custom output path
83
+ omc test e2e "Checkout flow" -o tests/e2e/checkout.spec.ts
84
+ ```
85
+
86
+ **Generated Test:**
87
+ ```typescript
88
+ import { test, expect } from '@playwright/test';
89
+
90
+ test.describe('E2E Tests', () => {
91
+ test('User flow test', async ({ page }) => {
92
+ await page.goto('http://localhost:3000/login');
93
+ await page.goto('http://localhost:3000/dashboard');
94
+ await page.click('a[href="/settings"]');
95
+ });
96
+ });
97
+ ```
98
+
99
+ ### 3. Giskard Behavioral Tests
100
+
101
+ Generate behavioral tests for robustness and perturbation testing.
102
+
103
+ **Command:**
104
+ ```bash
105
+ omc test giskard <file> [options]
106
+ ```
107
+
108
+ **Options:**
109
+ - `-t, --test-type <type>`: Test type - `perturbation` or `robustness` (default: `perturbation`)
110
+ - `-o, --output <path>`: Output file (default: `./tests/behavioral/perturbation.test.ts`)
111
+
112
+ **Example:**
113
+ ```bash
114
+ # Generate perturbation tests
115
+ omc test giskard src/models/classifier.ts
116
+
117
+ # Generate robustness tests
118
+ omc test giskard src/models/sentiment.ts -t robustness
119
+
120
+ # Custom output
121
+ omc test giskard src/llm/prompt.ts -o tests/behavioral/prompt-robustness.test.ts
122
+ ```
123
+
124
+ **Test Types:**
125
+
126
+ **Perturbation Tests**: Test model behavior under input variations
127
+ - Typos
128
+ - Negations
129
+ - Synonyms
130
+ - Case changes
131
+
132
+ **Robustness Tests**: Test model stability
133
+ - Case sensitivity
134
+ - Whitespace handling
135
+ - Special characters
136
+ - Input length variations
137
+
138
+ **Generated Test:**
139
+ ```typescript
140
+ // Generated Giskard behavioral tests
141
+ import { describe, it, expect } from 'vitest';
142
+
143
+ describe('Behavioral Tests', () => {
144
+ it('should still classify as expected', async () => {
145
+ // Original: sample input
146
+ // Perturbed (typo): smaple input
147
+ // TODO: Add test implementation
148
+ });
149
+
150
+ it('should still classify as expected', async () => {
151
+ // Original: sample input
152
+ // Perturbed (negation): not sample input
153
+ // TODO: Add test implementation
154
+ });
155
+ });
156
+ ```
157
+
158
+ ### 4. CI/CD Workflow Generation
159
+
160
+ Generate GitHub Actions workflows for automated testing.
161
+
162
+ **Command:**
163
+ ```bash
164
+ omc test cicd [options]
165
+ ```
166
+
167
+ **Options:**
168
+ - `-l, --language <lang>`: Primary language - `nodejs`, `python`, `go`, `rust` (default: `nodejs`)
169
+ - `-o, --output <path>`: Output file (default: `./.github/workflows/test.yml`)
170
+
171
+ **Example:**
172
+ ```bash
173
+ # Generate Node.js workflow
174
+ omc test cicd
175
+
176
+ # Generate Python workflow
177
+ omc test cicd -l python
178
+
179
+ # Generate Go workflow with custom path
180
+ omc test cicd -l go -o .github/workflows/go-test.yml
181
+ ```
182
+
183
+ **Generated Workflow:**
184
+ ```yaml
185
+ name: Test
186
+
187
+ on:
188
+ push:
189
+ branches: [main, dev]
190
+ pull_request:
191
+ branches: [main, dev]
192
+
193
+ jobs:
194
+ test:
195
+ runs-on: ubuntu-latest
196
+ steps:
197
+ - uses: actions/checkout@v4
198
+
199
+ - name: Setup Node.js
200
+ uses: actions/setup-node@v4
201
+ with:
202
+ node-version: 20
203
+ cache: 'pnpm'
204
+
205
+ - name: Install dependencies
206
+ run: pnpm install
207
+
208
+ - name: Run tests
209
+ run: pnpm test
210
+
211
+ - name: Upload coverage
212
+ uses: codecov/codecov-action@v4
213
+ with:
214
+ token: ${{ secrets.CODECOV_TOKEN }}
215
+
216
+ - name: Upload test results
217
+ if: always()
218
+ uses: actions/upload-artifact@v4
219
+ with:
220
+ name: test-results
221
+ path: test-results/
222
+ ```
223
+
224
+ ### 5. Test Quality Scoring
225
+
226
+ Analyze test quality and get actionable recommendations.
227
+
228
+ **Command:**
229
+ ```bash
230
+ omc test quality <test-file> [options]
231
+ ```
232
+
233
+ **Options:**
234
+ - `-t, --test-type <type>`: Test type - `unit`, `integration`, `e2e` (default: `unit`)
235
+
236
+ **Example:**
237
+ ```bash
238
+ # Score a unit test
239
+ omc test quality tests/utils/parser.test.ts
240
+
241
+ # Score an integration test
242
+ omc test quality tests/api/users.test.ts -t integration
243
+
244
+ # Score an E2E test
245
+ omc test quality tests/e2e/checkout.spec.ts -t e2e
246
+ ```
247
+
248
+ **Output:**
249
+ ```
250
+ 📊 Test Quality Score:
251
+ Overall: 85/100
252
+ Completeness: 90/100
253
+ Assertions: 80/100
254
+ Independence: 95/100
255
+ Naming: 75/100
256
+ Assertion Count: 12
257
+
258
+ 💡 Recommendations:
259
+ - Test edge cases like null, undefined, empty values, and boundary conditions
260
+ - Use descriptive test names that explain what is being tested
261
+ - Improve assertion quality with more specific matchers
262
+ ```
263
+
264
+ **Scoring Metrics:**
265
+
266
+ 1. **Completeness (35% weight)**
267
+ - Has assertions
268
+ - Assertion count
269
+ - Tests edge cases
270
+ - Uses mocks
271
+ - Has setup/teardown
272
+
273
+ 2. **Assertion Quality (25% weight)**
274
+ - Uses specific assertions (toBe, toEqual)
275
+ - Avoids generic truthy checks
276
+ - Assertion-to-test ratio
277
+
278
+ 3. **Independence (20% weight)**
279
+ - No shared state
280
+ - Proper test isolation
281
+ - Uses beforeEach/afterEach
282
+
283
+ 4. **Naming (20% weight)**
284
+ - Descriptive test names
285
+ - Uses "should" pattern
286
+ - Clear intent
287
+
288
+ ## Integration with Ralph/Autopilot
289
+
290
+ Phase 3 features integrate seamlessly with OMC's execution modes:
291
+
292
+ ### Ralph Mode Testing Loop
293
+
294
+ ```bash
295
+ # Ralph automatically runs test quality checks
296
+ omc ralph "implement user authentication with tests"
297
+ ```
298
+
299
+ Ralph will:
300
+ 1. Generate implementation
301
+ 2. Generate tests
302
+ 3. Run quality scoring
303
+ 4. Fix issues based on recommendations
304
+ 5. Loop until quality threshold met
305
+
306
+ ### Autopilot Testing Phase
307
+
308
+ ```bash
309
+ # Autopilot includes comprehensive testing
310
+ omc autopilot "build a REST API"
311
+ ```
312
+
313
+ Autopilot will:
314
+ 1. Generate code
315
+ 2. Generate unit tests
316
+ 3. Generate integration tests
317
+ 4. Generate E2E tests
318
+ 5. Generate CI/CD workflow
319
+ 6. Run quality checks
320
+ 7. Generate behavioral tests
321
+
322
+ ## Workflows
323
+
324
+ ### Complete Testing Workflow
325
+
326
+ ```bash
327
+ # 1. Generate implementation tests
328
+ omc test gen src/utils/parser.ts
329
+
330
+ # 2. Score test quality
331
+ omc test quality tests/utils/parser.test.ts
332
+
333
+ # 3. Generate E2E tests
334
+ omc test e2e "User parses configuration file"
335
+
336
+ # 4. Generate behavioral tests
337
+ omc test giskard src/utils/parser.ts
338
+
339
+ # 5. Generate CI/CD workflow
340
+ omc test cicd
341
+
342
+ # 6. Generate Promptfoo config (if using LLM)
343
+ omc test promptfoo src/prompts/parse-config.txt
344
+ ```
345
+
346
+ ### LLM Prompt Testing Workflow
347
+
348
+ ```bash
349
+ # 1. Create prompt file
350
+ echo "Analyze this code and suggest improvements" > prompts/code-review.txt
351
+
352
+ # 2. Generate Promptfoo config
353
+ omc test promptfoo prompts/code-review.txt
354
+
355
+ # 3. Edit config to add test cases
356
+ # Edit promptfoo.config.yaml
357
+
358
+ # 4. Run Promptfoo tests
359
+ npx promptfoo eval
360
+
361
+ # 5. View results
362
+ npx promptfoo view
363
+ ```
364
+
365
+ ### Behavioral Testing Workflow
366
+
367
+ ```bash
368
+ # 1. Generate perturbation tests
369
+ omc test giskard src/models/classifier.ts
370
+
371
+ # 2. Implement test logic
372
+ # Edit tests/behavioral/perturbation.test.ts
373
+
374
+ # 3. Run tests
375
+ pnpm test tests/behavioral/
376
+
377
+ # 4. Generate robustness tests
378
+ omc test giskard src/models/classifier.ts -t robustness
379
+
380
+ # 5. Run all behavioral tests
381
+ pnpm test tests/behavioral/
382
+ ```
383
+
384
+ ## Best Practices
385
+
386
+ ### Promptfoo Testing
387
+
388
+ 1. **Start with basic test cases**: Add simple inputs first
389
+ 2. **Test edge cases**: Include boundary conditions
390
+ 3. **Use multiple providers**: Compare outputs across models
391
+ 4. **Version your prompts**: Track prompt changes over time
392
+ 5. **Automate in CI**: Run Promptfoo tests in GitHub Actions
393
+
394
+ ### E2E Testing
395
+
396
+ 1. **Keep flows focused**: One user journey per test
397
+ 2. **Use descriptive names**: Clear test intent
398
+ 3. **Handle async properly**: Wait for elements
399
+ 4. **Test critical paths**: Focus on core functionality
400
+ 5. **Run in CI**: Automate E2E tests
401
+
402
+ ### Behavioral Testing
403
+
404
+ 1. **Test systematically**: Cover all perturbation types
405
+ 2. **Set thresholds**: Define acceptable behavior ranges
406
+ 3. **Monitor over time**: Track robustness metrics
407
+ 4. **Fix regressions**: Address behavioral issues promptly
408
+ 5. **Document expectations**: Clear behavior specifications
409
+
410
+ ### Test Quality
411
+
412
+ 1. **Aim for 80+ overall score**: Good quality baseline
413
+ 2. **Address recommendations**: Fix issues systematically
414
+ 3. **Review regularly**: Check quality on new tests
415
+ 4. **Enforce in CI**: Fail builds on low quality
416
+ 5. **Improve iteratively**: Gradual quality improvements
417
+
418
+ ## Configuration
419
+
420
+ ### Promptfoo Config
421
+
422
+ ```yaml
423
+ # promptfoo.config.yaml
424
+ prompts:
425
+ - file://prompts/code-review.txt
426
+ - file://prompts/summarize.txt
427
+
428
+ providers:
429
+ - anthropic:claude-3-5-sonnet-20241022
430
+ - openai:gpt-4
431
+ - openai:gpt-3.5-turbo
432
+
433
+ tests:
434
+ - vars:
435
+ code: "function add(a, b) { return a + b; }"
436
+ assert:
437
+ - type: contains
438
+ value: "function"
439
+ - type: contains
440
+ value: "parameters"
441
+
442
+ - vars:
443
+ code: "const x = 1;"
444
+ assert:
445
+ - type: contains
446
+ value: "variable"
447
+
448
+ defaultTest:
449
+ assert:
450
+ - type: llm-rubric
451
+ value: "Output should be helpful and accurate"
452
+
453
+ outputPath: ./promptfoo-results.json
454
+ ```
455
+
456
+ ### Playwright Config
457
+
458
+ ```typescript
459
+ // playwright.config.ts
460
+ import { defineConfig } from '@playwright/test';
461
+
462
+ export default defineConfig({
463
+ testDir: './tests/e2e',
464
+ fullyParallel: true,
465
+ forbidOnly: !!process.env.CI,
466
+ retries: process.env.CI ? 2 : 0,
467
+ workers: process.env.CI ? 1 : undefined,
468
+ reporter: 'html',
469
+ use: {
470
+ baseURL: 'http://localhost:3000',
471
+ trace: 'on-first-retry',
472
+ },
473
+ });
474
+ ```
475
+
476
+ ## Troubleshooting
477
+
478
+ ### Promptfoo Issues
479
+
480
+ **Problem**: Config not found
481
+ ```bash
482
+ # Solution: Check file path
483
+ ls -la promptfoo.config.yaml
484
+ ```
485
+
486
+ **Problem**: Provider authentication
487
+ ```bash
488
+ # Solution: Set API keys
489
+ export ANTHROPIC_API_KEY=your-key
490
+ export OPENAI_API_KEY=your-key
491
+ ```
492
+
493
+ ### E2E Test Issues
494
+
495
+ **Problem**: Selector not found
496
+ ```typescript
497
+ // Solution: Use better selectors
498
+ await page.waitForSelector('[data-testid="login-button"]');
499
+ await page.click('[data-testid="login-button"]');
500
+ ```
501
+
502
+ **Problem**: Flaky tests
503
+ ```typescript
504
+ // Solution: Add explicit waits
505
+ await page.waitForLoadState('networkidle');
506
+ await expect(page.locator('.result')).toBeVisible();
507
+ ```
508
+
509
+ ### Quality Scoring Issues
510
+
511
+ **Problem**: Low scores
512
+ ```bash
513
+ # Solution: Follow recommendations
514
+ omc test quality tests/file.test.ts
515
+ # Read and implement recommendations
516
+ ```
517
+
518
+ **Problem**: False positives
519
+ ```bash
520
+ # Solution: Review metrics manually
521
+ # Some patterns may not apply to your test style
522
+ ```
523
+
524
+ ## Next Steps
525
+
526
+ 1. **Explore Phase 1**: Basic test generation - [Phase 1 Guide](./PHASE1.md)
527
+ 2. **Explore Phase 2**: Advanced features - [Phase 2 Guide](./PHASE2.md)
528
+ 3. **Read Testing Guide**: Complete overview - [Testing README](./README.md)
529
+ 4. **Check Examples**: Sample tests - [Examples](../../examples/testing/)
530
+
531
+ ## API Reference
532
+
533
+ ### Promptfoo Config Generator
534
+
535
+ ```typescript
536
+ import { generatePromptfooConfig } from '@/testing/integrations/promptfoo/config-generator';
537
+
538
+ const config = await generatePromptfooConfig({
539
+ promptFile: 'prompts/code-review.txt',
540
+ testCases: [
541
+ { input: 'code sample', expected: 'contains:function' }
542
+ ],
543
+ provider: 'anthropic:claude-3-5-sonnet-20241022',
544
+ });
545
+ ```
546
+
547
+ ### E2E Test Generator
548
+
549
+ ```typescript
550
+ import { generateFromUserFlow } from '@/testing/generators/e2e';
551
+
552
+ const result = await generateFromUserFlow({
553
+ flowDescription: 'User logs in and views dashboard',
554
+ baseUrl: 'http://localhost:3000',
555
+ testName: 'Login flow',
556
+ });
557
+ ```
558
+
559
+ ### Behavioral Test Generator
560
+
561
+ ```typescript
562
+ import { generatePerturbationTests } from '@/testing/integrations/giskard/behavioral-tests';
563
+
564
+ const suite = await generatePerturbationTests({
565
+ testCases: [
566
+ { input: 'sample', expectedOutput: 'result' }
567
+ ],
568
+ perturbations: ['typo', 'negation', 'synonym'],
569
+ });
570
+ ```
571
+
572
+ ### CI/CD Workflow Generator
573
+
574
+ ```typescript
575
+ import { generateGitHubActionsWorkflow } from '@/testing/integrations/cicd';
576
+
577
+ const workflow = await generateGitHubActionsWorkflow({
578
+ language: 'nodejs',
579
+ coverage: true,
580
+ artifacts: true,
581
+ });
582
+ ```
583
+
584
+ ### Test Quality Scorer
585
+
586
+ ```typescript
587
+ import { scoreTestQuality } from '@/testing/analyzers/quality-scorer';
588
+
589
+ const score = await scoreTestQuality({
590
+ testCode: testFileContent,
591
+ testType: 'unit',
592
+ });
593
+ ```
594
+
595
+ ## Resources
596
+
597
+ - [Promptfoo Documentation](https://promptfoo.dev/)
598
+ - [Playwright Documentation](https://playwright.dev/)
599
+ - [Giskard Documentation](https://docs.giskard.ai/)
600
+ - [GitHub Actions Documentation](https://docs.github.com/en/actions)
601
+ - [OMC Testing Guide](./README.md)