compact-agent 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/README.md +394 -0
  2. package/bin/anycode.js +2 -0
  3. package/bin/crowcoder.js +19 -0
  4. package/bin/ecc-hooks.cjs +138 -0
  5. package/dist/agents.d.ts +17 -0
  6. package/dist/agents.js +1603 -0
  7. package/dist/agents.js.map +1 -0
  8. package/dist/api.d.ts +16 -0
  9. package/dist/api.js +115 -0
  10. package/dist/api.js.map +1 -0
  11. package/dist/autonomous-loops.d.ts +108 -0
  12. package/dist/autonomous-loops.js +526 -0
  13. package/dist/autonomous-loops.js.map +1 -0
  14. package/dist/codemaps.d.ts +53 -0
  15. package/dist/codemaps.js +325 -0
  16. package/dist/codemaps.js.map +1 -0
  17. package/dist/compaction.d.ts +30 -0
  18. package/dist/compaction.js +125 -0
  19. package/dist/compaction.js.map +1 -0
  20. package/dist/config.d.ts +5 -0
  21. package/dist/config.js +79 -0
  22. package/dist/config.js.map +1 -0
  23. package/dist/content-engine.d.ts +97 -0
  24. package/dist/content-engine.js +721 -0
  25. package/dist/content-engine.js.map +1 -0
  26. package/dist/cost-tracker.d.ts +49 -0
  27. package/dist/cost-tracker.js +150 -0
  28. package/dist/cost-tracker.js.map +1 -0
  29. package/dist/counter-button.d.ts +35 -0
  30. package/dist/counter-button.js +48 -0
  31. package/dist/counter-button.js.map +1 -0
  32. package/dist/counter.d.ts +21 -0
  33. package/dist/counter.js +31 -0
  34. package/dist/counter.js.map +1 -0
  35. package/dist/coverage.d.ts +23 -0
  36. package/dist/coverage.js +215 -0
  37. package/dist/coverage.js.map +1 -0
  38. package/dist/docs-sync.d.ts +23 -0
  39. package/dist/docs-sync.js +266 -0
  40. package/dist/docs-sync.js.map +1 -0
  41. package/dist/ecc.d.ts +41 -0
  42. package/dist/ecc.js +644 -0
  43. package/dist/ecc.js.map +1 -0
  44. package/dist/evaluation.d.ts +24 -0
  45. package/dist/evaluation.js +412 -0
  46. package/dist/evaluation.js.map +1 -0
  47. package/dist/export.d.ts +22 -0
  48. package/dist/export.js +109 -0
  49. package/dist/export.js.map +1 -0
  50. package/dist/git-workflow.d.ts +22 -0
  51. package/dist/git-workflow.js +197 -0
  52. package/dist/git-workflow.js.map +1 -0
  53. package/dist/hook-controls.d.ts +34 -0
  54. package/dist/hook-controls.js +90 -0
  55. package/dist/hook-controls.js.map +1 -0
  56. package/dist/hooks.d.ts +30 -0
  57. package/dist/hooks.js +130 -0
  58. package/dist/hooks.js.map +1 -0
  59. package/dist/html-parser.d.ts +18 -0
  60. package/dist/html-parser.js +101 -0
  61. package/dist/html-parser.js.map +1 -0
  62. package/dist/index.d.ts +12 -0
  63. package/dist/index.js +1230 -0
  64. package/dist/index.js.map +1 -0
  65. package/dist/learning.d.ts +35 -0
  66. package/dist/learning.js +238 -0
  67. package/dist/learning.js.map +1 -0
  68. package/dist/login.d.ts +37 -0
  69. package/dist/login.js +191 -0
  70. package/dist/login.js.map +1 -0
  71. package/dist/memory.d.ts +39 -0
  72. package/dist/memory.js +183 -0
  73. package/dist/memory.js.map +1 -0
  74. package/dist/model-router.d.ts +23 -0
  75. package/dist/model-router.js +145 -0
  76. package/dist/model-router.js.map +1 -0
  77. package/dist/modes.d.ts +17 -0
  78. package/dist/modes.js +217 -0
  79. package/dist/modes.js.map +1 -0
  80. package/dist/orchestration.d.ts +37 -0
  81. package/dist/orchestration.js +139 -0
  82. package/dist/orchestration.js.map +1 -0
  83. package/dist/package-detect.d.ts +36 -0
  84. package/dist/package-detect.js +529 -0
  85. package/dist/package-detect.js.map +1 -0
  86. package/dist/permissions.d.ts +25 -0
  87. package/dist/permissions.js +50 -0
  88. package/dist/permissions.js.map +1 -0
  89. package/dist/pm2-manager.d.ts +40 -0
  90. package/dist/pm2-manager.js +127 -0
  91. package/dist/pm2-manager.js.map +1 -0
  92. package/dist/query.d.ts +15 -0
  93. package/dist/query.js +278 -0
  94. package/dist/query.js.map +1 -0
  95. package/dist/refactor.d.ts +22 -0
  96. package/dist/refactor.js +226 -0
  97. package/dist/refactor.js.map +1 -0
  98. package/dist/retry.d.ts +20 -0
  99. package/dist/retry.js +88 -0
  100. package/dist/retry.js.map +1 -0
  101. package/dist/rules.d.ts +34 -0
  102. package/dist/rules.js +942 -0
  103. package/dist/rules.js.map +1 -0
  104. package/dist/schema.d.ts +23 -0
  105. package/dist/schema.js +12 -0
  106. package/dist/schema.js.map +1 -0
  107. package/dist/search-first.d.ts +17 -0
  108. package/dist/search-first.js +301 -0
  109. package/dist/search-first.js.map +1 -0
  110. package/dist/security.d.ts +10 -0
  111. package/dist/security.js +145 -0
  112. package/dist/security.js.map +1 -0
  113. package/dist/sessions.d.ts +21 -0
  114. package/dist/sessions.js +112 -0
  115. package/dist/sessions.js.map +1 -0
  116. package/dist/skill-create.d.ts +38 -0
  117. package/dist/skill-create.js +389 -0
  118. package/dist/skill-create.js.map +1 -0
  119. package/dist/skills.d.ts +34 -0
  120. package/dist/skills.js +161 -0
  121. package/dist/skills.js.map +1 -0
  122. package/dist/strategic-compaction.d.ts +24 -0
  123. package/dist/strategic-compaction.js +144 -0
  124. package/dist/strategic-compaction.js.map +1 -0
  125. package/dist/system-prompt.d.ts +3 -0
  126. package/dist/system-prompt.js +101 -0
  127. package/dist/system-prompt.js.map +1 -0
  128. package/dist/theme.d.ts +60 -0
  129. package/dist/theme.js +220 -0
  130. package/dist/theme.js.map +1 -0
  131. package/dist/tools/bash.d.ts +2 -0
  132. package/dist/tools/bash.js +49 -0
  133. package/dist/tools/bash.js.map +1 -0
  134. package/dist/tools/edit.d.ts +2 -0
  135. package/dist/tools/edit.js +76 -0
  136. package/dist/tools/edit.js.map +1 -0
  137. package/dist/tools/glob.d.ts +2 -0
  138. package/dist/tools/glob.js +54 -0
  139. package/dist/tools/glob.js.map +1 -0
  140. package/dist/tools/grep.d.ts +2 -0
  141. package/dist/tools/grep.js +64 -0
  142. package/dist/tools/grep.js.map +1 -0
  143. package/dist/tools/index.d.ts +5 -0
  144. package/dist/tools/index.js +27 -0
  145. package/dist/tools/index.js.map +1 -0
  146. package/dist/tools/list-dir.d.ts +2 -0
  147. package/dist/tools/list-dir.js +51 -0
  148. package/dist/tools/list-dir.js.map +1 -0
  149. package/dist/tools/read.d.ts +2 -0
  150. package/dist/tools/read.js +56 -0
  151. package/dist/tools/read.js.map +1 -0
  152. package/dist/tools/types.d.ts +45 -0
  153. package/dist/tools/types.js +2 -0
  154. package/dist/tools/types.js.map +1 -0
  155. package/dist/tools/web-fetch.d.ts +2 -0
  156. package/dist/tools/web-fetch.js +41 -0
  157. package/dist/tools/web-fetch.js.map +1 -0
  158. package/dist/tools/web-search.d.ts +27 -0
  159. package/dist/tools/web-search.js +139 -0
  160. package/dist/tools/web-search.js.map +1 -0
  161. package/dist/tools/write.d.ts +2 -0
  162. package/dist/tools/write.js +36 -0
  163. package/dist/tools/write.js.map +1 -0
  164. package/dist/types.d.ts +28 -0
  165. package/dist/types.js +57 -0
  166. package/dist/types.js.map +1 -0
  167. package/dist/users.d.ts +51 -0
  168. package/dist/users.js +193 -0
  169. package/dist/users.js.map +1 -0
  170. package/dist/verification.d.ts +73 -0
  171. package/dist/verification.js +269 -0
  172. package/dist/verification.js.map +1 -0
  173. package/dist/walkthrough.d.ts +10 -0
  174. package/dist/walkthrough.js +121 -0
  175. package/dist/walkthrough.js.map +1 -0
  176. package/package.json +58 -0
  177. package/resources/ecc/agents/architect.json +16 -0
  178. package/resources/ecc/agents/architect.md +212 -0
  179. package/resources/ecc/agents/build-error-resolver.json +17 -0
  180. package/resources/ecc/agents/build-error-resolver.md +116 -0
  181. package/resources/ecc/agents/chief-of-staff.json +17 -0
  182. package/resources/ecc/agents/chief-of-staff.md +153 -0
  183. package/resources/ecc/agents/code-reviewer.json +16 -0
  184. package/resources/ecc/agents/code-reviewer.md +238 -0
  185. package/resources/ecc/agents/database-reviewer.json +16 -0
  186. package/resources/ecc/agents/database-reviewer.md +92 -0
  187. package/resources/ecc/agents/doc-updater.json +16 -0
  188. package/resources/ecc/agents/doc-updater.md +108 -0
  189. package/resources/ecc/agents/e2e-runner.json +17 -0
  190. package/resources/ecc/agents/e2e-runner.md +109 -0
  191. package/resources/ecc/agents/go-build-resolver.json +17 -0
  192. package/resources/ecc/agents/go-build-resolver.md +96 -0
  193. package/resources/ecc/agents/go-reviewer.json +16 -0
  194. package/resources/ecc/agents/go-reviewer.md +77 -0
  195. package/resources/ecc/agents/harness-optimizer.json +15 -0
  196. package/resources/ecc/agents/harness-optimizer.md +34 -0
  197. package/resources/ecc/agents/loop-operator.json +16 -0
  198. package/resources/ecc/agents/loop-operator.md +36 -0
  199. package/resources/ecc/agents/planner.json +15 -0
  200. package/resources/ecc/agents/planner.md +212 -0
  201. package/resources/ecc/agents/python-reviewer.json +16 -0
  202. package/resources/ecc/agents/python-reviewer.md +99 -0
  203. package/resources/ecc/agents/refactor-cleaner.json +17 -0
  204. package/resources/ecc/agents/refactor-cleaner.md +87 -0
  205. package/resources/ecc/agents/security-reviewer.json +16 -0
  206. package/resources/ecc/agents/security-reviewer.md +109 -0
  207. package/resources/ecc/agents/tdd-guide.json +17 -0
  208. package/resources/ecc/agents/tdd-guide.md +93 -0
  209. package/resources/ecc/commands/add-language-rules.md +39 -0
  210. package/resources/ecc/commands/database-migration.md +36 -0
  211. package/resources/ecc/commands/feature-development.md +38 -0
  212. package/resources/ecc/prompts/build-fix.prompt.md +47 -0
  213. package/resources/ecc/prompts/code-review.prompt.md +56 -0
  214. package/resources/ecc/prompts/plan.prompt.md +52 -0
  215. package/resources/ecc/prompts/refactor.prompt.md +50 -0
  216. package/resources/ecc/prompts/security-review.prompt.md +70 -0
  217. package/resources/ecc/prompts/tdd.prompt.md +47 -0
  218. package/resources/ecc/rules/common-agents.md +53 -0
  219. package/resources/ecc/rules/common-coding-style.md +52 -0
  220. package/resources/ecc/rules/common-development-workflow.md +33 -0
  221. package/resources/ecc/rules/common-git-workflow.md +28 -0
  222. package/resources/ecc/rules/common-hooks.md +34 -0
  223. package/resources/ecc/rules/common-patterns.md +35 -0
  224. package/resources/ecc/rules/common-performance.md +59 -0
  225. package/resources/ecc/rules/common-security.md +33 -0
  226. package/resources/ecc/rules/common-testing.md +33 -0
  227. package/resources/ecc/rules/golang-coding-style.md +31 -0
  228. package/resources/ecc/rules/golang-hooks.md +16 -0
  229. package/resources/ecc/rules/golang-patterns.md +44 -0
  230. package/resources/ecc/rules/golang-security.md +33 -0
  231. package/resources/ecc/rules/golang-testing.md +30 -0
  232. package/resources/ecc/rules/kotlin-coding-style.md +39 -0
  233. package/resources/ecc/rules/kotlin-hooks.md +16 -0
  234. package/resources/ecc/rules/kotlin-patterns.md +50 -0
  235. package/resources/ecc/rules/kotlin-security.md +58 -0
  236. package/resources/ecc/rules/kotlin-testing.md +38 -0
  237. package/resources/ecc/rules/php-coding-style.md +25 -0
  238. package/resources/ecc/rules/php-hooks.md +21 -0
  239. package/resources/ecc/rules/php-patterns.md +23 -0
  240. package/resources/ecc/rules/php-security.md +24 -0
  241. package/resources/ecc/rules/php-testing.md +26 -0
  242. package/resources/ecc/rules/python-coding-style.md +42 -0
  243. package/resources/ecc/rules/python-hooks.md +19 -0
  244. package/resources/ecc/rules/python-patterns.md +39 -0
  245. package/resources/ecc/rules/python-security.md +30 -0
  246. package/resources/ecc/rules/python-testing.md +38 -0
  247. package/resources/ecc/rules/swift-coding-style.md +47 -0
  248. package/resources/ecc/rules/swift-hooks.md +20 -0
  249. package/resources/ecc/rules/swift-patterns.md +66 -0
  250. package/resources/ecc/rules/swift-security.md +33 -0
  251. package/resources/ecc/rules/swift-testing.md +45 -0
  252. package/resources/ecc/rules/typescript-coding-style.md +63 -0
  253. package/resources/ecc/rules/typescript-hooks.md +20 -0
  254. package/resources/ecc/rules/typescript-patterns.md +50 -0
  255. package/resources/ecc/rules/typescript-security.md +26 -0
  256. package/resources/ecc/rules/typescript-testing.md +16 -0
  257. package/resources/ecc/skills/agent-introspection-debugging/SKILL.md +152 -0
  258. package/resources/ecc/skills/agent-introspection-debugging/agents/openai.yaml +7 -0
  259. package/resources/ecc/skills/agent-sort/SKILL.md +214 -0
  260. package/resources/ecc/skills/agent-sort/agents/openai.yaml +7 -0
  261. package/resources/ecc/skills/api-design/SKILL.md +522 -0
  262. package/resources/ecc/skills/api-design/agents/openai.yaml +7 -0
  263. package/resources/ecc/skills/article-writing/SKILL.md +78 -0
  264. package/resources/ecc/skills/article-writing/agents/openai.yaml +7 -0
  265. package/resources/ecc/skills/backend-patterns/SKILL.md +597 -0
  266. package/resources/ecc/skills/backend-patterns/agents/openai.yaml +7 -0
  267. package/resources/ecc/skills/brand-voice/SKILL.md +96 -0
  268. package/resources/ecc/skills/brand-voice/agents/openai.yaml +7 -0
  269. package/resources/ecc/skills/brand-voice/references/voice-profile-schema.md +55 -0
  270. package/resources/ecc/skills/bun-runtime/SKILL.md +83 -0
  271. package/resources/ecc/skills/bun-runtime/agents/openai.yaml +7 -0
  272. package/resources/ecc/skills/coding-standards/SKILL.md +548 -0
  273. package/resources/ecc/skills/coding-standards/agents/openai.yaml +7 -0
  274. package/resources/ecc/skills/content-engine/SKILL.md +130 -0
  275. package/resources/ecc/skills/content-engine/agents/openai.yaml +7 -0
  276. package/resources/ecc/skills/crosspost/SKILL.md +110 -0
  277. package/resources/ecc/skills/crosspost/agents/openai.yaml +7 -0
  278. package/resources/ecc/skills/deep-research/SKILL.md +154 -0
  279. package/resources/ecc/skills/deep-research/agents/openai.yaml +7 -0
  280. package/resources/ecc/skills/dmux-workflows/SKILL.md +143 -0
  281. package/resources/ecc/skills/dmux-workflows/agents/openai.yaml +7 -0
  282. package/resources/ecc/skills/documentation-lookup/SKILL.md +89 -0
  283. package/resources/ecc/skills/documentation-lookup/agents/openai.yaml +7 -0
  284. package/resources/ecc/skills/e2e-testing/SKILL.md +325 -0
  285. package/resources/ecc/skills/e2e-testing/agents/openai.yaml +7 -0
  286. package/resources/ecc/skills/eval-harness/SKILL.md +235 -0
  287. package/resources/ecc/skills/eval-harness/agents/openai.yaml +7 -0
  288. package/resources/ecc/skills/everything-claude-code/SKILL.md +442 -0
  289. package/resources/ecc/skills/everything-claude-code/agents/openai.yaml +7 -0
  290. package/resources/ecc/skills/exa-search/SKILL.md +169 -0
  291. package/resources/ecc/skills/exa-search/agents/openai.yaml +7 -0
  292. package/resources/ecc/skills/fal-ai-media/SKILL.md +276 -0
  293. package/resources/ecc/skills/fal-ai-media/agents/openai.yaml +7 -0
  294. package/resources/ecc/skills/frontend-patterns/SKILL.md +647 -0
  295. package/resources/ecc/skills/frontend-patterns/agents/openai.yaml +7 -0
  296. package/resources/ecc/skills/frontend-slides/SKILL.md +183 -0
  297. package/resources/ecc/skills/frontend-slides/STYLE_PRESETS.md +330 -0
  298. package/resources/ecc/skills/frontend-slides/agents/openai.yaml +7 -0
  299. package/resources/ecc/skills/investor-materials/SKILL.md +95 -0
  300. package/resources/ecc/skills/investor-materials/agents/openai.yaml +7 -0
  301. package/resources/ecc/skills/investor-outreach/SKILL.md +90 -0
  302. package/resources/ecc/skills/investor-outreach/agents/openai.yaml +7 -0
  303. package/resources/ecc/skills/market-research/SKILL.md +74 -0
  304. package/resources/ecc/skills/market-research/agents/openai.yaml +7 -0
  305. package/resources/ecc/skills/mcp-server-patterns/SKILL.md +66 -0
  306. package/resources/ecc/skills/mcp-server-patterns/agents/openai.yaml +7 -0
  307. package/resources/ecc/skills/mle-workflow/SKILL.md +346 -0
  308. package/resources/ecc/skills/mle-workflow/agents/openai.yaml +7 -0
  309. package/resources/ecc/skills/nextjs-turbopack/SKILL.md +43 -0
  310. package/resources/ecc/skills/nextjs-turbopack/agents/openai.yaml +7 -0
  311. package/resources/ecc/skills/product-capability/SKILL.md +140 -0
  312. package/resources/ecc/skills/product-capability/agents/openai.yaml +7 -0
  313. package/resources/ecc/skills/security-review/SKILL.md +494 -0
  314. package/resources/ecc/skills/security-review/agents/openai.yaml +7 -0
  315. package/resources/ecc/skills/strategic-compact/SKILL.md +102 -0
  316. package/resources/ecc/skills/strategic-compact/agents/openai.yaml +7 -0
  317. package/resources/ecc/skills/tdd-workflow/SKILL.md +409 -0
  318. package/resources/ecc/skills/tdd-workflow/agents/openai.yaml +7 -0
  319. package/resources/ecc/skills/verification-loop/SKILL.md +125 -0
  320. package/resources/ecc/skills/verification-loop/agents/openai.yaml +7 -0
  321. package/resources/ecc/skills/video-editing/SKILL.md +307 -0
  322. package/resources/ecc/skills/video-editing/agents/openai.yaml +7 -0
  323. package/resources/ecc/skills/x-api/SKILL.md +229 -0
  324. package/resources/ecc/skills/x-api/agents/openai.yaml +7 -0
@@ -0,0 +1,325 @@
1
+ ---
2
+ name: e2e-testing
3
+ description: Playwright E2E testing patterns, Page Object Model, configuration, CI/CD integration, artifact management, and flaky test strategies.
4
+ ---
5
+
6
+ # E2E Testing Patterns
7
+
8
+ Comprehensive Playwright patterns for building stable, fast, and maintainable E2E test suites.
9
+
10
+ ## Test File Organization
11
+
12
+ ```
13
+ tests/
14
+ ├── e2e/
15
+ │ ├── auth/
16
+ │ │ ├── login.spec.ts
17
+ │ │ ├── logout.spec.ts
18
+ │ │ └── register.spec.ts
19
+ │ ├── features/
20
+ │ │ ├── browse.spec.ts
21
+ │ │ ├── search.spec.ts
22
+ │ │ └── create.spec.ts
23
+ │ └── api/
24
+ │ └── endpoints.spec.ts
25
+ ├── fixtures/
26
+ │ ├── auth.ts
27
+ │ └── data.ts
28
+ └── playwright.config.ts
29
+ ```
30
+
31
+ ## Page Object Model (POM)
32
+
33
+ ```typescript
34
+ import { Page, Locator } from '@playwright/test'
35
+
36
+ export class ItemsPage {
37
+ readonly page: Page
38
+ readonly searchInput: Locator
39
+ readonly itemCards: Locator
40
+ readonly createButton: Locator
41
+
42
+ constructor(page: Page) {
43
+ this.page = page
44
+ this.searchInput = page.locator('[data-testid="search-input"]')
45
+ this.itemCards = page.locator('[data-testid="item-card"]')
46
+ this.createButton = page.locator('[data-testid="create-btn"]')
47
+ }
48
+
49
+ async goto() {
50
+ await this.page.goto('/items')
51
+ await this.page.waitForLoadState('networkidle')
52
+ }
53
+
54
+ async search(query: string) {
55
+ await this.searchInput.fill(query)
56
+ await this.page.waitForResponse(resp => resp.url().includes('/api/search'))
57
+ await this.page.waitForLoadState('networkidle')
58
+ }
59
+
60
+ async getItemCount() {
61
+ return await this.itemCards.count()
62
+ }
63
+ }
64
+ ```
65
+
66
+ ## Test Structure
67
+
68
+ ```typescript
69
+ import { test, expect } from '@playwright/test'
70
+ import { ItemsPage } from '../../pages/ItemsPage'
71
+
72
+ test.describe('Item Search', () => {
73
+ let itemsPage: ItemsPage
74
+
75
+ test.beforeEach(async ({ page }) => {
76
+ itemsPage = new ItemsPage(page)
77
+ await itemsPage.goto()
78
+ })
79
+
80
+ test('should search by keyword', async ({ page }) => {
81
+ await itemsPage.search('test')
82
+
83
+ const count = await itemsPage.getItemCount()
84
+ expect(count).toBeGreaterThan(0)
85
+
86
+ await expect(itemsPage.itemCards.first()).toContainText(/test/i)
87
+ await page.screenshot({ path: 'artifacts/search-results.png' })
88
+ })
89
+
90
+ test('should handle no results', async ({ page }) => {
91
+ await itemsPage.search('xyznonexistent123')
92
+
93
+ await expect(page.locator('[data-testid="no-results"]')).toBeVisible()
94
+ expect(await itemsPage.getItemCount()).toBe(0)
95
+ })
96
+ })
97
+ ```
98
+
99
+ ## Playwright Configuration
100
+
101
+ ```typescript
102
+ import { defineConfig, devices } from '@playwright/test'
103
+
104
+ export default defineConfig({
105
+ testDir: './tests/e2e',
106
+ fullyParallel: true,
107
+ forbidOnly: !!process.env.CI,
108
+ retries: process.env.CI ? 2 : 0,
109
+ workers: process.env.CI ? 1 : undefined,
110
+ reporter: [
111
+ ['html', { outputFolder: 'playwright-report' }],
112
+ ['junit', { outputFile: 'playwright-results.xml' }],
113
+ ['json', { outputFile: 'playwright-results.json' }]
114
+ ],
115
+ use: {
116
+ baseURL: process.env.BASE_URL || 'http://localhost:3000',
117
+ trace: 'on-first-retry',
118
+ screenshot: 'only-on-failure',
119
+ video: 'retain-on-failure',
120
+ actionTimeout: 10000,
121
+ navigationTimeout: 30000,
122
+ },
123
+ projects: [
124
+ { name: 'chromium', use: { ...devices['Desktop Chrome'] } },
125
+ { name: 'firefox', use: { ...devices['Desktop Firefox'] } },
126
+ { name: 'webkit', use: { ...devices['Desktop Safari'] } },
127
+ { name: 'mobile-chrome', use: { ...devices['Pixel 5'] } },
128
+ ],
129
+ webServer: {
130
+ command: 'npm run dev',
131
+ url: 'http://localhost:3000',
132
+ reuseExistingServer: !process.env.CI,
133
+ timeout: 120000,
134
+ },
135
+ })
136
+ ```
137
+
138
+ ## Flaky Test Patterns
139
+
140
+ ### Quarantine
141
+
142
+ ```typescript
143
+ test('flaky: complex search', async ({ page }) => {
144
+ test.fixme(true, 'Flaky - Issue #123')
145
+ // test code...
146
+ })
147
+
148
+ test('conditional skip', async ({ page }) => {
149
+ test.skip(process.env.CI, 'Flaky in CI - Issue #123')
150
+ // test code...
151
+ })
152
+ ```
153
+
154
+ ### Identify Flakiness
155
+
156
+ ```bash
157
+ npx playwright test tests/search.spec.ts --repeat-each=10
158
+ npx playwright test tests/search.spec.ts --retries=3
159
+ ```
160
+
161
+ ### Common Causes & Fixes
162
+
163
+ **Race conditions:**
164
+ ```typescript
165
+ // Bad: assumes element is ready
166
+ await page.click('[data-testid="button"]')
167
+
168
+ // Good: auto-wait locator
169
+ await page.locator('[data-testid="button"]').click()
170
+ ```
171
+
172
+ **Network timing:**
173
+ ```typescript
174
+ // Bad: arbitrary timeout
175
+ await page.waitForTimeout(5000)
176
+
177
+ // Good: wait for specific condition
178
+ await page.waitForResponse(resp => resp.url().includes('/api/data'))
179
+ ```
180
+
181
+ **Animation timing:**
182
+ ```typescript
183
+ // Bad: click during animation
184
+ await page.click('[data-testid="menu-item"]')
185
+
186
+ // Good: wait for stability
187
+ await page.locator('[data-testid="menu-item"]').waitFor({ state: 'visible' })
188
+ await page.waitForLoadState('networkidle')
189
+ await page.locator('[data-testid="menu-item"]').click()
190
+ ```
191
+
192
+ ## Artifact Management
193
+
194
+ ### Screenshots
195
+
196
+ ```typescript
197
+ await page.screenshot({ path: 'artifacts/after-login.png' })
198
+ await page.screenshot({ path: 'artifacts/full-page.png', fullPage: true })
199
+ await page.locator('[data-testid="chart"]').screenshot({ path: 'artifacts/chart.png' })
200
+ ```
201
+
202
+ ### Traces
203
+
204
+ ```typescript
205
+ await browser.startTracing(page, {
206
+ path: 'artifacts/trace.json',
207
+ screenshots: true,
208
+ snapshots: true,
209
+ })
210
+ // ... test actions ...
211
+ await browser.stopTracing()
212
+ ```
213
+
214
+ ### Video
215
+
216
+ ```typescript
217
+ // In playwright.config.ts
218
+ use: {
219
+ video: 'retain-on-failure',
220
+ videosPath: 'artifacts/videos/'
221
+ }
222
+ ```
223
+
224
+ ## CI/CD Integration
225
+
226
+ ```yaml
227
+ # .github/workflows/e2e.yml
228
+ name: E2E Tests
229
+ on: [push, pull_request]
230
+
231
+ jobs:
232
+ test:
233
+ runs-on: ubuntu-latest
234
+ steps:
235
+ - uses: actions/checkout@v4
236
+ - uses: actions/setup-node@v4
237
+ with:
238
+ node-version: 20
239
+ - run: npm ci
240
+ - run: npx playwright install --with-deps
241
+ - run: npx playwright test
242
+ env:
243
+ BASE_URL: ${{ vars.STAGING_URL }}
244
+ - uses: actions/upload-artifact@v4
245
+ if: always()
246
+ with:
247
+ name: playwright-report
248
+ path: playwright-report/
249
+ retention-days: 30
250
+ ```
251
+
252
+ ## Test Report Template
253
+
254
+ ```markdown
255
+ # E2E Test Report
256
+
257
+ **Date:** YYYY-MM-DD HH:MM
258
+ **Duration:** Xm Ys
259
+ **Status:** PASSING / FAILING
260
+
261
+ ## Summary
262
+ - Total: X | Passed: Y (Z%) | Failed: A | Flaky: B | Skipped: C
263
+
264
+ ## Failed Tests
265
+
266
+ ### test-name
267
+ **File:** `tests/e2e/feature.spec.ts:45`
268
+ **Error:** Expected element to be visible
269
+ **Screenshot:** artifacts/failed.png
270
+ **Recommended Fix:** [description]
271
+
272
+ ## Artifacts
273
+ - HTML Report: playwright-report/index.html
274
+ - Screenshots: artifacts/*.png
275
+ - Videos: artifacts/videos/*.webm
276
+ - Traces: artifacts/*.zip
277
+ ```
278
+
279
+ ## Wallet / Web3 Testing
280
+
281
+ ```typescript
282
+ test('wallet connection', async ({ page, context }) => {
283
+ // Mock wallet provider
284
+ await context.addInitScript(() => {
285
+ window.ethereum = {
286
+ isMetaMask: true,
287
+ request: async ({ method }) => {
288
+ if (method === 'eth_requestAccounts')
289
+ return ['0x1234567890123456789012345678901234567890']
290
+ if (method === 'eth_chainId') return '0x1'
291
+ }
292
+ }
293
+ })
294
+
295
+ await page.goto('/')
296
+ await page.locator('[data-testid="connect-wallet"]').click()
297
+ await expect(page.locator('[data-testid="wallet-address"]')).toContainText('0x1234')
298
+ })
299
+ ```
300
+
301
+ ## Financial / Critical Flow Testing
302
+
303
+ ```typescript
304
+ test('trade execution', async ({ page }) => {
305
+ // Skip on production — real money
306
+ test.skip(process.env.NODE_ENV === 'production', 'Skip on production')
307
+
308
+ await page.goto('/markets/test-market')
309
+ await page.locator('[data-testid="position-yes"]').click()
310
+ await page.locator('[data-testid="trade-amount"]').fill('1.0')
311
+
312
+ // Verify preview
313
+ const preview = page.locator('[data-testid="trade-preview"]')
314
+ await expect(preview).toContainText('1.0')
315
+
316
+ // Confirm and wait for blockchain
317
+ await page.locator('[data-testid="confirm-trade"]').click()
318
+ await page.waitForResponse(
319
+ resp => resp.url().includes('/api/trade') && resp.status() === 200,
320
+ { timeout: 30000 }
321
+ )
322
+
323
+ await expect(page.locator('[data-testid="trade-success"]')).toBeVisible()
324
+ })
325
+ ```
@@ -0,0 +1,7 @@
1
+ interface:
2
+ display_name: "E2E Testing"
3
+ short_description: "Playwright E2E testing patterns"
4
+ brand_color: "#06B6D4"
5
+ default_prompt: "Use $e2e-testing to design Playwright end-to-end test coverage."
6
+ policy:
7
+ allow_implicit_invocation: true
@@ -0,0 +1,235 @@
1
+ ---
2
+ name: eval-harness
3
+ description: Formal evaluation framework for Claude Code sessions implementing eval-driven development (EDD) principles
4
+ allowed-tools: Read, Write, Edit, Bash, Grep, Glob
5
+ ---
6
+
7
+ # Eval Harness Skill
8
+
9
+ A formal evaluation framework for Claude Code sessions, implementing eval-driven development (EDD) principles.
10
+
11
+ ## When to Activate
12
+
13
+ - Setting up eval-driven development (EDD) for AI-assisted workflows
14
+ - Defining pass/fail criteria for Claude Code task completion
15
+ - Measuring agent reliability with pass@k metrics
16
+ - Creating regression test suites for prompt or agent changes
17
+ - Benchmarking agent performance across model versions
18
+
19
+ ## Philosophy
20
+
21
+ Eval-Driven Development treats evals as the "unit tests of AI development":
22
+ - Define expected behavior BEFORE implementation
23
+ - Run evals continuously during development
24
+ - Track regressions with each change
25
+ - Use pass@k metrics for reliability measurement
26
+
27
+ ## Eval Types
28
+
29
+ ### Capability Evals
30
+ Test if Claude can do something it couldn't before:
31
+ ```markdown
32
+ [CAPABILITY EVAL: feature-name]
33
+ Task: Description of what Claude should accomplish
34
+ Success Criteria:
35
+ - [ ] Criterion 1
36
+ - [ ] Criterion 2
37
+ - [ ] Criterion 3
38
+ Expected Output: Description of expected result
39
+ ```
40
+
41
+ ### Regression Evals
42
+ Ensure changes don't break existing functionality:
43
+ ```markdown
44
+ [REGRESSION EVAL: feature-name]
45
+ Baseline: SHA or checkpoint name
46
+ Tests:
47
+ - existing-test-1: PASS/FAIL
48
+ - existing-test-2: PASS/FAIL
49
+ - existing-test-3: PASS/FAIL
50
+ Result: X/Y passed (previously Y/Y)
51
+ ```
52
+
53
+ ## Grader Types
54
+
55
+ ### 1. Code-Based Grader
56
+ Deterministic checks using code:
57
+ ```bash
58
+ # Check if file contains expected pattern
59
+ grep -q "export function handleAuth" src/auth.ts && echo "PASS" || echo "FAIL"
60
+
61
+ # Check if tests pass
62
+ npm test -- --testPathPattern="auth" && echo "PASS" || echo "FAIL"
63
+
64
+ # Check if build succeeds
65
+ npm run build && echo "PASS" || echo "FAIL"
66
+ ```
67
+
68
+ ### 2. Model-Based Grader
69
+ Use Claude to evaluate open-ended outputs:
70
+ ```markdown
71
+ [MODEL GRADER PROMPT]
72
+ Evaluate the following code change:
73
+ 1. Does it solve the stated problem?
74
+ 2. Is it well-structured?
75
+ 3. Are edge cases handled?
76
+ 4. Is error handling appropriate?
77
+
78
+ Score: 1-5 (1=poor, 5=excellent)
79
+ Reasoning: [explanation]
80
+ ```
81
+
82
+ ### 3. Human Grader
83
+ Flag for manual review:
84
+ ```markdown
85
+ [HUMAN REVIEW REQUIRED]
86
+ Change: Description of what changed
87
+ Reason: Why human review is needed
88
+ Risk Level: LOW/MEDIUM/HIGH
89
+ ```
90
+
91
+ ## Metrics
92
+
93
+ ### pass@k
94
+ "At least one success in k attempts"
95
+ - pass@1: First attempt success rate
96
+ - pass@3: Success within 3 attempts
97
+ - Typical target: pass@3 > 90%
98
+
99
+ ### pass^k
100
+ "All k trials succeed"
101
+ - Higher bar for reliability
102
+ - pass^3: 3 consecutive successes
103
+ - Use for critical paths
104
+
105
+ ## Eval Workflow
106
+
107
+ ### 1. Define (Before Coding)
108
+ ```markdown
109
+ ## EVAL DEFINITION: feature-xyz
110
+
111
+ ### Capability Evals
112
+ 1. Can create new user account
113
+ 2. Can validate email format
114
+ 3. Can hash password securely
115
+
116
+ ### Regression Evals
117
+ 1. Existing login still works
118
+ 2. Session management unchanged
119
+ 3. Logout flow intact
120
+
121
+ ### Success Metrics
122
+ - pass@3 > 90% for capability evals
123
+ - pass^3 = 100% for regression evals
124
+ ```
125
+
126
+ ### 2. Implement
127
+ Write code to pass the defined evals.
128
+
129
+ ### 3. Evaluate
130
+ ```bash
131
+ # Run capability evals
132
+ [Run each capability eval, record PASS/FAIL]
133
+
134
+ # Run regression evals
135
+ npm test -- --testPathPattern="existing"
136
+
137
+ # Generate report
138
+ ```
139
+
140
+ ### 4. Report
141
+ ```markdown
142
+ EVAL REPORT: feature-xyz
143
+ ========================
144
+
145
+ Capability Evals:
146
+ create-user: PASS (pass@1)
147
+ validate-email: PASS (pass@2)
148
+ hash-password: PASS (pass@1)
149
+ Overall: 3/3 passed
150
+
151
+ Regression Evals:
152
+ login-flow: PASS
153
+ session-mgmt: PASS
154
+ logout-flow: PASS
155
+ Overall: 3/3 passed
156
+
157
+ Metrics:
158
+ pass@1: 67% (2/3)
159
+ pass@3: 100% (3/3)
160
+
161
+ Status: READY FOR REVIEW
162
+ ```
163
+
164
+ ## Integration Patterns
165
+
166
+ ### Pre-Implementation
167
+ ```
168
+ /eval define feature-name
169
+ ```
170
+ Creates eval definition file at `.claude/evals/feature-name.md`
171
+
172
+ ### During Implementation
173
+ ```
174
+ /eval check feature-name
175
+ ```
176
+ Runs current evals and reports status
177
+
178
+ ### Post-Implementation
179
+ ```
180
+ /eval report feature-name
181
+ ```
182
+ Generates full eval report
183
+
184
+ ## Eval Storage
185
+
186
+ Store evals in project:
187
+ ```
188
+ .claude/
189
+ evals/
190
+ feature-xyz.md # Eval definition
191
+ feature-xyz.log # Eval run history
192
+ baseline.json # Regression baselines
193
+ ```
194
+
195
+ ## Best Practices
196
+
197
+ 1. **Define evals BEFORE coding** - Forces clear thinking about success criteria
198
+ 2. **Run evals frequently** - Catch regressions early
199
+ 3. **Track pass@k over time** - Monitor reliability trends
200
+ 4. **Use code graders when possible** - Deterministic > probabilistic
201
+ 5. **Human review for security** - Never fully automate security checks
202
+ 6. **Keep evals fast** - Slow evals don't get run
203
+ 7. **Version evals with code** - Evals are first-class artifacts
204
+
205
+ ## Example: Adding Authentication
206
+
207
+ ```markdown
208
+ ## EVAL: add-authentication
209
+
210
+ ### Phase 1: Define (10 min)
211
+ Capability Evals:
212
+ - [ ] User can register with email/password
213
+ - [ ] User can login with valid credentials
214
+ - [ ] Invalid credentials rejected with proper error
215
+ - [ ] Sessions persist across page reloads
216
+ - [ ] Logout clears session
217
+
218
+ Regression Evals:
219
+ - [ ] Public routes still accessible
220
+ - [ ] API responses unchanged
221
+ - [ ] Database schema compatible
222
+
223
+ ### Phase 2: Implement (varies)
224
+ [Write code]
225
+
226
+ ### Phase 3: Evaluate
227
+ Run: /eval check add-authentication
228
+
229
+ ### Phase 4: Report
230
+ EVAL REPORT: add-authentication
231
+ ==============================
232
+ Capability: 5/5 passed (pass@3: 100%)
233
+ Regression: 3/3 passed (pass^3: 100%)
234
+ Status: SHIP IT
235
+ ```
@@ -0,0 +1,7 @@
1
+ interface:
2
+ display_name: "Eval Harness"
3
+ short_description: "Eval-driven development harnesses"
4
+ brand_color: "#EC4899"
5
+ default_prompt: "Use $eval-harness to define eval-driven development checks."
6
+ policy:
7
+ allow_implicit_invocation: true