@runchr/gstack-antigravity 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @runchr/gstack-antigravity might be problematic. Click here for more details.

Files changed (231) hide show
  1. package/.agents/skills/gstack/.agents/skills/gstack/SKILL.md +651 -0
  2. package/.agents/skills/gstack/.agents/skills/gstack-autoplan/SKILL.md +678 -0
  3. package/.agents/skills/gstack/.agents/skills/gstack-benchmark/SKILL.md +482 -0
  4. package/.agents/skills/gstack/.agents/skills/gstack-browse/SKILL.md +511 -0
  5. package/.agents/skills/gstack/.agents/skills/gstack-canary/SKILL.md +486 -0
  6. package/.agents/skills/gstack/.agents/skills/gstack-careful/SKILL.md +50 -0
  7. package/.agents/skills/gstack/.agents/skills/gstack-cso/SKILL.md +607 -0
  8. package/.agents/skills/gstack/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
  9. package/.agents/skills/gstack/.agents/skills/gstack-design-review/SKILL.md +988 -0
  10. package/.agents/skills/gstack/.agents/skills/gstack-document-release/SKILL.md +604 -0
  11. package/.agents/skills/gstack/.agents/skills/gstack-freeze/SKILL.md +67 -0
  12. package/.agents/skills/gstack/.agents/skills/gstack-guard/SKILL.md +62 -0
  13. package/.agents/skills/gstack/.agents/skills/gstack-investigate/SKILL.md +415 -0
  14. package/.agents/skills/gstack/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
  15. package/.agents/skills/gstack/.agents/skills/gstack-office-hours/SKILL.md +986 -0
  16. package/.agents/skills/gstack/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
  17. package/.agents/skills/gstack/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
  18. package/.agents/skills/gstack/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
  19. package/.agents/skills/gstack/.agents/skills/gstack-qa/SKILL.md +1006 -0
  20. package/.agents/skills/gstack/.agents/skills/gstack-qa-only/SKILL.md +626 -0
  21. package/.agents/skills/gstack/.agents/skills/gstack-retro/SKILL.md +1065 -0
  22. package/.agents/skills/gstack/.agents/skills/gstack-review/SKILL.md +704 -0
  23. package/.agents/skills/gstack/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
  24. package/.agents/skills/gstack/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
  25. package/.agents/skills/gstack/.agents/skills/gstack-ship/SKILL.md +1312 -0
  26. package/.agents/skills/gstack/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
  27. package/.agents/skills/gstack/.agents/skills/gstack-upgrade/SKILL.md +220 -0
  28. package/.agents/skills/gstack/.env.example +5 -0
  29. package/.agents/skills/gstack/.github/workflows/skill-docs.yml +17 -0
  30. package/.agents/skills/gstack/AGENTS.md +49 -0
  31. package/.agents/skills/gstack/ARCHITECTURE.md +359 -0
  32. package/.agents/skills/gstack/BROWSER.md +271 -0
  33. package/.agents/skills/gstack/CHANGELOG.md +800 -0
  34. package/.agents/skills/gstack/CLAUDE.md +284 -0
  35. package/.agents/skills/gstack/CONTRIBUTING.md +370 -0
  36. package/.agents/skills/gstack/ETHOS.md +129 -0
  37. package/.agents/skills/gstack/LICENSE +21 -0
  38. package/.agents/skills/gstack/README.md +228 -0
  39. package/.agents/skills/gstack/SKILL.md +657 -0
  40. package/.agents/skills/gstack/SKILL.md.tmpl +281 -0
  41. package/.agents/skills/gstack/TODOS.md +564 -0
  42. package/.agents/skills/gstack/VERSION +1 -0
  43. package/.agents/skills/gstack/autoplan/SKILL.md +689 -0
  44. package/.agents/skills/gstack/autoplan/SKILL.md.tmpl +416 -0
  45. package/.agents/skills/gstack/benchmark/SKILL.md +489 -0
  46. package/.agents/skills/gstack/benchmark/SKILL.md.tmpl +233 -0
  47. package/.agents/skills/gstack/bin/dev-setup +68 -0
  48. package/.agents/skills/gstack/bin/dev-teardown +56 -0
  49. package/.agents/skills/gstack/bin/gstack-analytics +191 -0
  50. package/.agents/skills/gstack/bin/gstack-community-dashboard +113 -0
  51. package/.agents/skills/gstack/bin/gstack-config +38 -0
  52. package/.agents/skills/gstack/bin/gstack-diff-scope +71 -0
  53. package/.agents/skills/gstack/bin/gstack-global-discover.ts +591 -0
  54. package/.agents/skills/gstack/bin/gstack-repo-mode +93 -0
  55. package/.agents/skills/gstack/bin/gstack-review-log +9 -0
  56. package/.agents/skills/gstack/bin/gstack-review-read +12 -0
  57. package/.agents/skills/gstack/bin/gstack-slug +15 -0
  58. package/.agents/skills/gstack/bin/gstack-telemetry-log +158 -0
  59. package/.agents/skills/gstack/bin/gstack-telemetry-sync +127 -0
  60. package/.agents/skills/gstack/bin/gstack-update-check +196 -0
  61. package/.agents/skills/gstack/browse/SKILL.md +517 -0
  62. package/.agents/skills/gstack/browse/SKILL.md.tmpl +141 -0
  63. package/.agents/skills/gstack/browse/bin/find-browse +21 -0
  64. package/.agents/skills/gstack/browse/bin/remote-slug +14 -0
  65. package/.agents/skills/gstack/browse/scripts/build-node-server.sh +48 -0
  66. package/.agents/skills/gstack/browse/src/browser-manager.ts +634 -0
  67. package/.agents/skills/gstack/browse/src/buffers.ts +137 -0
  68. package/.agents/skills/gstack/browse/src/bun-polyfill.cjs +109 -0
  69. package/.agents/skills/gstack/browse/src/cli.ts +420 -0
  70. package/.agents/skills/gstack/browse/src/commands.ts +111 -0
  71. package/.agents/skills/gstack/browse/src/config.ts +150 -0
  72. package/.agents/skills/gstack/browse/src/cookie-import-browser.ts +417 -0
  73. package/.agents/skills/gstack/browse/src/cookie-picker-routes.ts +207 -0
  74. package/.agents/skills/gstack/browse/src/cookie-picker-ui.ts +541 -0
  75. package/.agents/skills/gstack/browse/src/find-browse.ts +61 -0
  76. package/.agents/skills/gstack/browse/src/meta-commands.ts +269 -0
  77. package/.agents/skills/gstack/browse/src/platform.ts +17 -0
  78. package/.agents/skills/gstack/browse/src/read-commands.ts +335 -0
  79. package/.agents/skills/gstack/browse/src/server.ts +369 -0
  80. package/.agents/skills/gstack/browse/src/snapshot.ts +398 -0
  81. package/.agents/skills/gstack/browse/src/url-validation.ts +91 -0
  82. package/.agents/skills/gstack/browse/src/write-commands.ts +352 -0
  83. package/.agents/skills/gstack/browse/test/bun-polyfill.test.ts +72 -0
  84. package/.agents/skills/gstack/browse/test/commands.test.ts +1836 -0
  85. package/.agents/skills/gstack/browse/test/config.test.ts +250 -0
  86. package/.agents/skills/gstack/browse/test/cookie-import-browser.test.ts +397 -0
  87. package/.agents/skills/gstack/browse/test/cookie-picker-routes.test.ts +205 -0
  88. package/.agents/skills/gstack/browse/test/find-browse.test.ts +50 -0
  89. package/.agents/skills/gstack/browse/test/fixtures/basic.html +33 -0
  90. package/.agents/skills/gstack/browse/test/fixtures/cursor-interactive.html +22 -0
  91. package/.agents/skills/gstack/browse/test/fixtures/dialog.html +15 -0
  92. package/.agents/skills/gstack/browse/test/fixtures/empty.html +2 -0
  93. package/.agents/skills/gstack/browse/test/fixtures/forms.html +55 -0
  94. package/.agents/skills/gstack/browse/test/fixtures/qa-eval-checkout.html +108 -0
  95. package/.agents/skills/gstack/browse/test/fixtures/qa-eval-spa.html +98 -0
  96. package/.agents/skills/gstack/browse/test/fixtures/qa-eval.html +51 -0
  97. package/.agents/skills/gstack/browse/test/fixtures/responsive.html +49 -0
  98. package/.agents/skills/gstack/browse/test/fixtures/snapshot.html +55 -0
  99. package/.agents/skills/gstack/browse/test/fixtures/spa.html +24 -0
  100. package/.agents/skills/gstack/browse/test/fixtures/states.html +17 -0
  101. package/.agents/skills/gstack/browse/test/fixtures/upload.html +25 -0
  102. package/.agents/skills/gstack/browse/test/gstack-config.test.ts +125 -0
  103. package/.agents/skills/gstack/browse/test/gstack-update-check.test.ts +467 -0
  104. package/.agents/skills/gstack/browse/test/handoff.test.ts +235 -0
  105. package/.agents/skills/gstack/browse/test/path-validation.test.ts +63 -0
  106. package/.agents/skills/gstack/browse/test/platform.test.ts +37 -0
  107. package/.agents/skills/gstack/browse/test/snapshot.test.ts +467 -0
  108. package/.agents/skills/gstack/browse/test/test-server.ts +57 -0
  109. package/.agents/skills/gstack/browse/test/url-validation.test.ts +72 -0
  110. package/.agents/skills/gstack/canary/SKILL.md +493 -0
  111. package/.agents/skills/gstack/canary/SKILL.md.tmpl +220 -0
  112. package/.agents/skills/gstack/careful/SKILL.md +59 -0
  113. package/.agents/skills/gstack/careful/SKILL.md.tmpl +57 -0
  114. package/.agents/skills/gstack/careful/bin/check-careful.sh +112 -0
  115. package/.agents/skills/gstack/codex/SKILL.md +677 -0
  116. package/.agents/skills/gstack/codex/SKILL.md.tmpl +356 -0
  117. package/.agents/skills/gstack/conductor.json +6 -0
  118. package/.agents/skills/gstack/cso/SKILL.md +615 -0
  119. package/.agents/skills/gstack/cso/SKILL.md.tmpl +376 -0
  120. package/.agents/skills/gstack/design-consultation/SKILL.md +625 -0
  121. package/.agents/skills/gstack/design-consultation/SKILL.md.tmpl +369 -0
  122. package/.agents/skills/gstack/design-review/SKILL.md +998 -0
  123. package/.agents/skills/gstack/design-review/SKILL.md.tmpl +262 -0
  124. package/.agents/skills/gstack/docs/images/github-2013.png +0 -0
  125. package/.agents/skills/gstack/docs/images/github-2026.png +0 -0
  126. package/.agents/skills/gstack/docs/skills.md +877 -0
  127. package/.agents/skills/gstack/document-release/SKILL.md +613 -0
  128. package/.agents/skills/gstack/document-release/SKILL.md.tmpl +357 -0
  129. package/.agents/skills/gstack/freeze/SKILL.md +82 -0
  130. package/.agents/skills/gstack/freeze/SKILL.md.tmpl +80 -0
  131. package/.agents/skills/gstack/freeze/bin/check-freeze.sh +68 -0
  132. package/.agents/skills/gstack/gstack-upgrade/SKILL.md +226 -0
  133. package/.agents/skills/gstack/gstack-upgrade/SKILL.md.tmpl +224 -0
  134. package/.agents/skills/gstack/guard/SKILL.md +82 -0
  135. package/.agents/skills/gstack/guard/SKILL.md.tmpl +80 -0
  136. package/.agents/skills/gstack/investigate/SKILL.md +435 -0
  137. package/.agents/skills/gstack/investigate/SKILL.md.tmpl +196 -0
  138. package/.agents/skills/gstack/land-and-deploy/SKILL.md +880 -0
  139. package/.agents/skills/gstack/land-and-deploy/SKILL.md.tmpl +575 -0
  140. package/.agents/skills/gstack/office-hours/SKILL.md +996 -0
  141. package/.agents/skills/gstack/office-hours/SKILL.md.tmpl +624 -0
  142. package/.agents/skills/gstack/package.json +55 -0
  143. package/.agents/skills/gstack/plan-ceo-review/SKILL.md +1277 -0
  144. package/.agents/skills/gstack/plan-ceo-review/SKILL.md.tmpl +838 -0
  145. package/.agents/skills/gstack/plan-design-review/SKILL.md +676 -0
  146. package/.agents/skills/gstack/plan-design-review/SKILL.md.tmpl +314 -0
  147. package/.agents/skills/gstack/plan-eng-review/SKILL.md +836 -0
  148. package/.agents/skills/gstack/plan-eng-review/SKILL.md.tmpl +279 -0
  149. package/.agents/skills/gstack/qa/SKILL.md +1016 -0
  150. package/.agents/skills/gstack/qa/SKILL.md.tmpl +316 -0
  151. package/.agents/skills/gstack/qa/references/issue-taxonomy.md +85 -0
  152. package/.agents/skills/gstack/qa/templates/qa-report-template.md +126 -0
  153. package/.agents/skills/gstack/qa-only/SKILL.md +633 -0
  154. package/.agents/skills/gstack/qa-only/SKILL.md.tmpl +101 -0
  155. package/.agents/skills/gstack/retro/SKILL.md +1072 -0
  156. package/.agents/skills/gstack/retro/SKILL.md.tmpl +833 -0
  157. package/.agents/skills/gstack/review/SKILL.md +849 -0
  158. package/.agents/skills/gstack/review/SKILL.md.tmpl +259 -0
  159. package/.agents/skills/gstack/review/TODOS-format.md +62 -0
  160. package/.agents/skills/gstack/review/checklist.md +190 -0
  161. package/.agents/skills/gstack/review/design-checklist.md +132 -0
  162. package/.agents/skills/gstack/review/greptile-triage.md +220 -0
  163. package/.agents/skills/gstack/scripts/analytics.ts +190 -0
  164. package/.agents/skills/gstack/scripts/dev-skill.ts +82 -0
  165. package/.agents/skills/gstack/scripts/eval-compare.ts +96 -0
  166. package/.agents/skills/gstack/scripts/eval-list.ts +116 -0
  167. package/.agents/skills/gstack/scripts/eval-select.ts +86 -0
  168. package/.agents/skills/gstack/scripts/eval-summary.ts +187 -0
  169. package/.agents/skills/gstack/scripts/eval-watch.ts +172 -0
  170. package/.agents/skills/gstack/scripts/gen-skill-docs.ts +2414 -0
  171. package/.agents/skills/gstack/scripts/skill-check.ts +167 -0
  172. package/.agents/skills/gstack/setup +269 -0
  173. package/.agents/skills/gstack/setup-browser-cookies/SKILL.md +330 -0
  174. package/.agents/skills/gstack/setup-browser-cookies/SKILL.md.tmpl +74 -0
  175. package/.agents/skills/gstack/setup-deploy/SKILL.md +459 -0
  176. package/.agents/skills/gstack/setup-deploy/SKILL.md.tmpl +220 -0
  177. package/.agents/skills/gstack/ship/SKILL.md +1457 -0
  178. package/.agents/skills/gstack/ship/SKILL.md.tmpl +528 -0
  179. package/.agents/skills/gstack/supabase/config.sh +10 -0
  180. package/.agents/skills/gstack/supabase/functions/community-pulse/index.ts +59 -0
  181. package/.agents/skills/gstack/supabase/functions/telemetry-ingest/index.ts +135 -0
  182. package/.agents/skills/gstack/supabase/functions/update-check/index.ts +37 -0
  183. package/.agents/skills/gstack/supabase/migrations/001_telemetry.sql +89 -0
  184. package/.agents/skills/gstack/test/analytics.test.ts +277 -0
  185. package/.agents/skills/gstack/test/codex-e2e.test.ts +197 -0
  186. package/.agents/skills/gstack/test/fixtures/coverage-audit-fixture.ts +76 -0
  187. package/.agents/skills/gstack/test/fixtures/eval-baselines.json +7 -0
  188. package/.agents/skills/gstack/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
  189. package/.agents/skills/gstack/test/fixtures/qa-eval-ground-truth.json +43 -0
  190. package/.agents/skills/gstack/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
  191. package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.css +86 -0
  192. package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.html +41 -0
  193. package/.agents/skills/gstack/test/fixtures/review-eval-enum-diff.rb +30 -0
  194. package/.agents/skills/gstack/test/fixtures/review-eval-enum.rb +27 -0
  195. package/.agents/skills/gstack/test/fixtures/review-eval-vuln.rb +14 -0
  196. package/.agents/skills/gstack/test/gemini-e2e.test.ts +173 -0
  197. package/.agents/skills/gstack/test/gen-skill-docs.test.ts +1049 -0
  198. package/.agents/skills/gstack/test/global-discover.test.ts +187 -0
  199. package/.agents/skills/gstack/test/helpers/codex-session-runner.ts +282 -0
  200. package/.agents/skills/gstack/test/helpers/e2e-helpers.ts +239 -0
  201. package/.agents/skills/gstack/test/helpers/eval-store.test.ts +548 -0
  202. package/.agents/skills/gstack/test/helpers/eval-store.ts +689 -0
  203. package/.agents/skills/gstack/test/helpers/gemini-session-runner.test.ts +104 -0
  204. package/.agents/skills/gstack/test/helpers/gemini-session-runner.ts +201 -0
  205. package/.agents/skills/gstack/test/helpers/llm-judge.ts +130 -0
  206. package/.agents/skills/gstack/test/helpers/observability.test.ts +283 -0
  207. package/.agents/skills/gstack/test/helpers/session-runner.test.ts +96 -0
  208. package/.agents/skills/gstack/test/helpers/session-runner.ts +357 -0
  209. package/.agents/skills/gstack/test/helpers/skill-parser.ts +206 -0
  210. package/.agents/skills/gstack/test/helpers/touchfiles.ts +260 -0
  211. package/.agents/skills/gstack/test/hook-scripts.test.ts +373 -0
  212. package/.agents/skills/gstack/test/skill-e2e-browse.test.ts +293 -0
  213. package/.agents/skills/gstack/test/skill-e2e-deploy.test.ts +279 -0
  214. package/.agents/skills/gstack/test/skill-e2e-design.test.ts +614 -0
  215. package/.agents/skills/gstack/test/skill-e2e-plan.test.ts +538 -0
  216. package/.agents/skills/gstack/test/skill-e2e-qa-bugs.test.ts +194 -0
  217. package/.agents/skills/gstack/test/skill-e2e-qa-workflow.test.ts +412 -0
  218. package/.agents/skills/gstack/test/skill-e2e-review.test.ts +535 -0
  219. package/.agents/skills/gstack/test/skill-e2e-workflow.test.ts +586 -0
  220. package/.agents/skills/gstack/test/skill-e2e.test.ts +3325 -0
  221. package/.agents/skills/gstack/test/skill-llm-eval.test.ts +787 -0
  222. package/.agents/skills/gstack/test/skill-parser.test.ts +179 -0
  223. package/.agents/skills/gstack/test/skill-routing-e2e.test.ts +605 -0
  224. package/.agents/skills/gstack/test/skill-validation.test.ts +1520 -0
  225. package/.agents/skills/gstack/test/telemetry.test.ts +278 -0
  226. package/.agents/skills/gstack/test/touchfiles.test.ts +262 -0
  227. package/.agents/skills/gstack/unfreeze/SKILL.md +40 -0
  228. package/.agents/skills/gstack/unfreeze/SKILL.md.tmpl +38 -0
  229. package/README.md +12 -7
  230. package/README_KO.md +12 -6
  231. package/package.json +3 -2
@@ -0,0 +1,197 @@
1
+ /**
2
+ * Codex CLI E2E tests — verify skills work when invoked by Codex.
3
+ *
4
+ * Spawns `codex exec` with skills installed in a temp HOME, parses JSONL
5
+ * output, and validates structured results. Follows the same pattern as
6
+ * skill-e2e.test.ts but adapted for Codex CLI.
7
+ *
8
+ * Prerequisites:
9
+ * - `codex` binary installed (npm install -g @openai/codex)
10
+ * - Codex authenticated via ~/.codex/ config (no OPENAI_API_KEY env var needed)
11
+ * - EVALS=1 env var set (same gate as Claude E2E tests)
12
+ *
13
+ * Skips gracefully when prerequisites are not met.
14
+ */
15
+
16
+ import { describe, test, expect, afterAll } from 'bun:test';
17
+ import { runCodexSkill, parseCodexJSONL, installSkillToTempHome } from './helpers/codex-session-runner';
18
+ import type { CodexResult } from './helpers/codex-session-runner';
19
+ import { EvalCollector } from './helpers/eval-store';
20
+ import type { EvalTestEntry } from './helpers/eval-store';
21
+ import { selectTests, detectBaseBranch, getChangedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES } from './helpers/touchfiles';
22
+ import * as fs from 'fs';
23
+ import * as path from 'path';
24
+ import * as os from 'os';
25
+
26
+ const ROOT = path.resolve(import.meta.dir, '..');
27
+
28
+ // --- Prerequisites check ---
29
+
30
+ const CODEX_AVAILABLE = (() => {
31
+ try {
32
+ const result = Bun.spawnSync(['which', 'codex']);
33
+ return result.exitCode === 0;
34
+ } catch { return false; }
35
+ })();
36
+
37
+ const evalsEnabled = !!process.env.EVALS;
38
+
39
+ // Skip all tests if codex is not available or EVALS is not set.
40
+ // Note: Codex uses its own auth from ~/.codex/ config — no OPENAI_API_KEY env var needed.
41
+ const SKIP = !CODEX_AVAILABLE || !evalsEnabled;
42
+
43
+ const describeCodex = SKIP ? describe.skip : describe;
44
+
45
+ // Log why we're skipping (helpful for debugging CI)
46
+ if (!evalsEnabled) {
47
+ // Silent — same as Claude E2E tests, EVALS=1 required
48
+ } else if (!CODEX_AVAILABLE) {
49
+ process.stderr.write('\nCodex E2E: SKIPPED — codex binary not found (install: npm i -g @openai/codex)\n');
50
+ }
51
+
52
+ // --- Diff-based test selection ---
53
+
54
+ // Codex E2E touchfiles — keyed by test name, same pattern as E2E_TOUCHFILES
55
+ const CODEX_E2E_TOUCHFILES: Record<string, string[]> = {
56
+ 'codex-discover-skill': ['codex/**', '.agents/skills/**', 'test/helpers/codex-session-runner.ts'],
57
+ 'codex-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'codex/**', 'test/helpers/codex-session-runner.ts'],
58
+ };
59
+
60
+ let selectedTests: string[] | null = null; // null = run all
61
+
62
+ if (evalsEnabled && !process.env.EVALS_ALL) {
63
+ const baseBranch = process.env.EVALS_BASE
64
+ || detectBaseBranch(ROOT)
65
+ || 'main';
66
+ const changedFiles = getChangedFiles(baseBranch, ROOT);
67
+
68
+ if (changedFiles.length > 0) {
69
+ const selection = selectTests(changedFiles, CODEX_E2E_TOUCHFILES, GLOBAL_TOUCHFILES);
70
+ selectedTests = selection.selected;
71
+ process.stderr.write(`\nCodex E2E selection (${selection.reason}): ${selection.selected.length}/${Object.keys(CODEX_E2E_TOUCHFILES).length} tests\n`);
72
+ if (selection.skipped.length > 0) {
73
+ process.stderr.write(` Skipped: ${selection.skipped.join(', ')}\n`);
74
+ }
75
+ process.stderr.write('\n');
76
+ }
77
+ // If changedFiles is empty (e.g., on main branch), selectedTests stays null -> run all
78
+ }
79
+
80
+ /** Skip an individual test if not selected by diff-based selection. */
81
+ function testIfSelected(testName: string, fn: () => Promise<void>, timeout: number) {
82
+ const shouldRun = selectedTests === null || selectedTests.includes(testName);
83
+ (shouldRun ? test.concurrent : test.skip)(testName, fn, timeout);
84
+ }
85
+
86
+ // --- Eval result collector ---
87
+
88
+ const evalCollector = evalsEnabled && !SKIP ? new EvalCollector('e2e-codex') : null;
89
+
90
+ /** DRY helper to record a Codex E2E test result into the eval collector. */
91
+ function recordCodexE2E(name: string, result: CodexResult, passed: boolean) {
92
+ evalCollector?.addTest({
93
+ name,
94
+ suite: 'codex-e2e',
95
+ tier: 'e2e',
96
+ passed,
97
+ duration_ms: result.durationMs,
98
+ cost_usd: 0, // Codex doesn't report cost in the same way; tokens are tracked
99
+ output: result.output?.slice(0, 2000),
100
+ turns_used: result.toolCalls.length, // approximate: tool calls as turns
101
+ exit_reason: result.exitCode === 0 ? 'success' : `exit_code_${result.exitCode}`,
102
+ });
103
+ }
104
+
105
+ /** Print cost summary after a Codex E2E test. */
106
+ function logCodexCost(label: string, result: CodexResult) {
107
+ const durationSec = Math.round(result.durationMs / 1000);
108
+ console.log(`${label}: ${result.tokens} tokens, ${result.toolCalls.length} tool calls, ${durationSec}s`);
109
+ }
110
+
111
+ // Finalize eval results on exit
112
+ afterAll(async () => {
113
+ if (evalCollector) {
114
+ await evalCollector.finalize();
115
+ }
116
+ });
117
+
118
+ // --- Tests ---
119
+
120
+ describeCodex('Codex E2E', () => {
121
+
122
+ testIfSelected('codex-discover-skill', async () => {
123
+ // Install gstack-review skill to a temp HOME and ask Codex to list skills
124
+ const skillDir = path.join(ROOT, '.agents', 'skills', 'gstack-review');
125
+
126
+ const result = await runCodexSkill({
127
+ skillDir,
128
+ prompt: 'List any skills or instructions you have available. Just list the names.',
129
+ timeoutMs: 60_000,
130
+ cwd: ROOT,
131
+ skillName: 'gstack-review',
132
+ });
133
+
134
+ logCodexCost('codex-discover-skill', result);
135
+
136
+ // Codex should have produced some output
137
+ const passed = result.exitCode === 0 && result.output.length > 0;
138
+ recordCodexE2E('codex-discover-skill', result, passed);
139
+
140
+ expect(result.exitCode).toBe(0);
141
+ expect(result.output.length).toBeGreaterThan(0);
142
+ // The output should reference the skill name in some form
143
+ const outputLower = result.output.toLowerCase();
144
+ expect(
145
+ outputLower.includes('review') || outputLower.includes('gstack') || outputLower.includes('skill'),
146
+ ).toBe(true);
147
+ }, 120_000);
148
+
149
+ // Validates that Codex can invoke the gstack-review skill, run a diff-based
150
+ // code review, and produce structured review output with findings/issues.
151
+ // Accepts Codex timeout (exit 124/137) as non-failure since that's a CLI perf issue.
152
+ testIfSelected('codex-review-findings', async () => {
153
+ // Install gstack-review skill and ask Codex to review the current repo
154
+ const skillDir = path.join(ROOT, '.agents', 'skills', 'gstack-review');
155
+
156
+ const result = await runCodexSkill({
157
+ skillDir,
158
+ prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.',
159
+ timeoutMs: 540_000,
160
+ cwd: ROOT,
161
+ skillName: 'gstack-review',
162
+ });
163
+
164
+ logCodexCost('codex-review-findings', result);
165
+
166
+ // Should produce structured review-like output
167
+ const output = result.output;
168
+
169
+ // Codex may time out on large diffs — accept timeout as "not our fault"
170
+ // exitCode 124 = killed by timeout, which is a Codex CLI performance issue
171
+ if (result.exitCode === 124 || result.exitCode === 137) {
172
+ console.warn(`codex-review-findings: Codex timed out (exit ${result.exitCode}) — skipping assertions`);
173
+ recordCodexE2E('codex-review-findings', result, true); // don't fail the suite
174
+ return;
175
+ }
176
+
177
+ const passed = result.exitCode === 0 && output.length > 50;
178
+ recordCodexE2E('codex-review-findings', result, passed);
179
+
180
+ expect(result.exitCode).toBe(0);
181
+ expect(output.length).toBeGreaterThan(50);
182
+
183
+ // Review output should contain some review-like content
184
+ const outputLower = output.toLowerCase();
185
+ const hasReviewContent =
186
+ outputLower.includes('finding') ||
187
+ outputLower.includes('issue') ||
188
+ outputLower.includes('review') ||
189
+ outputLower.includes('change') ||
190
+ outputLower.includes('diff') ||
191
+ outputLower.includes('clean') ||
192
+ outputLower.includes('no issues') ||
193
+ outputLower.includes('p1') ||
194
+ outputLower.includes('p2');
195
+ expect(hasReviewContent).toBe(true);
196
+ }, 600_000);
197
+ });
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Shared fixture for test coverage audit E2E tests.
3
+ *
4
+ * Creates a Node.js project with billing source code that has intentional
5
+ * test coverage gaps: processPayment has happy-path-only tests,
6
+ * refundPayment has no tests at all.
7
+ *
8
+ * Used by: ship-coverage-audit E2E, review-coverage-audit E2E
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { spawnSync } from 'child_process';
14
+
15
+ export function createCoverageAuditFixture(dir: string): void {
16
+ // Create a Node.js project WITH test framework but coverage gaps
17
+ fs.writeFileSync(path.join(dir, 'package.json'), JSON.stringify({
18
+ name: 'test-coverage-app',
19
+ version: '1.0.0',
20
+ type: 'module',
21
+ scripts: { test: 'echo "no tests yet"' },
22
+ devDependencies: { vitest: '^1.0.0' },
23
+ }, null, 2));
24
+
25
+ // Create vitest config
26
+ fs.writeFileSync(path.join(dir, 'vitest.config.ts'),
27
+ `import { defineConfig } from 'vitest/config';\nexport default defineConfig({ test: {} });\n`);
28
+
29
+ fs.writeFileSync(path.join(dir, 'VERSION'), '0.1.0.0\n');
30
+ fs.writeFileSync(path.join(dir, 'CHANGELOG.md'), '# Changelog\n');
31
+
32
+ // Create source file with multiple code paths
33
+ fs.mkdirSync(path.join(dir, 'src'), { recursive: true });
34
+ fs.writeFileSync(path.join(dir, 'src', 'billing.ts'), `
35
+ export function processPayment(amount: number, currency: string) {
36
+ if (amount <= 0) throw new Error('Invalid amount');
37
+ if (currency !== 'USD' && currency !== 'EUR') throw new Error('Unsupported currency');
38
+ return { status: 'success', amount, currency };
39
+ }
40
+
41
+ export function refundPayment(paymentId: string, reason: string) {
42
+ if (!paymentId) throw new Error('Payment ID required');
43
+ if (!reason) throw new Error('Reason required');
44
+ return { status: 'refunded', paymentId, reason };
45
+ }
46
+ `);
47
+
48
+ // Create a test directory with ONE test (partial coverage)
49
+ fs.mkdirSync(path.join(dir, 'test'), { recursive: true });
50
+ fs.writeFileSync(path.join(dir, 'test', 'billing.test.ts'), `
51
+ import { describe, test, expect } from 'vitest';
52
+ import { processPayment } from '../src/billing';
53
+
54
+ describe('processPayment', () => {
55
+ test('processes valid payment', () => {
56
+ const result = processPayment(100, 'USD');
57
+ expect(result.status).toBe('success');
58
+ });
59
+ // GAP: no test for invalid amount
60
+ // GAP: no test for unsupported currency
61
+ // GAP: refundPayment not tested at all
62
+ });
63
+ `);
64
+
65
+ // Init git repo with main branch
66
+ const run = (cmd: string, args: string[]) =>
67
+ spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
68
+ run('git', ['init', '-b', 'main']);
69
+ run('git', ['config', 'user.email', 'test@test.com']);
70
+ run('git', ['config', 'user.name', 'Test']);
71
+ run('git', ['add', '.']);
72
+ run('git', ['commit', '-m', 'initial commit']);
73
+
74
+ // Create feature branch
75
+ run('git', ['checkout', '-b', 'feature/billing']);
76
+ }
@@ -0,0 +1,7 @@
1
+ {
2
+ "command_reference": { "clarity": 4, "completeness": 3, "actionability": 4 },
3
+ "snapshot_flags": { "clarity": 4, "completeness": 4, "actionability": 4 },
4
+ "browse_skill": { "clarity": 4, "completeness": 4, "actionability": 4 },
5
+ "qa_workflow": { "clarity": 4, "completeness": 4, "actionability": 4 },
6
+ "qa_health_rubric": { "clarity": 4, "completeness": 3, "actionability": 4 }
7
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "fixture": "qa-eval-checkout.html",
3
+ "bugs": [
4
+ {
5
+ "id": "broken-email-regex",
6
+ "category": "functional",
7
+ "severity": "high",
8
+ "description": "Email validation accepts 'user@' as valid — regex pattern [^@]+@[^@] is missing domain requirement",
9
+ "detection_hint": "email|regex|validation|accepts|invalid|user@|pattern"
10
+ },
11
+ {
12
+ "id": "nan-total",
13
+ "category": "functional",
14
+ "severity": "high",
15
+ "description": "Clearing the quantity field shows 'Total: $NaN' — parseInt on empty string returns NaN with no fallback",
16
+ "detection_hint": "NaN|total|quantity|empty|price|calculation|clear"
17
+ },
18
+ {
19
+ "id": "cc-field-overflow",
20
+ "category": "visual",
21
+ "severity": "medium",
22
+ "description": "Credit card input has no maxlength attribute — entering >20 characters causes text to overflow the container",
23
+ "detection_hint": "credit card|maxlength|overflow|cc|input|long|container"
24
+ },
25
+ {
26
+ "id": "missing-required-zip",
27
+ "category": "functional",
28
+ "severity": "medium",
29
+ "description": "Zip code field has no 'required' attribute — form can be submitted without a zip code",
30
+ "detection_hint": "zip|required|missing|form|submit|shipping|postal"
31
+ },
32
+ {
33
+ "id": "stripe-not-defined",
34
+ "category": "console",
35
+ "severity": "high",
36
+ "description": "Form submit triggers 'Uncaught ReferenceError: stripe is not defined' — payment SDK not loaded",
37
+ "detection_hint": "stripe|ReferenceError|not defined|console|error|submit|payment"
38
+ }
39
+ ],
40
+ "total_bugs": 5,
41
+ "minimum_detection": 2,
42
+ "max_false_positives": 5
43
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "fixture": "qa-eval.html",
3
+ "bugs": [
4
+ {
5
+ "id": "broken-link",
6
+ "category": "functional",
7
+ "severity": "medium",
8
+ "description": "Navigation link 'Resources' points to /nonexistent-404-page which returns 404",
9
+ "detection_hint": "link|404|broken|dead|nonexistent|Resources"
10
+ },
11
+ {
12
+ "id": "disabled-submit",
13
+ "category": "functional",
14
+ "severity": "high",
15
+ "description": "Contact form submit button has 'disabled' attribute permanently — form can never be submitted",
16
+ "detection_hint": "disabled|submit|button|form|cannot submit|contact"
17
+ },
18
+ {
19
+ "id": "content-overflow",
20
+ "category": "visual",
21
+ "severity": "medium",
22
+ "description": "Statistics text is clipped by overflow:hidden container — content wider than 400px container",
23
+ "detection_hint": "overflow|clipped|truncated|hidden|text cut|statistics"
24
+ },
25
+ {
26
+ "id": "missing-alt",
27
+ "category": "accessibility",
28
+ "severity": "medium",
29
+ "description": "Logo image (<img src='/logo.png'>) has no alt attribute",
30
+ "detection_hint": "alt|accessibility|image|a11y|missing alt|logo"
31
+ },
32
+ {
33
+ "id": "console-error",
34
+ "category": "console",
35
+ "severity": "high",
36
+ "description": "TypeError on page load: Cannot read properties of undefined (reading 'map')",
37
+ "detection_hint": "console|error|TypeError|undefined|map"
38
+ }
39
+ ],
40
+ "total_bugs": 5,
41
+ "minimum_detection": 2,
42
+ "max_false_positives": 5
43
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "fixture": "qa-eval-spa.html",
3
+ "bugs": [
4
+ {
5
+ "id": "broken-route",
6
+ "category": "functional",
7
+ "severity": "high",
8
+ "description": "Products nav link points to #/prodcts (typo) instead of #/products — shows 'Page not found'",
9
+ "detection_hint": "route|prodcts|typo|products|not found|broken link|navigation"
10
+ },
11
+ {
12
+ "id": "stale-cart-state",
13
+ "category": "functional",
14
+ "severity": "medium",
15
+ "description": "Cart count persists across route changes — never resets when navigating away from products",
16
+ "detection_hint": "cart|count|state|persist|reset|stale|navigation"
17
+ },
18
+ {
19
+ "id": "async-fetch-error",
20
+ "category": "functional",
21
+ "severity": "high",
22
+ "description": "Product list briefly loads then shows 'Error: Failed to fetch products from API' after 1 second",
23
+ "detection_hint": "error|fetch|products|API|loading|failed|async"
24
+ },
25
+ {
26
+ "id": "missing-aria-current",
27
+ "category": "accessibility",
28
+ "severity": "medium",
29
+ "description": "Navigation links have no aria-current attribute to indicate the active route",
30
+ "detection_hint": "aria|current|active|navigation|accessibility|a11y"
31
+ },
32
+ {
33
+ "id": "console-warn-leak",
34
+ "category": "console",
35
+ "severity": "medium",
36
+ "description": "console.warn fires on every route change: 'Possible memory leak detected: 11 event listeners'",
37
+ "detection_hint": "console|warn|memory leak|listener|event|warning"
38
+ }
39
+ ],
40
+ "total_bugs": 5,
41
+ "minimum_detection": 2,
42
+ "max_false_positives": 5
43
+ }
@@ -0,0 +1,86 @@
1
+ /* Planted design anti-patterns for E2E eval — 7 issues */
2
+
3
+ /* Issue 1: [HIGH] Blacklisted font (Papyrus) */
4
+ /* Issue 2: [HIGH] Body text < 16px (14px) */
5
+ body {
6
+ font-family: 'Papyrus', sans-serif;
7
+ font-size: 14px;
8
+ margin: 0;
9
+ padding: 0;
10
+ }
11
+
12
+ /* Issue 5: [MEDIUM] Purple/violet gradient background */
13
+ .hero {
14
+ background: linear-gradient(135deg, #6366f1, #8b5cf6);
15
+ text-align: center;
16
+ padding: 80px 20px;
17
+ color: white;
18
+ }
19
+
20
+ .hero h1 {
21
+ text-align: center;
22
+ font-size: 48px;
23
+ }
24
+
25
+ .hero p {
26
+ text-align: center;
27
+ font-size: 20px;
28
+ }
29
+
30
+ /* Issue 7: [LOW] 3-column feature grid with icon circles */
31
+ .features {
32
+ display: grid;
33
+ grid-template-columns: repeat(3, 1fr);
34
+ gap: 24px;
35
+ padding: 60px 40px;
36
+ text-align: center;
37
+ }
38
+
39
+ .feature-card {
40
+ border-radius: 24px;
41
+ padding: 32px;
42
+ text-align: center;
43
+ background: #f9fafb;
44
+ }
45
+
46
+ /* Icon in colored circle — AI slop pattern */
47
+ .icon-circle {
48
+ width: 60px;
49
+ height: 60px;
50
+ border-radius: 50%;
51
+ background: #ede9fe;
52
+ display: flex;
53
+ align-items: center;
54
+ justify-content: center;
55
+ margin: 0 auto 16px;
56
+ font-size: 24px;
57
+ }
58
+
59
+ /* Issue 3: [HIGH] outline: none without replacement */
60
+ button {
61
+ outline: none;
62
+ background: #6366f1;
63
+ color: white;
64
+ border: none;
65
+ padding: 12px 24px;
66
+ border-radius: 24px;
67
+ cursor: pointer;
68
+ }
69
+
70
+ .small-link {
71
+ font-size: 11px;
72
+ padding: 4px 8px;
73
+ }
74
+
75
+ /* Issue 4: [HIGH] !important usage */
76
+ .override {
77
+ color: red !important;
78
+ margin-left: 10px !important;
79
+ }
80
+
81
+ .footer {
82
+ text-align: center;
83
+ padding: 40px;
84
+ background: #1e1b4b;
85
+ color: white;
86
+ }
@@ -0,0 +1,41 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <link rel="stylesheet" href="styles.css">
7
+ <title>Our Platform</title>
8
+ </head>
9
+ <body>
10
+ <!-- Issue 6: [MEDIUM] Generic hero copy ("Welcome to...", "all-in-one solution") -->
11
+ <div class="hero">
12
+ <h1>Welcome to Our Platform</h1>
13
+ <p>Your all-in-one solution for everything you need</p>
14
+ <button>Get Started</button>
15
+ </div>
16
+
17
+ <!-- Issue 7: [LOW] 3-column feature grid with icon-in-circle + title + description -->
18
+ <div class="features">
19
+ <div class="feature-card">
20
+ <div class="icon-circle">&#9733;</div>
21
+ <h3>Feature One</h3>
22
+ <p>A short description of this amazing feature that will change your life.</p>
23
+ </div>
24
+ <div class="feature-card">
25
+ <div class="icon-circle">&#9889;</div>
26
+ <h3>Feature Two</h3>
27
+ <p>Another incredible capability that sets us apart from the competition.</p>
28
+ </div>
29
+ <div class="feature-card">
30
+ <div class="icon-circle">&#9881;</div>
31
+ <h3>Feature Three</h3>
32
+ <p>Yet another powerful tool to streamline your workflow effortlessly.</p>
33
+ </div>
34
+ </div>
35
+
36
+ <div class="footer">
37
+ <p class="override">Unlock the power of our platform today</p>
38
+ <a href="#" class="small-link">Terms of Service</a>
39
+ </div>
40
+ </body>
41
+ </html>
@@ -0,0 +1,30 @@
1
+ # Feature branch version: adds "returned" status but misses consumers
2
+ class Order < ApplicationRecord
3
+ STATUSES = %w[pending processing shipped delivered returned].freeze
4
+
5
+ validates :status, inclusion: { in: STATUSES }
6
+
7
+ def display_status
8
+ case status
9
+ when 'pending' then 'Awaiting processing'
10
+ when 'processing' then 'Being prepared'
11
+ when 'shipped' then 'On the way'
12
+ when 'delivered' then 'Delivered'
13
+ # BUG: 'returned' not handled — falls through to nil
14
+ end
15
+ end
16
+
17
+ def can_cancel?
18
+ # BUG: should 'returned' be cancellable? Not considered.
19
+ %w[pending processing].include?(status)
20
+ end
21
+
22
+ def notify_customer
23
+ case status
24
+ when 'pending' then OrderMailer.confirmation(self).deliver_later
25
+ when 'shipped' then OrderMailer.shipped(self).deliver_later
26
+ when 'delivered' then OrderMailer.delivered(self).deliver_later
27
+ # BUG: 'returned' has no notification — customer won't know return was received
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,27 @@
1
+ # Existing file on main: order model with status handling
2
+ class Order < ApplicationRecord
3
+ STATUSES = %w[pending processing shipped delivered].freeze
4
+
5
+ validates :status, inclusion: { in: STATUSES }
6
+
7
+ def display_status
8
+ case status
9
+ when 'pending' then 'Awaiting processing'
10
+ when 'processing' then 'Being prepared'
11
+ when 'shipped' then 'On the way'
12
+ when 'delivered' then 'Delivered'
13
+ end
14
+ end
15
+
16
+ def can_cancel?
17
+ %w[pending processing].include?(status)
18
+ end
19
+
20
+ def notify_customer
21
+ case status
22
+ when 'pending' then OrderMailer.confirmation(self).deliver_later
23
+ when 'shipped' then OrderMailer.shipped(self).deliver_later
24
+ when 'delivered' then OrderMailer.delivered(self).deliver_later
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,14 @@
1
+ class UserController < ApplicationController
2
+ def show
3
+ # SQL injection — interpolating user input directly into query
4
+ @user = User.where("id = #{params[:id]}").first
5
+ render json: @user
6
+ end
7
+
8
+ def promote
9
+ # Bypasses ActiveRecord validations — update_column skips callbacks + validation
10
+ @user = User.find(params[:id])
11
+ @user.update_column(:role, 'admin')
12
+ head :ok
13
+ end
14
+ end