@runchr/gstack-antigravity 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. package/.agents/rules/ETHOS.md +129 -0
  2. package/.agents/rules/global-gstack.md +117 -0
  3. package/.agents/rules/persona-gstack-autoplan.md +14 -0
  4. package/.agents/rules/persona-gstack-benchmark.md +14 -0
  5. package/.agents/rules/persona-gstack-browse.md +14 -0
  6. package/.agents/rules/persona-gstack-canary.md +14 -0
  7. package/.agents/rules/persona-gstack-careful.md +14 -0
  8. package/.agents/rules/persona-gstack-codex.md +14 -0
  9. package/.agents/rules/persona-gstack-cso.md +14 -0
  10. package/.agents/rules/persona-gstack-design-consultation.md +14 -0
  11. package/.agents/rules/persona-gstack-design-review.md +14 -0
  12. package/.agents/rules/persona-gstack-document-release.md +14 -0
  13. package/.agents/rules/persona-gstack-freeze.md +14 -0
  14. package/.agents/rules/persona-gstack-gstack-upgrade.md +14 -0
  15. package/.agents/rules/persona-gstack-guard.md +14 -0
  16. package/.agents/rules/persona-gstack-investigate.md +14 -0
  17. package/.agents/rules/persona-gstack-land-and-deploy.md +14 -0
  18. package/.agents/rules/persona-gstack-office-hours.md +14 -0
  19. package/.agents/rules/persona-gstack-plan-ceo-review.md +14 -0
  20. package/.agents/rules/persona-gstack-plan-design-review.md +14 -0
  21. package/.agents/rules/persona-gstack-plan-eng-review.md +14 -0
  22. package/.agents/rules/persona-gstack-qa-only.md +14 -0
  23. package/.agents/rules/persona-gstack-qa.md +14 -0
  24. package/.agents/rules/persona-gstack-retro.md +14 -0
  25. package/.agents/rules/persona-gstack-review.md +14 -0
  26. package/.agents/rules/persona-gstack-setup-browser-cookies.md +14 -0
  27. package/.agents/rules/persona-gstack-setup-deploy.md +14 -0
  28. package/.agents/rules/persona-gstack-ship.md +14 -0
  29. package/.agents/rules/persona-gstack-unfreeze.md +14 -0
  30. package/.agents/rules/persona-gstack.md +40 -0
  31. package/.agents/rules/recursive-identities.md +22 -0
  32. package/.agents/workflows/autoplan.md +30 -0
  33. package/.agents/workflows/benchmark.md +31 -0
  34. package/.agents/workflows/browse.md +26 -0
  35. package/.agents/workflows/canary.md +33 -0
  36. package/.agents/workflows/careful.md +22 -0
  37. package/.agents/workflows/codex.md +36 -0
  38. package/.agents/workflows/cso.md +29 -0
  39. package/.agents/workflows/design-consultation.md +28 -0
  40. package/.agents/workflows/design-review.md +28 -0
  41. package/.agents/workflows/document-release.md +32 -0
  42. package/.agents/workflows/freeze.md +17 -0
  43. package/.agents/workflows/gstack-upgrade.md +54 -0
  44. package/.agents/workflows/gstack.md +56 -0
  45. package/.agents/workflows/guard.md +18 -0
  46. package/.agents/workflows/investigate.md +37 -0
  47. package/.agents/workflows/land-and-deploy.md +35 -0
  48. package/.agents/workflows/office-hours.md +27 -0
  49. package/.agents/workflows/plan-ceo-review.md +34 -0
  50. package/.agents/workflows/plan-design-review.md +31 -0
  51. package/.agents/workflows/plan-eng-review.md +28 -0
  52. package/.agents/workflows/qa-only.md +28 -0
  53. package/.agents/workflows/qa.md +73 -0
  54. package/.agents/workflows/retro.md +34 -0
  55. package/.agents/workflows/review.md +30 -0
  56. package/.agents/workflows/setup-browser-cookies.md +15 -0
  57. package/.agents/workflows/setup-cookies.md +8 -0
  58. package/.agents/workflows/setup-deploy.md +21 -0
  59. package/.agents/workflows/ship.md +93 -0
  60. package/.agents/workflows/unfreeze.md +12 -0
  61. package/LICENSE +22 -0
  62. package/README.md +189 -0
  63. package/README_KO.md +191 -0
  64. package/bin/install.js +105 -0
  65. package/gstack-origin/.agents/skills/gstack/SKILL.md +651 -0
  66. package/gstack-origin/.agents/skills/gstack-autoplan/SKILL.md +678 -0
  67. package/gstack-origin/.agents/skills/gstack-benchmark/SKILL.md +482 -0
  68. package/gstack-origin/.agents/skills/gstack-browse/SKILL.md +511 -0
  69. package/gstack-origin/.agents/skills/gstack-canary/SKILL.md +486 -0
  70. package/gstack-origin/.agents/skills/gstack-careful/SKILL.md +50 -0
  71. package/gstack-origin/.agents/skills/gstack-cso/SKILL.md +607 -0
  72. package/gstack-origin/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
  73. package/gstack-origin/.agents/skills/gstack-design-review/SKILL.md +988 -0
  74. package/gstack-origin/.agents/skills/gstack-document-release/SKILL.md +604 -0
  75. package/gstack-origin/.agents/skills/gstack-freeze/SKILL.md +67 -0
  76. package/gstack-origin/.agents/skills/gstack-guard/SKILL.md +62 -0
  77. package/gstack-origin/.agents/skills/gstack-investigate/SKILL.md +415 -0
  78. package/gstack-origin/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
  79. package/gstack-origin/.agents/skills/gstack-office-hours/SKILL.md +986 -0
  80. package/gstack-origin/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
  81. package/gstack-origin/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
  82. package/gstack-origin/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
  83. package/gstack-origin/.agents/skills/gstack-qa/SKILL.md +1006 -0
  84. package/gstack-origin/.agents/skills/gstack-qa-only/SKILL.md +626 -0
  85. package/gstack-origin/.agents/skills/gstack-retro/SKILL.md +1065 -0
  86. package/gstack-origin/.agents/skills/gstack-review/SKILL.md +704 -0
  87. package/gstack-origin/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
  88. package/gstack-origin/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
  89. package/gstack-origin/.agents/skills/gstack-ship/SKILL.md +1312 -0
  90. package/gstack-origin/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
  91. package/gstack-origin/.agents/skills/gstack-upgrade/SKILL.md +220 -0
  92. package/gstack-origin/.env.example +5 -0
  93. package/gstack-origin/.github/workflows/skill-docs.yml +17 -0
  94. package/gstack-origin/AGENTS.md +49 -0
  95. package/gstack-origin/ARCHITECTURE.md +359 -0
  96. package/gstack-origin/BROWSER.md +271 -0
  97. package/gstack-origin/CHANGELOG.md +800 -0
  98. package/gstack-origin/CLAUDE.md +284 -0
  99. package/gstack-origin/CONTRIBUTING.md +370 -0
  100. package/gstack-origin/ETHOS.md +129 -0
  101. package/gstack-origin/LICENSE +21 -0
  102. package/gstack-origin/README.md +228 -0
  103. package/gstack-origin/SKILL.md +657 -0
  104. package/gstack-origin/SKILL.md.tmpl +281 -0
  105. package/gstack-origin/TODOS.md +564 -0
  106. package/gstack-origin/VERSION +1 -0
  107. package/gstack-origin/autoplan/SKILL.md +689 -0
  108. package/gstack-origin/autoplan/SKILL.md.tmpl +416 -0
  109. package/gstack-origin/benchmark/SKILL.md +489 -0
  110. package/gstack-origin/benchmark/SKILL.md.tmpl +233 -0
  111. package/gstack-origin/bin/dev-setup +68 -0
  112. package/gstack-origin/bin/dev-teardown +56 -0
  113. package/gstack-origin/bin/gstack-analytics +191 -0
  114. package/gstack-origin/bin/gstack-community-dashboard +113 -0
  115. package/gstack-origin/bin/gstack-config +38 -0
  116. package/gstack-origin/bin/gstack-diff-scope +71 -0
  117. package/gstack-origin/bin/gstack-global-discover.ts +591 -0
  118. package/gstack-origin/bin/gstack-repo-mode +93 -0
  119. package/gstack-origin/bin/gstack-review-log +9 -0
  120. package/gstack-origin/bin/gstack-review-read +12 -0
  121. package/gstack-origin/bin/gstack-slug +15 -0
  122. package/gstack-origin/bin/gstack-telemetry-log +158 -0
  123. package/gstack-origin/bin/gstack-telemetry-sync +127 -0
  124. package/gstack-origin/bin/gstack-update-check +196 -0
  125. package/gstack-origin/browse/SKILL.md +517 -0
  126. package/gstack-origin/browse/SKILL.md.tmpl +141 -0
  127. package/gstack-origin/browse/bin/find-browse +21 -0
  128. package/gstack-origin/browse/bin/remote-slug +14 -0
  129. package/gstack-origin/browse/scripts/build-node-server.sh +48 -0
  130. package/gstack-origin/browse/src/browser-manager.ts +634 -0
  131. package/gstack-origin/browse/src/buffers.ts +137 -0
  132. package/gstack-origin/browse/src/bun-polyfill.cjs +109 -0
  133. package/gstack-origin/browse/src/cli.ts +420 -0
  134. package/gstack-origin/browse/src/commands.ts +111 -0
  135. package/gstack-origin/browse/src/config.ts +150 -0
  136. package/gstack-origin/browse/src/cookie-import-browser.ts +417 -0
  137. package/gstack-origin/browse/src/cookie-picker-routes.ts +207 -0
  138. package/gstack-origin/browse/src/cookie-picker-ui.ts +541 -0
  139. package/gstack-origin/browse/src/find-browse.ts +61 -0
  140. package/gstack-origin/browse/src/meta-commands.ts +269 -0
  141. package/gstack-origin/browse/src/platform.ts +17 -0
  142. package/gstack-origin/browse/src/read-commands.ts +335 -0
  143. package/gstack-origin/browse/src/server.ts +369 -0
  144. package/gstack-origin/browse/src/snapshot.ts +398 -0
  145. package/gstack-origin/browse/src/url-validation.ts +91 -0
  146. package/gstack-origin/browse/src/write-commands.ts +352 -0
  147. package/gstack-origin/browse/test/bun-polyfill.test.ts +72 -0
  148. package/gstack-origin/browse/test/commands.test.ts +1836 -0
  149. package/gstack-origin/browse/test/config.test.ts +250 -0
  150. package/gstack-origin/browse/test/cookie-import-browser.test.ts +397 -0
  151. package/gstack-origin/browse/test/cookie-picker-routes.test.ts +205 -0
  152. package/gstack-origin/browse/test/find-browse.test.ts +50 -0
  153. package/gstack-origin/browse/test/fixtures/basic.html +33 -0
  154. package/gstack-origin/browse/test/fixtures/cursor-interactive.html +22 -0
  155. package/gstack-origin/browse/test/fixtures/dialog.html +15 -0
  156. package/gstack-origin/browse/test/fixtures/empty.html +2 -0
  157. package/gstack-origin/browse/test/fixtures/forms.html +55 -0
  158. package/gstack-origin/browse/test/fixtures/qa-eval-checkout.html +108 -0
  159. package/gstack-origin/browse/test/fixtures/qa-eval-spa.html +98 -0
  160. package/gstack-origin/browse/test/fixtures/qa-eval.html +51 -0
  161. package/gstack-origin/browse/test/fixtures/responsive.html +49 -0
  162. package/gstack-origin/browse/test/fixtures/snapshot.html +55 -0
  163. package/gstack-origin/browse/test/fixtures/spa.html +24 -0
  164. package/gstack-origin/browse/test/fixtures/states.html +17 -0
  165. package/gstack-origin/browse/test/fixtures/upload.html +25 -0
  166. package/gstack-origin/browse/test/gstack-config.test.ts +125 -0
  167. package/gstack-origin/browse/test/gstack-update-check.test.ts +467 -0
  168. package/gstack-origin/browse/test/handoff.test.ts +235 -0
  169. package/gstack-origin/browse/test/path-validation.test.ts +63 -0
  170. package/gstack-origin/browse/test/platform.test.ts +37 -0
  171. package/gstack-origin/browse/test/snapshot.test.ts +467 -0
  172. package/gstack-origin/browse/test/test-server.ts +57 -0
  173. package/gstack-origin/browse/test/url-validation.test.ts +72 -0
  174. package/gstack-origin/canary/SKILL.md +493 -0
  175. package/gstack-origin/canary/SKILL.md.tmpl +220 -0
  176. package/gstack-origin/careful/SKILL.md +59 -0
  177. package/gstack-origin/careful/SKILL.md.tmpl +57 -0
  178. package/gstack-origin/careful/bin/check-careful.sh +112 -0
  179. package/gstack-origin/codex/SKILL.md +677 -0
  180. package/gstack-origin/codex/SKILL.md.tmpl +356 -0
  181. package/gstack-origin/conductor.json +6 -0
  182. package/gstack-origin/cso/SKILL.md +615 -0
  183. package/gstack-origin/cso/SKILL.md.tmpl +376 -0
  184. package/gstack-origin/design-consultation/SKILL.md +625 -0
  185. package/gstack-origin/design-consultation/SKILL.md.tmpl +369 -0
  186. package/gstack-origin/design-review/SKILL.md +998 -0
  187. package/gstack-origin/design-review/SKILL.md.tmpl +262 -0
  188. package/gstack-origin/docs/images/github-2013.png +0 -0
  189. package/gstack-origin/docs/images/github-2026.png +0 -0
  190. package/gstack-origin/docs/skills.md +877 -0
  191. package/gstack-origin/document-release/SKILL.md +613 -0
  192. package/gstack-origin/document-release/SKILL.md.tmpl +357 -0
  193. package/gstack-origin/freeze/SKILL.md +82 -0
  194. package/gstack-origin/freeze/SKILL.md.tmpl +80 -0
  195. package/gstack-origin/freeze/bin/check-freeze.sh +68 -0
  196. package/gstack-origin/gstack-upgrade/SKILL.md +226 -0
  197. package/gstack-origin/gstack-upgrade/SKILL.md.tmpl +224 -0
  198. package/gstack-origin/guard/SKILL.md +82 -0
  199. package/gstack-origin/guard/SKILL.md.tmpl +80 -0
  200. package/gstack-origin/investigate/SKILL.md +435 -0
  201. package/gstack-origin/investigate/SKILL.md.tmpl +196 -0
  202. package/gstack-origin/land-and-deploy/SKILL.md +880 -0
  203. package/gstack-origin/land-and-deploy/SKILL.md.tmpl +575 -0
  204. package/gstack-origin/office-hours/SKILL.md +996 -0
  205. package/gstack-origin/office-hours/SKILL.md.tmpl +624 -0
  206. package/gstack-origin/package.json +55 -0
  207. package/gstack-origin/plan-ceo-review/SKILL.md +1277 -0
  208. package/gstack-origin/plan-ceo-review/SKILL.md.tmpl +838 -0
  209. package/gstack-origin/plan-design-review/SKILL.md +676 -0
  210. package/gstack-origin/plan-design-review/SKILL.md.tmpl +314 -0
  211. package/gstack-origin/plan-eng-review/SKILL.md +836 -0
  212. package/gstack-origin/plan-eng-review/SKILL.md.tmpl +279 -0
  213. package/gstack-origin/qa/SKILL.md +1016 -0
  214. package/gstack-origin/qa/SKILL.md.tmpl +316 -0
  215. package/gstack-origin/qa/references/issue-taxonomy.md +85 -0
  216. package/gstack-origin/qa/templates/qa-report-template.md +126 -0
  217. package/gstack-origin/qa-only/SKILL.md +633 -0
  218. package/gstack-origin/qa-only/SKILL.md.tmpl +101 -0
  219. package/gstack-origin/retro/SKILL.md +1072 -0
  220. package/gstack-origin/retro/SKILL.md.tmpl +833 -0
  221. package/gstack-origin/review/SKILL.md +849 -0
  222. package/gstack-origin/review/SKILL.md.tmpl +259 -0
  223. package/gstack-origin/review/TODOS-format.md +62 -0
  224. package/gstack-origin/review/checklist.md +190 -0
  225. package/gstack-origin/review/design-checklist.md +132 -0
  226. package/gstack-origin/review/greptile-triage.md +220 -0
  227. package/gstack-origin/scripts/analytics.ts +190 -0
  228. package/gstack-origin/scripts/dev-skill.ts +82 -0
  229. package/gstack-origin/scripts/eval-compare.ts +96 -0
  230. package/gstack-origin/scripts/eval-list.ts +116 -0
  231. package/gstack-origin/scripts/eval-select.ts +86 -0
  232. package/gstack-origin/scripts/eval-summary.ts +187 -0
  233. package/gstack-origin/scripts/eval-watch.ts +172 -0
  234. package/gstack-origin/scripts/gen-skill-docs.ts +2414 -0
  235. package/gstack-origin/scripts/skill-check.ts +167 -0
  236. package/gstack-origin/setup +269 -0
  237. package/gstack-origin/setup-browser-cookies/SKILL.md +330 -0
  238. package/gstack-origin/setup-browser-cookies/SKILL.md.tmpl +74 -0
  239. package/gstack-origin/setup-deploy/SKILL.md +459 -0
  240. package/gstack-origin/setup-deploy/SKILL.md.tmpl +220 -0
  241. package/gstack-origin/ship/SKILL.md +1457 -0
  242. package/gstack-origin/ship/SKILL.md.tmpl +528 -0
  243. package/gstack-origin/supabase/config.sh +10 -0
  244. package/gstack-origin/supabase/functions/community-pulse/index.ts +59 -0
  245. package/gstack-origin/supabase/functions/telemetry-ingest/index.ts +135 -0
  246. package/gstack-origin/supabase/functions/update-check/index.ts +37 -0
  247. package/gstack-origin/supabase/migrations/001_telemetry.sql +89 -0
  248. package/gstack-origin/test/analytics.test.ts +277 -0
  249. package/gstack-origin/test/codex-e2e.test.ts +197 -0
  250. package/gstack-origin/test/fixtures/coverage-audit-fixture.ts +76 -0
  251. package/gstack-origin/test/fixtures/eval-baselines.json +7 -0
  252. package/gstack-origin/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
  253. package/gstack-origin/test/fixtures/qa-eval-ground-truth.json +43 -0
  254. package/gstack-origin/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
  255. package/gstack-origin/test/fixtures/review-eval-design-slop.css +86 -0
  256. package/gstack-origin/test/fixtures/review-eval-design-slop.html +41 -0
  257. package/gstack-origin/test/fixtures/review-eval-enum-diff.rb +30 -0
  258. package/gstack-origin/test/fixtures/review-eval-enum.rb +27 -0
  259. package/gstack-origin/test/fixtures/review-eval-vuln.rb +14 -0
  260. package/gstack-origin/test/gemini-e2e.test.ts +173 -0
  261. package/gstack-origin/test/gen-skill-docs.test.ts +1049 -0
  262. package/gstack-origin/test/global-discover.test.ts +187 -0
  263. package/gstack-origin/test/helpers/codex-session-runner.ts +282 -0
  264. package/gstack-origin/test/helpers/e2e-helpers.ts +239 -0
  265. package/gstack-origin/test/helpers/eval-store.test.ts +548 -0
  266. package/gstack-origin/test/helpers/eval-store.ts +689 -0
  267. package/gstack-origin/test/helpers/gemini-session-runner.test.ts +104 -0
  268. package/gstack-origin/test/helpers/gemini-session-runner.ts +201 -0
  269. package/gstack-origin/test/helpers/llm-judge.ts +130 -0
  270. package/gstack-origin/test/helpers/observability.test.ts +283 -0
  271. package/gstack-origin/test/helpers/session-runner.test.ts +96 -0
  272. package/gstack-origin/test/helpers/session-runner.ts +357 -0
  273. package/gstack-origin/test/helpers/skill-parser.ts +206 -0
  274. package/gstack-origin/test/helpers/touchfiles.ts +260 -0
  275. package/gstack-origin/test/hook-scripts.test.ts +373 -0
  276. package/gstack-origin/test/skill-e2e-browse.test.ts +293 -0
  277. package/gstack-origin/test/skill-e2e-deploy.test.ts +279 -0
  278. package/gstack-origin/test/skill-e2e-design.test.ts +614 -0
  279. package/gstack-origin/test/skill-e2e-plan.test.ts +538 -0
  280. package/gstack-origin/test/skill-e2e-qa-bugs.test.ts +194 -0
  281. package/gstack-origin/test/skill-e2e-qa-workflow.test.ts +412 -0
  282. package/gstack-origin/test/skill-e2e-review.test.ts +535 -0
  283. package/gstack-origin/test/skill-e2e-workflow.test.ts +586 -0
  284. package/gstack-origin/test/skill-e2e.test.ts +3325 -0
  285. package/gstack-origin/test/skill-llm-eval.test.ts +787 -0
  286. package/gstack-origin/test/skill-parser.test.ts +179 -0
  287. package/gstack-origin/test/skill-routing-e2e.test.ts +605 -0
  288. package/gstack-origin/test/skill-validation.test.ts +1520 -0
  289. package/gstack-origin/test/telemetry.test.ts +278 -0
  290. package/gstack-origin/test/touchfiles.test.ts +262 -0
  291. package/gstack-origin/unfreeze/SKILL.md +40 -0
  292. package/gstack-origin/unfreeze/SKILL.md.tmpl +38 -0
  293. package/package.json +38 -0
  294. package/scripts/install-antigravity-skill.ps1 +33 -0
  295. package/scripts/install-antigravity-skill.sh +41 -0
  296. package/scripts/sync-gstack-origin.ps1 +37 -0
  297. package/scripts/sync-gstack-origin.sh +35 -0
@@ -0,0 +1,239 @@
1
+ /**
2
+ * Shared helpers for E2E test files.
3
+ *
4
+ * Extracted from the monolithic skill-e2e.test.ts to support splitting
5
+ * tests across multiple files by category.
6
+ */
7
+
8
+ import { describe, test, afterAll } from 'bun:test';
9
+ import type { SkillTestResult } from './session-runner';
10
+ import { EvalCollector, judgePassed } from './eval-store';
11
+ import type { EvalTestEntry } from './eval-store';
12
+ import { selectTests, detectBaseBranch, getChangedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES } from './touchfiles';
13
+ import { spawnSync } from 'child_process';
14
+ import * as fs from 'fs';
15
+ import * as path from 'path';
16
+ import * as os from 'os';
17
+
18
+ export const ROOT = path.resolve(import.meta.dir, '..', '..');
19
+
20
+ // Skip unless EVALS=1. Session runner strips CLAUDE* env vars to avoid nested session issues.
21
+ //
22
+ // BLAME PROTOCOL: When an eval fails, do NOT claim "pre-existing" or "not related
23
+ // to our changes" without proof. Run the same eval on main to verify. These tests
24
+ // have invisible couplings — preamble text, SKILL.md content, and timing all affect
25
+ // agent behavior. See CLAUDE.md "E2E eval failure blame protocol" for details.
26
+ export const evalsEnabled = !!process.env.EVALS;
27
+
28
+ // --- Diff-based test selection ---
29
+ // When EVALS_ALL is not set, only run tests whose touchfiles were modified.
30
+ // Set EVALS_ALL=1 to force all tests. Set EVALS_BASE to override base branch.
31
+ export let selectedTests: string[] | null = null; // null = run all
32
+
33
+ // EVALS_FAST: skip the 8 slowest tests (all Opus quality tests) for quick feedback
34
+ const FAST_EXCLUDED_TESTS = [
35
+ 'plan-ceo-review-selective', 'plan-ceo-review', 'retro', 'retro-base-branch',
36
+ 'design-consultation-core', 'design-consultation-existing',
37
+ 'qa-fix-loop', 'design-review-fix',
38
+ ];
39
+
40
+ if (evalsEnabled && !process.env.EVALS_ALL) {
41
+ const baseBranch = process.env.EVALS_BASE
42
+ || detectBaseBranch(ROOT)
43
+ || 'main';
44
+ const changedFiles = getChangedFiles(baseBranch, ROOT);
45
+
46
+ if (changedFiles.length > 0) {
47
+ const selection = selectTests(changedFiles, E2E_TOUCHFILES, GLOBAL_TOUCHFILES);
48
+ selectedTests = selection.selected;
49
+ process.stderr.write(`\nE2E selection (${selection.reason}): ${selection.selected.length}/${Object.keys(E2E_TOUCHFILES).length} tests\n`);
50
+ if (selection.skipped.length > 0) {
51
+ process.stderr.write(` Skipped: ${selection.skipped.join(', ')}\n`);
52
+ }
53
+ process.stderr.write('\n');
54
+ }
55
+ // If changedFiles is empty (e.g., on main branch), selectedTests stays null → run all
56
+ }
57
+
58
+ // Apply EVALS_FAST filter after diff-based selection
59
+ if (evalsEnabled && process.env.EVALS_FAST) {
60
+ if (selectedTests === null) {
61
+ // Run all minus excluded
62
+ selectedTests = Object.keys(E2E_TOUCHFILES).filter(t => !FAST_EXCLUDED_TESTS.includes(t));
63
+ } else {
64
+ selectedTests = selectedTests.filter(t => !FAST_EXCLUDED_TESTS.includes(t));
65
+ }
66
+ process.stderr.write(`EVALS_FAST: excluded ${FAST_EXCLUDED_TESTS.length} slow tests, running ${selectedTests.length}\n\n`);
67
+ }
68
+
69
+ export const describeE2E = evalsEnabled ? describe : describe.skip;
70
+
71
+ /** Wrap a describe block to skip entirely if none of its tests are selected. */
72
+ export function describeIfSelected(name: string, testNames: string[], fn: () => void) {
73
+ const anySelected = selectedTests === null || testNames.some(t => selectedTests!.includes(t));
74
+ (anySelected ? describeE2E : describe.skip)(name, fn);
75
+ }
76
+
77
+ // Unique run ID for this E2E session — used for heartbeat + per-run log directory
78
+ export const runId = new Date().toISOString().replace(/[:.]/g, '').replace('T', '-').slice(0, 15);
79
+
80
+ export const browseBin = path.resolve(ROOT, 'browse', 'dist', 'browse');
81
+
82
+ // Check if Anthropic API key is available (needed for outcome evals)
83
+ export const hasApiKey = !!process.env.ANTHROPIC_API_KEY;
84
+
85
+ /**
86
+ * Copy a directory tree recursively (files only, follows structure).
87
+ */
88
+ export function copyDirSync(src: string, dest: string) {
89
+ fs.mkdirSync(dest, { recursive: true });
90
+ for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
91
+ const srcPath = path.join(src, entry.name);
92
+ const destPath = path.join(dest, entry.name);
93
+ if (entry.isDirectory()) {
94
+ copyDirSync(srcPath, destPath);
95
+ } else {
96
+ fs.copyFileSync(srcPath, destPath);
97
+ }
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Set up browse shims (binary symlink, find-browse, remote-slug) in a tmpDir.
103
+ */
104
+ export function setupBrowseShims(dir: string) {
105
+ // Symlink browse binary
106
+ const binDir = path.join(dir, 'browse', 'dist');
107
+ fs.mkdirSync(binDir, { recursive: true });
108
+ if (fs.existsSync(browseBin)) {
109
+ fs.symlinkSync(browseBin, path.join(binDir, 'browse'));
110
+ }
111
+
112
+ // find-browse shim
113
+ const findBrowseDir = path.join(dir, 'browse', 'bin');
114
+ fs.mkdirSync(findBrowseDir, { recursive: true });
115
+ fs.writeFileSync(
116
+ path.join(findBrowseDir, 'find-browse'),
117
+ `#!/bin/bash\necho "${browseBin}"\n`,
118
+ { mode: 0o755 },
119
+ );
120
+
121
+ // remote-slug shim (returns test-project)
122
+ fs.writeFileSync(
123
+ path.join(findBrowseDir, 'remote-slug'),
124
+ `#!/bin/bash\necho "test-project"\n`,
125
+ { mode: 0o755 },
126
+ );
127
+ }
128
+
129
+ /**
130
+ * Print cost summary after an E2E test.
131
+ */
132
+ export function logCost(label: string, result: { costEstimate: { turnsUsed: number; estimatedTokens: number; estimatedCost: number }; duration: number }) {
133
+ const { turnsUsed, estimatedTokens, estimatedCost } = result.costEstimate;
134
+ const durationSec = Math.round(result.duration / 1000);
135
+ console.log(`${label}: $${estimatedCost.toFixed(2)} (${turnsUsed} turns, ${(estimatedTokens / 1000).toFixed(1)}k tokens, ${durationSec}s)`);
136
+ }
137
+
138
+ /**
139
+ * Dump diagnostic info on planted-bug outcome failure (decision 1C).
140
+ */
141
+ export function dumpOutcomeDiagnostic(dir: string, label: string, report: string, judgeResult: any) {
142
+ try {
143
+ const transcriptDir = path.join(dir, '.gstack', 'test-transcripts');
144
+ fs.mkdirSync(transcriptDir, { recursive: true });
145
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
146
+ fs.writeFileSync(
147
+ path.join(transcriptDir, `${label}-outcome-${timestamp}.json`),
148
+ JSON.stringify({ label, report, judgeResult }, null, 2),
149
+ );
150
+ } catch { /* non-fatal */ }
151
+ }
152
+
153
+ /**
154
+ * Create an EvalCollector for a specific suite. Returns null if evals are not enabled.
155
+ */
156
+ export function createEvalCollector(suite: string): EvalCollector | null {
157
+ return evalsEnabled ? new EvalCollector(suite) : null;
158
+ }
159
+
160
+ /** DRY helper to record an E2E test result into the eval collector. */
161
+ export function recordE2E(
162
+ evalCollector: EvalCollector | null,
163
+ name: string,
164
+ suite: string,
165
+ result: SkillTestResult,
166
+ extra?: Partial<EvalTestEntry>,
167
+ ) {
168
+ // Derive last tool call from transcript for machine-readable diagnostics
169
+ const lastTool = result.toolCalls.length > 0
170
+ ? `${result.toolCalls[result.toolCalls.length - 1].tool}(${JSON.stringify(result.toolCalls[result.toolCalls.length - 1].input).slice(0, 60)})`
171
+ : undefined;
172
+
173
+ evalCollector?.addTest({
174
+ name, suite, tier: 'e2e',
175
+ passed: result.exitReason === 'success' && result.browseErrors.length === 0,
176
+ duration_ms: result.duration,
177
+ cost_usd: result.costEstimate.estimatedCost,
178
+ transcript: result.transcript,
179
+ output: result.output?.slice(0, 2000),
180
+ turns_used: result.costEstimate.turnsUsed,
181
+ browse_errors: result.browseErrors,
182
+ exit_reason: result.exitReason,
183
+ timeout_at_turn: result.exitReason === 'timeout' ? result.costEstimate.turnsUsed : undefined,
184
+ last_tool_call: lastTool,
185
+ model: result.model,
186
+ first_response_ms: result.firstResponseMs,
187
+ max_inter_turn_ms: result.maxInterTurnMs,
188
+ ...extra,
189
+ });
190
+ }
191
+
192
+ /** Finalize an eval collector (write results). */
193
+ export async function finalizeEvalCollector(evalCollector: EvalCollector | null) {
194
+ if (evalCollector) {
195
+ try {
196
+ await evalCollector.finalize();
197
+ } catch (err) {
198
+ console.error('Failed to save eval results:', err);
199
+ }
200
+ }
201
+ }
202
+
203
+ // Pre-seed preamble state files so E2E tests don't waste turns on lake intro + telemetry prompts.
204
+ // These are one-time interactive prompts that burn 3-7 turns per test if not pre-seeded.
205
+ if (evalsEnabled) {
206
+ const gstackDir = path.join(os.homedir(), '.gstack');
207
+ fs.mkdirSync(gstackDir, { recursive: true });
208
+ for (const f of ['.completeness-intro-seen', '.telemetry-prompted']) {
209
+ const p = path.join(gstackDir, f);
210
+ if (!fs.existsSync(p)) fs.writeFileSync(p, '');
211
+ }
212
+ }
213
+
214
+ // Fail fast if Anthropic API is unreachable — don't burn through tests getting ConnectionRefused
215
+ if (evalsEnabled) {
216
+ const check = spawnSync('sh', ['-c', 'echo "ping" | claude -p --max-turns 1 --output-format stream-json --verbose --dangerously-skip-permissions'], {
217
+ stdio: 'pipe', timeout: 30_000,
218
+ });
219
+ const output = check.stdout?.toString() || '';
220
+ if (output.includes('ConnectionRefused') || output.includes('Unable to connect')) {
221
+ throw new Error('Anthropic API unreachable — aborting E2E suite. Fix connectivity and retry.');
222
+ }
223
+ }
224
+
225
+ /** Skip an individual test if not selected (for multi-test describe blocks). */
226
+ export function testIfSelected(testName: string, fn: () => Promise<void>, timeout: number) {
227
+ const shouldRun = selectedTests === null || selectedTests.includes(testName);
228
+ (shouldRun ? test : test.skip)(testName, fn, timeout);
229
+ }
230
+
231
+ /** Concurrent version — runs in parallel with other concurrent tests within the same describe block. */
232
+ export function testConcurrentIfSelected(testName: string, fn: () => Promise<void>, timeout: number) {
233
+ const shouldRun = selectedTests === null || selectedTests.includes(testName);
234
+ (shouldRun ? test.concurrent : test.skip)(testName, fn, timeout);
235
+ }
236
+
237
+ export { judgePassed } from './eval-store';
238
+ export { EvalCollector } from './eval-store';
239
+ export type { EvalTestEntry } from './eval-store';