@runchr/gstack-antigravity 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. package/.agents/rules/ETHOS.md +129 -0
  2. package/.agents/rules/global-gstack.md +117 -0
  3. package/.agents/rules/persona-gstack-autoplan.md +14 -0
  4. package/.agents/rules/persona-gstack-benchmark.md +14 -0
  5. package/.agents/rules/persona-gstack-browse.md +14 -0
  6. package/.agents/rules/persona-gstack-canary.md +14 -0
  7. package/.agents/rules/persona-gstack-careful.md +14 -0
  8. package/.agents/rules/persona-gstack-codex.md +14 -0
  9. package/.agents/rules/persona-gstack-cso.md +14 -0
  10. package/.agents/rules/persona-gstack-design-consultation.md +14 -0
  11. package/.agents/rules/persona-gstack-design-review.md +14 -0
  12. package/.agents/rules/persona-gstack-document-release.md +14 -0
  13. package/.agents/rules/persona-gstack-freeze.md +14 -0
  14. package/.agents/rules/persona-gstack-gstack-upgrade.md +14 -0
  15. package/.agents/rules/persona-gstack-guard.md +14 -0
  16. package/.agents/rules/persona-gstack-investigate.md +14 -0
  17. package/.agents/rules/persona-gstack-land-and-deploy.md +14 -0
  18. package/.agents/rules/persona-gstack-office-hours.md +14 -0
  19. package/.agents/rules/persona-gstack-plan-ceo-review.md +14 -0
  20. package/.agents/rules/persona-gstack-plan-design-review.md +14 -0
  21. package/.agents/rules/persona-gstack-plan-eng-review.md +14 -0
  22. package/.agents/rules/persona-gstack-qa-only.md +14 -0
  23. package/.agents/rules/persona-gstack-qa.md +14 -0
  24. package/.agents/rules/persona-gstack-retro.md +14 -0
  25. package/.agents/rules/persona-gstack-review.md +14 -0
  26. package/.agents/rules/persona-gstack-setup-browser-cookies.md +14 -0
  27. package/.agents/rules/persona-gstack-setup-deploy.md +14 -0
  28. package/.agents/rules/persona-gstack-ship.md +14 -0
  29. package/.agents/rules/persona-gstack-unfreeze.md +14 -0
  30. package/.agents/rules/persona-gstack.md +40 -0
  31. package/.agents/rules/recursive-identities.md +22 -0
  32. package/.agents/workflows/autoplan.md +30 -0
  33. package/.agents/workflows/benchmark.md +31 -0
  34. package/.agents/workflows/browse.md +26 -0
  35. package/.agents/workflows/canary.md +33 -0
  36. package/.agents/workflows/careful.md +22 -0
  37. package/.agents/workflows/codex.md +36 -0
  38. package/.agents/workflows/cso.md +29 -0
  39. package/.agents/workflows/design-consultation.md +28 -0
  40. package/.agents/workflows/design-review.md +28 -0
  41. package/.agents/workflows/document-release.md +32 -0
  42. package/.agents/workflows/freeze.md +17 -0
  43. package/.agents/workflows/gstack-upgrade.md +54 -0
  44. package/.agents/workflows/gstack.md +56 -0
  45. package/.agents/workflows/guard.md +18 -0
  46. package/.agents/workflows/investigate.md +37 -0
  47. package/.agents/workflows/land-and-deploy.md +35 -0
  48. package/.agents/workflows/office-hours.md +27 -0
  49. package/.agents/workflows/plan-ceo-review.md +34 -0
  50. package/.agents/workflows/plan-design-review.md +31 -0
  51. package/.agents/workflows/plan-eng-review.md +28 -0
  52. package/.agents/workflows/qa-only.md +28 -0
  53. package/.agents/workflows/qa.md +73 -0
  54. package/.agents/workflows/retro.md +34 -0
  55. package/.agents/workflows/review.md +30 -0
  56. package/.agents/workflows/setup-browser-cookies.md +15 -0
  57. package/.agents/workflows/setup-cookies.md +8 -0
  58. package/.agents/workflows/setup-deploy.md +21 -0
  59. package/.agents/workflows/ship.md +93 -0
  60. package/.agents/workflows/unfreeze.md +12 -0
  61. package/LICENSE +22 -0
  62. package/README.md +189 -0
  63. package/README_KO.md +191 -0
  64. package/bin/install.js +105 -0
  65. package/gstack-origin/.agents/skills/gstack/SKILL.md +651 -0
  66. package/gstack-origin/.agents/skills/gstack-autoplan/SKILL.md +678 -0
  67. package/gstack-origin/.agents/skills/gstack-benchmark/SKILL.md +482 -0
  68. package/gstack-origin/.agents/skills/gstack-browse/SKILL.md +511 -0
  69. package/gstack-origin/.agents/skills/gstack-canary/SKILL.md +486 -0
  70. package/gstack-origin/.agents/skills/gstack-careful/SKILL.md +50 -0
  71. package/gstack-origin/.agents/skills/gstack-cso/SKILL.md +607 -0
  72. package/gstack-origin/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
  73. package/gstack-origin/.agents/skills/gstack-design-review/SKILL.md +988 -0
  74. package/gstack-origin/.agents/skills/gstack-document-release/SKILL.md +604 -0
  75. package/gstack-origin/.agents/skills/gstack-freeze/SKILL.md +67 -0
  76. package/gstack-origin/.agents/skills/gstack-guard/SKILL.md +62 -0
  77. package/gstack-origin/.agents/skills/gstack-investigate/SKILL.md +415 -0
  78. package/gstack-origin/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
  79. package/gstack-origin/.agents/skills/gstack-office-hours/SKILL.md +986 -0
  80. package/gstack-origin/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
  81. package/gstack-origin/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
  82. package/gstack-origin/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
  83. package/gstack-origin/.agents/skills/gstack-qa/SKILL.md +1006 -0
  84. package/gstack-origin/.agents/skills/gstack-qa-only/SKILL.md +626 -0
  85. package/gstack-origin/.agents/skills/gstack-retro/SKILL.md +1065 -0
  86. package/gstack-origin/.agents/skills/gstack-review/SKILL.md +704 -0
  87. package/gstack-origin/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
  88. package/gstack-origin/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
  89. package/gstack-origin/.agents/skills/gstack-ship/SKILL.md +1312 -0
  90. package/gstack-origin/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
  91. package/gstack-origin/.agents/skills/gstack-upgrade/SKILL.md +220 -0
  92. package/gstack-origin/.env.example +5 -0
  93. package/gstack-origin/.github/workflows/skill-docs.yml +17 -0
  94. package/gstack-origin/AGENTS.md +49 -0
  95. package/gstack-origin/ARCHITECTURE.md +359 -0
  96. package/gstack-origin/BROWSER.md +271 -0
  97. package/gstack-origin/CHANGELOG.md +800 -0
  98. package/gstack-origin/CLAUDE.md +284 -0
  99. package/gstack-origin/CONTRIBUTING.md +370 -0
  100. package/gstack-origin/ETHOS.md +129 -0
  101. package/gstack-origin/LICENSE +21 -0
  102. package/gstack-origin/README.md +228 -0
  103. package/gstack-origin/SKILL.md +657 -0
  104. package/gstack-origin/SKILL.md.tmpl +281 -0
  105. package/gstack-origin/TODOS.md +564 -0
  106. package/gstack-origin/VERSION +1 -0
  107. package/gstack-origin/autoplan/SKILL.md +689 -0
  108. package/gstack-origin/autoplan/SKILL.md.tmpl +416 -0
  109. package/gstack-origin/benchmark/SKILL.md +489 -0
  110. package/gstack-origin/benchmark/SKILL.md.tmpl +233 -0
  111. package/gstack-origin/bin/dev-setup +68 -0
  112. package/gstack-origin/bin/dev-teardown +56 -0
  113. package/gstack-origin/bin/gstack-analytics +191 -0
  114. package/gstack-origin/bin/gstack-community-dashboard +113 -0
  115. package/gstack-origin/bin/gstack-config +38 -0
  116. package/gstack-origin/bin/gstack-diff-scope +71 -0
  117. package/gstack-origin/bin/gstack-global-discover.ts +591 -0
  118. package/gstack-origin/bin/gstack-repo-mode +93 -0
  119. package/gstack-origin/bin/gstack-review-log +9 -0
  120. package/gstack-origin/bin/gstack-review-read +12 -0
  121. package/gstack-origin/bin/gstack-slug +15 -0
  122. package/gstack-origin/bin/gstack-telemetry-log +158 -0
  123. package/gstack-origin/bin/gstack-telemetry-sync +127 -0
  124. package/gstack-origin/bin/gstack-update-check +196 -0
  125. package/gstack-origin/browse/SKILL.md +517 -0
  126. package/gstack-origin/browse/SKILL.md.tmpl +141 -0
  127. package/gstack-origin/browse/bin/find-browse +21 -0
  128. package/gstack-origin/browse/bin/remote-slug +14 -0
  129. package/gstack-origin/browse/scripts/build-node-server.sh +48 -0
  130. package/gstack-origin/browse/src/browser-manager.ts +634 -0
  131. package/gstack-origin/browse/src/buffers.ts +137 -0
  132. package/gstack-origin/browse/src/bun-polyfill.cjs +109 -0
  133. package/gstack-origin/browse/src/cli.ts +420 -0
  134. package/gstack-origin/browse/src/commands.ts +111 -0
  135. package/gstack-origin/browse/src/config.ts +150 -0
  136. package/gstack-origin/browse/src/cookie-import-browser.ts +417 -0
  137. package/gstack-origin/browse/src/cookie-picker-routes.ts +207 -0
  138. package/gstack-origin/browse/src/cookie-picker-ui.ts +541 -0
  139. package/gstack-origin/browse/src/find-browse.ts +61 -0
  140. package/gstack-origin/browse/src/meta-commands.ts +269 -0
  141. package/gstack-origin/browse/src/platform.ts +17 -0
  142. package/gstack-origin/browse/src/read-commands.ts +335 -0
  143. package/gstack-origin/browse/src/server.ts +369 -0
  144. package/gstack-origin/browse/src/snapshot.ts +398 -0
  145. package/gstack-origin/browse/src/url-validation.ts +91 -0
  146. package/gstack-origin/browse/src/write-commands.ts +352 -0
  147. package/gstack-origin/browse/test/bun-polyfill.test.ts +72 -0
  148. package/gstack-origin/browse/test/commands.test.ts +1836 -0
  149. package/gstack-origin/browse/test/config.test.ts +250 -0
  150. package/gstack-origin/browse/test/cookie-import-browser.test.ts +397 -0
  151. package/gstack-origin/browse/test/cookie-picker-routes.test.ts +205 -0
  152. package/gstack-origin/browse/test/find-browse.test.ts +50 -0
  153. package/gstack-origin/browse/test/fixtures/basic.html +33 -0
  154. package/gstack-origin/browse/test/fixtures/cursor-interactive.html +22 -0
  155. package/gstack-origin/browse/test/fixtures/dialog.html +15 -0
  156. package/gstack-origin/browse/test/fixtures/empty.html +2 -0
  157. package/gstack-origin/browse/test/fixtures/forms.html +55 -0
  158. package/gstack-origin/browse/test/fixtures/qa-eval-checkout.html +108 -0
  159. package/gstack-origin/browse/test/fixtures/qa-eval-spa.html +98 -0
  160. package/gstack-origin/browse/test/fixtures/qa-eval.html +51 -0
  161. package/gstack-origin/browse/test/fixtures/responsive.html +49 -0
  162. package/gstack-origin/browse/test/fixtures/snapshot.html +55 -0
  163. package/gstack-origin/browse/test/fixtures/spa.html +24 -0
  164. package/gstack-origin/browse/test/fixtures/states.html +17 -0
  165. package/gstack-origin/browse/test/fixtures/upload.html +25 -0
  166. package/gstack-origin/browse/test/gstack-config.test.ts +125 -0
  167. package/gstack-origin/browse/test/gstack-update-check.test.ts +467 -0
  168. package/gstack-origin/browse/test/handoff.test.ts +235 -0
  169. package/gstack-origin/browse/test/path-validation.test.ts +63 -0
  170. package/gstack-origin/browse/test/platform.test.ts +37 -0
  171. package/gstack-origin/browse/test/snapshot.test.ts +467 -0
  172. package/gstack-origin/browse/test/test-server.ts +57 -0
  173. package/gstack-origin/browse/test/url-validation.test.ts +72 -0
  174. package/gstack-origin/canary/SKILL.md +493 -0
  175. package/gstack-origin/canary/SKILL.md.tmpl +220 -0
  176. package/gstack-origin/careful/SKILL.md +59 -0
  177. package/gstack-origin/careful/SKILL.md.tmpl +57 -0
  178. package/gstack-origin/careful/bin/check-careful.sh +112 -0
  179. package/gstack-origin/codex/SKILL.md +677 -0
  180. package/gstack-origin/codex/SKILL.md.tmpl +356 -0
  181. package/gstack-origin/conductor.json +6 -0
  182. package/gstack-origin/cso/SKILL.md +615 -0
  183. package/gstack-origin/cso/SKILL.md.tmpl +376 -0
  184. package/gstack-origin/design-consultation/SKILL.md +625 -0
  185. package/gstack-origin/design-consultation/SKILL.md.tmpl +369 -0
  186. package/gstack-origin/design-review/SKILL.md +998 -0
  187. package/gstack-origin/design-review/SKILL.md.tmpl +262 -0
  188. package/gstack-origin/docs/images/github-2013.png +0 -0
  189. package/gstack-origin/docs/images/github-2026.png +0 -0
  190. package/gstack-origin/docs/skills.md +877 -0
  191. package/gstack-origin/document-release/SKILL.md +613 -0
  192. package/gstack-origin/document-release/SKILL.md.tmpl +357 -0
  193. package/gstack-origin/freeze/SKILL.md +82 -0
  194. package/gstack-origin/freeze/SKILL.md.tmpl +80 -0
  195. package/gstack-origin/freeze/bin/check-freeze.sh +68 -0
  196. package/gstack-origin/gstack-upgrade/SKILL.md +226 -0
  197. package/gstack-origin/gstack-upgrade/SKILL.md.tmpl +224 -0
  198. package/gstack-origin/guard/SKILL.md +82 -0
  199. package/gstack-origin/guard/SKILL.md.tmpl +80 -0
  200. package/gstack-origin/investigate/SKILL.md +435 -0
  201. package/gstack-origin/investigate/SKILL.md.tmpl +196 -0
  202. package/gstack-origin/land-and-deploy/SKILL.md +880 -0
  203. package/gstack-origin/land-and-deploy/SKILL.md.tmpl +575 -0
  204. package/gstack-origin/office-hours/SKILL.md +996 -0
  205. package/gstack-origin/office-hours/SKILL.md.tmpl +624 -0
  206. package/gstack-origin/package.json +55 -0
  207. package/gstack-origin/plan-ceo-review/SKILL.md +1277 -0
  208. package/gstack-origin/plan-ceo-review/SKILL.md.tmpl +838 -0
  209. package/gstack-origin/plan-design-review/SKILL.md +676 -0
  210. package/gstack-origin/plan-design-review/SKILL.md.tmpl +314 -0
  211. package/gstack-origin/plan-eng-review/SKILL.md +836 -0
  212. package/gstack-origin/plan-eng-review/SKILL.md.tmpl +279 -0
  213. package/gstack-origin/qa/SKILL.md +1016 -0
  214. package/gstack-origin/qa/SKILL.md.tmpl +316 -0
  215. package/gstack-origin/qa/references/issue-taxonomy.md +85 -0
  216. package/gstack-origin/qa/templates/qa-report-template.md +126 -0
  217. package/gstack-origin/qa-only/SKILL.md +633 -0
  218. package/gstack-origin/qa-only/SKILL.md.tmpl +101 -0
  219. package/gstack-origin/retro/SKILL.md +1072 -0
  220. package/gstack-origin/retro/SKILL.md.tmpl +833 -0
  221. package/gstack-origin/review/SKILL.md +849 -0
  222. package/gstack-origin/review/SKILL.md.tmpl +259 -0
  223. package/gstack-origin/review/TODOS-format.md +62 -0
  224. package/gstack-origin/review/checklist.md +190 -0
  225. package/gstack-origin/review/design-checklist.md +132 -0
  226. package/gstack-origin/review/greptile-triage.md +220 -0
  227. package/gstack-origin/scripts/analytics.ts +190 -0
  228. package/gstack-origin/scripts/dev-skill.ts +82 -0
  229. package/gstack-origin/scripts/eval-compare.ts +96 -0
  230. package/gstack-origin/scripts/eval-list.ts +116 -0
  231. package/gstack-origin/scripts/eval-select.ts +86 -0
  232. package/gstack-origin/scripts/eval-summary.ts +187 -0
  233. package/gstack-origin/scripts/eval-watch.ts +172 -0
  234. package/gstack-origin/scripts/gen-skill-docs.ts +2414 -0
  235. package/gstack-origin/scripts/skill-check.ts +167 -0
  236. package/gstack-origin/setup +269 -0
  237. package/gstack-origin/setup-browser-cookies/SKILL.md +330 -0
  238. package/gstack-origin/setup-browser-cookies/SKILL.md.tmpl +74 -0
  239. package/gstack-origin/setup-deploy/SKILL.md +459 -0
  240. package/gstack-origin/setup-deploy/SKILL.md.tmpl +220 -0
  241. package/gstack-origin/ship/SKILL.md +1457 -0
  242. package/gstack-origin/ship/SKILL.md.tmpl +528 -0
  243. package/gstack-origin/supabase/config.sh +10 -0
  244. package/gstack-origin/supabase/functions/community-pulse/index.ts +59 -0
  245. package/gstack-origin/supabase/functions/telemetry-ingest/index.ts +135 -0
  246. package/gstack-origin/supabase/functions/update-check/index.ts +37 -0
  247. package/gstack-origin/supabase/migrations/001_telemetry.sql +89 -0
  248. package/gstack-origin/test/analytics.test.ts +277 -0
  249. package/gstack-origin/test/codex-e2e.test.ts +197 -0
  250. package/gstack-origin/test/fixtures/coverage-audit-fixture.ts +76 -0
  251. package/gstack-origin/test/fixtures/eval-baselines.json +7 -0
  252. package/gstack-origin/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
  253. package/gstack-origin/test/fixtures/qa-eval-ground-truth.json +43 -0
  254. package/gstack-origin/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
  255. package/gstack-origin/test/fixtures/review-eval-design-slop.css +86 -0
  256. package/gstack-origin/test/fixtures/review-eval-design-slop.html +41 -0
  257. package/gstack-origin/test/fixtures/review-eval-enum-diff.rb +30 -0
  258. package/gstack-origin/test/fixtures/review-eval-enum.rb +27 -0
  259. package/gstack-origin/test/fixtures/review-eval-vuln.rb +14 -0
  260. package/gstack-origin/test/gemini-e2e.test.ts +173 -0
  261. package/gstack-origin/test/gen-skill-docs.test.ts +1049 -0
  262. package/gstack-origin/test/global-discover.test.ts +187 -0
  263. package/gstack-origin/test/helpers/codex-session-runner.ts +282 -0
  264. package/gstack-origin/test/helpers/e2e-helpers.ts +239 -0
  265. package/gstack-origin/test/helpers/eval-store.test.ts +548 -0
  266. package/gstack-origin/test/helpers/eval-store.ts +689 -0
  267. package/gstack-origin/test/helpers/gemini-session-runner.test.ts +104 -0
  268. package/gstack-origin/test/helpers/gemini-session-runner.ts +201 -0
  269. package/gstack-origin/test/helpers/llm-judge.ts +130 -0
  270. package/gstack-origin/test/helpers/observability.test.ts +283 -0
  271. package/gstack-origin/test/helpers/session-runner.test.ts +96 -0
  272. package/gstack-origin/test/helpers/session-runner.ts +357 -0
  273. package/gstack-origin/test/helpers/skill-parser.ts +206 -0
  274. package/gstack-origin/test/helpers/touchfiles.ts +260 -0
  275. package/gstack-origin/test/hook-scripts.test.ts +373 -0
  276. package/gstack-origin/test/skill-e2e-browse.test.ts +293 -0
  277. package/gstack-origin/test/skill-e2e-deploy.test.ts +279 -0
  278. package/gstack-origin/test/skill-e2e-design.test.ts +614 -0
  279. package/gstack-origin/test/skill-e2e-plan.test.ts +538 -0
  280. package/gstack-origin/test/skill-e2e-qa-bugs.test.ts +194 -0
  281. package/gstack-origin/test/skill-e2e-qa-workflow.test.ts +412 -0
  282. package/gstack-origin/test/skill-e2e-review.test.ts +535 -0
  283. package/gstack-origin/test/skill-e2e-workflow.test.ts +586 -0
  284. package/gstack-origin/test/skill-e2e.test.ts +3325 -0
  285. package/gstack-origin/test/skill-llm-eval.test.ts +787 -0
  286. package/gstack-origin/test/skill-parser.test.ts +179 -0
  287. package/gstack-origin/test/skill-routing-e2e.test.ts +605 -0
  288. package/gstack-origin/test/skill-validation.test.ts +1520 -0
  289. package/gstack-origin/test/telemetry.test.ts +278 -0
  290. package/gstack-origin/test/touchfiles.test.ts +262 -0
  291. package/gstack-origin/unfreeze/SKILL.md +40 -0
  292. package/gstack-origin/unfreeze/SKILL.md.tmpl +38 -0
  293. package/package.json +38 -0
  294. package/scripts/install-antigravity-skill.ps1 +33 -0
  295. package/scripts/install-antigravity-skill.sh +41 -0
  296. package/scripts/sync-gstack-origin.ps1 +37 -0
  297. package/scripts/sync-gstack-origin.sh +35 -0
@@ -0,0 +1,538 @@
1
+ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
2
+ import { runSkillTest } from './helpers/session-runner';
3
+ import {
4
+ ROOT, browseBin, runId, evalsEnabled,
5
+ describeIfSelected, testConcurrentIfSelected,
6
+ copyDirSync, setupBrowseShims, logCost, recordE2E,
7
+ createEvalCollector, finalizeEvalCollector,
8
+ } from './helpers/e2e-helpers';
9
+ import { spawnSync } from 'child_process';
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import * as os from 'os';
13
+
14
+ const evalCollector = createEvalCollector('e2e-plan');
15
+
16
+ // --- Plan CEO Review E2E ---
17
+
18
+ describeIfSelected('Plan CEO Review E2E', ['plan-ceo-review'], () => {
19
+ let planDir: string;
20
+
21
+ beforeAll(() => {
22
+ planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-ceo-'));
23
+ const run = (cmd: string, args: string[]) =>
24
+ spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
25
+
26
+ // Init git repo (CEO review SKILL.md has a "System Audit" step that runs git)
27
+ run('git', ['init', '-b', 'main']);
28
+ run('git', ['config', 'user.email', 'test@test.com']);
29
+ run('git', ['config', 'user.name', 'Test']);
30
+
31
+ // Create a simple plan document for the agent to review
32
+ fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add User Dashboard
33
+
34
+ ## Context
35
+ We're building a new user dashboard that shows recent activity, notifications, and quick actions.
36
+
37
+ ## Changes
38
+ 1. New React component \`UserDashboard\` in \`src/components/\`
39
+ 2. REST API endpoint \`GET /api/dashboard\` returning user stats
40
+ 3. PostgreSQL query for activity aggregation
41
+ 4. Redis cache layer for dashboard data (5min TTL)
42
+
43
+ ## Architecture
44
+ - Frontend: React + TailwindCSS
45
+ - Backend: Express.js REST API
46
+ - Database: PostgreSQL with existing user/activity tables
47
+ - Cache: Redis for dashboard aggregates
48
+
49
+ ## Open questions
50
+ - Should we use WebSocket for real-time updates?
51
+ - How do we handle users with 100k+ activity records?
52
+ `);
53
+
54
+ run('git', ['add', '.']);
55
+ run('git', ['commit', '-m', 'add plan']);
56
+
57
+ // Copy plan-ceo-review skill
58
+ fs.mkdirSync(path.join(planDir, 'plan-ceo-review'), { recursive: true });
59
+ fs.copyFileSync(
60
+ path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
61
+ path.join(planDir, 'plan-ceo-review', 'SKILL.md'),
62
+ );
63
+ });
64
+
65
+ afterAll(() => {
66
+ try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
67
+ });
68
+
69
+ test('/plan-ceo-review produces structured review output', async () => {
70
+ const result = await runSkillTest({
71
+ prompt: `Read plan-ceo-review/SKILL.md for the review workflow.
72
+
73
+ Read plan.md — that's the plan to review. This is a standalone plan document, not a codebase — skip any codebase exploration or system audit steps.
74
+
75
+ Choose HOLD SCOPE mode. Skip any AskUserQuestion calls — this is non-interactive.
76
+ Write your complete review directly to ${planDir}/review-output.md
77
+
78
+ Focus on reviewing the plan content: architecture, error handling, security, and performance.`,
79
+ workingDirectory: planDir,
80
+ maxTurns: 15,
81
+ timeout: 360_000,
82
+ testName: 'plan-ceo-review',
83
+ runId,
84
+ model: 'claude-opus-4-6',
85
+ });
86
+
87
+ logCost('/plan-ceo-review', result);
88
+ recordE2E(evalCollector, '/plan-ceo-review', 'Plan CEO Review E2E', result, {
89
+ passed: ['success', 'error_max_turns'].includes(result.exitReason),
90
+ });
91
+ // Accept error_max_turns — the CEO review is very thorough and may exceed turns
92
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
93
+
94
+ // Verify the review was written
95
+ const reviewPath = path.join(planDir, 'review-output.md');
96
+ if (fs.existsSync(reviewPath)) {
97
+ const review = fs.readFileSync(reviewPath, 'utf-8');
98
+ expect(review.length).toBeGreaterThan(200);
99
+ }
100
+ }, 420_000);
101
+ });
102
+
103
+ // --- Plan CEO Review (SELECTIVE EXPANSION) E2E ---
104
+
105
+ describeIfSelected('Plan CEO Review SELECTIVE EXPANSION E2E', ['plan-ceo-review-selective'], () => {
106
+ let planDir: string;
107
+
108
+ beforeAll(() => {
109
+ planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-ceo-sel-'));
110
+ const run = (cmd: string, args: string[]) =>
111
+ spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
112
+
113
+ run('git', ['init', '-b', 'main']);
114
+ run('git', ['config', 'user.email', 'test@test.com']);
115
+ run('git', ['config', 'user.name', 'Test']);
116
+
117
+ fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add User Dashboard
118
+
119
+ ## Context
120
+ We're building a new user dashboard that shows recent activity, notifications, and quick actions.
121
+
122
+ ## Changes
123
+ 1. New React component \`UserDashboard\` in \`src/components/\`
124
+ 2. REST API endpoint \`GET /api/dashboard\` returning user stats
125
+ 3. PostgreSQL query for activity aggregation
126
+ 4. Redis cache layer for dashboard data (5min TTL)
127
+
128
+ ## Architecture
129
+ - Frontend: React + TailwindCSS
130
+ - Backend: Express.js REST API
131
+ - Database: PostgreSQL with existing user/activity tables
132
+ - Cache: Redis for dashboard aggregates
133
+
134
+ ## Open questions
135
+ - Should we use WebSocket for real-time updates?
136
+ - How do we handle users with 100k+ activity records?
137
+ `);
138
+
139
+ run('git', ['add', '.']);
140
+ run('git', ['commit', '-m', 'add plan']);
141
+
142
+ fs.mkdirSync(path.join(planDir, 'plan-ceo-review'), { recursive: true });
143
+ fs.copyFileSync(
144
+ path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
145
+ path.join(planDir, 'plan-ceo-review', 'SKILL.md'),
146
+ );
147
+ });
148
+
149
+ afterAll(() => {
150
+ try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
151
+ });
152
+
153
+ test('/plan-ceo-review SELECTIVE EXPANSION produces structured review output', async () => {
154
+ const result = await runSkillTest({
155
+ prompt: `Read plan-ceo-review/SKILL.md for the review workflow.
156
+
157
+ Read plan.md — that's the plan to review. This is a standalone plan document, not a codebase — skip any codebase exploration or system audit steps.
158
+
159
+ Choose SELECTIVE EXPANSION mode. Skip any AskUserQuestion calls — this is non-interactive.
160
+ For the cherry-pick ceremony, accept all expansion proposals automatically.
161
+ Write your complete review directly to ${planDir}/review-output-selective.md
162
+
163
+ Focus on reviewing the plan content: architecture, error handling, security, and performance.`,
164
+ workingDirectory: planDir,
165
+ maxTurns: 15,
166
+ timeout: 360_000,
167
+ testName: 'plan-ceo-review-selective',
168
+ runId,
169
+ model: 'claude-opus-4-6',
170
+ });
171
+
172
+ logCost('/plan-ceo-review (SELECTIVE)', result);
173
+ recordE2E(evalCollector, '/plan-ceo-review-selective', 'Plan CEO Review SELECTIVE EXPANSION E2E', result, {
174
+ passed: ['success', 'error_max_turns'].includes(result.exitReason),
175
+ });
176
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
177
+
178
+ const reviewPath = path.join(planDir, 'review-output-selective.md');
179
+ if (fs.existsSync(reviewPath)) {
180
+ const review = fs.readFileSync(reviewPath, 'utf-8');
181
+ expect(review.length).toBeGreaterThan(200);
182
+ }
183
+ }, 420_000);
184
+ });
185
+
186
+ // --- Plan Eng Review E2E ---
187
+
188
+ describeIfSelected('Plan Eng Review E2E', ['plan-eng-review'], () => {
189
+ let planDir: string;
190
+
191
+ beforeAll(() => {
192
+ planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-eng-'));
193
+ const run = (cmd: string, args: string[]) =>
194
+ spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
195
+
196
+ run('git', ['init', '-b', 'main']);
197
+ run('git', ['config', 'user.email', 'test@test.com']);
198
+ run('git', ['config', 'user.name', 'Test']);
199
+
200
+ // Create a plan with more engineering detail
201
+ fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Migrate Auth to JWT
202
+
203
+ ## Context
204
+ Replace session-cookie auth with JWT tokens. Currently using express-session + Redis store.
205
+
206
+ ## Changes
207
+ 1. Add \`jsonwebtoken\` package
208
+ 2. New middleware \`auth/jwt-verify.ts\` replacing \`auth/session-check.ts\`
209
+ 3. Login endpoint returns { accessToken, refreshToken }
210
+ 4. Refresh endpoint rotates tokens
211
+ 5. Migration script to invalidate existing sessions
212
+
213
+ ## Files Modified
214
+ | File | Change |
215
+ |------|--------|
216
+ | auth/jwt-verify.ts | NEW: JWT verification middleware |
217
+ | auth/session-check.ts | DELETED |
218
+ | routes/login.ts | Return JWT instead of setting cookie |
219
+ | routes/refresh.ts | NEW: Token refresh endpoint |
220
+ | middleware/index.ts | Swap session-check for jwt-verify |
221
+
222
+ ## Error handling
223
+ - Expired token: 401 with \`token_expired\` code
224
+ - Invalid token: 401 with \`invalid_token\` code
225
+ - Refresh with revoked token: 403
226
+
227
+ ## Not in scope
228
+ - OAuth/OIDC integration
229
+ - Rate limiting on refresh endpoint
230
+ `);
231
+
232
+ run('git', ['add', '.']);
233
+ run('git', ['commit', '-m', 'add plan']);
234
+
235
+ // Copy plan-eng-review skill
236
+ fs.mkdirSync(path.join(planDir, 'plan-eng-review'), { recursive: true });
237
+ fs.copyFileSync(
238
+ path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
239
+ path.join(planDir, 'plan-eng-review', 'SKILL.md'),
240
+ );
241
+ });
242
+
243
+ afterAll(() => {
244
+ try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
245
+ });
246
+
247
+ test('/plan-eng-review produces structured review output', async () => {
248
+ const result = await runSkillTest({
249
+ prompt: `Read plan-eng-review/SKILL.md for the review workflow.
250
+
251
+ Read plan.md — that's the plan to review. This is a standalone plan document, not a codebase — skip any codebase exploration steps.
252
+
253
+ Proceed directly to the full review. Skip any AskUserQuestion calls — this is non-interactive.
254
+ Write your complete review directly to ${planDir}/review-output.md
255
+
256
+ Focus on architecture, code quality, tests, and performance sections.`,
257
+ workingDirectory: planDir,
258
+ maxTurns: 15,
259
+ timeout: 360_000,
260
+ testName: 'plan-eng-review',
261
+ runId,
262
+ model: 'claude-opus-4-6',
263
+ });
264
+
265
+ logCost('/plan-eng-review', result);
266
+ recordE2E(evalCollector, '/plan-eng-review', 'Plan Eng Review E2E', result, {
267
+ passed: ['success', 'error_max_turns'].includes(result.exitReason),
268
+ });
269
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
270
+
271
+ // Verify the review was written
272
+ const reviewPath = path.join(planDir, 'review-output.md');
273
+ if (fs.existsSync(reviewPath)) {
274
+ const review = fs.readFileSync(reviewPath, 'utf-8');
275
+ expect(review.length).toBeGreaterThan(200);
276
+ }
277
+ }, 420_000);
278
+ });
279
+
280
+ // --- Plan-Eng-Review Test-Plan Artifact E2E ---
281
+
282
+ describeIfSelected('Plan-Eng-Review Test-Plan Artifact E2E', ['plan-eng-review-artifact'], () => {
283
+ let planDir: string;
284
+ let projectDir: string;
285
+
286
+ beforeAll(() => {
287
+ planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-artifact-'));
288
+ const run = (cmd: string, args: string[]) =>
289
+ spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
290
+
291
+ run('git', ['init', '-b', 'main']);
292
+ run('git', ['config', 'user.email', 'test@test.com']);
293
+ run('git', ['config', 'user.name', 'Test']);
294
+
295
+ // Create base commit on main
296
+ fs.writeFileSync(path.join(planDir, 'app.ts'), 'export function greet() { return "hello"; }\n');
297
+ run('git', ['add', '.']);
298
+ run('git', ['commit', '-m', 'initial']);
299
+
300
+ // Create feature branch with changes
301
+ run('git', ['checkout', '-b', 'feature/add-dashboard']);
302
+ fs.writeFileSync(path.join(planDir, 'dashboard.ts'), `export function Dashboard() {
303
+ const data = fetchStats();
304
+ return { users: data.users, revenue: data.revenue };
305
+ }
306
+ function fetchStats() {
307
+ return fetch('/api/stats').then(r => r.json());
308
+ }
309
+ `);
310
+ fs.writeFileSync(path.join(planDir, 'app.ts'), `import { Dashboard } from "./dashboard";
311
+ export function greet() { return "hello"; }
312
+ export function main() { return Dashboard(); }
313
+ `);
314
+ run('git', ['add', '.']);
315
+ run('git', ['commit', '-m', 'feat: add dashboard']);
316
+
317
+ // Plan document
318
+ fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add Dashboard
319
+
320
+ ## Changes
321
+ 1. New \`dashboard.ts\` with Dashboard component and fetchStats API call
322
+ 2. Updated \`app.ts\` to import and use Dashboard
323
+
324
+ ## Architecture
325
+ - Dashboard fetches from \`/api/stats\` endpoint
326
+ - Returns user count and revenue metrics
327
+ `);
328
+ run('git', ['add', 'plan.md']);
329
+ run('git', ['commit', '-m', 'add plan']);
330
+
331
+ // Copy plan-eng-review skill
332
+ fs.mkdirSync(path.join(planDir, 'plan-eng-review'), { recursive: true });
333
+ fs.copyFileSync(
334
+ path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
335
+ path.join(planDir, 'plan-eng-review', 'SKILL.md'),
336
+ );
337
+
338
+ // Set up remote-slug shim and browse shims (plan-eng-review uses remote-slug for artifact path)
339
+ setupBrowseShims(planDir);
340
+
341
+ // Create project directory for artifacts
342
+ projectDir = path.join(os.homedir(), '.gstack', 'projects', 'test-project');
343
+ fs.mkdirSync(projectDir, { recursive: true });
344
+
345
+ // Clean up stale test-plan files from previous runs
346
+ try {
347
+ const staleFiles = fs.readdirSync(projectDir).filter(f => f.includes('test-plan'));
348
+ for (const f of staleFiles) {
349
+ fs.unlinkSync(path.join(projectDir, f));
350
+ }
351
+ } catch {}
352
+ });
353
+
354
+ afterAll(() => {
355
+ try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
356
+ // Clean up test-plan artifacts (but not the project dir itself)
357
+ try {
358
+ const files = fs.readdirSync(projectDir);
359
+ for (const f of files) {
360
+ if (f.includes('test-plan')) {
361
+ fs.unlinkSync(path.join(projectDir, f));
362
+ }
363
+ }
364
+ } catch {}
365
+ });
366
+
367
+ test('/plan-eng-review writes test-plan artifact to ~/.gstack/projects/', async () => {
368
+ // Count existing test-plan files before
369
+ const beforeFiles = fs.readdirSync(projectDir).filter(f => f.includes('test-plan'));
370
+
371
+ const result = await runSkillTest({
372
+ prompt: `Read plan-eng-review/SKILL.md for the review workflow.
373
+ Skip the preamble bash block, lake intro, telemetry, and contributor mode sections — go straight to the review.
374
+
375
+ Read plan.md — that's the plan to review. This is a standalone plan with source code in app.ts and dashboard.ts.
376
+
377
+ Proceed directly to the full review. Skip any AskUserQuestion calls — this is non-interactive.
378
+
379
+ IMPORTANT: After your review, you MUST write the test-plan artifact as described in the "Test Plan Artifact" section of SKILL.md. The remote-slug shim is at ${planDir}/browse/bin/remote-slug.
380
+
381
+ Write your review to ${planDir}/review-output.md`,
382
+ workingDirectory: planDir,
383
+ maxTurns: 25,
384
+ allowedTools: ['Bash', 'Read', 'Write', 'Glob', 'Grep'],
385
+ timeout: 360_000,
386
+ testName: 'plan-eng-review-artifact',
387
+ runId,
388
+ model: 'claude-opus-4-6',
389
+ });
390
+
391
+ logCost('/plan-eng-review artifact', result);
392
+ recordE2E(evalCollector, '/plan-eng-review test-plan artifact', 'Plan-Eng-Review Test-Plan Artifact E2E', result, {
393
+ passed: ['success', 'error_max_turns'].includes(result.exitReason),
394
+ });
395
+
396
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
397
+
398
+ // Verify test-plan artifact was written
399
+ const afterFiles = fs.readdirSync(projectDir).filter(f => f.includes('test-plan'));
400
+ const newFiles = afterFiles.filter(f => !beforeFiles.includes(f));
401
+ console.log(`Test-plan artifacts: ${beforeFiles.length} before, ${afterFiles.length} after, ${newFiles.length} new`);
402
+
403
+ if (newFiles.length > 0) {
404
+ const content = fs.readFileSync(path.join(projectDir, newFiles[0]), 'utf-8');
405
+ console.log(`Test-plan artifact (${newFiles[0]}): ${content.length} chars`);
406
+ expect(content.length).toBeGreaterThan(50);
407
+ } else {
408
+ console.warn('No test-plan artifact found — agent may not have followed artifact instructions');
409
+ }
410
+
411
+ // Soft assertion: we expect an artifact but agent compliance is not guaranteed
412
+ expect(newFiles.length).toBeGreaterThanOrEqual(1);
413
+ }, 420_000);
414
+ });
415
+
416
+ // --- Office Hours Spec Review E2E ---
417
+
418
+ describeIfSelected('Office Hours Spec Review E2E', ['office-hours-spec-review'], () => {
419
+ let ohDir: string;
420
+
421
+ beforeAll(() => {
422
+ ohDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-oh-spec-'));
423
+ const run = (cmd: string, args: string[]) =>
424
+ spawnSync(cmd, args, { cwd: ohDir, stdio: 'pipe', timeout: 5000 });
425
+
426
+ run('git', ['init', '-b', 'main']);
427
+ run('git', ['config', 'user.email', 'test@test.com']);
428
+ run('git', ['config', 'user.name', 'Test']);
429
+ fs.writeFileSync(path.join(ohDir, 'README.md'), '# Test Project\n');
430
+ run('git', ['add', '.']);
431
+ run('git', ['commit', '-m', 'init']);
432
+
433
+ // Copy office-hours skill
434
+ fs.mkdirSync(path.join(ohDir, 'office-hours'), { recursive: true });
435
+ fs.copyFileSync(
436
+ path.join(ROOT, 'office-hours', 'SKILL.md'),
437
+ path.join(ohDir, 'office-hours', 'SKILL.md'),
438
+ );
439
+ });
440
+
441
+ afterAll(() => {
442
+ try { fs.rmSync(ohDir, { recursive: true, force: true }); } catch {}
443
+ });
444
+
445
+ test('/office-hours SKILL.md contains spec review loop', async () => {
446
+ const result = await runSkillTest({
447
+ prompt: `Read office-hours/SKILL.md. I want to understand the spec review loop.
448
+
449
+ Summarize what the "Spec Review Loop" section does — specifically:
450
+ 1. How many dimensions does the reviewer check?
451
+ 2. What tool is used to dispatch the reviewer?
452
+ 3. What's the maximum number of iterations?
453
+ 4. What metrics are tracked?
454
+
455
+ Write your summary to ${ohDir}/spec-review-summary.md`,
456
+ workingDirectory: ohDir,
457
+ maxTurns: 8,
458
+ timeout: 120_000,
459
+ testName: 'office-hours-spec-review',
460
+ runId,
461
+ });
462
+
463
+ logCost('/office-hours spec review', result);
464
+ recordE2E(evalCollector, '/office-hours-spec-review', 'Office Hours Spec Review E2E', result);
465
+ expect(result.exitReason).toBe('success');
466
+
467
+ const summaryPath = path.join(ohDir, 'spec-review-summary.md');
468
+ if (fs.existsSync(summaryPath)) {
469
+ const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
470
+ expect(summary).toMatch(/5.*dimension|dimension.*5|completeness|consistency|clarity|scope|feasibility/);
471
+ expect(summary).toMatch(/agent|subagent/);
472
+ expect(summary).toMatch(/3.*iteration|iteration.*3|maximum.*3/);
473
+ }
474
+ }, 180_000);
475
+ });
476
+
477
+ // --- Plan CEO Review Benefits-From E2E ---
478
+
479
+ describeIfSelected('Plan CEO Review Benefits-From E2E', ['plan-ceo-review-benefits'], () => {
480
+ let benefitsDir: string;
481
+
482
+ beforeAll(() => {
483
+ benefitsDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-benefits-'));
484
+ const run = (cmd: string, args: string[]) =>
485
+ spawnSync(cmd, args, { cwd: benefitsDir, stdio: 'pipe', timeout: 5000 });
486
+
487
+ run('git', ['init', '-b', 'main']);
488
+ run('git', ['config', 'user.email', 'test@test.com']);
489
+ run('git', ['config', 'user.name', 'Test']);
490
+ fs.writeFileSync(path.join(benefitsDir, 'README.md'), '# Test Project\n');
491
+ run('git', ['add', '.']);
492
+ run('git', ['commit', '-m', 'init']);
493
+
494
+ fs.mkdirSync(path.join(benefitsDir, 'plan-ceo-review'), { recursive: true });
495
+ fs.copyFileSync(
496
+ path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
497
+ path.join(benefitsDir, 'plan-ceo-review', 'SKILL.md'),
498
+ );
499
+ });
500
+
501
+ afterAll(() => {
502
+ try { fs.rmSync(benefitsDir, { recursive: true, force: true }); } catch {}
503
+ });
504
+
505
+ test('/plan-ceo-review SKILL.md contains prerequisite skill offer', async () => {
506
+ const result = await runSkillTest({
507
+ prompt: `Read plan-ceo-review/SKILL.md. Search for sections about "Prerequisite" or "office-hours" or "design doc found".
508
+
509
+ Summarize what happens when no design doc is found — specifically:
510
+ 1. Is /office-hours offered as a prerequisite?
511
+ 2. What options does the user get?
512
+ 3. Is there a mid-session detection for when the user seems lost?
513
+
514
+ Write your summary to ${benefitsDir}/benefits-summary.md`,
515
+ workingDirectory: benefitsDir,
516
+ maxTurns: 8,
517
+ timeout: 120_000,
518
+ testName: 'plan-ceo-review-benefits',
519
+ runId,
520
+ });
521
+
522
+ logCost('/plan-ceo-review benefits-from', result);
523
+ recordE2E(evalCollector, '/plan-ceo-review-benefits', 'Plan CEO Review Benefits-From E2E', result);
524
+ expect(result.exitReason).toBe('success');
525
+
526
+ const summaryPath = path.join(benefitsDir, 'benefits-summary.md');
527
+ if (fs.existsSync(summaryPath)) {
528
+ const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
529
+ expect(summary).toMatch(/office.hours/);
530
+ expect(summary).toMatch(/design doc|no design/i);
531
+ }
532
+ }, 180_000);
533
+ });
534
+
535
+ // Module-level afterAll — finalize eval collector after all tests complete
536
+ afterAll(async () => {
537
+ await finalizeEvalCollector(evalCollector);
538
+ });