@runchr/gstack-antigravity 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. package/.agents/rules/ETHOS.md +129 -0
  2. package/.agents/rules/global-gstack.md +117 -0
  3. package/.agents/rules/persona-gstack-autoplan.md +14 -0
  4. package/.agents/rules/persona-gstack-benchmark.md +14 -0
  5. package/.agents/rules/persona-gstack-browse.md +14 -0
  6. package/.agents/rules/persona-gstack-canary.md +14 -0
  7. package/.agents/rules/persona-gstack-careful.md +14 -0
  8. package/.agents/rules/persona-gstack-codex.md +14 -0
  9. package/.agents/rules/persona-gstack-cso.md +14 -0
  10. package/.agents/rules/persona-gstack-design-consultation.md +14 -0
  11. package/.agents/rules/persona-gstack-design-review.md +14 -0
  12. package/.agents/rules/persona-gstack-document-release.md +14 -0
  13. package/.agents/rules/persona-gstack-freeze.md +14 -0
  14. package/.agents/rules/persona-gstack-gstack-upgrade.md +14 -0
  15. package/.agents/rules/persona-gstack-guard.md +14 -0
  16. package/.agents/rules/persona-gstack-investigate.md +14 -0
  17. package/.agents/rules/persona-gstack-land-and-deploy.md +14 -0
  18. package/.agents/rules/persona-gstack-office-hours.md +14 -0
  19. package/.agents/rules/persona-gstack-plan-ceo-review.md +14 -0
  20. package/.agents/rules/persona-gstack-plan-design-review.md +14 -0
  21. package/.agents/rules/persona-gstack-plan-eng-review.md +14 -0
  22. package/.agents/rules/persona-gstack-qa-only.md +14 -0
  23. package/.agents/rules/persona-gstack-qa.md +14 -0
  24. package/.agents/rules/persona-gstack-retro.md +14 -0
  25. package/.agents/rules/persona-gstack-review.md +14 -0
  26. package/.agents/rules/persona-gstack-setup-browser-cookies.md +14 -0
  27. package/.agents/rules/persona-gstack-setup-deploy.md +14 -0
  28. package/.agents/rules/persona-gstack-ship.md +14 -0
  29. package/.agents/rules/persona-gstack-unfreeze.md +14 -0
  30. package/.agents/rules/persona-gstack.md +40 -0
  31. package/.agents/rules/recursive-identities.md +22 -0
  32. package/.agents/workflows/autoplan.md +30 -0
  33. package/.agents/workflows/benchmark.md +31 -0
  34. package/.agents/workflows/browse.md +26 -0
  35. package/.agents/workflows/canary.md +33 -0
  36. package/.agents/workflows/careful.md +22 -0
  37. package/.agents/workflows/codex.md +36 -0
  38. package/.agents/workflows/cso.md +29 -0
  39. package/.agents/workflows/design-consultation.md +28 -0
  40. package/.agents/workflows/design-review.md +28 -0
  41. package/.agents/workflows/document-release.md +32 -0
  42. package/.agents/workflows/freeze.md +17 -0
  43. package/.agents/workflows/gstack-upgrade.md +54 -0
  44. package/.agents/workflows/gstack.md +56 -0
  45. package/.agents/workflows/guard.md +18 -0
  46. package/.agents/workflows/investigate.md +37 -0
  47. package/.agents/workflows/land-and-deploy.md +35 -0
  48. package/.agents/workflows/office-hours.md +27 -0
  49. package/.agents/workflows/plan-ceo-review.md +34 -0
  50. package/.agents/workflows/plan-design-review.md +31 -0
  51. package/.agents/workflows/plan-eng-review.md +28 -0
  52. package/.agents/workflows/qa-only.md +28 -0
  53. package/.agents/workflows/qa.md +73 -0
  54. package/.agents/workflows/retro.md +34 -0
  55. package/.agents/workflows/review.md +30 -0
  56. package/.agents/workflows/setup-browser-cookies.md +15 -0
  57. package/.agents/workflows/setup-cookies.md +8 -0
  58. package/.agents/workflows/setup-deploy.md +21 -0
  59. package/.agents/workflows/ship.md +93 -0
  60. package/.agents/workflows/unfreeze.md +12 -0
  61. package/LICENSE +22 -0
  62. package/README.md +189 -0
  63. package/README_KO.md +191 -0
  64. package/bin/install.js +105 -0
  65. package/gstack-origin/.agents/skills/gstack/SKILL.md +651 -0
  66. package/gstack-origin/.agents/skills/gstack-autoplan/SKILL.md +678 -0
  67. package/gstack-origin/.agents/skills/gstack-benchmark/SKILL.md +482 -0
  68. package/gstack-origin/.agents/skills/gstack-browse/SKILL.md +511 -0
  69. package/gstack-origin/.agents/skills/gstack-canary/SKILL.md +486 -0
  70. package/gstack-origin/.agents/skills/gstack-careful/SKILL.md +50 -0
  71. package/gstack-origin/.agents/skills/gstack-cso/SKILL.md +607 -0
  72. package/gstack-origin/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
  73. package/gstack-origin/.agents/skills/gstack-design-review/SKILL.md +988 -0
  74. package/gstack-origin/.agents/skills/gstack-document-release/SKILL.md +604 -0
  75. package/gstack-origin/.agents/skills/gstack-freeze/SKILL.md +67 -0
  76. package/gstack-origin/.agents/skills/gstack-guard/SKILL.md +62 -0
  77. package/gstack-origin/.agents/skills/gstack-investigate/SKILL.md +415 -0
  78. package/gstack-origin/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
  79. package/gstack-origin/.agents/skills/gstack-office-hours/SKILL.md +986 -0
  80. package/gstack-origin/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
  81. package/gstack-origin/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
  82. package/gstack-origin/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
  83. package/gstack-origin/.agents/skills/gstack-qa/SKILL.md +1006 -0
  84. package/gstack-origin/.agents/skills/gstack-qa-only/SKILL.md +626 -0
  85. package/gstack-origin/.agents/skills/gstack-retro/SKILL.md +1065 -0
  86. package/gstack-origin/.agents/skills/gstack-review/SKILL.md +704 -0
  87. package/gstack-origin/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
  88. package/gstack-origin/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
  89. package/gstack-origin/.agents/skills/gstack-ship/SKILL.md +1312 -0
  90. package/gstack-origin/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
  91. package/gstack-origin/.agents/skills/gstack-upgrade/SKILL.md +220 -0
  92. package/gstack-origin/.env.example +5 -0
  93. package/gstack-origin/.github/workflows/skill-docs.yml +17 -0
  94. package/gstack-origin/AGENTS.md +49 -0
  95. package/gstack-origin/ARCHITECTURE.md +359 -0
  96. package/gstack-origin/BROWSER.md +271 -0
  97. package/gstack-origin/CHANGELOG.md +800 -0
  98. package/gstack-origin/CLAUDE.md +284 -0
  99. package/gstack-origin/CONTRIBUTING.md +370 -0
  100. package/gstack-origin/ETHOS.md +129 -0
  101. package/gstack-origin/LICENSE +21 -0
  102. package/gstack-origin/README.md +228 -0
  103. package/gstack-origin/SKILL.md +657 -0
  104. package/gstack-origin/SKILL.md.tmpl +281 -0
  105. package/gstack-origin/TODOS.md +564 -0
  106. package/gstack-origin/VERSION +1 -0
  107. package/gstack-origin/autoplan/SKILL.md +689 -0
  108. package/gstack-origin/autoplan/SKILL.md.tmpl +416 -0
  109. package/gstack-origin/benchmark/SKILL.md +489 -0
  110. package/gstack-origin/benchmark/SKILL.md.tmpl +233 -0
  111. package/gstack-origin/bin/dev-setup +68 -0
  112. package/gstack-origin/bin/dev-teardown +56 -0
  113. package/gstack-origin/bin/gstack-analytics +191 -0
  114. package/gstack-origin/bin/gstack-community-dashboard +113 -0
  115. package/gstack-origin/bin/gstack-config +38 -0
  116. package/gstack-origin/bin/gstack-diff-scope +71 -0
  117. package/gstack-origin/bin/gstack-global-discover.ts +591 -0
  118. package/gstack-origin/bin/gstack-repo-mode +93 -0
  119. package/gstack-origin/bin/gstack-review-log +9 -0
  120. package/gstack-origin/bin/gstack-review-read +12 -0
  121. package/gstack-origin/bin/gstack-slug +15 -0
  122. package/gstack-origin/bin/gstack-telemetry-log +158 -0
  123. package/gstack-origin/bin/gstack-telemetry-sync +127 -0
  124. package/gstack-origin/bin/gstack-update-check +196 -0
  125. package/gstack-origin/browse/SKILL.md +517 -0
  126. package/gstack-origin/browse/SKILL.md.tmpl +141 -0
  127. package/gstack-origin/browse/bin/find-browse +21 -0
  128. package/gstack-origin/browse/bin/remote-slug +14 -0
  129. package/gstack-origin/browse/scripts/build-node-server.sh +48 -0
  130. package/gstack-origin/browse/src/browser-manager.ts +634 -0
  131. package/gstack-origin/browse/src/buffers.ts +137 -0
  132. package/gstack-origin/browse/src/bun-polyfill.cjs +109 -0
  133. package/gstack-origin/browse/src/cli.ts +420 -0
  134. package/gstack-origin/browse/src/commands.ts +111 -0
  135. package/gstack-origin/browse/src/config.ts +150 -0
  136. package/gstack-origin/browse/src/cookie-import-browser.ts +417 -0
  137. package/gstack-origin/browse/src/cookie-picker-routes.ts +207 -0
  138. package/gstack-origin/browse/src/cookie-picker-ui.ts +541 -0
  139. package/gstack-origin/browse/src/find-browse.ts +61 -0
  140. package/gstack-origin/browse/src/meta-commands.ts +269 -0
  141. package/gstack-origin/browse/src/platform.ts +17 -0
  142. package/gstack-origin/browse/src/read-commands.ts +335 -0
  143. package/gstack-origin/browse/src/server.ts +369 -0
  144. package/gstack-origin/browse/src/snapshot.ts +398 -0
  145. package/gstack-origin/browse/src/url-validation.ts +91 -0
  146. package/gstack-origin/browse/src/write-commands.ts +352 -0
  147. package/gstack-origin/browse/test/bun-polyfill.test.ts +72 -0
  148. package/gstack-origin/browse/test/commands.test.ts +1836 -0
  149. package/gstack-origin/browse/test/config.test.ts +250 -0
  150. package/gstack-origin/browse/test/cookie-import-browser.test.ts +397 -0
  151. package/gstack-origin/browse/test/cookie-picker-routes.test.ts +205 -0
  152. package/gstack-origin/browse/test/find-browse.test.ts +50 -0
  153. package/gstack-origin/browse/test/fixtures/basic.html +33 -0
  154. package/gstack-origin/browse/test/fixtures/cursor-interactive.html +22 -0
  155. package/gstack-origin/browse/test/fixtures/dialog.html +15 -0
  156. package/gstack-origin/browse/test/fixtures/empty.html +2 -0
  157. package/gstack-origin/browse/test/fixtures/forms.html +55 -0
  158. package/gstack-origin/browse/test/fixtures/qa-eval-checkout.html +108 -0
  159. package/gstack-origin/browse/test/fixtures/qa-eval-spa.html +98 -0
  160. package/gstack-origin/browse/test/fixtures/qa-eval.html +51 -0
  161. package/gstack-origin/browse/test/fixtures/responsive.html +49 -0
  162. package/gstack-origin/browse/test/fixtures/snapshot.html +55 -0
  163. package/gstack-origin/browse/test/fixtures/spa.html +24 -0
  164. package/gstack-origin/browse/test/fixtures/states.html +17 -0
  165. package/gstack-origin/browse/test/fixtures/upload.html +25 -0
  166. package/gstack-origin/browse/test/gstack-config.test.ts +125 -0
  167. package/gstack-origin/browse/test/gstack-update-check.test.ts +467 -0
  168. package/gstack-origin/browse/test/handoff.test.ts +235 -0
  169. package/gstack-origin/browse/test/path-validation.test.ts +63 -0
  170. package/gstack-origin/browse/test/platform.test.ts +37 -0
  171. package/gstack-origin/browse/test/snapshot.test.ts +467 -0
  172. package/gstack-origin/browse/test/test-server.ts +57 -0
  173. package/gstack-origin/browse/test/url-validation.test.ts +72 -0
  174. package/gstack-origin/canary/SKILL.md +493 -0
  175. package/gstack-origin/canary/SKILL.md.tmpl +220 -0
  176. package/gstack-origin/careful/SKILL.md +59 -0
  177. package/gstack-origin/careful/SKILL.md.tmpl +57 -0
  178. package/gstack-origin/careful/bin/check-careful.sh +112 -0
  179. package/gstack-origin/codex/SKILL.md +677 -0
  180. package/gstack-origin/codex/SKILL.md.tmpl +356 -0
  181. package/gstack-origin/conductor.json +6 -0
  182. package/gstack-origin/cso/SKILL.md +615 -0
  183. package/gstack-origin/cso/SKILL.md.tmpl +376 -0
  184. package/gstack-origin/design-consultation/SKILL.md +625 -0
  185. package/gstack-origin/design-consultation/SKILL.md.tmpl +369 -0
  186. package/gstack-origin/design-review/SKILL.md +998 -0
  187. package/gstack-origin/design-review/SKILL.md.tmpl +262 -0
  188. package/gstack-origin/docs/images/github-2013.png +0 -0
  189. package/gstack-origin/docs/images/github-2026.png +0 -0
  190. package/gstack-origin/docs/skills.md +877 -0
  191. package/gstack-origin/document-release/SKILL.md +613 -0
  192. package/gstack-origin/document-release/SKILL.md.tmpl +357 -0
  193. package/gstack-origin/freeze/SKILL.md +82 -0
  194. package/gstack-origin/freeze/SKILL.md.tmpl +80 -0
  195. package/gstack-origin/freeze/bin/check-freeze.sh +68 -0
  196. package/gstack-origin/gstack-upgrade/SKILL.md +226 -0
  197. package/gstack-origin/gstack-upgrade/SKILL.md.tmpl +224 -0
  198. package/gstack-origin/guard/SKILL.md +82 -0
  199. package/gstack-origin/guard/SKILL.md.tmpl +80 -0
  200. package/gstack-origin/investigate/SKILL.md +435 -0
  201. package/gstack-origin/investigate/SKILL.md.tmpl +196 -0
  202. package/gstack-origin/land-and-deploy/SKILL.md +880 -0
  203. package/gstack-origin/land-and-deploy/SKILL.md.tmpl +575 -0
  204. package/gstack-origin/office-hours/SKILL.md +996 -0
  205. package/gstack-origin/office-hours/SKILL.md.tmpl +624 -0
  206. package/gstack-origin/package.json +55 -0
  207. package/gstack-origin/plan-ceo-review/SKILL.md +1277 -0
  208. package/gstack-origin/plan-ceo-review/SKILL.md.tmpl +838 -0
  209. package/gstack-origin/plan-design-review/SKILL.md +676 -0
  210. package/gstack-origin/plan-design-review/SKILL.md.tmpl +314 -0
  211. package/gstack-origin/plan-eng-review/SKILL.md +836 -0
  212. package/gstack-origin/plan-eng-review/SKILL.md.tmpl +279 -0
  213. package/gstack-origin/qa/SKILL.md +1016 -0
  214. package/gstack-origin/qa/SKILL.md.tmpl +316 -0
  215. package/gstack-origin/qa/references/issue-taxonomy.md +85 -0
  216. package/gstack-origin/qa/templates/qa-report-template.md +126 -0
  217. package/gstack-origin/qa-only/SKILL.md +633 -0
  218. package/gstack-origin/qa-only/SKILL.md.tmpl +101 -0
  219. package/gstack-origin/retro/SKILL.md +1072 -0
  220. package/gstack-origin/retro/SKILL.md.tmpl +833 -0
  221. package/gstack-origin/review/SKILL.md +849 -0
  222. package/gstack-origin/review/SKILL.md.tmpl +259 -0
  223. package/gstack-origin/review/TODOS-format.md +62 -0
  224. package/gstack-origin/review/checklist.md +190 -0
  225. package/gstack-origin/review/design-checklist.md +132 -0
  226. package/gstack-origin/review/greptile-triage.md +220 -0
  227. package/gstack-origin/scripts/analytics.ts +190 -0
  228. package/gstack-origin/scripts/dev-skill.ts +82 -0
  229. package/gstack-origin/scripts/eval-compare.ts +96 -0
  230. package/gstack-origin/scripts/eval-list.ts +116 -0
  231. package/gstack-origin/scripts/eval-select.ts +86 -0
  232. package/gstack-origin/scripts/eval-summary.ts +187 -0
  233. package/gstack-origin/scripts/eval-watch.ts +172 -0
  234. package/gstack-origin/scripts/gen-skill-docs.ts +2414 -0
  235. package/gstack-origin/scripts/skill-check.ts +167 -0
  236. package/gstack-origin/setup +269 -0
  237. package/gstack-origin/setup-browser-cookies/SKILL.md +330 -0
  238. package/gstack-origin/setup-browser-cookies/SKILL.md.tmpl +74 -0
  239. package/gstack-origin/setup-deploy/SKILL.md +459 -0
  240. package/gstack-origin/setup-deploy/SKILL.md.tmpl +220 -0
  241. package/gstack-origin/ship/SKILL.md +1457 -0
  242. package/gstack-origin/ship/SKILL.md.tmpl +528 -0
  243. package/gstack-origin/supabase/config.sh +10 -0
  244. package/gstack-origin/supabase/functions/community-pulse/index.ts +59 -0
  245. package/gstack-origin/supabase/functions/telemetry-ingest/index.ts +135 -0
  246. package/gstack-origin/supabase/functions/update-check/index.ts +37 -0
  247. package/gstack-origin/supabase/migrations/001_telemetry.sql +89 -0
  248. package/gstack-origin/test/analytics.test.ts +277 -0
  249. package/gstack-origin/test/codex-e2e.test.ts +197 -0
  250. package/gstack-origin/test/fixtures/coverage-audit-fixture.ts +76 -0
  251. package/gstack-origin/test/fixtures/eval-baselines.json +7 -0
  252. package/gstack-origin/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
  253. package/gstack-origin/test/fixtures/qa-eval-ground-truth.json +43 -0
  254. package/gstack-origin/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
  255. package/gstack-origin/test/fixtures/review-eval-design-slop.css +86 -0
  256. package/gstack-origin/test/fixtures/review-eval-design-slop.html +41 -0
  257. package/gstack-origin/test/fixtures/review-eval-enum-diff.rb +30 -0
  258. package/gstack-origin/test/fixtures/review-eval-enum.rb +27 -0
  259. package/gstack-origin/test/fixtures/review-eval-vuln.rb +14 -0
  260. package/gstack-origin/test/gemini-e2e.test.ts +173 -0
  261. package/gstack-origin/test/gen-skill-docs.test.ts +1049 -0
  262. package/gstack-origin/test/global-discover.test.ts +187 -0
  263. package/gstack-origin/test/helpers/codex-session-runner.ts +282 -0
  264. package/gstack-origin/test/helpers/e2e-helpers.ts +239 -0
  265. package/gstack-origin/test/helpers/eval-store.test.ts +548 -0
  266. package/gstack-origin/test/helpers/eval-store.ts +689 -0
  267. package/gstack-origin/test/helpers/gemini-session-runner.test.ts +104 -0
  268. package/gstack-origin/test/helpers/gemini-session-runner.ts +201 -0
  269. package/gstack-origin/test/helpers/llm-judge.ts +130 -0
  270. package/gstack-origin/test/helpers/observability.test.ts +283 -0
  271. package/gstack-origin/test/helpers/session-runner.test.ts +96 -0
  272. package/gstack-origin/test/helpers/session-runner.ts +357 -0
  273. package/gstack-origin/test/helpers/skill-parser.ts +206 -0
  274. package/gstack-origin/test/helpers/touchfiles.ts +260 -0
  275. package/gstack-origin/test/hook-scripts.test.ts +373 -0
  276. package/gstack-origin/test/skill-e2e-browse.test.ts +293 -0
  277. package/gstack-origin/test/skill-e2e-deploy.test.ts +279 -0
  278. package/gstack-origin/test/skill-e2e-design.test.ts +614 -0
  279. package/gstack-origin/test/skill-e2e-plan.test.ts +538 -0
  280. package/gstack-origin/test/skill-e2e-qa-bugs.test.ts +194 -0
  281. package/gstack-origin/test/skill-e2e-qa-workflow.test.ts +412 -0
  282. package/gstack-origin/test/skill-e2e-review.test.ts +535 -0
  283. package/gstack-origin/test/skill-e2e-workflow.test.ts +586 -0
  284. package/gstack-origin/test/skill-e2e.test.ts +3325 -0
  285. package/gstack-origin/test/skill-llm-eval.test.ts +787 -0
  286. package/gstack-origin/test/skill-parser.test.ts +179 -0
  287. package/gstack-origin/test/skill-routing-e2e.test.ts +605 -0
  288. package/gstack-origin/test/skill-validation.test.ts +1520 -0
  289. package/gstack-origin/test/telemetry.test.ts +278 -0
  290. package/gstack-origin/test/touchfiles.test.ts +262 -0
  291. package/gstack-origin/unfreeze/SKILL.md +40 -0
  292. package/gstack-origin/unfreeze/SKILL.md.tmpl +38 -0
  293. package/package.json +38 -0
  294. package/scripts/install-antigravity-skill.ps1 +33 -0
  295. package/scripts/install-antigravity-skill.sh +41 -0
  296. package/scripts/sync-gstack-origin.ps1 +37 -0
  297. package/scripts/sync-gstack-origin.sh +35 -0
@@ -0,0 +1,283 @@
1
+ /**
2
+ * Unit tests for E2E observability infrastructure.
3
+ *
4
+ * Tests heartbeat, progress.log, NDJSON persistence, savePartial(),
5
+ * finalize() cleanup, failure transcript paths, watcher rendering,
6
+ * and non-fatal I/O guarantees.
7
+ */
8
+
9
+ import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import * as os from 'os';
13
+ import { sanitizeTestName } from './session-runner';
14
+ import { EvalCollector } from './eval-store';
15
+ import { renderDashboard } from '../../scripts/eval-watch';
16
+ import type { HeartbeatData, PartialData } from '../../scripts/eval-watch';
17
+
18
+ let tmpDir: string;
19
+
20
+ beforeEach(() => {
21
+ tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'obs-test-'));
22
+ });
23
+
24
+ afterEach(() => {
25
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
26
+ });
27
+
28
+ // --- Test 1: runDir created when runId set ---
29
+
30
+ describe('session-runner observability', () => {
31
+ test('1: sanitizeTestName strips slashes and leading dashes', () => {
32
+ expect(sanitizeTestName('/plan-ceo-review')).toBe('plan-ceo-review');
33
+ expect(sanitizeTestName('browse-basic')).toBe('browse-basic');
34
+ expect(sanitizeTestName('/qa/deep/test')).toBe('qa-deep-test');
35
+ expect(sanitizeTestName('///leading')).toBe('leading');
36
+ });
37
+
38
+ test('2: heartbeat file path uses ~/.gstack-dev/e2e-live.json', () => {
39
+ // Just verify the constant is correct — actual write is tested by E2E
40
+ const expected = path.join(os.homedir(), '.gstack-dev', 'e2e-live.json');
41
+ // Import the module and check HEARTBEAT_PATH exists in the file
42
+ const sessionRunnerSrc = fs.readFileSync(
43
+ path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
44
+ );
45
+ expect(sessionRunnerSrc).toContain("'e2e-live.json'");
46
+ expect(sessionRunnerSrc).toContain('atomicWriteSync');
47
+ });
48
+
49
+ test('3: heartbeat JSON schema has expected fields', () => {
50
+ // Verify the heartbeat write code includes all required fields
51
+ const src = fs.readFileSync(
52
+ path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
53
+ );
54
+ for (const field of ['runId', 'startedAt', 'currentTest', 'status', 'turn', 'toolCount', 'lastTool', 'lastToolAt', 'elapsedSec']) {
55
+ expect(src).toContain(field);
56
+ }
57
+ // Should NOT contain completedTests (removed per plan)
58
+ expect(src).not.toContain('completedTests');
59
+ });
60
+
61
+ test('4: progress.log format matches expected pattern', () => {
62
+ // The progress line format is: " [Ns] turn T tool #C: Name(...)"
63
+ const src = fs.readFileSync(
64
+ path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
65
+ );
66
+ // Both stderr and progress.log use the same progressLine variable
67
+ expect(src).toContain('progressLine');
68
+ expect(src).toContain("'progress.log'");
69
+ expect(src).toContain('appendFileSync');
70
+ });
71
+
72
+ test('5: NDJSON file uses sanitized test name', () => {
73
+ const src = fs.readFileSync(
74
+ path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
75
+ );
76
+ expect(src).toContain('safeName');
77
+ expect(src).toContain('.ndjson');
78
+ });
79
+
80
+ test('8: failure transcript goes to runDir when available', () => {
81
+ const src = fs.readFileSync(
82
+ path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
83
+ );
84
+ // Should use runDir as primary, workingDirectory as fallback
85
+ expect(src).toContain('runDir || path.join(workingDirectory');
86
+ expect(src).toContain('-failure.json');
87
+ });
88
+
89
+ test('11: all new I/O is wrapped in try/catch (non-fatal)', () => {
90
+ const src = fs.readFileSync(
91
+ path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
92
+ );
93
+ // Count non-fatal comments — should be present for each new I/O path
94
+ const nonFatalCount = (src.match(/\/\* non-fatal \*\//g) || []).length;
95
+ // Original had 2 (promptFile unlink + failure transcript), we added 4 more
96
+ // (runDir creation, progress.log, heartbeat, NDJSON append)
97
+ expect(nonFatalCount).toBeGreaterThanOrEqual(6);
98
+ });
99
+ });
100
+
101
+ // --- Tests 6, 7: eval-store savePartial() and finalize() ---
102
+
103
+ describe('eval-store observability', () => {
104
+ test('6: savePartial() writes valid JSON with _partial: true', () => {
105
+ const evalDir = path.join(tmpDir, 'evals');
106
+ const collector = new EvalCollector('e2e', evalDir);
107
+
108
+ collector.addTest({
109
+ name: 'test-one',
110
+ suite: 'test',
111
+ tier: 'e2e',
112
+ passed: true,
113
+ duration_ms: 1000,
114
+ cost_usd: 0.05,
115
+ exit_reason: 'success',
116
+ });
117
+
118
+ const partialPath = path.join(evalDir, '_partial-e2e.json');
119
+ expect(fs.existsSync(partialPath)).toBe(true);
120
+
121
+ const partial = JSON.parse(fs.readFileSync(partialPath, 'utf-8'));
122
+ expect(partial._partial).toBe(true);
123
+ expect(partial.tests).toHaveLength(1);
124
+ expect(partial.tests[0].name).toBe('test-one');
125
+ expect(partial.tests[0].exit_reason).toBe('success');
126
+ expect(partial.schema_version).toBe(1);
127
+ expect(partial.total_tests).toBe(1);
128
+ expect(partial.passed).toBe(1);
129
+ });
130
+
131
+ test('6b: savePartial() accumulates multiple tests', () => {
132
+ const evalDir = path.join(tmpDir, 'evals');
133
+ const collector = new EvalCollector('e2e', evalDir);
134
+
135
+ collector.addTest({
136
+ name: 'test-one', suite: 'test', tier: 'e2e',
137
+ passed: true, duration_ms: 1000, cost_usd: 0.05,
138
+ });
139
+ collector.addTest({
140
+ name: 'test-two', suite: 'test', tier: 'e2e',
141
+ passed: false, duration_ms: 2000, cost_usd: 0.10,
142
+ exit_reason: 'timeout', timeout_at_turn: 5, last_tool_call: 'Bash(ls)',
143
+ });
144
+
145
+ const partialPath = path.join(evalDir, '_partial-e2e.json');
146
+ const partial = JSON.parse(fs.readFileSync(partialPath, 'utf-8'));
147
+ expect(partial.tests).toHaveLength(2);
148
+ expect(partial.total_tests).toBe(2);
149
+ expect(partial.passed).toBe(1);
150
+ expect(partial.failed).toBe(1);
151
+ expect(partial.tests[1].exit_reason).toBe('timeout');
152
+ expect(partial.tests[1].timeout_at_turn).toBe(5);
153
+ expect(partial.tests[1].last_tool_call).toBe('Bash(ls)');
154
+ });
155
+
156
+ test('7: finalize() preserves partial file alongside final', async () => {
157
+ const evalDir = path.join(tmpDir, 'evals');
158
+ const collector = new EvalCollector('e2e', evalDir);
159
+
160
+ collector.addTest({
161
+ name: 'test-one', suite: 'test', tier: 'e2e',
162
+ passed: true, duration_ms: 1000, cost_usd: 0.05,
163
+ });
164
+
165
+ const partialPath = path.join(evalDir, '_partial-e2e.json');
166
+ expect(fs.existsSync(partialPath)).toBe(true);
167
+
168
+ await collector.finalize();
169
+
170
+ // Partial file preserved for observability — never cleaned up
171
+ expect(fs.existsSync(partialPath)).toBe(true);
172
+
173
+ // Final eval file should also exist
174
+ const files = fs.readdirSync(evalDir).filter(f => f.endsWith('.json') && !f.startsWith('_'));
175
+ expect(files.length).toBeGreaterThanOrEqual(1);
176
+ });
177
+
178
+ test('EvalTestEntry includes diagnostic fields', () => {
179
+ const evalDir = path.join(tmpDir, 'evals');
180
+ const collector = new EvalCollector('e2e', evalDir);
181
+
182
+ collector.addTest({
183
+ name: 'diagnostic-test', suite: 'test', tier: 'e2e',
184
+ passed: false, duration_ms: 5000, cost_usd: 0.20,
185
+ exit_reason: 'error_max_turns',
186
+ timeout_at_turn: undefined,
187
+ last_tool_call: 'Write(review-output.md)',
188
+ });
189
+
190
+ const partialPath = path.join(evalDir, '_partial-e2e.json');
191
+ const partial = JSON.parse(fs.readFileSync(partialPath, 'utf-8'));
192
+ const t = partial.tests[0];
193
+ expect(t.exit_reason).toBe('error_max_turns');
194
+ expect(t.last_tool_call).toBe('Write(review-output.md)');
195
+ });
196
+ });
197
+
198
+ // --- Tests 9, 10: watcher dashboard rendering ---
199
+
200
+ describe('eval-watch dashboard', () => {
201
+ test('9: renderDashboard shows completed tests and current test', () => {
202
+ const heartbeat: HeartbeatData = {
203
+ runId: '20260314-143022',
204
+ startedAt: '2026-03-14T14:30:22Z',
205
+ currentTest: 'plan-ceo-review',
206
+ status: 'running',
207
+ turn: 4,
208
+ toolCount: 3,
209
+ lastTool: 'Write(review-output.md)',
210
+ lastToolAt: new Date().toISOString(), // recent — not stale
211
+ elapsedSec: 285,
212
+ };
213
+
214
+ const partial: PartialData = {
215
+ tests: [
216
+ { name: 'browse basic', passed: true, cost_usd: 0.07, duration_ms: 24000, turns_used: 6 },
217
+ { name: '/review', passed: true, cost_usd: 0.17, duration_ms: 63000, turns_used: 13 },
218
+ ],
219
+ total_cost_usd: 0.24,
220
+ _partial: true,
221
+ };
222
+
223
+ const output = renderDashboard(heartbeat, partial);
224
+
225
+ // Should contain run ID
226
+ expect(output).toContain('20260314-143022');
227
+
228
+ // Should show completed tests
229
+ expect(output).toContain('browse basic');
230
+ expect(output).toContain('/review');
231
+ expect(output).toContain('$0.07');
232
+ expect(output).toContain('$0.17');
233
+
234
+ // Should show current test
235
+ expect(output).toContain('plan-ceo-review');
236
+ expect(output).toContain('turn 4');
237
+ expect(output).toContain('Write(review-output.md)');
238
+
239
+ // Should NOT show stale warning (lastToolAt is recent)
240
+ expect(output).not.toContain('STALE');
241
+ });
242
+
243
+ test('10: renderDashboard warns on stale heartbeat', () => {
244
+ const staleTime = new Date(Date.now() - 15 * 60 * 1000).toISOString(); // 15 min ago
245
+
246
+ const heartbeat: HeartbeatData = {
247
+ runId: '20260314-143022',
248
+ startedAt: '2026-03-14T14:30:22Z',
249
+ currentTest: 'plan-ceo-review',
250
+ status: 'running',
251
+ turn: 4,
252
+ toolCount: 3,
253
+ lastTool: 'Write(review-output.md)',
254
+ lastToolAt: staleTime,
255
+ elapsedSec: 900,
256
+ };
257
+
258
+ const output = renderDashboard(heartbeat, null);
259
+
260
+ expect(output).toContain('STALE');
261
+ expect(output).toContain('may have crashed');
262
+ });
263
+
264
+ test('renderDashboard handles no active run', () => {
265
+ const output = renderDashboard(null, null);
266
+ expect(output).toContain('No active run');
267
+ expect(output).toContain('bun test');
268
+ });
269
+
270
+ test('renderDashboard handles partial-only (heartbeat gone)', () => {
271
+ const partial: PartialData = {
272
+ tests: [
273
+ { name: 'browse basic', passed: true, cost_usd: 0.07, duration_ms: 24000 },
274
+ ],
275
+ total_cost_usd: 0.07,
276
+ _partial: true,
277
+ };
278
+
279
+ const output = renderDashboard(null, partial);
280
+ expect(output).toContain('browse basic');
281
+ expect(output).toContain('$0.07');
282
+ });
283
+ });
@@ -0,0 +1,96 @@
1
+ import { describe, test, expect } from 'bun:test';
2
+ import { parseNDJSON } from './session-runner';
3
+
4
+ // Fixture: minimal NDJSON session (system init, assistant with tool_use, tool result, assistant text, result)
5
+ const FIXTURE_LINES = [
6
+ '{"type":"system","subtype":"init","session_id":"test-123"}',
7
+ '{"type":"assistant","message":{"content":[{"type":"tool_use","id":"tu1","name":"Bash","input":{"command":"echo hello"}}]}}',
8
+ '{"type":"user","tool_use_result":{"tool_use_id":"tu1","stdout":"hello\\n","stderr":""}}',
9
+ '{"type":"assistant","message":{"content":[{"type":"text","text":"The command printed hello."}]}}',
10
+ '{"type":"assistant","message":{"content":[{"type":"text","text":"Let me also read a file."},{"type":"tool_use","id":"tu2","name":"Read","input":{"file_path":"/tmp/test"}}]}}',
11
+ '{"type":"result","subtype":"success","total_cost_usd":0.05,"num_turns":3,"usage":{"input_tokens":100,"output_tokens":50},"result":"Done."}',
12
+ ];
13
+
14
+ describe('parseNDJSON', () => {
15
+ test('parses valid NDJSON with system + assistant + result events', () => {
16
+ const parsed = parseNDJSON(FIXTURE_LINES);
17
+ expect(parsed.transcript).toHaveLength(6);
18
+ expect(parsed.transcript[0].type).toBe('system');
19
+ expect(parsed.transcript[5].type).toBe('result');
20
+ });
21
+
22
+ test('extracts tool calls from assistant.message.content[].type === tool_use', () => {
23
+ const parsed = parseNDJSON(FIXTURE_LINES);
24
+ expect(parsed.toolCalls).toHaveLength(2);
25
+ expect(parsed.toolCalls[0]).toEqual({
26
+ tool: 'Bash',
27
+ input: { command: 'echo hello' },
28
+ output: '',
29
+ });
30
+ expect(parsed.toolCalls[1]).toEqual({
31
+ tool: 'Read',
32
+ input: { file_path: '/tmp/test' },
33
+ output: '',
34
+ });
35
+ expect(parsed.toolCallCount).toBe(2);
36
+ });
37
+
38
+ test('skips malformed lines without throwing', () => {
39
+ const lines = [
40
+ '{"type":"system"}',
41
+ 'this is not json',
42
+ '{"type":"assistant","message":{"content":[{"type":"text","text":"ok"}]}}',
43
+ '{incomplete json',
44
+ '{"type":"result","subtype":"success","result":"done"}',
45
+ ];
46
+ const parsed = parseNDJSON(lines);
47
+ expect(parsed.transcript).toHaveLength(3); // system, assistant, result
48
+ expect(parsed.resultLine?.subtype).toBe('success');
49
+ });
50
+
51
+ test('skips empty and whitespace-only lines', () => {
52
+ const lines = [
53
+ '',
54
+ ' ',
55
+ '{"type":"system"}',
56
+ '\t',
57
+ '{"type":"result","subtype":"success","result":"ok"}',
58
+ ];
59
+ const parsed = parseNDJSON(lines);
60
+ expect(parsed.transcript).toHaveLength(2);
61
+ });
62
+
63
+ test('extracts resultLine from type: "result" event', () => {
64
+ const parsed = parseNDJSON(FIXTURE_LINES);
65
+ expect(parsed.resultLine).not.toBeNull();
66
+ expect(parsed.resultLine.subtype).toBe('success');
67
+ expect(parsed.resultLine.total_cost_usd).toBe(0.05);
68
+ expect(parsed.resultLine.num_turns).toBe(3);
69
+ expect(parsed.resultLine.result).toBe('Done.');
70
+ });
71
+
72
+ test('counts turns correctly — one per assistant event, not per text block', () => {
73
+ const parsed = parseNDJSON(FIXTURE_LINES);
74
+ // 3 assistant events in fixture (tool_use, text, text+tool_use)
75
+ expect(parsed.turnCount).toBe(3);
76
+ });
77
+
78
+ test('handles empty input', () => {
79
+ const parsed = parseNDJSON([]);
80
+ expect(parsed.transcript).toHaveLength(0);
81
+ expect(parsed.resultLine).toBeNull();
82
+ expect(parsed.turnCount).toBe(0);
83
+ expect(parsed.toolCallCount).toBe(0);
84
+ expect(parsed.toolCalls).toHaveLength(0);
85
+ });
86
+
87
+ test('handles assistant event with no content array', () => {
88
+ const lines = [
89
+ '{"type":"assistant","message":{}}',
90
+ '{"type":"assistant"}',
91
+ ];
92
+ const parsed = parseNDJSON(lines);
93
+ expect(parsed.turnCount).toBe(2);
94
+ expect(parsed.toolCalls).toHaveLength(0);
95
+ });
96
+ });