@runchr/gstack-antigravity 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @runchr/gstack-antigravity might be problematic. Click here for more details.
- package/.agents/skills/gstack/.agents/skills/gstack/SKILL.md +651 -0
- package/.agents/skills/gstack/.agents/skills/gstack-autoplan/SKILL.md +678 -0
- package/.agents/skills/gstack/.agents/skills/gstack-benchmark/SKILL.md +482 -0
- package/.agents/skills/gstack/.agents/skills/gstack-browse/SKILL.md +511 -0
- package/.agents/skills/gstack/.agents/skills/gstack-canary/SKILL.md +486 -0
- package/.agents/skills/gstack/.agents/skills/gstack-careful/SKILL.md +50 -0
- package/.agents/skills/gstack/.agents/skills/gstack-cso/SKILL.md +607 -0
- package/.agents/skills/gstack/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
- package/.agents/skills/gstack/.agents/skills/gstack-design-review/SKILL.md +988 -0
- package/.agents/skills/gstack/.agents/skills/gstack-document-release/SKILL.md +604 -0
- package/.agents/skills/gstack/.agents/skills/gstack-freeze/SKILL.md +67 -0
- package/.agents/skills/gstack/.agents/skills/gstack-guard/SKILL.md +62 -0
- package/.agents/skills/gstack/.agents/skills/gstack-investigate/SKILL.md +415 -0
- package/.agents/skills/gstack/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
- package/.agents/skills/gstack/.agents/skills/gstack-office-hours/SKILL.md +986 -0
- package/.agents/skills/gstack/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
- package/.agents/skills/gstack/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
- package/.agents/skills/gstack/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
- package/.agents/skills/gstack/.agents/skills/gstack-qa/SKILL.md +1006 -0
- package/.agents/skills/gstack/.agents/skills/gstack-qa-only/SKILL.md +626 -0
- package/.agents/skills/gstack/.agents/skills/gstack-retro/SKILL.md +1065 -0
- package/.agents/skills/gstack/.agents/skills/gstack-review/SKILL.md +704 -0
- package/.agents/skills/gstack/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
- package/.agents/skills/gstack/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
- package/.agents/skills/gstack/.agents/skills/gstack-ship/SKILL.md +1312 -0
- package/.agents/skills/gstack/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
- package/.agents/skills/gstack/.agents/skills/gstack-upgrade/SKILL.md +220 -0
- package/.agents/skills/gstack/.env.example +5 -0
- package/.agents/skills/gstack/.github/workflows/skill-docs.yml +17 -0
- package/.agents/skills/gstack/AGENTS.md +49 -0
- package/.agents/skills/gstack/ARCHITECTURE.md +359 -0
- package/.agents/skills/gstack/BROWSER.md +271 -0
- package/.agents/skills/gstack/CHANGELOG.md +800 -0
- package/.agents/skills/gstack/CLAUDE.md +284 -0
- package/.agents/skills/gstack/CONTRIBUTING.md +370 -0
- package/.agents/skills/gstack/ETHOS.md +129 -0
- package/.agents/skills/gstack/LICENSE +21 -0
- package/.agents/skills/gstack/README.md +228 -0
- package/.agents/skills/gstack/SKILL.md +657 -0
- package/.agents/skills/gstack/SKILL.md.tmpl +281 -0
- package/.agents/skills/gstack/TODOS.md +564 -0
- package/.agents/skills/gstack/VERSION +1 -0
- package/.agents/skills/gstack/autoplan/SKILL.md +689 -0
- package/.agents/skills/gstack/autoplan/SKILL.md.tmpl +416 -0
- package/.agents/skills/gstack/benchmark/SKILL.md +489 -0
- package/.agents/skills/gstack/benchmark/SKILL.md.tmpl +233 -0
- package/.agents/skills/gstack/bin/dev-setup +68 -0
- package/.agents/skills/gstack/bin/dev-teardown +56 -0
- package/.agents/skills/gstack/bin/gstack-analytics +191 -0
- package/.agents/skills/gstack/bin/gstack-community-dashboard +113 -0
- package/.agents/skills/gstack/bin/gstack-config +38 -0
- package/.agents/skills/gstack/bin/gstack-diff-scope +71 -0
- package/.agents/skills/gstack/bin/gstack-global-discover.ts +591 -0
- package/.agents/skills/gstack/bin/gstack-repo-mode +93 -0
- package/.agents/skills/gstack/bin/gstack-review-log +9 -0
- package/.agents/skills/gstack/bin/gstack-review-read +12 -0
- package/.agents/skills/gstack/bin/gstack-slug +15 -0
- package/.agents/skills/gstack/bin/gstack-telemetry-log +158 -0
- package/.agents/skills/gstack/bin/gstack-telemetry-sync +127 -0
- package/.agents/skills/gstack/bin/gstack-update-check +196 -0
- package/.agents/skills/gstack/browse/SKILL.md +517 -0
- package/.agents/skills/gstack/browse/SKILL.md.tmpl +141 -0
- package/.agents/skills/gstack/browse/bin/find-browse +21 -0
- package/.agents/skills/gstack/browse/bin/remote-slug +14 -0
- package/.agents/skills/gstack/browse/scripts/build-node-server.sh +48 -0
- package/.agents/skills/gstack/browse/src/browser-manager.ts +634 -0
- package/.agents/skills/gstack/browse/src/buffers.ts +137 -0
- package/.agents/skills/gstack/browse/src/bun-polyfill.cjs +109 -0
- package/.agents/skills/gstack/browse/src/cli.ts +420 -0
- package/.agents/skills/gstack/browse/src/commands.ts +111 -0
- package/.agents/skills/gstack/browse/src/config.ts +150 -0
- package/.agents/skills/gstack/browse/src/cookie-import-browser.ts +417 -0
- package/.agents/skills/gstack/browse/src/cookie-picker-routes.ts +207 -0
- package/.agents/skills/gstack/browse/src/cookie-picker-ui.ts +541 -0
- package/.agents/skills/gstack/browse/src/find-browse.ts +61 -0
- package/.agents/skills/gstack/browse/src/meta-commands.ts +269 -0
- package/.agents/skills/gstack/browse/src/platform.ts +17 -0
- package/.agents/skills/gstack/browse/src/read-commands.ts +335 -0
- package/.agents/skills/gstack/browse/src/server.ts +369 -0
- package/.agents/skills/gstack/browse/src/snapshot.ts +398 -0
- package/.agents/skills/gstack/browse/src/url-validation.ts +91 -0
- package/.agents/skills/gstack/browse/src/write-commands.ts +352 -0
- package/.agents/skills/gstack/browse/test/bun-polyfill.test.ts +72 -0
- package/.agents/skills/gstack/browse/test/commands.test.ts +1836 -0
- package/.agents/skills/gstack/browse/test/config.test.ts +250 -0
- package/.agents/skills/gstack/browse/test/cookie-import-browser.test.ts +397 -0
- package/.agents/skills/gstack/browse/test/cookie-picker-routes.test.ts +205 -0
- package/.agents/skills/gstack/browse/test/find-browse.test.ts +50 -0
- package/.agents/skills/gstack/browse/test/fixtures/basic.html +33 -0
- package/.agents/skills/gstack/browse/test/fixtures/cursor-interactive.html +22 -0
- package/.agents/skills/gstack/browse/test/fixtures/dialog.html +15 -0
- package/.agents/skills/gstack/browse/test/fixtures/empty.html +2 -0
- package/.agents/skills/gstack/browse/test/fixtures/forms.html +55 -0
- package/.agents/skills/gstack/browse/test/fixtures/qa-eval-checkout.html +108 -0
- package/.agents/skills/gstack/browse/test/fixtures/qa-eval-spa.html +98 -0
- package/.agents/skills/gstack/browse/test/fixtures/qa-eval.html +51 -0
- package/.agents/skills/gstack/browse/test/fixtures/responsive.html +49 -0
- package/.agents/skills/gstack/browse/test/fixtures/snapshot.html +55 -0
- package/.agents/skills/gstack/browse/test/fixtures/spa.html +24 -0
- package/.agents/skills/gstack/browse/test/fixtures/states.html +17 -0
- package/.agents/skills/gstack/browse/test/fixtures/upload.html +25 -0
- package/.agents/skills/gstack/browse/test/gstack-config.test.ts +125 -0
- package/.agents/skills/gstack/browse/test/gstack-update-check.test.ts +467 -0
- package/.agents/skills/gstack/browse/test/handoff.test.ts +235 -0
- package/.agents/skills/gstack/browse/test/path-validation.test.ts +63 -0
- package/.agents/skills/gstack/browse/test/platform.test.ts +37 -0
- package/.agents/skills/gstack/browse/test/snapshot.test.ts +467 -0
- package/.agents/skills/gstack/browse/test/test-server.ts +57 -0
- package/.agents/skills/gstack/browse/test/url-validation.test.ts +72 -0
- package/.agents/skills/gstack/canary/SKILL.md +493 -0
- package/.agents/skills/gstack/canary/SKILL.md.tmpl +220 -0
- package/.agents/skills/gstack/careful/SKILL.md +59 -0
- package/.agents/skills/gstack/careful/SKILL.md.tmpl +57 -0
- package/.agents/skills/gstack/careful/bin/check-careful.sh +112 -0
- package/.agents/skills/gstack/codex/SKILL.md +677 -0
- package/.agents/skills/gstack/codex/SKILL.md.tmpl +356 -0
- package/.agents/skills/gstack/conductor.json +6 -0
- package/.agents/skills/gstack/cso/SKILL.md +615 -0
- package/.agents/skills/gstack/cso/SKILL.md.tmpl +376 -0
- package/.agents/skills/gstack/design-consultation/SKILL.md +625 -0
- package/.agents/skills/gstack/design-consultation/SKILL.md.tmpl +369 -0
- package/.agents/skills/gstack/design-review/SKILL.md +998 -0
- package/.agents/skills/gstack/design-review/SKILL.md.tmpl +262 -0
- package/.agents/skills/gstack/docs/images/github-2013.png +0 -0
- package/.agents/skills/gstack/docs/images/github-2026.png +0 -0
- package/.agents/skills/gstack/docs/skills.md +877 -0
- package/.agents/skills/gstack/document-release/SKILL.md +613 -0
- package/.agents/skills/gstack/document-release/SKILL.md.tmpl +357 -0
- package/.agents/skills/gstack/freeze/SKILL.md +82 -0
- package/.agents/skills/gstack/freeze/SKILL.md.tmpl +80 -0
- package/.agents/skills/gstack/freeze/bin/check-freeze.sh +68 -0
- package/.agents/skills/gstack/gstack-upgrade/SKILL.md +226 -0
- package/.agents/skills/gstack/gstack-upgrade/SKILL.md.tmpl +224 -0
- package/.agents/skills/gstack/guard/SKILL.md +82 -0
- package/.agents/skills/gstack/guard/SKILL.md.tmpl +80 -0
- package/.agents/skills/gstack/investigate/SKILL.md +435 -0
- package/.agents/skills/gstack/investigate/SKILL.md.tmpl +196 -0
- package/.agents/skills/gstack/land-and-deploy/SKILL.md +880 -0
- package/.agents/skills/gstack/land-and-deploy/SKILL.md.tmpl +575 -0
- package/.agents/skills/gstack/office-hours/SKILL.md +996 -0
- package/.agents/skills/gstack/office-hours/SKILL.md.tmpl +624 -0
- package/.agents/skills/gstack/package.json +55 -0
- package/.agents/skills/gstack/plan-ceo-review/SKILL.md +1277 -0
- package/.agents/skills/gstack/plan-ceo-review/SKILL.md.tmpl +838 -0
- package/.agents/skills/gstack/plan-design-review/SKILL.md +676 -0
- package/.agents/skills/gstack/plan-design-review/SKILL.md.tmpl +314 -0
- package/.agents/skills/gstack/plan-eng-review/SKILL.md +836 -0
- package/.agents/skills/gstack/plan-eng-review/SKILL.md.tmpl +279 -0
- package/.agents/skills/gstack/qa/SKILL.md +1016 -0
- package/.agents/skills/gstack/qa/SKILL.md.tmpl +316 -0
- package/.agents/skills/gstack/qa/references/issue-taxonomy.md +85 -0
- package/.agents/skills/gstack/qa/templates/qa-report-template.md +126 -0
- package/.agents/skills/gstack/qa-only/SKILL.md +633 -0
- package/.agents/skills/gstack/qa-only/SKILL.md.tmpl +101 -0
- package/.agents/skills/gstack/retro/SKILL.md +1072 -0
- package/.agents/skills/gstack/retro/SKILL.md.tmpl +833 -0
- package/.agents/skills/gstack/review/SKILL.md +849 -0
- package/.agents/skills/gstack/review/SKILL.md.tmpl +259 -0
- package/.agents/skills/gstack/review/TODOS-format.md +62 -0
- package/.agents/skills/gstack/review/checklist.md +190 -0
- package/.agents/skills/gstack/review/design-checklist.md +132 -0
- package/.agents/skills/gstack/review/greptile-triage.md +220 -0
- package/.agents/skills/gstack/scripts/analytics.ts +190 -0
- package/.agents/skills/gstack/scripts/dev-skill.ts +82 -0
- package/.agents/skills/gstack/scripts/eval-compare.ts +96 -0
- package/.agents/skills/gstack/scripts/eval-list.ts +116 -0
- package/.agents/skills/gstack/scripts/eval-select.ts +86 -0
- package/.agents/skills/gstack/scripts/eval-summary.ts +187 -0
- package/.agents/skills/gstack/scripts/eval-watch.ts +172 -0
- package/.agents/skills/gstack/scripts/gen-skill-docs.ts +2414 -0
- package/.agents/skills/gstack/scripts/skill-check.ts +167 -0
- package/.agents/skills/gstack/setup +269 -0
- package/.agents/skills/gstack/setup-browser-cookies/SKILL.md +330 -0
- package/.agents/skills/gstack/setup-browser-cookies/SKILL.md.tmpl +74 -0
- package/.agents/skills/gstack/setup-deploy/SKILL.md +459 -0
- package/.agents/skills/gstack/setup-deploy/SKILL.md.tmpl +220 -0
- package/.agents/skills/gstack/ship/SKILL.md +1457 -0
- package/.agents/skills/gstack/ship/SKILL.md.tmpl +528 -0
- package/.agents/skills/gstack/supabase/config.sh +10 -0
- package/.agents/skills/gstack/supabase/functions/community-pulse/index.ts +59 -0
- package/.agents/skills/gstack/supabase/functions/telemetry-ingest/index.ts +135 -0
- package/.agents/skills/gstack/supabase/functions/update-check/index.ts +37 -0
- package/.agents/skills/gstack/supabase/migrations/001_telemetry.sql +89 -0
- package/.agents/skills/gstack/test/analytics.test.ts +277 -0
- package/.agents/skills/gstack/test/codex-e2e.test.ts +197 -0
- package/.agents/skills/gstack/test/fixtures/coverage-audit-fixture.ts +76 -0
- package/.agents/skills/gstack/test/fixtures/eval-baselines.json +7 -0
- package/.agents/skills/gstack/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
- package/.agents/skills/gstack/test/fixtures/qa-eval-ground-truth.json +43 -0
- package/.agents/skills/gstack/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.css +86 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.html +41 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-enum-diff.rb +30 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-enum.rb +27 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-vuln.rb +14 -0
- package/.agents/skills/gstack/test/gemini-e2e.test.ts +173 -0
- package/.agents/skills/gstack/test/gen-skill-docs.test.ts +1049 -0
- package/.agents/skills/gstack/test/global-discover.test.ts +187 -0
- package/.agents/skills/gstack/test/helpers/codex-session-runner.ts +282 -0
- package/.agents/skills/gstack/test/helpers/e2e-helpers.ts +239 -0
- package/.agents/skills/gstack/test/helpers/eval-store.test.ts +548 -0
- package/.agents/skills/gstack/test/helpers/eval-store.ts +689 -0
- package/.agents/skills/gstack/test/helpers/gemini-session-runner.test.ts +104 -0
- package/.agents/skills/gstack/test/helpers/gemini-session-runner.ts +201 -0
- package/.agents/skills/gstack/test/helpers/llm-judge.ts +130 -0
- package/.agents/skills/gstack/test/helpers/observability.test.ts +283 -0
- package/.agents/skills/gstack/test/helpers/session-runner.test.ts +96 -0
- package/.agents/skills/gstack/test/helpers/session-runner.ts +357 -0
- package/.agents/skills/gstack/test/helpers/skill-parser.ts +206 -0
- package/.agents/skills/gstack/test/helpers/touchfiles.ts +260 -0
- package/.agents/skills/gstack/test/hook-scripts.test.ts +373 -0
- package/.agents/skills/gstack/test/skill-e2e-browse.test.ts +293 -0
- package/.agents/skills/gstack/test/skill-e2e-deploy.test.ts +279 -0
- package/.agents/skills/gstack/test/skill-e2e-design.test.ts +614 -0
- package/.agents/skills/gstack/test/skill-e2e-plan.test.ts +538 -0
- package/.agents/skills/gstack/test/skill-e2e-qa-bugs.test.ts +194 -0
- package/.agents/skills/gstack/test/skill-e2e-qa-workflow.test.ts +412 -0
- package/.agents/skills/gstack/test/skill-e2e-review.test.ts +535 -0
- package/.agents/skills/gstack/test/skill-e2e-workflow.test.ts +586 -0
- package/.agents/skills/gstack/test/skill-e2e.test.ts +3325 -0
- package/.agents/skills/gstack/test/skill-llm-eval.test.ts +787 -0
- package/.agents/skills/gstack/test/skill-parser.test.ts +179 -0
- package/.agents/skills/gstack/test/skill-routing-e2e.test.ts +605 -0
- package/.agents/skills/gstack/test/skill-validation.test.ts +1520 -0
- package/.agents/skills/gstack/test/telemetry.test.ts +278 -0
- package/.agents/skills/gstack/test/touchfiles.test.ts +262 -0
- package/.agents/skills/gstack/unfreeze/SKILL.md +40 -0
- package/.agents/skills/gstack/unfreeze/SKILL.md.tmpl +38 -0
- package/README.md +12 -7
- package/README_KO.md +12 -6
- package/package.json +3 -2
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for E2E observability infrastructure.
|
|
3
|
+
*
|
|
4
|
+
* Tests heartbeat, progress.log, NDJSON persistence, savePartial(),
|
|
5
|
+
* finalize() cleanup, failure transcript paths, watcher rendering,
|
|
6
|
+
* and non-fatal I/O guarantees.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
|
10
|
+
import * as fs from 'fs';
|
|
11
|
+
import * as path from 'path';
|
|
12
|
+
import * as os from 'os';
|
|
13
|
+
import { sanitizeTestName } from './session-runner';
|
|
14
|
+
import { EvalCollector } from './eval-store';
|
|
15
|
+
import { renderDashboard } from '../../scripts/eval-watch';
|
|
16
|
+
import type { HeartbeatData, PartialData } from '../../scripts/eval-watch';
|
|
17
|
+
|
|
18
|
+
let tmpDir: string;
|
|
19
|
+
|
|
20
|
+
beforeEach(() => {
|
|
21
|
+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'obs-test-'));
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
afterEach(() => {
|
|
25
|
+
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// --- Test 1: runDir created when runId set ---
|
|
29
|
+
|
|
30
|
+
describe('session-runner observability', () => {
|
|
31
|
+
test('1: sanitizeTestName strips slashes and leading dashes', () => {
|
|
32
|
+
expect(sanitizeTestName('/plan-ceo-review')).toBe('plan-ceo-review');
|
|
33
|
+
expect(sanitizeTestName('browse-basic')).toBe('browse-basic');
|
|
34
|
+
expect(sanitizeTestName('/qa/deep/test')).toBe('qa-deep-test');
|
|
35
|
+
expect(sanitizeTestName('///leading')).toBe('leading');
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('2: heartbeat file path uses ~/.gstack-dev/e2e-live.json', () => {
|
|
39
|
+
// Just verify the constant is correct — actual write is tested by E2E
|
|
40
|
+
const expected = path.join(os.homedir(), '.gstack-dev', 'e2e-live.json');
|
|
41
|
+
// Import the module and check HEARTBEAT_PATH exists in the file
|
|
42
|
+
const sessionRunnerSrc = fs.readFileSync(
|
|
43
|
+
path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
|
|
44
|
+
);
|
|
45
|
+
expect(sessionRunnerSrc).toContain("'e2e-live.json'");
|
|
46
|
+
expect(sessionRunnerSrc).toContain('atomicWriteSync');
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test('3: heartbeat JSON schema has expected fields', () => {
|
|
50
|
+
// Verify the heartbeat write code includes all required fields
|
|
51
|
+
const src = fs.readFileSync(
|
|
52
|
+
path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
|
|
53
|
+
);
|
|
54
|
+
for (const field of ['runId', 'startedAt', 'currentTest', 'status', 'turn', 'toolCount', 'lastTool', 'lastToolAt', 'elapsedSec']) {
|
|
55
|
+
expect(src).toContain(field);
|
|
56
|
+
}
|
|
57
|
+
// Should NOT contain completedTests (removed per plan)
|
|
58
|
+
expect(src).not.toContain('completedTests');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('4: progress.log format matches expected pattern', () => {
|
|
62
|
+
// The progress line format is: " [Ns] turn T tool #C: Name(...)"
|
|
63
|
+
const src = fs.readFileSync(
|
|
64
|
+
path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
|
|
65
|
+
);
|
|
66
|
+
// Both stderr and progress.log use the same progressLine variable
|
|
67
|
+
expect(src).toContain('progressLine');
|
|
68
|
+
expect(src).toContain("'progress.log'");
|
|
69
|
+
expect(src).toContain('appendFileSync');
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test('5: NDJSON file uses sanitized test name', () => {
|
|
73
|
+
const src = fs.readFileSync(
|
|
74
|
+
path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
|
|
75
|
+
);
|
|
76
|
+
expect(src).toContain('safeName');
|
|
77
|
+
expect(src).toContain('.ndjson');
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test('8: failure transcript goes to runDir when available', () => {
|
|
81
|
+
const src = fs.readFileSync(
|
|
82
|
+
path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
|
|
83
|
+
);
|
|
84
|
+
// Should use runDir as primary, workingDirectory as fallback
|
|
85
|
+
expect(src).toContain('runDir || path.join(workingDirectory');
|
|
86
|
+
expect(src).toContain('-failure.json');
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
test('11: all new I/O is wrapped in try/catch (non-fatal)', () => {
|
|
90
|
+
const src = fs.readFileSync(
|
|
91
|
+
path.resolve(__dirname, 'session-runner.ts'), 'utf-8'
|
|
92
|
+
);
|
|
93
|
+
// Count non-fatal comments — should be present for each new I/O path
|
|
94
|
+
const nonFatalCount = (src.match(/\/\* non-fatal \*\//g) || []).length;
|
|
95
|
+
// Original had 2 (promptFile unlink + failure transcript), we added 4 more
|
|
96
|
+
// (runDir creation, progress.log, heartbeat, NDJSON append)
|
|
97
|
+
expect(nonFatalCount).toBeGreaterThanOrEqual(6);
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// --- Tests 6, 7: eval-store savePartial() and finalize() ---
|
|
102
|
+
|
|
103
|
+
describe('eval-store observability', () => {
|
|
104
|
+
test('6: savePartial() writes valid JSON with _partial: true', () => {
|
|
105
|
+
const evalDir = path.join(tmpDir, 'evals');
|
|
106
|
+
const collector = new EvalCollector('e2e', evalDir);
|
|
107
|
+
|
|
108
|
+
collector.addTest({
|
|
109
|
+
name: 'test-one',
|
|
110
|
+
suite: 'test',
|
|
111
|
+
tier: 'e2e',
|
|
112
|
+
passed: true,
|
|
113
|
+
duration_ms: 1000,
|
|
114
|
+
cost_usd: 0.05,
|
|
115
|
+
exit_reason: 'success',
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
const partialPath = path.join(evalDir, '_partial-e2e.json');
|
|
119
|
+
expect(fs.existsSync(partialPath)).toBe(true);
|
|
120
|
+
|
|
121
|
+
const partial = JSON.parse(fs.readFileSync(partialPath, 'utf-8'));
|
|
122
|
+
expect(partial._partial).toBe(true);
|
|
123
|
+
expect(partial.tests).toHaveLength(1);
|
|
124
|
+
expect(partial.tests[0].name).toBe('test-one');
|
|
125
|
+
expect(partial.tests[0].exit_reason).toBe('success');
|
|
126
|
+
expect(partial.schema_version).toBe(1);
|
|
127
|
+
expect(partial.total_tests).toBe(1);
|
|
128
|
+
expect(partial.passed).toBe(1);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test('6b: savePartial() accumulates multiple tests', () => {
|
|
132
|
+
const evalDir = path.join(tmpDir, 'evals');
|
|
133
|
+
const collector = new EvalCollector('e2e', evalDir);
|
|
134
|
+
|
|
135
|
+
collector.addTest({
|
|
136
|
+
name: 'test-one', suite: 'test', tier: 'e2e',
|
|
137
|
+
passed: true, duration_ms: 1000, cost_usd: 0.05,
|
|
138
|
+
});
|
|
139
|
+
collector.addTest({
|
|
140
|
+
name: 'test-two', suite: 'test', tier: 'e2e',
|
|
141
|
+
passed: false, duration_ms: 2000, cost_usd: 0.10,
|
|
142
|
+
exit_reason: 'timeout', timeout_at_turn: 5, last_tool_call: 'Bash(ls)',
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
const partialPath = path.join(evalDir, '_partial-e2e.json');
|
|
146
|
+
const partial = JSON.parse(fs.readFileSync(partialPath, 'utf-8'));
|
|
147
|
+
expect(partial.tests).toHaveLength(2);
|
|
148
|
+
expect(partial.total_tests).toBe(2);
|
|
149
|
+
expect(partial.passed).toBe(1);
|
|
150
|
+
expect(partial.failed).toBe(1);
|
|
151
|
+
expect(partial.tests[1].exit_reason).toBe('timeout');
|
|
152
|
+
expect(partial.tests[1].timeout_at_turn).toBe(5);
|
|
153
|
+
expect(partial.tests[1].last_tool_call).toBe('Bash(ls)');
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
test('7: finalize() preserves partial file alongside final', async () => {
|
|
157
|
+
const evalDir = path.join(tmpDir, 'evals');
|
|
158
|
+
const collector = new EvalCollector('e2e', evalDir);
|
|
159
|
+
|
|
160
|
+
collector.addTest({
|
|
161
|
+
name: 'test-one', suite: 'test', tier: 'e2e',
|
|
162
|
+
passed: true, duration_ms: 1000, cost_usd: 0.05,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
const partialPath = path.join(evalDir, '_partial-e2e.json');
|
|
166
|
+
expect(fs.existsSync(partialPath)).toBe(true);
|
|
167
|
+
|
|
168
|
+
await collector.finalize();
|
|
169
|
+
|
|
170
|
+
// Partial file preserved for observability — never cleaned up
|
|
171
|
+
expect(fs.existsSync(partialPath)).toBe(true);
|
|
172
|
+
|
|
173
|
+
// Final eval file should also exist
|
|
174
|
+
const files = fs.readdirSync(evalDir).filter(f => f.endsWith('.json') && !f.startsWith('_'));
|
|
175
|
+
expect(files.length).toBeGreaterThanOrEqual(1);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
test('EvalTestEntry includes diagnostic fields', () => {
|
|
179
|
+
const evalDir = path.join(tmpDir, 'evals');
|
|
180
|
+
const collector = new EvalCollector('e2e', evalDir);
|
|
181
|
+
|
|
182
|
+
collector.addTest({
|
|
183
|
+
name: 'diagnostic-test', suite: 'test', tier: 'e2e',
|
|
184
|
+
passed: false, duration_ms: 5000, cost_usd: 0.20,
|
|
185
|
+
exit_reason: 'error_max_turns',
|
|
186
|
+
timeout_at_turn: undefined,
|
|
187
|
+
last_tool_call: 'Write(review-output.md)',
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const partialPath = path.join(evalDir, '_partial-e2e.json');
|
|
191
|
+
const partial = JSON.parse(fs.readFileSync(partialPath, 'utf-8'));
|
|
192
|
+
const t = partial.tests[0];
|
|
193
|
+
expect(t.exit_reason).toBe('error_max_turns');
|
|
194
|
+
expect(t.last_tool_call).toBe('Write(review-output.md)');
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
// --- Tests 9, 10: watcher dashboard rendering ---
|
|
199
|
+
|
|
200
|
+
describe('eval-watch dashboard', () => {
|
|
201
|
+
test('9: renderDashboard shows completed tests and current test', () => {
|
|
202
|
+
const heartbeat: HeartbeatData = {
|
|
203
|
+
runId: '20260314-143022',
|
|
204
|
+
startedAt: '2026-03-14T14:30:22Z',
|
|
205
|
+
currentTest: 'plan-ceo-review',
|
|
206
|
+
status: 'running',
|
|
207
|
+
turn: 4,
|
|
208
|
+
toolCount: 3,
|
|
209
|
+
lastTool: 'Write(review-output.md)',
|
|
210
|
+
lastToolAt: new Date().toISOString(), // recent — not stale
|
|
211
|
+
elapsedSec: 285,
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
const partial: PartialData = {
|
|
215
|
+
tests: [
|
|
216
|
+
{ name: 'browse basic', passed: true, cost_usd: 0.07, duration_ms: 24000, turns_used: 6 },
|
|
217
|
+
{ name: '/review', passed: true, cost_usd: 0.17, duration_ms: 63000, turns_used: 13 },
|
|
218
|
+
],
|
|
219
|
+
total_cost_usd: 0.24,
|
|
220
|
+
_partial: true,
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
const output = renderDashboard(heartbeat, partial);
|
|
224
|
+
|
|
225
|
+
// Should contain run ID
|
|
226
|
+
expect(output).toContain('20260314-143022');
|
|
227
|
+
|
|
228
|
+
// Should show completed tests
|
|
229
|
+
expect(output).toContain('browse basic');
|
|
230
|
+
expect(output).toContain('/review');
|
|
231
|
+
expect(output).toContain('$0.07');
|
|
232
|
+
expect(output).toContain('$0.17');
|
|
233
|
+
|
|
234
|
+
// Should show current test
|
|
235
|
+
expect(output).toContain('plan-ceo-review');
|
|
236
|
+
expect(output).toContain('turn 4');
|
|
237
|
+
expect(output).toContain('Write(review-output.md)');
|
|
238
|
+
|
|
239
|
+
// Should NOT show stale warning (lastToolAt is recent)
|
|
240
|
+
expect(output).not.toContain('STALE');
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
test('10: renderDashboard warns on stale heartbeat', () => {
|
|
244
|
+
const staleTime = new Date(Date.now() - 15 * 60 * 1000).toISOString(); // 15 min ago
|
|
245
|
+
|
|
246
|
+
const heartbeat: HeartbeatData = {
|
|
247
|
+
runId: '20260314-143022',
|
|
248
|
+
startedAt: '2026-03-14T14:30:22Z',
|
|
249
|
+
currentTest: 'plan-ceo-review',
|
|
250
|
+
status: 'running',
|
|
251
|
+
turn: 4,
|
|
252
|
+
toolCount: 3,
|
|
253
|
+
lastTool: 'Write(review-output.md)',
|
|
254
|
+
lastToolAt: staleTime,
|
|
255
|
+
elapsedSec: 900,
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
const output = renderDashboard(heartbeat, null);
|
|
259
|
+
|
|
260
|
+
expect(output).toContain('STALE');
|
|
261
|
+
expect(output).toContain('may have crashed');
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
test('renderDashboard handles no active run', () => {
|
|
265
|
+
const output = renderDashboard(null, null);
|
|
266
|
+
expect(output).toContain('No active run');
|
|
267
|
+
expect(output).toContain('bun test');
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
test('renderDashboard handles partial-only (heartbeat gone)', () => {
|
|
271
|
+
const partial: PartialData = {
|
|
272
|
+
tests: [
|
|
273
|
+
{ name: 'browse basic', passed: true, cost_usd: 0.07, duration_ms: 24000 },
|
|
274
|
+
],
|
|
275
|
+
total_cost_usd: 0.07,
|
|
276
|
+
_partial: true,
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
const output = renderDashboard(null, partial);
|
|
280
|
+
expect(output).toContain('browse basic');
|
|
281
|
+
expect(output).toContain('$0.07');
|
|
282
|
+
});
|
|
283
|
+
});
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { describe, test, expect } from 'bun:test';
|
|
2
|
+
import { parseNDJSON } from './session-runner';
|
|
3
|
+
|
|
4
|
+
// Fixture: minimal NDJSON session (system init, assistant with tool_use, tool result, assistant text, result)
|
|
5
|
+
const FIXTURE_LINES = [
|
|
6
|
+
'{"type":"system","subtype":"init","session_id":"test-123"}',
|
|
7
|
+
'{"type":"assistant","message":{"content":[{"type":"tool_use","id":"tu1","name":"Bash","input":{"command":"echo hello"}}]}}',
|
|
8
|
+
'{"type":"user","tool_use_result":{"tool_use_id":"tu1","stdout":"hello\\n","stderr":""}}',
|
|
9
|
+
'{"type":"assistant","message":{"content":[{"type":"text","text":"The command printed hello."}]}}',
|
|
10
|
+
'{"type":"assistant","message":{"content":[{"type":"text","text":"Let me also read a file."},{"type":"tool_use","id":"tu2","name":"Read","input":{"file_path":"/tmp/test"}}]}}',
|
|
11
|
+
'{"type":"result","subtype":"success","total_cost_usd":0.05,"num_turns":3,"usage":{"input_tokens":100,"output_tokens":50},"result":"Done."}',
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
describe('parseNDJSON', () => {
|
|
15
|
+
test('parses valid NDJSON with system + assistant + result events', () => {
|
|
16
|
+
const parsed = parseNDJSON(FIXTURE_LINES);
|
|
17
|
+
expect(parsed.transcript).toHaveLength(6);
|
|
18
|
+
expect(parsed.transcript[0].type).toBe('system');
|
|
19
|
+
expect(parsed.transcript[5].type).toBe('result');
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test('extracts tool calls from assistant.message.content[].type === tool_use', () => {
|
|
23
|
+
const parsed = parseNDJSON(FIXTURE_LINES);
|
|
24
|
+
expect(parsed.toolCalls).toHaveLength(2);
|
|
25
|
+
expect(parsed.toolCalls[0]).toEqual({
|
|
26
|
+
tool: 'Bash',
|
|
27
|
+
input: { command: 'echo hello' },
|
|
28
|
+
output: '',
|
|
29
|
+
});
|
|
30
|
+
expect(parsed.toolCalls[1]).toEqual({
|
|
31
|
+
tool: 'Read',
|
|
32
|
+
input: { file_path: '/tmp/test' },
|
|
33
|
+
output: '',
|
|
34
|
+
});
|
|
35
|
+
expect(parsed.toolCallCount).toBe(2);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('skips malformed lines without throwing', () => {
|
|
39
|
+
const lines = [
|
|
40
|
+
'{"type":"system"}',
|
|
41
|
+
'this is not json',
|
|
42
|
+
'{"type":"assistant","message":{"content":[{"type":"text","text":"ok"}]}}',
|
|
43
|
+
'{incomplete json',
|
|
44
|
+
'{"type":"result","subtype":"success","result":"done"}',
|
|
45
|
+
];
|
|
46
|
+
const parsed = parseNDJSON(lines);
|
|
47
|
+
expect(parsed.transcript).toHaveLength(3); // system, assistant, result
|
|
48
|
+
expect(parsed.resultLine?.subtype).toBe('success');
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test('skips empty and whitespace-only lines', () => {
|
|
52
|
+
const lines = [
|
|
53
|
+
'',
|
|
54
|
+
' ',
|
|
55
|
+
'{"type":"system"}',
|
|
56
|
+
'\t',
|
|
57
|
+
'{"type":"result","subtype":"success","result":"ok"}',
|
|
58
|
+
];
|
|
59
|
+
const parsed = parseNDJSON(lines);
|
|
60
|
+
expect(parsed.transcript).toHaveLength(2);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test('extracts resultLine from type: "result" event', () => {
|
|
64
|
+
const parsed = parseNDJSON(FIXTURE_LINES);
|
|
65
|
+
expect(parsed.resultLine).not.toBeNull();
|
|
66
|
+
expect(parsed.resultLine.subtype).toBe('success');
|
|
67
|
+
expect(parsed.resultLine.total_cost_usd).toBe(0.05);
|
|
68
|
+
expect(parsed.resultLine.num_turns).toBe(3);
|
|
69
|
+
expect(parsed.resultLine.result).toBe('Done.');
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test('counts turns correctly — one per assistant event, not per text block', () => {
|
|
73
|
+
const parsed = parseNDJSON(FIXTURE_LINES);
|
|
74
|
+
// 3 assistant events in fixture (tool_use, text, text+tool_use)
|
|
75
|
+
expect(parsed.turnCount).toBe(3);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test('handles empty input', () => {
|
|
79
|
+
const parsed = parseNDJSON([]);
|
|
80
|
+
expect(parsed.transcript).toHaveLength(0);
|
|
81
|
+
expect(parsed.resultLine).toBeNull();
|
|
82
|
+
expect(parsed.turnCount).toBe(0);
|
|
83
|
+
expect(parsed.toolCallCount).toBe(0);
|
|
84
|
+
expect(parsed.toolCalls).toHaveLength(0);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test('handles assistant event with no content array', () => {
|
|
88
|
+
const lines = [
|
|
89
|
+
'{"type":"assistant","message":{}}',
|
|
90
|
+
'{"type":"assistant"}',
|
|
91
|
+
];
|
|
92
|
+
const parsed = parseNDJSON(lines);
|
|
93
|
+
expect(parsed.turnCount).toBe(2);
|
|
94
|
+
expect(parsed.toolCalls).toHaveLength(0);
|
|
95
|
+
});
|
|
96
|
+
});
|