@runchr/gstack-antigravity 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @runchr/gstack-antigravity might be problematic. Click here for more details.
- package/.agents/skills/gstack/.agents/skills/gstack/SKILL.md +651 -0
- package/.agents/skills/gstack/.agents/skills/gstack-autoplan/SKILL.md +678 -0
- package/.agents/skills/gstack/.agents/skills/gstack-benchmark/SKILL.md +482 -0
- package/.agents/skills/gstack/.agents/skills/gstack-browse/SKILL.md +511 -0
- package/.agents/skills/gstack/.agents/skills/gstack-canary/SKILL.md +486 -0
- package/.agents/skills/gstack/.agents/skills/gstack-careful/SKILL.md +50 -0
- package/.agents/skills/gstack/.agents/skills/gstack-cso/SKILL.md +607 -0
- package/.agents/skills/gstack/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
- package/.agents/skills/gstack/.agents/skills/gstack-design-review/SKILL.md +988 -0
- package/.agents/skills/gstack/.agents/skills/gstack-document-release/SKILL.md +604 -0
- package/.agents/skills/gstack/.agents/skills/gstack-freeze/SKILL.md +67 -0
- package/.agents/skills/gstack/.agents/skills/gstack-guard/SKILL.md +62 -0
- package/.agents/skills/gstack/.agents/skills/gstack-investigate/SKILL.md +415 -0
- package/.agents/skills/gstack/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
- package/.agents/skills/gstack/.agents/skills/gstack-office-hours/SKILL.md +986 -0
- package/.agents/skills/gstack/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
- package/.agents/skills/gstack/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
- package/.agents/skills/gstack/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
- package/.agents/skills/gstack/.agents/skills/gstack-qa/SKILL.md +1006 -0
- package/.agents/skills/gstack/.agents/skills/gstack-qa-only/SKILL.md +626 -0
- package/.agents/skills/gstack/.agents/skills/gstack-retro/SKILL.md +1065 -0
- package/.agents/skills/gstack/.agents/skills/gstack-review/SKILL.md +704 -0
- package/.agents/skills/gstack/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
- package/.agents/skills/gstack/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
- package/.agents/skills/gstack/.agents/skills/gstack-ship/SKILL.md +1312 -0
- package/.agents/skills/gstack/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
- package/.agents/skills/gstack/.agents/skills/gstack-upgrade/SKILL.md +220 -0
- package/.agents/skills/gstack/.env.example +5 -0
- package/.agents/skills/gstack/.github/workflows/skill-docs.yml +17 -0
- package/.agents/skills/gstack/AGENTS.md +49 -0
- package/.agents/skills/gstack/ARCHITECTURE.md +359 -0
- package/.agents/skills/gstack/BROWSER.md +271 -0
- package/.agents/skills/gstack/CHANGELOG.md +800 -0
- package/.agents/skills/gstack/CLAUDE.md +284 -0
- package/.agents/skills/gstack/CONTRIBUTING.md +370 -0
- package/.agents/skills/gstack/ETHOS.md +129 -0
- package/.agents/skills/gstack/LICENSE +21 -0
- package/.agents/skills/gstack/README.md +228 -0
- package/.agents/skills/gstack/SKILL.md +657 -0
- package/.agents/skills/gstack/SKILL.md.tmpl +281 -0
- package/.agents/skills/gstack/TODOS.md +564 -0
- package/.agents/skills/gstack/VERSION +1 -0
- package/.agents/skills/gstack/autoplan/SKILL.md +689 -0
- package/.agents/skills/gstack/autoplan/SKILL.md.tmpl +416 -0
- package/.agents/skills/gstack/benchmark/SKILL.md +489 -0
- package/.agents/skills/gstack/benchmark/SKILL.md.tmpl +233 -0
- package/.agents/skills/gstack/bin/dev-setup +68 -0
- package/.agents/skills/gstack/bin/dev-teardown +56 -0
- package/.agents/skills/gstack/bin/gstack-analytics +191 -0
- package/.agents/skills/gstack/bin/gstack-community-dashboard +113 -0
- package/.agents/skills/gstack/bin/gstack-config +38 -0
- package/.agents/skills/gstack/bin/gstack-diff-scope +71 -0
- package/.agents/skills/gstack/bin/gstack-global-discover.ts +591 -0
- package/.agents/skills/gstack/bin/gstack-repo-mode +93 -0
- package/.agents/skills/gstack/bin/gstack-review-log +9 -0
- package/.agents/skills/gstack/bin/gstack-review-read +12 -0
- package/.agents/skills/gstack/bin/gstack-slug +15 -0
- package/.agents/skills/gstack/bin/gstack-telemetry-log +158 -0
- package/.agents/skills/gstack/bin/gstack-telemetry-sync +127 -0
- package/.agents/skills/gstack/bin/gstack-update-check +196 -0
- package/.agents/skills/gstack/browse/SKILL.md +517 -0
- package/.agents/skills/gstack/browse/SKILL.md.tmpl +141 -0
- package/.agents/skills/gstack/browse/bin/find-browse +21 -0
- package/.agents/skills/gstack/browse/bin/remote-slug +14 -0
- package/.agents/skills/gstack/browse/scripts/build-node-server.sh +48 -0
- package/.agents/skills/gstack/browse/src/browser-manager.ts +634 -0
- package/.agents/skills/gstack/browse/src/buffers.ts +137 -0
- package/.agents/skills/gstack/browse/src/bun-polyfill.cjs +109 -0
- package/.agents/skills/gstack/browse/src/cli.ts +420 -0
- package/.agents/skills/gstack/browse/src/commands.ts +111 -0
- package/.agents/skills/gstack/browse/src/config.ts +150 -0
- package/.agents/skills/gstack/browse/src/cookie-import-browser.ts +417 -0
- package/.agents/skills/gstack/browse/src/cookie-picker-routes.ts +207 -0
- package/.agents/skills/gstack/browse/src/cookie-picker-ui.ts +541 -0
- package/.agents/skills/gstack/browse/src/find-browse.ts +61 -0
- package/.agents/skills/gstack/browse/src/meta-commands.ts +269 -0
- package/.agents/skills/gstack/browse/src/platform.ts +17 -0
- package/.agents/skills/gstack/browse/src/read-commands.ts +335 -0
- package/.agents/skills/gstack/browse/src/server.ts +369 -0
- package/.agents/skills/gstack/browse/src/snapshot.ts +398 -0
- package/.agents/skills/gstack/browse/src/url-validation.ts +91 -0
- package/.agents/skills/gstack/browse/src/write-commands.ts +352 -0
- package/.agents/skills/gstack/browse/test/bun-polyfill.test.ts +72 -0
- package/.agents/skills/gstack/browse/test/commands.test.ts +1836 -0
- package/.agents/skills/gstack/browse/test/config.test.ts +250 -0
- package/.agents/skills/gstack/browse/test/cookie-import-browser.test.ts +397 -0
- package/.agents/skills/gstack/browse/test/cookie-picker-routes.test.ts +205 -0
- package/.agents/skills/gstack/browse/test/find-browse.test.ts +50 -0
- package/.agents/skills/gstack/browse/test/fixtures/basic.html +33 -0
- package/.agents/skills/gstack/browse/test/fixtures/cursor-interactive.html +22 -0
- package/.agents/skills/gstack/browse/test/fixtures/dialog.html +15 -0
- package/.agents/skills/gstack/browse/test/fixtures/empty.html +2 -0
- package/.agents/skills/gstack/browse/test/fixtures/forms.html +55 -0
- package/.agents/skills/gstack/browse/test/fixtures/qa-eval-checkout.html +108 -0
- package/.agents/skills/gstack/browse/test/fixtures/qa-eval-spa.html +98 -0
- package/.agents/skills/gstack/browse/test/fixtures/qa-eval.html +51 -0
- package/.agents/skills/gstack/browse/test/fixtures/responsive.html +49 -0
- package/.agents/skills/gstack/browse/test/fixtures/snapshot.html +55 -0
- package/.agents/skills/gstack/browse/test/fixtures/spa.html +24 -0
- package/.agents/skills/gstack/browse/test/fixtures/states.html +17 -0
- package/.agents/skills/gstack/browse/test/fixtures/upload.html +25 -0
- package/.agents/skills/gstack/browse/test/gstack-config.test.ts +125 -0
- package/.agents/skills/gstack/browse/test/gstack-update-check.test.ts +467 -0
- package/.agents/skills/gstack/browse/test/handoff.test.ts +235 -0
- package/.agents/skills/gstack/browse/test/path-validation.test.ts +63 -0
- package/.agents/skills/gstack/browse/test/platform.test.ts +37 -0
- package/.agents/skills/gstack/browse/test/snapshot.test.ts +467 -0
- package/.agents/skills/gstack/browse/test/test-server.ts +57 -0
- package/.agents/skills/gstack/browse/test/url-validation.test.ts +72 -0
- package/.agents/skills/gstack/canary/SKILL.md +493 -0
- package/.agents/skills/gstack/canary/SKILL.md.tmpl +220 -0
- package/.agents/skills/gstack/careful/SKILL.md +59 -0
- package/.agents/skills/gstack/careful/SKILL.md.tmpl +57 -0
- package/.agents/skills/gstack/careful/bin/check-careful.sh +112 -0
- package/.agents/skills/gstack/codex/SKILL.md +677 -0
- package/.agents/skills/gstack/codex/SKILL.md.tmpl +356 -0
- package/.agents/skills/gstack/conductor.json +6 -0
- package/.agents/skills/gstack/cso/SKILL.md +615 -0
- package/.agents/skills/gstack/cso/SKILL.md.tmpl +376 -0
- package/.agents/skills/gstack/design-consultation/SKILL.md +625 -0
- package/.agents/skills/gstack/design-consultation/SKILL.md.tmpl +369 -0
- package/.agents/skills/gstack/design-review/SKILL.md +998 -0
- package/.agents/skills/gstack/design-review/SKILL.md.tmpl +262 -0
- package/.agents/skills/gstack/docs/images/github-2013.png +0 -0
- package/.agents/skills/gstack/docs/images/github-2026.png +0 -0
- package/.agents/skills/gstack/docs/skills.md +877 -0
- package/.agents/skills/gstack/document-release/SKILL.md +613 -0
- package/.agents/skills/gstack/document-release/SKILL.md.tmpl +357 -0
- package/.agents/skills/gstack/freeze/SKILL.md +82 -0
- package/.agents/skills/gstack/freeze/SKILL.md.tmpl +80 -0
- package/.agents/skills/gstack/freeze/bin/check-freeze.sh +68 -0
- package/.agents/skills/gstack/gstack-upgrade/SKILL.md +226 -0
- package/.agents/skills/gstack/gstack-upgrade/SKILL.md.tmpl +224 -0
- package/.agents/skills/gstack/guard/SKILL.md +82 -0
- package/.agents/skills/gstack/guard/SKILL.md.tmpl +80 -0
- package/.agents/skills/gstack/investigate/SKILL.md +435 -0
- package/.agents/skills/gstack/investigate/SKILL.md.tmpl +196 -0
- package/.agents/skills/gstack/land-and-deploy/SKILL.md +880 -0
- package/.agents/skills/gstack/land-and-deploy/SKILL.md.tmpl +575 -0
- package/.agents/skills/gstack/office-hours/SKILL.md +996 -0
- package/.agents/skills/gstack/office-hours/SKILL.md.tmpl +624 -0
- package/.agents/skills/gstack/package.json +55 -0
- package/.agents/skills/gstack/plan-ceo-review/SKILL.md +1277 -0
- package/.agents/skills/gstack/plan-ceo-review/SKILL.md.tmpl +838 -0
- package/.agents/skills/gstack/plan-design-review/SKILL.md +676 -0
- package/.agents/skills/gstack/plan-design-review/SKILL.md.tmpl +314 -0
- package/.agents/skills/gstack/plan-eng-review/SKILL.md +836 -0
- package/.agents/skills/gstack/plan-eng-review/SKILL.md.tmpl +279 -0
- package/.agents/skills/gstack/qa/SKILL.md +1016 -0
- package/.agents/skills/gstack/qa/SKILL.md.tmpl +316 -0
- package/.agents/skills/gstack/qa/references/issue-taxonomy.md +85 -0
- package/.agents/skills/gstack/qa/templates/qa-report-template.md +126 -0
- package/.agents/skills/gstack/qa-only/SKILL.md +633 -0
- package/.agents/skills/gstack/qa-only/SKILL.md.tmpl +101 -0
- package/.agents/skills/gstack/retro/SKILL.md +1072 -0
- package/.agents/skills/gstack/retro/SKILL.md.tmpl +833 -0
- package/.agents/skills/gstack/review/SKILL.md +849 -0
- package/.agents/skills/gstack/review/SKILL.md.tmpl +259 -0
- package/.agents/skills/gstack/review/TODOS-format.md +62 -0
- package/.agents/skills/gstack/review/checklist.md +190 -0
- package/.agents/skills/gstack/review/design-checklist.md +132 -0
- package/.agents/skills/gstack/review/greptile-triage.md +220 -0
- package/.agents/skills/gstack/scripts/analytics.ts +190 -0
- package/.agents/skills/gstack/scripts/dev-skill.ts +82 -0
- package/.agents/skills/gstack/scripts/eval-compare.ts +96 -0
- package/.agents/skills/gstack/scripts/eval-list.ts +116 -0
- package/.agents/skills/gstack/scripts/eval-select.ts +86 -0
- package/.agents/skills/gstack/scripts/eval-summary.ts +187 -0
- package/.agents/skills/gstack/scripts/eval-watch.ts +172 -0
- package/.agents/skills/gstack/scripts/gen-skill-docs.ts +2414 -0
- package/.agents/skills/gstack/scripts/skill-check.ts +167 -0
- package/.agents/skills/gstack/setup +269 -0
- package/.agents/skills/gstack/setup-browser-cookies/SKILL.md +330 -0
- package/.agents/skills/gstack/setup-browser-cookies/SKILL.md.tmpl +74 -0
- package/.agents/skills/gstack/setup-deploy/SKILL.md +459 -0
- package/.agents/skills/gstack/setup-deploy/SKILL.md.tmpl +220 -0
- package/.agents/skills/gstack/ship/SKILL.md +1457 -0
- package/.agents/skills/gstack/ship/SKILL.md.tmpl +528 -0
- package/.agents/skills/gstack/supabase/config.sh +10 -0
- package/.agents/skills/gstack/supabase/functions/community-pulse/index.ts +59 -0
- package/.agents/skills/gstack/supabase/functions/telemetry-ingest/index.ts +135 -0
- package/.agents/skills/gstack/supabase/functions/update-check/index.ts +37 -0
- package/.agents/skills/gstack/supabase/migrations/001_telemetry.sql +89 -0
- package/.agents/skills/gstack/test/analytics.test.ts +277 -0
- package/.agents/skills/gstack/test/codex-e2e.test.ts +197 -0
- package/.agents/skills/gstack/test/fixtures/coverage-audit-fixture.ts +76 -0
- package/.agents/skills/gstack/test/fixtures/eval-baselines.json +7 -0
- package/.agents/skills/gstack/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
- package/.agents/skills/gstack/test/fixtures/qa-eval-ground-truth.json +43 -0
- package/.agents/skills/gstack/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.css +86 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.html +41 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-enum-diff.rb +30 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-enum.rb +27 -0
- package/.agents/skills/gstack/test/fixtures/review-eval-vuln.rb +14 -0
- package/.agents/skills/gstack/test/gemini-e2e.test.ts +173 -0
- package/.agents/skills/gstack/test/gen-skill-docs.test.ts +1049 -0
- package/.agents/skills/gstack/test/global-discover.test.ts +187 -0
- package/.agents/skills/gstack/test/helpers/codex-session-runner.ts +282 -0
- package/.agents/skills/gstack/test/helpers/e2e-helpers.ts +239 -0
- package/.agents/skills/gstack/test/helpers/eval-store.test.ts +548 -0
- package/.agents/skills/gstack/test/helpers/eval-store.ts +689 -0
- package/.agents/skills/gstack/test/helpers/gemini-session-runner.test.ts +104 -0
- package/.agents/skills/gstack/test/helpers/gemini-session-runner.ts +201 -0
- package/.agents/skills/gstack/test/helpers/llm-judge.ts +130 -0
- package/.agents/skills/gstack/test/helpers/observability.test.ts +283 -0
- package/.agents/skills/gstack/test/helpers/session-runner.test.ts +96 -0
- package/.agents/skills/gstack/test/helpers/session-runner.ts +357 -0
- package/.agents/skills/gstack/test/helpers/skill-parser.ts +206 -0
- package/.agents/skills/gstack/test/helpers/touchfiles.ts +260 -0
- package/.agents/skills/gstack/test/hook-scripts.test.ts +373 -0
- package/.agents/skills/gstack/test/skill-e2e-browse.test.ts +293 -0
- package/.agents/skills/gstack/test/skill-e2e-deploy.test.ts +279 -0
- package/.agents/skills/gstack/test/skill-e2e-design.test.ts +614 -0
- package/.agents/skills/gstack/test/skill-e2e-plan.test.ts +538 -0
- package/.agents/skills/gstack/test/skill-e2e-qa-bugs.test.ts +194 -0
- package/.agents/skills/gstack/test/skill-e2e-qa-workflow.test.ts +412 -0
- package/.agents/skills/gstack/test/skill-e2e-review.test.ts +535 -0
- package/.agents/skills/gstack/test/skill-e2e-workflow.test.ts +586 -0
- package/.agents/skills/gstack/test/skill-e2e.test.ts +3325 -0
- package/.agents/skills/gstack/test/skill-llm-eval.test.ts +787 -0
- package/.agents/skills/gstack/test/skill-parser.test.ts +179 -0
- package/.agents/skills/gstack/test/skill-routing-e2e.test.ts +605 -0
- package/.agents/skills/gstack/test/skill-validation.test.ts +1520 -0
- package/.agents/skills/gstack/test/telemetry.test.ts +278 -0
- package/.agents/skills/gstack/test/touchfiles.test.ts +262 -0
- package/.agents/skills/gstack/unfreeze/SKILL.md +40 -0
- package/.agents/skills/gstack/unfreeze/SKILL.md.tmpl +38 -0
- package/README.md +12 -7
- package/README_KO.md +12 -6
- package/package.json +3 -2
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Greptile Comment Triage
|
|
2
|
+
|
|
3
|
+
Shared reference for fetching, filtering, and classifying Greptile review comments on GitHub PRs. Both `/review` (Step 2.5) and `/ship` (Step 3.75) reference this document.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Fetch
|
|
8
|
+
|
|
9
|
+
Run these commands to detect the PR and fetch comments. Both API calls run in parallel.
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
REPO=$(gh repo view --json nameWithOwner --jq '.nameWithOwner' 2>/dev/null)
|
|
13
|
+
PR_NUMBER=$(gh pr view --json number --jq '.number' 2>/dev/null)
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
**If either fails or is empty:** Skip Greptile triage silently. This integration is additive — the workflow works without it.
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Fetch line-level review comments AND top-level PR comments in parallel
|
|
20
|
+
gh api repos/$REPO/pulls/$PR_NUMBER/comments \
|
|
21
|
+
--jq '.[] | select(.user.login == "greptile-apps[bot]") | select(.position != null) | {id: .id, path: .path, line: .line, body: .body, html_url: .html_url, source: "line-level"}' > /tmp/greptile_line.json &
|
|
22
|
+
gh api repos/$REPO/issues/$PR_NUMBER/comments \
|
|
23
|
+
--jq '.[] | select(.user.login == "greptile-apps[bot]") | {id: .id, body: .body, html_url: .html_url, source: "top-level"}' > /tmp/greptile_top.json &
|
|
24
|
+
wait
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
**If API errors or zero Greptile comments across both endpoints:** Skip silently.
|
|
28
|
+
|
|
29
|
+
The `position != null` filter on line-level comments automatically skips outdated comments from force-pushed code.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Suppressions Check
|
|
34
|
+
|
|
35
|
+
Derive the project-specific history path:
|
|
36
|
+
```bash
|
|
37
|
+
REMOTE_SLUG=$(browse/bin/remote-slug 2>/dev/null || ~/.claude/skills/gstack/browse/bin/remote-slug 2>/dev/null || basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)")
|
|
38
|
+
PROJECT_HISTORY="$HOME/.gstack/projects/$REMOTE_SLUG/greptile-history.md"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Read `$PROJECT_HISTORY` if it exists (per-project suppressions). Each line records a previous triage outcome:
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
<date> | <repo> | <type:fp|fix|already-fixed> | <file-pattern> | <category>
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**Categories** (fixed set): `race-condition`, `null-check`, `error-handling`, `style`, `type-safety`, `security`, `performance`, `correctness`, `other`
|
|
48
|
+
|
|
49
|
+
Match each fetched comment against entries where:
|
|
50
|
+
- `type == fp` (only suppress known false positives, not previously fixed real issues)
|
|
51
|
+
- `repo` matches the current repo
|
|
52
|
+
- `file-pattern` matches the comment's file path
|
|
53
|
+
- `category` matches the issue type in the comment
|
|
54
|
+
|
|
55
|
+
Skip matched comments as **SUPPRESSED**.
|
|
56
|
+
|
|
57
|
+
If the history file doesn't exist or has unparseable lines, skip those lines and continue — never fail on a malformed history file.
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Classify
|
|
62
|
+
|
|
63
|
+
For each non-suppressed comment:
|
|
64
|
+
|
|
65
|
+
1. **Line-level comments:** Read the file at the indicated `path:line` and surrounding context (±10 lines)
|
|
66
|
+
2. **Top-level comments:** Read the full comment body
|
|
67
|
+
3. Cross-reference the comment against the full diff (`git diff origin/main`) and the review checklist
|
|
68
|
+
4. Classify:
|
|
69
|
+
- **VALID & ACTIONABLE** — a real bug, race condition, security issue, or correctness problem that exists in the current code
|
|
70
|
+
- **VALID BUT ALREADY FIXED** — a real issue that was addressed in a subsequent commit on the branch. Identify the fixing commit SHA.
|
|
71
|
+
- **FALSE POSITIVE** — the comment misunderstands the code, flags something handled elsewhere, or is stylistic noise
|
|
72
|
+
- **SUPPRESSED** — already filtered in the suppressions check above
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Reply APIs
|
|
77
|
+
|
|
78
|
+
When replying to Greptile comments, use the correct endpoint based on comment source:
|
|
79
|
+
|
|
80
|
+
**Line-level comments** (from `pulls/$PR/comments`):
|
|
81
|
+
```bash
|
|
82
|
+
gh api repos/$REPO/pulls/$PR_NUMBER/comments/$COMMENT_ID/replies \
|
|
83
|
+
-f body="<reply text>"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Top-level comments** (from `issues/$PR/comments`):
|
|
87
|
+
```bash
|
|
88
|
+
gh api repos/$REPO/issues/$PR_NUMBER/comments \
|
|
89
|
+
-f body="<reply text>"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**If a reply POST fails** (e.g., PR was closed, no write permission): warn and continue. Do not stop the workflow for a failed reply.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Reply Templates
|
|
97
|
+
|
|
98
|
+
Use these templates for every Greptile reply. Always include concrete evidence — never post vague replies.
|
|
99
|
+
|
|
100
|
+
### Tier 1 (First response) — Friendly, evidence-included
|
|
101
|
+
|
|
102
|
+
**For FIXES (user chose to fix the issue):**
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
**Fixed** in `<commit-sha>`.
|
|
106
|
+
|
|
107
|
+
\`\`\`diff
|
|
108
|
+
- <old problematic line(s)>
|
|
109
|
+
+ <new fixed line(s)>
|
|
110
|
+
\`\`\`
|
|
111
|
+
|
|
112
|
+
**Why:** <1-sentence explanation of what was wrong and how the fix addresses it>
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**For ALREADY FIXED (issue addressed in a prior commit on the branch):**
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
**Already fixed** in `<commit-sha>`.
|
|
119
|
+
|
|
120
|
+
**What was done:** <1-2 sentences describing how the existing commit addresses this issue>
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
**For FALSE POSITIVES (the comment is incorrect):**
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
**Not a bug.** <1 sentence directly stating why this is incorrect>
|
|
127
|
+
|
|
128
|
+
**Evidence:**
|
|
129
|
+
- <specific code reference showing the pattern is safe/correct>
|
|
130
|
+
- <e.g., "The nil check is handled by `ActiveRecord::FinderMethods#find` which raises RecordNotFound, not nil">
|
|
131
|
+
|
|
132
|
+
**Suggested re-rank:** This appears to be a `<style|noise|misread>` issue, not a `<what Greptile called it>`. Consider lowering severity.
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Tier 2 (Greptile re-flags after prior reply) — Firm, overwhelming evidence
|
|
136
|
+
|
|
137
|
+
Use Tier 2 when escalation detection (below) identifies a prior GStack reply on the same thread. Include maximum evidence to close the discussion.
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
**This has been reviewed and confirmed as [intentional/already-fixed/not-a-bug].**
|
|
141
|
+
|
|
142
|
+
\`\`\`diff
|
|
143
|
+
<full relevant diff showing the change or safe pattern>
|
|
144
|
+
\`\`\`
|
|
145
|
+
|
|
146
|
+
**Evidence chain:**
|
|
147
|
+
1. <file:line permalink showing the safe pattern or fix>
|
|
148
|
+
2. <commit SHA where it was addressed, if applicable>
|
|
149
|
+
3. <architecture rationale or design decision, if applicable>
|
|
150
|
+
|
|
151
|
+
**Suggested re-rank:** Please recalibrate — this is a `<actual category>` issue, not `<claimed category>`. [Link to specific file change permalink if helpful]
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Escalation Detection
|
|
157
|
+
|
|
158
|
+
Before composing a reply, check if a prior GStack reply already exists on this comment thread:
|
|
159
|
+
|
|
160
|
+
1. **For line-level comments:** Fetch replies via `gh api repos/$REPO/pulls/$PR_NUMBER/comments/$COMMENT_ID/replies`. Check if any reply body contains GStack markers: `**Fixed**`, `**Not a bug.**`, `**Already fixed**`.
|
|
161
|
+
|
|
162
|
+
2. **For top-level comments:** Scan the fetched issue comments for replies posted after the Greptile comment that contain GStack markers.
|
|
163
|
+
|
|
164
|
+
3. **If a prior GStack reply exists AND Greptile posted again on the same file+category:** Use Tier 2 (firm) templates.
|
|
165
|
+
|
|
166
|
+
4. **If no prior GStack reply exists:** Use Tier 1 (friendly) templates.
|
|
167
|
+
|
|
168
|
+
If escalation detection fails (API error, ambiguous thread): default to Tier 1. Never escalate on ambiguity.
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## Severity Assessment & Re-ranking
|
|
173
|
+
|
|
174
|
+
When classifying comments, also assess whether Greptile's implied severity matches reality:
|
|
175
|
+
|
|
176
|
+
- If Greptile flags something as a **security/correctness/race-condition** issue but it's actually a **style/performance** nit: include `**Suggested re-rank:**` in the reply requesting the category be corrected.
|
|
177
|
+
- If Greptile flags a low-severity style issue as if it were critical: push back in the reply.
|
|
178
|
+
- Always be specific about why the re-ranking is warranted — cite code and line numbers, not opinions.
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## History File Writes
|
|
183
|
+
|
|
184
|
+
Before writing, ensure both directories exist:
|
|
185
|
+
```bash
|
|
186
|
+
REMOTE_SLUG=$(browse/bin/remote-slug 2>/dev/null || ~/.claude/skills/gstack/browse/bin/remote-slug 2>/dev/null || basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)")
|
|
187
|
+
mkdir -p "$HOME/.gstack/projects/$REMOTE_SLUG"
|
|
188
|
+
mkdir -p ~/.gstack
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Append one line per triage outcome to **both** files (per-project for suppressions, global for retro):
|
|
192
|
+
- `~/.gstack/projects/$REMOTE_SLUG/greptile-history.md` (per-project)
|
|
193
|
+
- `~/.gstack/greptile-history.md` (global aggregate)
|
|
194
|
+
|
|
195
|
+
Format:
|
|
196
|
+
```
|
|
197
|
+
<YYYY-MM-DD> | <owner/repo> | <type> | <file-pattern> | <category>
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Example entries:
|
|
201
|
+
```
|
|
202
|
+
2026-03-13 | garrytan/myapp | fp | app/services/auth_service.rb | race-condition
|
|
203
|
+
2026-03-13 | garrytan/myapp | fix | app/models/user.rb | null-check
|
|
204
|
+
2026-03-13 | garrytan/myapp | already-fixed | lib/payments.rb | error-handling
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Output Format
|
|
210
|
+
|
|
211
|
+
Include a Greptile summary in the output header:
|
|
212
|
+
```
|
|
213
|
+
+ N Greptile comments (X valid, Y fixed, Z FP)
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
For each classified comment, show:
|
|
217
|
+
- Classification tag: `[VALID]`, `[FIXED]`, `[FALSE POSITIVE]`, `[SUPPRESSED]`
|
|
218
|
+
- File:line reference (for line-level) or `[top-level]` (for top-level)
|
|
219
|
+
- One-line body summary
|
|
220
|
+
- Permalink URL (the `html_url` field)
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* analytics — CLI for viewing gstack skill usage statistics.
|
|
4
|
+
*
|
|
5
|
+
* Reads ~/.gstack/analytics/skill-usage.jsonl and displays:
|
|
6
|
+
* - Top skills by invocation count
|
|
7
|
+
* - Per-repo skill breakdown
|
|
8
|
+
* - Safety hook fire events
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* bun run scripts/analytics.ts [--period 7d|30d|all]
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import * as fs from 'fs';
|
|
15
|
+
import * as path from 'path';
|
|
16
|
+
import * as os from 'os';
|
|
17
|
+
|
|
18
|
+
export interface AnalyticsEvent {
|
|
19
|
+
skill: string;
|
|
20
|
+
ts: string;
|
|
21
|
+
repo: string;
|
|
22
|
+
event?: string;
|
|
23
|
+
pattern?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const ANALYTICS_FILE = path.join(os.homedir(), '.gstack', 'analytics', 'skill-usage.jsonl');
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Parse JSONL content into AnalyticsEvent[], skipping malformed lines.
|
|
30
|
+
*/
|
|
31
|
+
export function parseJSONL(content: string): AnalyticsEvent[] {
|
|
32
|
+
const events: AnalyticsEvent[] = [];
|
|
33
|
+
for (const line of content.split('\n')) {
|
|
34
|
+
const trimmed = line.trim();
|
|
35
|
+
if (!trimmed) continue;
|
|
36
|
+
try {
|
|
37
|
+
const obj = JSON.parse(trimmed);
|
|
38
|
+
if (typeof obj === 'object' && obj !== null && typeof obj.ts === 'string') {
|
|
39
|
+
events.push(obj as AnalyticsEvent);
|
|
40
|
+
}
|
|
41
|
+
} catch {
|
|
42
|
+
// skip malformed lines
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return events;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Filter events by period. Supports "7d", "30d", and "all".
|
|
50
|
+
*/
|
|
51
|
+
export function filterByPeriod(events: AnalyticsEvent[], period: string): AnalyticsEvent[] {
|
|
52
|
+
if (period === 'all') return events;
|
|
53
|
+
|
|
54
|
+
const match = period.match(/^(\d+)d$/);
|
|
55
|
+
if (!match) return events;
|
|
56
|
+
|
|
57
|
+
const days = parseInt(match[1], 10);
|
|
58
|
+
const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1000);
|
|
59
|
+
|
|
60
|
+
return events.filter(e => {
|
|
61
|
+
const d = new Date(e.ts);
|
|
62
|
+
return !isNaN(d.getTime()) && d >= cutoff;
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Format a report string from a list of events.
|
|
68
|
+
*/
|
|
69
|
+
export function formatReport(events: AnalyticsEvent[], period: string = 'all'): string {
|
|
70
|
+
const skillEvents = events.filter(e => e.event !== 'hook_fire');
|
|
71
|
+
const hookEvents = events.filter(e => e.event === 'hook_fire');
|
|
72
|
+
|
|
73
|
+
const lines: string[] = [];
|
|
74
|
+
lines.push('gstack skill usage analytics');
|
|
75
|
+
lines.push('\u2550'.repeat(39));
|
|
76
|
+
lines.push('');
|
|
77
|
+
|
|
78
|
+
const periodLabel = period === 'all' ? 'all time' : `last ${period.replace('d', ' days')}`;
|
|
79
|
+
lines.push(`Period: ${periodLabel}`);
|
|
80
|
+
|
|
81
|
+
// Top Skills
|
|
82
|
+
const skillCounts = new Map<string, number>();
|
|
83
|
+
for (const e of skillEvents) {
|
|
84
|
+
skillCounts.set(e.skill, (skillCounts.get(e.skill) || 0) + 1);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (skillCounts.size > 0) {
|
|
88
|
+
lines.push('');
|
|
89
|
+
lines.push('Top Skills');
|
|
90
|
+
|
|
91
|
+
const sorted = [...skillCounts.entries()].sort((a, b) => b[1] - a[1]);
|
|
92
|
+
const maxName = Math.max(...sorted.map(([name]) => name.length + 1)); // +1 for /
|
|
93
|
+
const maxCount = Math.max(...sorted.map(([, count]) => String(count).length));
|
|
94
|
+
|
|
95
|
+
for (const [name, count] of sorted) {
|
|
96
|
+
const label = `/${name}`;
|
|
97
|
+
const suffix = `${count} invocation${count === 1 ? '' : 's'}`;
|
|
98
|
+
const dotLen = Math.max(2, 25 - label.length - suffix.length);
|
|
99
|
+
const dots = ' ' + '.'.repeat(dotLen) + ' ';
|
|
100
|
+
lines.push(` ${label}${dots}${suffix}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// By Repo
|
|
105
|
+
const repoSkills = new Map<string, Map<string, number>>();
|
|
106
|
+
for (const e of skillEvents) {
|
|
107
|
+
if (!repoSkills.has(e.repo)) repoSkills.set(e.repo, new Map());
|
|
108
|
+
const m = repoSkills.get(e.repo)!;
|
|
109
|
+
m.set(e.skill, (m.get(e.skill) || 0) + 1);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (repoSkills.size > 0) {
|
|
113
|
+
lines.push('');
|
|
114
|
+
lines.push('By Repo');
|
|
115
|
+
|
|
116
|
+
const sortedRepos = [...repoSkills.entries()].sort((a, b) => a[0].localeCompare(b[0]));
|
|
117
|
+
for (const [repo, skills] of sortedRepos) {
|
|
118
|
+
const parts = [...skills.entries()]
|
|
119
|
+
.sort((a, b) => b[1] - a[1])
|
|
120
|
+
.map(([s, c]) => `${s}(${c})`);
|
|
121
|
+
lines.push(` ${repo}: ${parts.join(' ')}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Safety Hook Events
|
|
126
|
+
const hookCounts = new Map<string, number>();
|
|
127
|
+
for (const e of hookEvents) {
|
|
128
|
+
if (e.pattern) {
|
|
129
|
+
hookCounts.set(e.pattern, (hookCounts.get(e.pattern) || 0) + 1);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (hookCounts.size > 0) {
|
|
134
|
+
lines.push('');
|
|
135
|
+
lines.push('Safety Hook Events');
|
|
136
|
+
|
|
137
|
+
const sortedHooks = [...hookCounts.entries()].sort((a, b) => b[1] - a[1]);
|
|
138
|
+
for (const [pattern, count] of sortedHooks) {
|
|
139
|
+
const suffix = `${count} fire${count === 1 ? '' : 's'}`;
|
|
140
|
+
const dotLen = Math.max(2, 25 - pattern.length - suffix.length);
|
|
141
|
+
const dots = ' ' + '.'.repeat(dotLen) + ' ';
|
|
142
|
+
lines.push(` ${pattern}${dots}${suffix}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Total
|
|
147
|
+
const totalSkills = skillEvents.length;
|
|
148
|
+
const totalHooks = hookEvents.length;
|
|
149
|
+
lines.push('');
|
|
150
|
+
lines.push(`Total: ${totalSkills} skill invocation${totalSkills === 1 ? '' : 's'}, ${totalHooks} hook fire${totalHooks === 1 ? '' : 's'}`);
|
|
151
|
+
|
|
152
|
+
return lines.join('\n');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function main() {
|
|
156
|
+
// Parse --period flag
|
|
157
|
+
let period = 'all';
|
|
158
|
+
const args = process.argv.slice(2);
|
|
159
|
+
for (let i = 0; i < args.length; i++) {
|
|
160
|
+
if (args[i] === '--period' && i + 1 < args.length) {
|
|
161
|
+
period = args[i + 1];
|
|
162
|
+
i++;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Read file
|
|
167
|
+
if (!fs.existsSync(ANALYTICS_FILE)) {
|
|
168
|
+
console.log('No analytics data found.');
|
|
169
|
+
process.exit(0);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const content = fs.readFileSync(ANALYTICS_FILE, 'utf-8').trim();
|
|
173
|
+
if (!content) {
|
|
174
|
+
console.log('No analytics data found.');
|
|
175
|
+
process.exit(0);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const events = parseJSONL(content);
|
|
179
|
+
if (events.length === 0) {
|
|
180
|
+
console.log('No analytics data found.');
|
|
181
|
+
process.exit(0);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const filtered = filterByPeriod(events, period);
|
|
185
|
+
console.log(formatReport(filtered, period));
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (import.meta.main) {
|
|
189
|
+
main();
|
|
190
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* dev:skill — Watch mode for SKILL.md template development.
|
|
4
|
+
*
|
|
5
|
+
* Watches .tmpl files, regenerates SKILL.md files on change,
|
|
6
|
+
* validates all $B commands immediately.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { validateSkill } from '../test/helpers/skill-parser';
|
|
10
|
+
import { execSync } from 'child_process';
|
|
11
|
+
import * as fs from 'fs';
|
|
12
|
+
import * as path from 'path';
|
|
13
|
+
|
|
14
|
+
const ROOT = path.resolve(import.meta.dir, '..');
|
|
15
|
+
|
|
16
|
+
const TEMPLATES = [
|
|
17
|
+
{ tmpl: path.join(ROOT, 'SKILL.md.tmpl'), output: 'SKILL.md' },
|
|
18
|
+
{ tmpl: path.join(ROOT, 'browse', 'SKILL.md.tmpl'), output: 'browse/SKILL.md' },
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
function regenerateAndValidate() {
|
|
22
|
+
// Regenerate
|
|
23
|
+
try {
|
|
24
|
+
execSync('bun run scripts/gen-skill-docs.ts', { cwd: ROOT, stdio: 'pipe' });
|
|
25
|
+
} catch (err: any) {
|
|
26
|
+
console.log(` [gen] ERROR: ${err.stderr?.toString().trim() || err.message}`);
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Validate each generated file
|
|
31
|
+
for (const { output } of TEMPLATES) {
|
|
32
|
+
const fullPath = path.join(ROOT, output);
|
|
33
|
+
if (!fs.existsSync(fullPath)) continue;
|
|
34
|
+
|
|
35
|
+
const result = validateSkill(fullPath);
|
|
36
|
+
const totalValid = result.valid.length;
|
|
37
|
+
const totalInvalid = result.invalid.length;
|
|
38
|
+
const totalSnapErrors = result.snapshotFlagErrors.length;
|
|
39
|
+
|
|
40
|
+
if (totalInvalid > 0 || totalSnapErrors > 0) {
|
|
41
|
+
console.log(` [check] \u274c ${output} (${totalValid} valid)`);
|
|
42
|
+
for (const inv of result.invalid) {
|
|
43
|
+
console.log(` Unknown command: '${inv.command}' at line ${inv.line}`);
|
|
44
|
+
}
|
|
45
|
+
for (const se of result.snapshotFlagErrors) {
|
|
46
|
+
console.log(` ${se.error} at line ${se.command.line}`);
|
|
47
|
+
}
|
|
48
|
+
} else {
|
|
49
|
+
console.log(` [check] \u2705 ${output} — ${totalValid} commands, all valid`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Initial run
|
|
55
|
+
console.log(' [watch] Watching *.md.tmpl files...');
|
|
56
|
+
regenerateAndValidate();
|
|
57
|
+
|
|
58
|
+
// Watch for changes
|
|
59
|
+
for (const { tmpl } of TEMPLATES) {
|
|
60
|
+
if (!fs.existsSync(tmpl)) continue;
|
|
61
|
+
fs.watch(tmpl, () => {
|
|
62
|
+
console.log(`\n [watch] ${path.relative(ROOT, tmpl)} changed`);
|
|
63
|
+
regenerateAndValidate();
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Also watch commands.ts and snapshot.ts (source of truth changes)
|
|
68
|
+
const SOURCE_FILES = [
|
|
69
|
+
path.join(ROOT, 'browse', 'src', 'commands.ts'),
|
|
70
|
+
path.join(ROOT, 'browse', 'src', 'snapshot.ts'),
|
|
71
|
+
];
|
|
72
|
+
|
|
73
|
+
for (const src of SOURCE_FILES) {
|
|
74
|
+
if (!fs.existsSync(src)) continue;
|
|
75
|
+
fs.watch(src, () => {
|
|
76
|
+
console.log(`\n [watch] ${path.relative(ROOT, src)} changed`);
|
|
77
|
+
regenerateAndValidate();
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Keep alive
|
|
82
|
+
console.log(' [watch] Press Ctrl+C to stop\n');
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Compare two eval runs from ~/.gstack-dev/evals/
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* bun run eval:compare # compare two most recent of same tier
|
|
7
|
+
* bun run eval:compare <file> # compare file against its predecessor
|
|
8
|
+
* bun run eval:compare <file-a> <file-b> # compare two specific files
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import * as fs from 'fs';
|
|
12
|
+
import * as path from 'path';
|
|
13
|
+
import * as os from 'os';
|
|
14
|
+
import {
|
|
15
|
+
findPreviousRun,
|
|
16
|
+
compareEvalResults,
|
|
17
|
+
formatComparison,
|
|
18
|
+
} from '../test/helpers/eval-store';
|
|
19
|
+
import type { EvalResult } from '../test/helpers/eval-store';
|
|
20
|
+
|
|
21
|
+
const EVAL_DIR = path.join(os.homedir(), '.gstack-dev', 'evals');
|
|
22
|
+
|
|
23
|
+
function loadResult(filepath: string): EvalResult {
|
|
24
|
+
// Resolve relative to EVAL_DIR if not absolute
|
|
25
|
+
const resolved = path.isAbsolute(filepath) ? filepath : path.join(EVAL_DIR, filepath);
|
|
26
|
+
if (!fs.existsSync(resolved)) {
|
|
27
|
+
console.error(`File not found: ${resolved}`);
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
return JSON.parse(fs.readFileSync(resolved, 'utf-8'));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const args = process.argv.slice(2);
|
|
34
|
+
|
|
35
|
+
let beforeFile: string;
|
|
36
|
+
let afterFile: string;
|
|
37
|
+
|
|
38
|
+
if (args.length === 2) {
|
|
39
|
+
// Two explicit files
|
|
40
|
+
beforeFile = args[0];
|
|
41
|
+
afterFile = args[1];
|
|
42
|
+
} else if (args.length === 1) {
|
|
43
|
+
// One file — find its predecessor
|
|
44
|
+
afterFile = args[0];
|
|
45
|
+
const resolved = path.isAbsolute(afterFile) ? afterFile : path.join(EVAL_DIR, afterFile);
|
|
46
|
+
const afterResult = loadResult(resolved);
|
|
47
|
+
const prev = findPreviousRun(EVAL_DIR, afterResult.tier, afterResult.branch, resolved);
|
|
48
|
+
if (!prev) {
|
|
49
|
+
console.log('No previous run found to compare against.');
|
|
50
|
+
process.exit(0);
|
|
51
|
+
}
|
|
52
|
+
beforeFile = prev;
|
|
53
|
+
} else {
|
|
54
|
+
// No args — find two most recent of the same tier
|
|
55
|
+
let files: string[];
|
|
56
|
+
try {
|
|
57
|
+
files = fs.readdirSync(EVAL_DIR)
|
|
58
|
+
.filter(f => f.endsWith('.json'))
|
|
59
|
+
.sort()
|
|
60
|
+
.reverse();
|
|
61
|
+
} catch {
|
|
62
|
+
console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
|
|
63
|
+
process.exit(0);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (files.length < 2) {
|
|
67
|
+
console.log('Need at least 2 eval runs to compare. Run evals again.');
|
|
68
|
+
process.exit(0);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Most recent file
|
|
72
|
+
afterFile = path.join(EVAL_DIR, files[0]);
|
|
73
|
+
const afterResult = loadResult(afterFile);
|
|
74
|
+
const prev = findPreviousRun(EVAL_DIR, afterResult.tier, afterResult.branch, afterFile);
|
|
75
|
+
if (!prev) {
|
|
76
|
+
console.log('No previous run of the same tier found to compare against.');
|
|
77
|
+
process.exit(0);
|
|
78
|
+
}
|
|
79
|
+
beforeFile = prev;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const beforeResult = loadResult(beforeFile);
|
|
83
|
+
const afterResult = loadResult(afterFile);
|
|
84
|
+
|
|
85
|
+
// Warn if different tiers
|
|
86
|
+
if (beforeResult.tier !== afterResult.tier) {
|
|
87
|
+
console.warn(`Warning: comparing different tiers (${beforeResult.tier} vs ${afterResult.tier})`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Warn on schema mismatch
|
|
91
|
+
if (beforeResult.schema_version !== afterResult.schema_version) {
|
|
92
|
+
console.warn(`Warning: schema version mismatch (${beforeResult.schema_version} vs ${afterResult.schema_version})`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const comparison = compareEvalResults(beforeResult, afterResult, beforeFile, afterFile);
|
|
96
|
+
console.log(formatComparison(comparison));
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* List eval runs from ~/.gstack-dev/evals/
|
|
4
|
+
*
|
|
5
|
+
* Usage: bun run eval:list [--branch <name>] [--tier e2e|llm-judge] [--limit N]
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import * as fs from 'fs';
|
|
9
|
+
import * as path from 'path';
|
|
10
|
+
import * as os from 'os';
|
|
11
|
+
|
|
12
|
+
const EVAL_DIR = path.join(os.homedir(), '.gstack-dev', 'evals');
|
|
13
|
+
|
|
14
|
+
// Parse args
|
|
15
|
+
const args = process.argv.slice(2);
|
|
16
|
+
let filterBranch: string | null = null;
|
|
17
|
+
let filterTier: string | null = null;
|
|
18
|
+
let limit = 20;
|
|
19
|
+
|
|
20
|
+
for (let i = 0; i < args.length; i++) {
|
|
21
|
+
if (args[i] === '--branch' && args[i + 1]) { filterBranch = args[++i]; }
|
|
22
|
+
else if (args[i] === '--tier' && args[i + 1]) { filterTier = args[++i]; }
|
|
23
|
+
else if (args[i] === '--limit' && args[i + 1]) { limit = parseInt(args[++i], 10); }
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Read eval files
|
|
27
|
+
let files: string[];
|
|
28
|
+
try {
|
|
29
|
+
files = fs.readdirSync(EVAL_DIR).filter(f => f.endsWith('.json'));
|
|
30
|
+
} catch {
|
|
31
|
+
console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
|
|
32
|
+
process.exit(0);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (files.length === 0) {
|
|
36
|
+
console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
|
|
37
|
+
process.exit(0);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Parse top-level fields from each file
|
|
41
|
+
interface RunSummary {
|
|
42
|
+
file: string;
|
|
43
|
+
timestamp: string;
|
|
44
|
+
branch: string;
|
|
45
|
+
tier: string;
|
|
46
|
+
version: string;
|
|
47
|
+
passed: number;
|
|
48
|
+
total: number;
|
|
49
|
+
cost: number;
|
|
50
|
+
duration: number;
|
|
51
|
+
turns: number;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const runs: RunSummary[] = [];
|
|
55
|
+
for (const file of files) {
|
|
56
|
+
try {
|
|
57
|
+
const data = JSON.parse(fs.readFileSync(path.join(EVAL_DIR, file), 'utf-8'));
|
|
58
|
+
if (filterBranch && data.branch !== filterBranch) continue;
|
|
59
|
+
if (filterTier && data.tier !== filterTier) continue;
|
|
60
|
+
const totalTurns = (data.tests || []).reduce((s: number, t: any) => s + (t.turns_used || 0), 0);
|
|
61
|
+
runs.push({
|
|
62
|
+
file,
|
|
63
|
+
timestamp: data.timestamp || '',
|
|
64
|
+
branch: data.branch || 'unknown',
|
|
65
|
+
tier: data.tier || 'unknown',
|
|
66
|
+
version: data.version || '?',
|
|
67
|
+
passed: data.passed || 0,
|
|
68
|
+
total: data.total_tests || 0,
|
|
69
|
+
cost: data.total_cost_usd || 0,
|
|
70
|
+
duration: data.total_duration_ms || 0,
|
|
71
|
+
turns: totalTurns,
|
|
72
|
+
});
|
|
73
|
+
} catch { continue; }
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Sort by timestamp descending
|
|
77
|
+
runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
78
|
+
|
|
79
|
+
// Apply limit
|
|
80
|
+
const displayed = runs.slice(0, limit);
|
|
81
|
+
|
|
82
|
+
// Print table
|
|
83
|
+
console.log('');
|
|
84
|
+
console.log(`Eval History (${runs.length} total runs)`);
|
|
85
|
+
console.log('═'.repeat(105));
|
|
86
|
+
console.log(
|
|
87
|
+
' ' +
|
|
88
|
+
'Date'.padEnd(17) +
|
|
89
|
+
'Branch'.padEnd(25) +
|
|
90
|
+
'Tier'.padEnd(12) +
|
|
91
|
+
'Pass'.padEnd(8) +
|
|
92
|
+
'Cost'.padEnd(8) +
|
|
93
|
+
'Turns'.padEnd(7) +
|
|
94
|
+
'Duration'.padEnd(10) +
|
|
95
|
+
'Version'
|
|
96
|
+
);
|
|
97
|
+
console.log('─'.repeat(105));
|
|
98
|
+
|
|
99
|
+
for (const run of displayed) {
|
|
100
|
+
const date = run.timestamp.replace('T', ' ').slice(0, 16);
|
|
101
|
+
const branch = run.branch.length > 23 ? run.branch.slice(0, 20) + '...' : run.branch.padEnd(25);
|
|
102
|
+
const pass = `${run.passed}/${run.total}`.padEnd(8);
|
|
103
|
+
const cost = `$${run.cost.toFixed(2)}`.padEnd(8);
|
|
104
|
+
const turns = run.turns > 0 ? `${run.turns}t`.padEnd(7) : ''.padEnd(7);
|
|
105
|
+
const dur = run.duration > 0 ? `${Math.round(run.duration / 1000)}s`.padEnd(10) : ''.padEnd(10);
|
|
106
|
+
console.log(` ${date.padEnd(17)}${branch}${run.tier.padEnd(12)}${pass}${cost}${turns}${dur}v${run.version}`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
console.log('─'.repeat(105));
|
|
110
|
+
|
|
111
|
+
const totalCost = runs.reduce((s, r) => s + r.cost, 0);
|
|
112
|
+
const totalDur = runs.reduce((s, r) => s + r.duration, 0);
|
|
113
|
+
const totalTurns = runs.reduce((s, r) => s + r.turns, 0);
|
|
114
|
+
console.log(` ${runs.length} runs | $${totalCost.toFixed(2)} total | ${totalTurns} turns | ${Math.round(totalDur / 1000)}s | Showing: ${displayed.length}`);
|
|
115
|
+
console.log(` Dir: ${EVAL_DIR}`);
|
|
116
|
+
console.log('');
|