opengstack 0.13.10 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/AGENTS.md +4 -4
  2. package/CLAUDE.md +127 -110
  3. package/README.md +10 -5
  4. package/SKILL.md +500 -70
  5. package/bin/opengstack.js +69 -69
  6. package/{skills/land-and-deploy/SKILL.md → commands/autoplan.md} +7 -25
  7. package/{skills/benchmark/SKILL.md → commands/benchmark.md} +84 -108
  8. package/{skills/browse/SKILL.md → commands/browse.md} +60 -81
  9. package/{skills/ship/SKILL.md → commands/canary.md} +7 -27
  10. package/{skills/careful/SKILL.md → commands/careful.md} +2 -22
  11. package/{skills/canary/SKILL.md → commands/codex.md} +7 -26
  12. package/{skills/connect-chrome/SKILL.md → commands/connect-chrome.md} +7 -24
  13. package/commands/cso.md +70 -0
  14. package/commands/design-consultation.md +70 -0
  15. package/commands/design-review.md +70 -0
  16. package/commands/design-shotgun.md +70 -0
  17. package/commands/document-release.md +70 -0
  18. package/{skills/freeze/SKILL.md → commands/freeze.md} +3 -29
  19. package/{skills/guard/SKILL.md → commands/guard.md} +4 -35
  20. package/commands/investigate.md +70 -0
  21. package/commands/land-and-deploy.md +70 -0
  22. package/commands/office-hours.md +70 -0
  23. package/{skills/gstack-upgrade/SKILL.md → commands/opengstack-upgrade.md} +64 -79
  24. package/commands/plan-ceo-review.md +70 -0
  25. package/commands/plan-design-review.md +70 -0
  26. package/commands/plan-eng-review.md +70 -0
  27. package/commands/qa-only.md +70 -0
  28. package/commands/qa.md +70 -0
  29. package/commands/retro.md +70 -0
  30. package/commands/review.md +70 -0
  31. package/{skills/setup-browser-cookies/SKILL.md → commands/setup-browser-cookies.md} +22 -40
  32. package/commands/setup-deploy.md +70 -0
  33. package/commands/ship.md +70 -0
  34. package/commands/unfreeze.md +25 -0
  35. package/docs/designs/CHROME_VS_CHROMIUM_EXPLORATION.md +9 -9
  36. package/docs/designs/CONDUCTOR_CHROME_SIDEBAR_INTEGRATION.md +2 -2
  37. package/docs/designs/CONDUCTOR_SESSION_API.md +16 -16
  38. package/docs/designs/DESIGN_SHOTGUN.md +74 -74
  39. package/docs/designs/DESIGN_TOOLS_V1.md +111 -111
  40. package/docs/skills.md +483 -202
  41. package/package.json +42 -43
  42. package/scripts/analytics.ts +188 -0
  43. package/scripts/dev-skill.ts +83 -0
  44. package/scripts/discover-skills.ts +39 -0
  45. package/scripts/eval-compare.ts +97 -0
  46. package/scripts/eval-list.ts +117 -0
  47. package/scripts/eval-select.ts +86 -0
  48. package/scripts/eval-summary.ts +188 -0
  49. package/scripts/eval-watch.ts +172 -0
  50. package/scripts/gen-skill-docs.ts +473 -0
  51. package/scripts/resolvers/browse.ts +129 -0
  52. package/scripts/resolvers/codex-helpers.ts +133 -0
  53. package/scripts/resolvers/composition.ts +48 -0
  54. package/scripts/resolvers/confidence.ts +37 -0
  55. package/scripts/resolvers/constants.ts +50 -0
  56. package/scripts/resolvers/design.ts +950 -0
  57. package/scripts/resolvers/index.ts +59 -0
  58. package/scripts/resolvers/learnings.ts +96 -0
  59. package/scripts/resolvers/preamble.ts +505 -0
  60. package/scripts/resolvers/review.ts +884 -0
  61. package/scripts/resolvers/testing.ts +573 -0
  62. package/scripts/resolvers/types.ts +45 -0
  63. package/scripts/resolvers/utility.ts +421 -0
  64. package/scripts/skill-check.ts +190 -0
  65. package/scripts/cleanup.py +0 -100
  66. package/scripts/filter-skills.sh +0 -114
  67. package/scripts/filter_skills.py +0 -164
  68. package/scripts/install-skills.js +0 -60
  69. package/skills/autoplan/SKILL.md +0 -96
  70. package/skills/autoplan/SKILL.md.tmpl +0 -694
  71. package/skills/benchmark/SKILL.md.tmpl +0 -222
  72. package/skills/browse/SKILL.md.tmpl +0 -131
  73. package/skills/browse/bin/find-browse +0 -21
  74. package/skills/browse/bin/remote-slug +0 -14
  75. package/skills/browse/scripts/build-node-server.sh +0 -48
  76. package/skills/browse/src/activity.ts +0 -208
  77. package/skills/browse/src/browser-manager.ts +0 -959
  78. package/skills/browse/src/buffers.ts +0 -137
  79. package/skills/browse/src/bun-polyfill.cjs +0 -109
  80. package/skills/browse/src/cli.ts +0 -678
  81. package/skills/browse/src/commands.ts +0 -128
  82. package/skills/browse/src/config.ts +0 -150
  83. package/skills/browse/src/cookie-import-browser.ts +0 -625
  84. package/skills/browse/src/cookie-picker-routes.ts +0 -230
  85. package/skills/browse/src/cookie-picker-ui.ts +0 -688
  86. package/skills/browse/src/find-browse.ts +0 -61
  87. package/skills/browse/src/meta-commands.ts +0 -550
  88. package/skills/browse/src/platform.ts +0 -17
  89. package/skills/browse/src/read-commands.ts +0 -358
  90. package/skills/browse/src/server.ts +0 -1192
  91. package/skills/browse/src/sidebar-agent.ts +0 -280
  92. package/skills/browse/src/sidebar-utils.ts +0 -21
  93. package/skills/browse/src/snapshot.ts +0 -407
  94. package/skills/browse/src/url-validation.ts +0 -95
  95. package/skills/browse/src/write-commands.ts +0 -364
  96. package/skills/browse/test/activity.test.ts +0 -120
  97. package/skills/browse/test/adversarial-security.test.ts +0 -32
  98. package/skills/browse/test/browser-manager-unit.test.ts +0 -17
  99. package/skills/browse/test/bun-polyfill.test.ts +0 -72
  100. package/skills/browse/test/commands.test.ts +0 -2075
  101. package/skills/browse/test/compare-board.test.ts +0 -342
  102. package/skills/browse/test/config.test.ts +0 -316
  103. package/skills/browse/test/cookie-import-browser.test.ts +0 -519
  104. package/skills/browse/test/cookie-picker-routes.test.ts +0 -260
  105. package/skills/browse/test/file-drop.test.ts +0 -271
  106. package/skills/browse/test/find-browse.test.ts +0 -50
  107. package/skills/browse/test/findport.test.ts +0 -191
  108. package/skills/browse/test/fixtures/basic.html +0 -33
  109. package/skills/browse/test/fixtures/cursor-interactive.html +0 -22
  110. package/skills/browse/test/fixtures/dialog.html +0 -15
  111. package/skills/browse/test/fixtures/empty.html +0 -2
  112. package/skills/browse/test/fixtures/forms.html +0 -55
  113. package/skills/browse/test/fixtures/iframe.html +0 -30
  114. package/skills/browse/test/fixtures/network-idle.html +0 -30
  115. package/skills/browse/test/fixtures/qa-eval-checkout.html +0 -108
  116. package/skills/browse/test/fixtures/qa-eval-spa.html +0 -98
  117. package/skills/browse/test/fixtures/qa-eval.html +0 -51
  118. package/skills/browse/test/fixtures/responsive.html +0 -49
  119. package/skills/browse/test/fixtures/snapshot.html +0 -55
  120. package/skills/browse/test/fixtures/spa.html +0 -24
  121. package/skills/browse/test/fixtures/states.html +0 -17
  122. package/skills/browse/test/fixtures/upload.html +0 -25
  123. package/skills/browse/test/gstack-config.test.ts +0 -138
  124. package/skills/browse/test/gstack-update-check.test.ts +0 -514
  125. package/skills/browse/test/handoff.test.ts +0 -235
  126. package/skills/browse/test/path-validation.test.ts +0 -91
  127. package/skills/browse/test/platform.test.ts +0 -37
  128. package/skills/browse/test/server-auth.test.ts +0 -65
  129. package/skills/browse/test/sidebar-agent-roundtrip.test.ts +0 -226
  130. package/skills/browse/test/sidebar-agent.test.ts +0 -199
  131. package/skills/browse/test/sidebar-integration.test.ts +0 -320
  132. package/skills/browse/test/sidebar-unit.test.ts +0 -96
  133. package/skills/browse/test/snapshot.test.ts +0 -467
  134. package/skills/browse/test/state-ttl.test.ts +0 -35
  135. package/skills/browse/test/test-server.ts +0 -57
  136. package/skills/browse/test/url-validation.test.ts +0 -72
  137. package/skills/browse/test/watch.test.ts +0 -129
  138. package/skills/canary/SKILL.md.tmpl +0 -212
  139. package/skills/careful/SKILL.md.tmpl +0 -56
  140. package/skills/careful/bin/check-careful.sh +0 -112
  141. package/skills/codex/SKILL.md +0 -90
  142. package/skills/codex/SKILL.md.tmpl +0 -417
  143. package/skills/connect-chrome/SKILL.md.tmpl +0 -195
  144. package/skills/cso/ACKNOWLEDGEMENTS.md +0 -14
  145. package/skills/cso/SKILL.md +0 -93
  146. package/skills/cso/SKILL.md.tmpl +0 -606
  147. package/skills/design-consultation/SKILL.md +0 -94
  148. package/skills/design-consultation/SKILL.md.tmpl +0 -415
  149. package/skills/design-review/SKILL.md +0 -94
  150. package/skills/design-review/SKILL.md.tmpl +0 -290
  151. package/skills/design-shotgun/SKILL.md +0 -91
  152. package/skills/design-shotgun/SKILL.md.tmpl +0 -285
  153. package/skills/document-release/SKILL.md +0 -91
  154. package/skills/document-release/SKILL.md.tmpl +0 -359
  155. package/skills/freeze/SKILL.md.tmpl +0 -77
  156. package/skills/freeze/bin/check-freeze.sh +0 -79
  157. package/skills/gstack-upgrade/SKILL.md.tmpl +0 -222
  158. package/skills/guard/SKILL.md.tmpl +0 -77
  159. package/skills/investigate/SKILL.md +0 -105
  160. package/skills/investigate/SKILL.md.tmpl +0 -194
  161. package/skills/land-and-deploy/SKILL.md.tmpl +0 -881
  162. package/skills/office-hours/SKILL.md +0 -96
  163. package/skills/office-hours/SKILL.md.tmpl +0 -645
  164. package/skills/plan-ceo-review/SKILL.md +0 -94
  165. package/skills/plan-ceo-review/SKILL.md.tmpl +0 -811
  166. package/skills/plan-design-review/SKILL.md +0 -92
  167. package/skills/plan-design-review/SKILL.md.tmpl +0 -446
  168. package/skills/plan-eng-review/SKILL.md +0 -93
  169. package/skills/plan-eng-review/SKILL.md.tmpl +0 -303
  170. package/skills/qa/SKILL.md +0 -95
  171. package/skills/qa/SKILL.md.tmpl +0 -316
  172. package/skills/qa/references/issue-taxonomy.md +0 -85
  173. package/skills/qa/templates/qa-report-template.md +0 -126
  174. package/skills/qa-only/SKILL.md +0 -89
  175. package/skills/qa-only/SKILL.md.tmpl +0 -101
  176. package/skills/retro/SKILL.md +0 -89
  177. package/skills/retro/SKILL.md.tmpl +0 -820
  178. package/skills/review/SKILL.md +0 -92
  179. package/skills/review/SKILL.md.tmpl +0 -281
  180. package/skills/review/TODOS-format.md +0 -62
  181. package/skills/review/checklist.md +0 -220
  182. package/skills/review/design-checklist.md +0 -132
  183. package/skills/review/greptile-triage.md +0 -220
  184. package/skills/setup-browser-cookies/SKILL.md.tmpl +0 -81
  185. package/skills/setup-deploy/SKILL.md +0 -92
  186. package/skills/setup-deploy/SKILL.md.tmpl +0 -215
  187. package/skills/ship/SKILL.md.tmpl +0 -636
  188. package/skills/unfreeze/SKILL.md +0 -37
  189. package/skills/unfreeze/SKILL.md.tmpl +0 -36
package/package.json CHANGED
@@ -1,47 +1,46 @@
1
1
  {
2
- "name": "opengstack",
3
- "version": "0.13.10",
4
- "private": false,
5
- "description": "AI Engineering Workflow - SKILL.md files that give AI agents structured roles for software development. Forked from gstack but scrubbed clean of all the YC/Garry Tan cruft and telemetry.",
6
- "keywords": [
7
- "ai-agents",
8
- "claude",
9
- "workflow",
10
- "skills",
11
- "agent-instructions",
12
- "gstack",
13
- "opengstack"
14
- ],
15
- "homepage": "https://github.com/Ambisphaeric/opengstack#readme",
16
- "bugs": {
17
- "url": "https://github.com/Ambisphaeric/opengstack/issues"
18
- },
19
- "repository": {
20
- "type": "git",
21
- "url": "git+https://github.com/Ambisphaeric/opengstack.git"
22
- },
23
- "license": "MIT",
24
- "author": "Ambisphaeric",
25
- "type": "commonjs",
26
- "main": "SKILL.md",
27
- "bin": {
28
- "opengstack": "./bin/opengstack.js"
29
- },
30
- "directories": {
31
- "doc": "docs"
32
- },
33
- "files": [
34
- "SKILL.md",
35
- "CLAUDE.md",
36
- "AGENTS.md",
37
- "README.md",
38
- "skills/",
39
- "scripts/",
40
- "docs/",
41
- "bin/"
42
- ],
43
- "scripts": {
44
- "postinstall": "node scripts/install-skills.js",
2
+ "name": "opengstack",
3
+ "version": "0.14.2",
4
+ "private": false,
5
+ "description": "AI Engineering Workflow - Native slash commands for OpenCode. Open source AI engineering workflow. No telemetry. No tracking.",
6
+ "keywords": [
7
+ "ai-agents",
8
+ "claude",
9
+ "workflow",
10
+ "skills",
11
+ "agent-instructions",
12
+ "opengstack",
13
+ "opengstack"
14
+ ],
15
+ "homepage": "https://github.com/Ambisphaeric/opengstack#readme",
16
+ "bugs": {
17
+ "url": "https://github.com/Ambisphaeric/opengstack/issues"
18
+ },
19
+ "repository": {
20
+ "type": "git",
21
+ "url": "git+https://github.com/Ambisphaeric/OpenGStack.git"
22
+ },
23
+ "license": "MIT",
24
+ "author": "Ambisphaeric",
25
+ "type": "commonjs",
26
+ "main": "SKILL.md",
27
+ "bin": {
28
+ "opengstack": "./bin/opengstack.js"
29
+ },
30
+ "directories": {
31
+ "doc": "docs"
32
+ },
33
+ "files": [
34
+ "SKILL.md",
35
+ "CLAUDE.md",
36
+ "AGENTS.md",
37
+ "README.md",
38
+ "commands/",
39
+ "scripts/",
40
+ "docs/",
41
+ "bin/"
42
+ ],
43
+ "scripts": {
45
44
  "test": "echo \"Error: no test specified\" && exit 1"
46
45
  }
47
46
  }
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * analytics — CLI for viewing opengstack skill usage statistics.
4
+ *
5
+ * - Top skills by invocation count
6
+ * - Per-repo skill breakdown
7
+ * - Safety hook fire events
8
+ *
9
+ * Usage:
10
+ */
11
+
12
+ import * as fs from 'fs';
13
+ import * as path from 'path';
14
+ import * as os from 'os';
15
+
16
+ export interface AnalyticsEvent {
17
+ skill: string;
18
+ ts: string;
19
+ repo: string;
20
+ event?: string;
21
+ pattern?: string;
22
+ }
23
+
24
+ const ANALYTICS_FILE = path.join(os.homedir(), '.OpenGStack', 'analytics', 'skill-usage.jsonl');
25
+
26
+ /**
27
+ * Parse JSONL content into AnalyticsEvent[], skipping malformed lines.
28
+ */
29
+ export function parseJSONL(content: string): AnalyticsEvent[] {
30
+ const events: AnalyticsEvent[] = [];
31
+ for (const line of content.split('\n')) {
32
+ const trimmed = line.trim();
33
+ if (!trimmed) continue;
34
+ try {
35
+ const obj = JSON.parse(trimmed);
36
+ if (typeof obj === 'object' && obj !== null && typeof obj.ts === 'string') {
37
+ events.push(obj as AnalyticsEvent);
38
+ }
39
+ } catch {
40
+ // skip malformed lines
41
+ }
42
+ }
43
+ return events;
44
+ }
45
+
46
+ /**
47
+ * Filter events by period. Supports "7d", "30d", and "all".
48
+ */
49
+ export function filterByPeriod(events: AnalyticsEvent[], period: string): AnalyticsEvent[] {
50
+ if (period === 'all') return events;
51
+
52
+ const match = period.match(/^(\d+)d$/);
53
+ if (!match) return events;
54
+
55
+ const days = parseInt(match[1], 10);
56
+ const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1000);
57
+
58
+ return events.filter(e => {
59
+ const d = new Date(e.ts);
60
+ return !isNaN(d.getTime()) && d >= cutoff;
61
+ });
62
+ }
63
+
64
+ /**
65
+ * Format a report string from a list of events.
66
+ */
67
+ export function formatReport(events: AnalyticsEvent[], period: string = 'all'): string {
68
+ const skillEvents = events.filter(e => e.event !== 'hook_fire');
69
+ const hookEvents = events.filter(e => e.event === 'hook_fire');
70
+
71
+ const lines: string[] = [];
72
+ lines.push('opengstack skill usage analytics');
73
+ lines.push('\u2550'.repeat(39));
74
+ lines.push('');
75
+
76
+ const periodLabel = period === 'all' ? 'all time' : `last ${period.replace('d', ' days')}`;
77
+ lines.push(`Period: ${periodLabel}`);
78
+
79
+ // Top Skills
80
+ const skillCounts = new Map<string, number>();
81
+ for (const e of skillEvents) {
82
+ skillCounts.set(e.skill, (skillCounts.get(e.skill) || 0) + 1);
83
+ }
84
+
85
+ if (skillCounts.size > 0) {
86
+ lines.push('');
87
+ lines.push('Top Skills');
88
+
89
+ const sorted = [...skillCounts.entries()].sort((a, b) => b[1] - a[1]);
90
+ const maxName = Math.max(...sorted.map(([name]) => name.length + 1)); // +1 for /
91
+ const maxCount = Math.max(...sorted.map(([, count]) => String(count).length));
92
+
93
+ for (const [name, count] of sorted) {
94
+ const label = `/${name}`;
95
+ const suffix = `${count} invocation${count === 1 ? '' : 's'}`;
96
+ const dotLen = Math.max(2, 25 - label.length - suffix.length);
97
+ const dots = ' ' + '.'.repeat(dotLen) + ' ';
98
+ lines.push(` ${label}${dots}${suffix}`);
99
+ }
100
+ }
101
+
102
+ // By Repo
103
+ const repoSkills = new Map<string, Map<string, number>>();
104
+ for (const e of skillEvents) {
105
+ if (!repoSkills.has(e.repo)) repoSkills.set(e.repo, new Map());
106
+ const m = repoSkills.get(e.repo)!;
107
+ m.set(e.skill, (m.get(e.skill) || 0) + 1);
108
+ }
109
+
110
+ if (repoSkills.size > 0) {
111
+ lines.push('');
112
+ lines.push('By Repo');
113
+
114
+ const sortedRepos = [...repoSkills.entries()].sort((a, b) => a[0].localeCompare(b[0]));
115
+ for (const [repo, skills] of sortedRepos) {
116
+ const parts = [...skills.entries()]
117
+ .sort((a, b) => b[1] - a[1])
118
+ .map(([s, c]) => `${s}(${c})`);
119
+ lines.push(` ${repo}: ${parts.join(' ')}`);
120
+ }
121
+ }
122
+
123
+ // Safety Hook Events
124
+ const hookCounts = new Map<string, number>();
125
+ for (const e of hookEvents) {
126
+ if (e.pattern) {
127
+ hookCounts.set(e.pattern, (hookCounts.get(e.pattern) || 0) + 1);
128
+ }
129
+ }
130
+
131
+ if (hookCounts.size > 0) {
132
+ lines.push('');
133
+ lines.push('Safety Hook Events');
134
+
135
+ const sortedHooks = [...hookCounts.entries()].sort((a, b) => b[1] - a[1]);
136
+ for (const [pattern, count] of sortedHooks) {
137
+ const suffix = `${count} fire${count === 1 ? '' : 's'}`;
138
+ const dotLen = Math.max(2, 25 - pattern.length - suffix.length);
139
+ const dots = ' ' + '.'.repeat(dotLen) + ' ';
140
+ lines.push(` ${pattern}${dots}${suffix}`);
141
+ }
142
+ }
143
+
144
+ // Total
145
+ const totalSkills = skillEvents.length;
146
+ const totalHooks = hookEvents.length;
147
+ lines.push('');
148
+ lines.push(`Total: ${totalSkills} skill invocation${totalSkills === 1 ? '' : 's'}, ${totalHooks} hook fire${totalHooks === 1 ? '' : 's'}`);
149
+
150
+ return lines.join('\n');
151
+ }
152
+
153
+ function main() {
154
+ // Parse --period flag
155
+ let period = 'all';
156
+ const args = process.argv.slice(2);
157
+ for (let i = 0; i < args.length; i++) {
158
+ if (args[i] === '--period' && i + 1 < args.length) {
159
+ period = args[i + 1];
160
+ i++;
161
+ }
162
+ }
163
+
164
+ // Read file
165
+ if (!fs.existsSync(ANALYTICS_FILE)) {
166
+ console.log('No analytics data found.');
167
+ process.exit(0);
168
+ }
169
+
170
+ const content = fs.readFileSync(ANALYTICS_FILE, 'utf-8').trim();
171
+ if (!content) {
172
+ console.log('No analytics data found.');
173
+ process.exit(0);
174
+ }
175
+
176
+ const events = parseJSONL(content);
177
+ if (events.length === 0) {
178
+ console.log('No analytics data found.');
179
+ process.exit(0);
180
+ }
181
+
182
+ const filtered = filterByPeriod(events, period);
183
+ console.log(formatReport(filtered, period));
184
+ }
185
+
186
+ if (import.meta.main) {
187
+ main();
188
+ }
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * dev:skill — Watch mode for SKILL.md template development.
4
+ *
5
+ * Watches .tmpl files, regenerates SKILL.md files on change,
6
+ * validates all $B commands immediately.
7
+ */
8
+
9
+ import { validateSkill } from '../test/helpers/skill-parser';
10
+ import { discoverTemplates } from './discover-skills';
11
+ import { execSync } from 'child_process';
12
+ import * as fs from 'fs';
13
+ import * as path from 'path';
14
+
15
+ const ROOT = path.resolve(import.meta.dir, '..');
16
+
17
+ const TEMPLATES = discoverTemplates(ROOT).map(t => ({
18
+ tmpl: path.join(ROOT, t.tmpl),
19
+ output: t.output,
20
+ }));
21
+
22
+ function regenerateAndValidate() {
23
+ // Regenerate
24
+ try {
25
+ execSync('bun run scripts/gen-skill-docs.ts', { cwd: ROOT, stdio: 'pipe' });
26
+ } catch (err: any) {
27
+ console.log(` [gen] ERROR: ${err.stderr?.toString().trim() || err.message}`);
28
+ return;
29
+ }
30
+
31
+ // Validate each generated file
32
+ for (const { output } of TEMPLATES) {
33
+ const fullPath = path.join(ROOT, output);
34
+ if (!fs.existsSync(fullPath)) continue;
35
+
36
+ const result = validateSkill(fullPath);
37
+ const totalValid = result.valid.length;
38
+ const totalInvalid = result.invalid.length;
39
+ const totalSnapErrors = result.snapshotFlagErrors.length;
40
+
41
+ if (totalInvalid > 0 || totalSnapErrors > 0) {
42
+ console.log(` [check] \u274c ${output} (${totalValid} valid)`);
43
+ for (const inv of result.invalid) {
44
+ console.log(` Unknown command: '${inv.command}' at line ${inv.line}`);
45
+ }
46
+ for (const se of result.snapshotFlagErrors) {
47
+ console.log(` ${se.error} at line ${se.command.line}`);
48
+ }
49
+ } else {
50
+ console.log(` [check] \u2705 ${output} — ${totalValid} commands, all valid`);
51
+ }
52
+ }
53
+ }
54
+
55
+ // Initial run
56
+ console.log(' [watch] Watching *.md.tmpl files...');
57
+ regenerateAndValidate();
58
+
59
+ // Watch for changes
60
+ for (const { tmpl } of TEMPLATES) {
61
+ if (!fs.existsSync(tmpl)) continue;
62
+ fs.watch(tmpl, () => {
63
+ console.log(`\n [watch] ${path.relative(ROOT, tmpl)} changed`);
64
+ regenerateAndValidate();
65
+ });
66
+ }
67
+
68
+ // Also watch commands.ts and snapshot.ts (source of truth changes)
69
+ const SOURCE_FILES = [
70
+ path.join(ROOT, 'browse', 'src', 'commands.ts'),
71
+ path.join(ROOT, 'browse', 'src', 'snapshot.ts'),
72
+ ];
73
+
74
+ for (const src of SOURCE_FILES) {
75
+ if (!fs.existsSync(src)) continue;
76
+ fs.watch(src, () => {
77
+ console.log(`\n [watch] ${path.relative(ROOT, src)} changed`);
78
+ regenerateAndValidate();
79
+ });
80
+ }
81
+
82
+ // Keep alive
83
+ console.log(' [watch] Press Ctrl+C to stop\n');
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Shared discovery for SKILL.md and .tmpl files.
3
+ * Scans root + one level of subdirs, skipping node_modules/.git/dist.
4
+ */
5
+
6
+ import * as fs from 'fs';
7
+ import * as path from 'path';
8
+
9
+ const SKIP = new Set(['node_modules', '.git', 'dist']);
10
+
11
+ function subdirs(root: string): string[] {
12
+ return fs.readdirSync(root, { withFileTypes: true })
13
+ .filter(d => d.isDirectory() && !d.name.startsWith('.') && !SKIP.has(d.name))
14
+ .map(d => d.name);
15
+ }
16
+
17
+ export function discoverTemplates(root: string): Array<{ tmpl: string; output: string }> {
18
+ const dirs = ['', ...subdirs(root)];
19
+ const results: Array<{ tmpl: string; output: string }> = [];
20
+ for (const dir of dirs) {
21
+ const rel = dir ? `${dir}/SKILL.md.tmpl` : 'SKILL.md.tmpl';
22
+ if (fs.existsSync(path.join(root, rel))) {
23
+ results.push({ tmpl: rel, output: rel.replace(/\.tmpl$/, '') });
24
+ }
25
+ }
26
+ return results;
27
+ }
28
+
29
+ export function discoverSkillFiles(root: string): string[] {
30
+ const dirs = ['', ...subdirs(root)];
31
+ const results: string[] = [];
32
+ for (const dir of dirs) {
33
+ const rel = dir ? `${dir}/SKILL.md` : 'SKILL.md';
34
+ if (fs.existsSync(path.join(root, rel))) {
35
+ results.push(rel);
36
+ }
37
+ }
38
+ return results;
39
+ }
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Compare two eval runs from ~/.opengstack-dev/evals/
4
+ *
5
+ * Usage:
6
+ * bun run eval:compare # compare two most recent of same tier
7
+ * bun run eval:compare <file> # compare file against its predecessor
8
+ * bun run eval:compare <file-a> <file-b> # compare two specific files
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import * as os from 'os';
14
+ import {
15
+ findPreviousRun,
16
+ compareEvalResults,
17
+ formatComparison,
18
+ getProjectEvalDir,
19
+ } from '../test/helpers/eval-store';
20
+ import type { EvalResult } from '../test/helpers/eval-store';
21
+
22
+ const EVAL_DIR = getProjectEvalDir();
23
+
24
+ function loadResult(filepath: string): EvalResult {
25
+ // Resolve relative to EVAL_DIR if not absolute
26
+ const resolved = path.isAbsolute(filepath) ? filepath : path.join(EVAL_DIR, filepath);
27
+ if (!fs.existsSync(resolved)) {
28
+ console.error(`File not found: ${resolved}`);
29
+ process.exit(1);
30
+ }
31
+ return JSON.parse(fs.readFileSync(resolved, 'utf-8'));
32
+ }
33
+
34
+ const args = process.argv.slice(2);
35
+
36
+ let beforeFile: string;
37
+ let afterFile: string;
38
+
39
+ if (args.length === 2) {
40
+ // Two explicit files
41
+ beforeFile = args[0];
42
+ afterFile = args[1];
43
+ } else if (args.length === 1) {
44
+ // One file — find its predecessor
45
+ afterFile = args[0];
46
+ const resolved = path.isAbsolute(afterFile) ? afterFile : path.join(EVAL_DIR, afterFile);
47
+ const afterResult = loadResult(resolved);
48
+ const prev = findPreviousRun(EVAL_DIR, afterResult.tier, afterResult.branch, resolved);
49
+ if (!prev) {
50
+ console.log('No previous run found to compare against.');
51
+ process.exit(0);
52
+ }
53
+ beforeFile = prev;
54
+ } else {
55
+ // No args — find two most recent of the same tier
56
+ let files: string[];
57
+ try {
58
+ files = fs.readdirSync(EVAL_DIR)
59
+ .filter(f => f.endsWith('.json'))
60
+ .sort()
61
+ .reverse();
62
+ } catch {
63
+ console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
64
+ process.exit(0);
65
+ }
66
+
67
+ if (files.length < 2) {
68
+ console.log('Need at least 2 eval runs to compare. Run evals again.');
69
+ process.exit(0);
70
+ }
71
+
72
+ // Most recent file
73
+ afterFile = path.join(EVAL_DIR, files[0]);
74
+ const afterResult = loadResult(afterFile);
75
+ const prev = findPreviousRun(EVAL_DIR, afterResult.tier, afterResult.branch, afterFile);
76
+ if (!prev) {
77
+ console.log('No previous run of the same tier found to compare against.');
78
+ process.exit(0);
79
+ }
80
+ beforeFile = prev;
81
+ }
82
+
83
+ const beforeResult = loadResult(beforeFile);
84
+ const afterResult = loadResult(afterFile);
85
+
86
+ // Warn if different tiers
87
+ if (beforeResult.tier !== afterResult.tier) {
88
+ console.warn(`Warning: comparing different tiers (${beforeResult.tier} vs ${afterResult.tier})`);
89
+ }
90
+
91
+ // Warn on schema mismatch
92
+ if (beforeResult.schema_version !== afterResult.schema_version) {
93
+ console.warn(`Warning: schema version mismatch (${beforeResult.schema_version} vs ${afterResult.schema_version})`);
94
+ }
95
+
96
+ const comparison = compareEvalResults(beforeResult, afterResult, beforeFile, afterFile);
97
+ console.log(formatComparison(comparison));
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * List eval runs from ~/.opengstack-dev/evals/
4
+ *
5
+ * Usage: bun run eval:list [--branch <name>] [--tier e2e|llm-judge] [--limit N]
6
+ */
7
+
8
+ import * as fs from 'fs';
9
+ import * as path from 'path';
10
+ import * as os from 'os';
11
+ import { getProjectEvalDir } from '../test/helpers/eval-store';
12
+
13
+ const EVAL_DIR = getProjectEvalDir();
14
+
15
+ // Parse args
16
+ const args = process.argv.slice(2);
17
+ let filterBranch: string | null = null;
18
+ let filterTier: string | null = null;
19
+ let limit = 20;
20
+
21
+ for (let i = 0; i < args.length; i++) {
22
+ if (args[i] === '--branch' && args[i + 1]) { filterBranch = args[++i]; }
23
+ else if (args[i] === '--tier' && args[i + 1]) { filterTier = args[++i]; }
24
+ else if (args[i] === '--limit' && args[i + 1]) { limit = parseInt(args[++i], 10); }
25
+ }
26
+
27
+ // Read eval files
28
+ let files: string[];
29
+ try {
30
+ files = fs.readdirSync(EVAL_DIR).filter(f => f.endsWith('.json'));
31
+ } catch {
32
+ console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
33
+ process.exit(0);
34
+ }
35
+
36
+ if (files.length === 0) {
37
+ console.log('No eval runs yet. Run: EVALS=1 bun run test:evals');
38
+ process.exit(0);
39
+ }
40
+
41
+ // Parse top-level fields from each file
42
+ interface RunSummary {
43
+ file: string;
44
+ timestamp: string;
45
+ branch: string;
46
+ tier: string;
47
+ version: string;
48
+ passed: number;
49
+ total: number;
50
+ cost: number;
51
+ duration: number;
52
+ turns: number;
53
+ }
54
+
55
+ const runs: RunSummary[] = [];
56
+ for (const file of files) {
57
+ try {
58
+ const data = JSON.parse(fs.readFileSync(path.join(EVAL_DIR, file), 'utf-8'));
59
+ if (filterBranch && data.branch !== filterBranch) continue;
60
+ if (filterTier && data.tier !== filterTier) continue;
61
+ const totalTurns = (data.tests || []).reduce((s: number, t: any) => s + (t.turns_used || 0), 0);
62
+ runs.push({
63
+ file,
64
+ timestamp: data.timestamp || '',
65
+ branch: data.branch || 'unknown',
66
+ tier: data.tier || 'unknown',
67
+ version: data.version || '?',
68
+ passed: data.passed || 0,
69
+ total: data.total_tests || 0,
70
+ cost: data.total_cost_usd || 0,
71
+ duration: data.total_duration_ms || 0,
72
+ turns: totalTurns,
73
+ });
74
+ } catch { continue; }
75
+ }
76
+
77
+ // Sort by timestamp descending
78
+ runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
79
+
80
+ // Apply limit
81
+ const displayed = runs.slice(0, limit);
82
+
83
+ // Print table
84
+ console.log('');
85
+ console.log(`Eval History (${runs.length} total runs)`);
86
+ console.log('═'.repeat(105));
87
+ console.log(
88
+ ' ' +
89
+ 'Date'.padEnd(17) +
90
+ 'Branch'.padEnd(25) +
91
+ 'Tier'.padEnd(12) +
92
+ 'Pass'.padEnd(8) +
93
+ 'Cost'.padEnd(8) +
94
+ 'Turns'.padEnd(7) +
95
+ 'Duration'.padEnd(10) +
96
+ 'Version'
97
+ );
98
+ console.log('─'.repeat(105));
99
+
100
+ for (const run of displayed) {
101
+ const date = run.timestamp.replace('T', ' ').slice(0, 16);
102
+ const branch = run.branch.length > 23 ? run.branch.slice(0, 20) + '...' : run.branch.padEnd(25);
103
+ const pass = `${run.passed}/${run.total}`.padEnd(8);
104
+ const cost = `$${run.cost.toFixed(2)}`.padEnd(8);
105
+ const turns = run.turns > 0 ? `${run.turns}t`.padEnd(7) : ''.padEnd(7);
106
+ const dur = run.duration > 0 ? `${Math.round(run.duration / 1000)}s`.padEnd(10) : ''.padEnd(10);
107
+ console.log(` ${date.padEnd(17)}${branch}${run.tier.padEnd(12)}${pass}${cost}${turns}${dur}v${run.version}`);
108
+ }
109
+
110
+ console.log('─'.repeat(105));
111
+
112
+ const totalCost = runs.reduce((s, r) => s + r.cost, 0);
113
+ const totalDur = runs.reduce((s, r) => s + r.duration, 0);
114
+ const totalTurns = runs.reduce((s, r) => s + r.turns, 0);
115
+ console.log(` ${runs.length} runs | $${totalCost.toFixed(2)} total | ${totalTurns} turns | ${Math.round(totalDur / 1000)}s | Showing: ${displayed.length}`);
116
+ console.log(` Dir: ${EVAL_DIR}`);
117
+ console.log('');