@pellux/goodvibes-agent 0.1.53 → 0.1.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/docs/deployment-and-services.md +1 -0
- package/docs/release-and-publishing.md +3 -2
- package/package.json +2 -5
- package/scripts/check-bun.sh +0 -2
- package/src/input/agent-workspace.ts +88 -3
- package/src/input/command-registry.ts +0 -1
- package/src/input/commands/local-runtime.ts +6 -7
- package/src/input/commands/operator-runtime.ts +0 -50
- package/src/input/commands/product-runtime.ts +3 -129
- package/src/input/commands.ts +0 -4
- package/src/main.ts +28 -1
- package/src/panels/builtin/operations.ts +0 -12
- package/src/panels/builtin/shared.ts +0 -2
- package/src/runtime/services.ts +109 -1
- package/src/version.ts +1 -1
- package/src/input/commands/eval.ts +0 -217
- package/src/panels/eval-panel.ts +0 -399
package/src/runtime/services.ts
CHANGED
|
@@ -136,6 +136,114 @@ function ensureConfiguredModelIsRoutable(providerRegistry: ProviderRegistry, con
|
|
|
136
136
|
});
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
+
const PROVIDER_STARTUP_PLACEHOLDER_API_KEY = 'goodvibes-agent-startup-placeholder';
|
|
140
|
+
|
|
141
|
+
type ProviderRegistryConstructionOptions = ConstructorParameters<typeof ProviderRegistry>[0];
|
|
142
|
+
|
|
143
|
+
type ProviderStartupEnv = {
|
|
144
|
+
readonly providerId: string;
|
|
145
|
+
readonly envVars: readonly string[];
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
type MutableApiKeyProvider = {
|
|
149
|
+
apiKey: string;
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
type MutableConfiguredProvider = {
|
|
153
|
+
configured: boolean;
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
const PROVIDER_STARTUP_PLACEHOLDER_ENVS: readonly ProviderStartupEnv[] = [
|
|
157
|
+
{ providerId: 'openai', envVars: ['OPENAI_API_KEY', 'OPENAI_KEY'] },
|
|
158
|
+
{ providerId: 'inceptionlabs', envVars: ['INCEPTION_API_KEY'] },
|
|
159
|
+
{ providerId: 'openrouter', envVars: ['OPENROUTER_API_KEY'] },
|
|
160
|
+
{ providerId: 'aihubmix', envVars: ['AIHUBMIX_API_KEY'] },
|
|
161
|
+
{ providerId: 'groq', envVars: ['GROQ_API_KEY'] },
|
|
162
|
+
{ providerId: 'cerebras', envVars: ['CEREBRAS_API_KEY'] },
|
|
163
|
+
{ providerId: 'mistral', envVars: ['MISTRAL_API_KEY'] },
|
|
164
|
+
{ providerId: 'ollama-cloud', envVars: ['OLLAMA_CLOUD_API_KEY', 'OLLAMA_API_KEY'] },
|
|
165
|
+
{ providerId: 'huggingface', envVars: ['HF_API_KEY', 'HUGGINGFACE_API_KEY', 'HF_TOKEN'] },
|
|
166
|
+
{ providerId: 'nvidia', envVars: ['NVIDIA_API_KEY'] },
|
|
167
|
+
{ providerId: 'llm7', envVars: ['LLM7_API_KEY'] },
|
|
168
|
+
{ providerId: 'deepseek', envVars: ['DEEPSEEK_API_KEY'] },
|
|
169
|
+
{ providerId: 'fireworks', envVars: ['FIREWORKS_API_KEY'] },
|
|
170
|
+
{ providerId: 'microsoft-foundry', envVars: ['AZURE_OPENAI_API_KEY'] },
|
|
171
|
+
{ providerId: 'moonshot', envVars: ['MOONSHOT_API_KEY'] },
|
|
172
|
+
{ providerId: 'qianfan', envVars: ['QIANFAN_API_KEY'] },
|
|
173
|
+
{ providerId: 'qwen', envVars: ['QWEN_API_KEY', 'DASHSCOPE_API_KEY', 'MODELSTUDIO_API_KEY'] },
|
|
174
|
+
{ providerId: 'sglang', envVars: ['SGLANG_API_KEY'] },
|
|
175
|
+
{ providerId: 'stepfun', envVars: ['STEPFUN_API_KEY'] },
|
|
176
|
+
{ providerId: 'together', envVars: ['TOGETHER_API_KEY'] },
|
|
177
|
+
{ providerId: 'venice', envVars: ['VENICE_API_KEY'] },
|
|
178
|
+
{ providerId: 'volcengine', envVars: ['VOLCANO_ENGINE_API_KEY'] },
|
|
179
|
+
{ providerId: 'xai', envVars: ['XAI_API_KEY'] },
|
|
180
|
+
{ providerId: 'xiaomi', envVars: ['XIAOMI_API_KEY'] },
|
|
181
|
+
{ providerId: 'zai', envVars: ['ZAI_API_KEY', 'Z_AI_API_KEY'] },
|
|
182
|
+
{ providerId: 'cloudflare-ai-gateway', envVars: ['CLOUDFLARE_AI_GATEWAY_API_KEY'] },
|
|
183
|
+
{ providerId: 'vercel-ai-gateway', envVars: ['AI_GATEWAY_API_KEY'] },
|
|
184
|
+
{ providerId: 'litellm', envVars: ['LITELLM_API_KEY'] },
|
|
185
|
+
{ providerId: 'copilot-proxy', envVars: ['COPILOT_PROXY_API_KEY'] },
|
|
186
|
+
];
|
|
187
|
+
|
|
188
|
+
function hasMutableApiKeyProvider(value: unknown): value is MutableApiKeyProvider {
|
|
189
|
+
if (typeof value !== 'object' || value === null) return false;
|
|
190
|
+
const candidate = value as { readonly apiKey?: unknown };
|
|
191
|
+
return typeof candidate.apiKey === 'string';
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function hasMutableConfiguredProvider(value: unknown): value is MutableConfiguredProvider {
|
|
195
|
+
if (typeof value !== 'object' || value === null) return false;
|
|
196
|
+
const candidate = value as { readonly configured?: unknown };
|
|
197
|
+
return typeof candidate.configured === 'boolean';
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function hasAnyConfiguredEnv(envVars: readonly string[]): boolean {
|
|
201
|
+
return envVars.some((envVar) => {
|
|
202
|
+
const value = process.env[envVar];
|
|
203
|
+
return typeof value === 'string' && value.trim().length > 0;
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
export function createLaunchTolerantProviderRegistry(options: ProviderRegistryConstructionOptions): ProviderRegistry {
|
|
208
|
+
const placeholders = PROVIDER_STARTUP_PLACEHOLDER_ENVS
|
|
209
|
+
.filter((entry) => !hasAnyConfiguredEnv(entry.envVars))
|
|
210
|
+
.map((entry) => ({ providerId: entry.providerId, envVar: entry.envVars[0] }))
|
|
211
|
+
.filter((entry): entry is { readonly providerId: string; readonly envVar: string } => typeof entry.envVar === 'string');
|
|
212
|
+
|
|
213
|
+
if (placeholders.length === 0) {
|
|
214
|
+
return new ProviderRegistry(options);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const previousValues = new Map<string, string | undefined>();
|
|
218
|
+
for (const placeholder of placeholders) {
|
|
219
|
+
previousValues.set(placeholder.envVar, process.env[placeholder.envVar]);
|
|
220
|
+
process.env[placeholder.envVar] = PROVIDER_STARTUP_PLACEHOLDER_API_KEY;
|
|
221
|
+
}
|
|
222
|
+
let providerRegistry: ProviderRegistry;
|
|
223
|
+
try {
|
|
224
|
+
providerRegistry = new ProviderRegistry(options);
|
|
225
|
+
} finally {
|
|
226
|
+
for (const [envVar, previousValue] of previousValues) {
|
|
227
|
+
if (previousValue === undefined) {
|
|
228
|
+
delete process.env[envVar];
|
|
229
|
+
} else {
|
|
230
|
+
process.env[envVar] = previousValue;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
for (const placeholder of placeholders) {
|
|
236
|
+
const provider = providerRegistry.get(placeholder.providerId);
|
|
237
|
+
if (hasMutableApiKeyProvider(provider) && provider.apiKey === PROVIDER_STARTUP_PLACEHOLDER_API_KEY) {
|
|
238
|
+
provider.apiKey = '';
|
|
239
|
+
}
|
|
240
|
+
if (hasMutableConfiguredProvider(provider)) {
|
|
241
|
+
provider.configured = false;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
return providerRegistry;
|
|
245
|
+
}
|
|
246
|
+
|
|
139
247
|
export interface RuntimeServicesOptions {
|
|
140
248
|
readonly runtimeBus: RuntimeEventBus;
|
|
141
249
|
readonly runtimeStore: RuntimeStore;
|
|
@@ -285,7 +393,7 @@ export function createRuntimeServices(options: RuntimeServicesOptions): RuntimeS
|
|
|
285
393
|
const modelLimitsService = new ModelLimitsService({
|
|
286
394
|
cachePath: shellPaths.resolveUserPath(GOODVIBES_AGENT_SURFACE_ROOT, 'model-limits.json'),
|
|
287
395
|
});
|
|
288
|
-
const providerRegistry =
|
|
396
|
+
const providerRegistry = createLaunchTolerantProviderRegistry({
|
|
289
397
|
configManager,
|
|
290
398
|
subscriptionManager,
|
|
291
399
|
secretsManager,
|
package/src/version.ts
CHANGED
|
@@ -6,7 +6,7 @@ import { join } from 'node:path';
|
|
|
6
6
|
// The prebuild script updates the fallback value before compilation.
|
|
7
7
|
// Uses import.meta.dir (Bun) to locate package.json relative to this file,
|
|
8
8
|
// which is correct regardless of the process working directory.
|
|
9
|
-
let _version = '0.1.
|
|
9
|
+
let _version = '0.1.55';
|
|
10
10
|
let _sdkVersion = '0.33.35';
|
|
11
11
|
try {
|
|
12
12
|
const pkg = JSON.parse(readFileSync(join(import.meta.dir, '..', 'package.json'), 'utf-8')) as {
|
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* /eval command handler.
|
|
3
|
-
*
|
|
4
|
-
* Implements the Evaluation Harness commands:
|
|
5
|
-
*
|
|
6
|
-
* /eval list — List all available eval suites
|
|
7
|
-
* /eval run <suite> --yes — Run a named suite (or 'all')
|
|
8
|
-
* /eval compare <baseline-file> — Compare last run against a baseline file
|
|
9
|
-
* /eval gate <suite> --yes — Run suite and apply CI gate (exits 1 on regression)
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import type { SlashCommand, CommandContext } from '../command-registry.ts';
|
|
13
|
-
import { EvalRunner } from '@/runtime/index.ts';
|
|
14
|
-
import { BUILTIN_SUITES } from '@/runtime/index.ts';
|
|
15
|
-
import { formatScorecard } from '@/runtime/index.ts';
|
|
16
|
-
import { loadBaseline, captureBaseline, formatBaselineComparison, writeBaseline } from '@/runtime/index.ts';
|
|
17
|
-
import type { EvalRegistry } from '../../panels/eval-panel.ts';
|
|
18
|
-
import { formatSuiteResult, formatGateResult } from '@/runtime/index.ts';
|
|
19
|
-
import { requireShellPaths } from './runtime-services.ts';
|
|
20
|
-
import { summarizeError } from '@pellux/goodvibes-sdk/platform/utils';
|
|
21
|
-
import { requireYesFlag, stripYesFlag } from './confirmation.ts';
|
|
22
|
-
|
|
23
|
-
// ── Subcommand helpers ────────────────────────────────────────────────────────
|
|
24
|
-
|
|
25
|
-
function printSuiteList(context: CommandContext): void {
|
|
26
|
-
context.print('[eval] Available suites:');
|
|
27
|
-
for (const [name, scenarios] of Object.entries(BUILTIN_SUITES)) {
|
|
28
|
-
context.print(` ${name} (${scenarios.length} scenarios)`);
|
|
29
|
-
for (const s of scenarios) {
|
|
30
|
-
context.print(` - ${s.id}: ${s.name}`);
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
context.print('[eval] Usage: /eval run <suite> --yes or /eval run all --yes');
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
function getRegistry(context: CommandContext): EvalRegistry | undefined {
|
|
37
|
-
return context.extensions.evalRegistry;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// ── /eval list ────────────────────────────────────────────────────────────────
|
|
41
|
-
|
|
42
|
-
function handleList(_args: string[], context: CommandContext): void {
|
|
43
|
-
printSuiteList(context);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// ── /eval run ────────────────────────────────────────────────────────────────
|
|
47
|
-
|
|
48
|
-
async function handleRun(args: string[], context: CommandContext): Promise<void> {
|
|
49
|
-
const { rest, yes } = stripYesFlag(args);
|
|
50
|
-
const suiteName = rest[0] ?? 'all';
|
|
51
|
-
const registry = getRegistry(context);
|
|
52
|
-
|
|
53
|
-
const suitesToRun =
|
|
54
|
-
suiteName === 'all'
|
|
55
|
-
? Object.keys(BUILTIN_SUITES)
|
|
56
|
-
: BUILTIN_SUITES[suiteName]
|
|
57
|
-
? [suiteName]
|
|
58
|
-
: null;
|
|
59
|
-
|
|
60
|
-
if (!suitesToRun) {
|
|
61
|
-
context.print(`[eval] Unknown suite: "${suiteName}". Run /eval list to see available suites.`);
|
|
62
|
-
return;
|
|
63
|
-
}
|
|
64
|
-
if (!yes) {
|
|
65
|
-
requireYesFlag(context, `run eval suite ${suiteName}`, '/eval run <suite|all> --yes');
|
|
66
|
-
return;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const runner = new EvalRunner();
|
|
70
|
-
registry?.setRunning(true);
|
|
71
|
-
|
|
72
|
-
for (const name of suitesToRun) {
|
|
73
|
-
const scenarios = BUILTIN_SUITES[name];
|
|
74
|
-
if (!scenarios) continue;
|
|
75
|
-
|
|
76
|
-
context.print(`[eval] Running suite: ${name} (${scenarios.length} scenarios)...`);
|
|
77
|
-
const result = await runner.runSuite(name, scenarios);
|
|
78
|
-
registry?.push(result);
|
|
79
|
-
|
|
80
|
-
context.print(formatSuiteResult(result));
|
|
81
|
-
|
|
82
|
-
for (const r of result.results) {
|
|
83
|
-
context.print(formatScorecard(r.scorecard));
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
registry?.setRunning(false);
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// ── /eval compare ─────────────────────────────────────────────────────────────
|
|
91
|
-
|
|
92
|
-
async function handleCompare(args: string[], context: CommandContext): Promise<void> {
|
|
93
|
-
const baselineFile = args[0] ?? '.goodvibes/eval/baseline.json';
|
|
94
|
-
const registry = getRegistry(context);
|
|
95
|
-
const projectRoot = requireShellPaths(context).workingDirectory;
|
|
96
|
-
const suiteResults = registry?.getSuiteResults() ?? [];
|
|
97
|
-
|
|
98
|
-
if (suiteResults.length === 0) {
|
|
99
|
-
context.print('[eval] No suite results to compare. Run /eval run <suite> --yes first.');
|
|
100
|
-
return;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
const baseline = await loadBaseline(baselineFile, projectRoot);
|
|
104
|
-
if (!baseline) {
|
|
105
|
-
context.print(`[eval] Baseline file not found: ${baselineFile}`);
|
|
106
|
-
context.print('[eval] Tip: run /eval gate <suite> [baseline-file] --save-baseline --yes to create a baseline.');
|
|
107
|
-
return;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
for (const result of suiteResults) {
|
|
111
|
-
context.print(formatBaselineComparison(baseline, result));
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// ── /eval gate ────────────────────────────────────────────────────────────────
|
|
116
|
-
|
|
117
|
-
async function handleGate(args: string[], context: CommandContext): Promise<void> {
|
|
118
|
-
const { rest, yes } = stripYesFlag(args);
|
|
119
|
-
const positional = rest.filter((arg) => arg !== '--save-baseline');
|
|
120
|
-
const suiteName = positional[0];
|
|
121
|
-
const baselineFile = positional[1] ?? '.goodvibes/eval/baseline.json';
|
|
122
|
-
const saveFlag = rest.includes('--save-baseline');
|
|
123
|
-
const projectRoot = requireShellPaths(context).workingDirectory;
|
|
124
|
-
|
|
125
|
-
if (!suiteName) {
|
|
126
|
-
context.print('[eval] Usage: /eval gate <suite> [baseline-file] [--save-baseline] --yes');
|
|
127
|
-
return;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const scenarios = BUILTIN_SUITES[suiteName];
|
|
131
|
-
if (!scenarios) {
|
|
132
|
-
context.print(`[eval] Unknown suite: "${suiteName}". Run /eval list to see available suites.`);
|
|
133
|
-
return;
|
|
134
|
-
}
|
|
135
|
-
if (!yes) {
|
|
136
|
-
requireYesFlag(context, `run eval gate ${suiteName}`, '/eval gate <suite> [baseline-file] [--save-baseline] --yes');
|
|
137
|
-
return;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
const registry = getRegistry(context);
|
|
141
|
-
const runner = new EvalRunner();
|
|
142
|
-
|
|
143
|
-
context.print(`[eval] Gate: running suite "${suiteName}"...`);
|
|
144
|
-
registry?.setRunning(true);
|
|
145
|
-
const fresh = await runner.runSuite(suiteName, scenarios);
|
|
146
|
-
registry?.push(fresh);
|
|
147
|
-
registry?.setRunning(false);
|
|
148
|
-
|
|
149
|
-
const baseline = await loadBaseline(baselineFile, projectRoot);
|
|
150
|
-
const gate = runner.evaluateGate(fresh, baseline);
|
|
151
|
-
registry?.pushGate(gate);
|
|
152
|
-
|
|
153
|
-
context.print(formatGateResult(gate));
|
|
154
|
-
|
|
155
|
-
if (saveFlag || !baseline) {
|
|
156
|
-
const label = suiteName ?? 'latest';
|
|
157
|
-
const newBaseline = captureBaseline(label, [fresh]);
|
|
158
|
-
try {
|
|
159
|
-
await writeBaseline(baselineFile, newBaseline, projectRoot);
|
|
160
|
-
context.print(`[eval] Baseline saved to ${baselineFile}`);
|
|
161
|
-
} catch (err) {
|
|
162
|
-
context.print(`[eval] Warning: could not save baseline: ${summarizeError(err)}`);
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
if (!gate.passed) {
|
|
167
|
-
context.print(`[eval] Gate FAILED: ${gate.regressions.length} regression(s) detected.`);
|
|
168
|
-
} else {
|
|
169
|
-
context.print('[eval] Gate PASSED.');
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
// ── Top-level command ─────────────────────────────────────────────────────────
|
|
174
|
-
|
|
175
|
-
export const evalCommand: SlashCommand = {
|
|
176
|
-
name: 'eval',
|
|
177
|
-
description: 'Evaluation harness: run benchmark suites, compare baselines, and gate regressions.',
|
|
178
|
-
usage: '<subcommand> [args]',
|
|
179
|
-
argsHint: 'list|run <suite> --yes|compare <baseline>|gate <suite> --yes',
|
|
180
|
-
handler: async (args: string[], context: CommandContext): Promise<void> => {
|
|
181
|
-
const [sub, ...rest] = args;
|
|
182
|
-
|
|
183
|
-
switch (sub) {
|
|
184
|
-
case 'list':
|
|
185
|
-
case 'ls':
|
|
186
|
-
handleList(rest, context);
|
|
187
|
-
break;
|
|
188
|
-
|
|
189
|
-
case 'run':
|
|
190
|
-
await handleRun(rest, context);
|
|
191
|
-
break;
|
|
192
|
-
|
|
193
|
-
case 'compare':
|
|
194
|
-
case 'cmp':
|
|
195
|
-
await handleCompare(rest, context);
|
|
196
|
-
break;
|
|
197
|
-
|
|
198
|
-
case 'gate':
|
|
199
|
-
await handleGate(rest, context);
|
|
200
|
-
break;
|
|
201
|
-
|
|
202
|
-
default: {
|
|
203
|
-
const usage = [
|
|
204
|
-
'Usage: /eval <subcommand>',
|
|
205
|
-
' list — List all available eval suites',
|
|
206
|
-
' run <suite|all> --yes — Run a named suite (or all suites)',
|
|
207
|
-
' compare [baseline-file] — Compare last results against baseline',
|
|
208
|
-
' gate <suite> [baseline-file] --yes',
|
|
209
|
-
' — Run suite and apply regression gate',
|
|
210
|
-
' --save-baseline — Save fresh run as new baseline',
|
|
211
|
-
].join('\n');
|
|
212
|
-
context.print(usage);
|
|
213
|
-
break;
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
},
|
|
217
|
-
};
|