@elsium-ai/cli 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +2382 -0
- package/dist/commands/cost.d.ts +2 -0
- package/dist/commands/cost.d.ts.map +1 -0
- package/dist/commands/dev.d.ts +2 -0
- package/dist/commands/dev.d.ts.map +1 -0
- package/dist/commands/eval.d.ts +2 -0
- package/dist/commands/eval.d.ts.map +1 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/prompt.d.ts +2 -0
- package/dist/commands/prompt.d.ts.map +1 -0
- package/dist/commands/trace.d.ts +2 -0
- package/dist/commands/trace.d.ts.map +1 -0
- package/dist/commands/xray.d.ts +2 -0
- package/dist/commands/xray.d.ts.map +1 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -0
- package/package.json +34 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,2382 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
// @bun
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __returnValue = (v) => v;
|
|
5
|
+
function __exportSetter(name, newValue) {
|
|
6
|
+
this[name] = __returnValue.bind(null, newValue);
|
|
7
|
+
}
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, {
|
|
11
|
+
get: all[name],
|
|
12
|
+
enumerable: true,
|
|
13
|
+
configurable: true,
|
|
14
|
+
set: __exportSetter.bind(all, name)
|
|
15
|
+
});
|
|
16
|
+
};
|
|
17
|
+
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
18
|
+
|
|
19
|
+
// ../core/src/errors.ts
|
|
20
|
+
var ElsiumError;
|
|
21
|
+
var init_errors = __esm(() => {
|
|
22
|
+
ElsiumError = class ElsiumError extends Error {
|
|
23
|
+
code;
|
|
24
|
+
provider;
|
|
25
|
+
model;
|
|
26
|
+
statusCode;
|
|
27
|
+
retryable;
|
|
28
|
+
retryAfterMs;
|
|
29
|
+
cause;
|
|
30
|
+
metadata;
|
|
31
|
+
constructor(details) {
|
|
32
|
+
super(details.message);
|
|
33
|
+
this.name = "ElsiumError";
|
|
34
|
+
this.code = details.code;
|
|
35
|
+
this.provider = details.provider;
|
|
36
|
+
this.model = details.model;
|
|
37
|
+
this.statusCode = details.statusCode;
|
|
38
|
+
this.retryable = details.retryable;
|
|
39
|
+
this.retryAfterMs = details.retryAfterMs;
|
|
40
|
+
this.cause = details.cause;
|
|
41
|
+
this.metadata = details.metadata;
|
|
42
|
+
}
|
|
43
|
+
toJSON() {
|
|
44
|
+
return {
|
|
45
|
+
name: this.name,
|
|
46
|
+
code: this.code,
|
|
47
|
+
message: this.message,
|
|
48
|
+
provider: this.provider,
|
|
49
|
+
model: this.model,
|
|
50
|
+
statusCode: this.statusCode,
|
|
51
|
+
retryable: this.retryable,
|
|
52
|
+
retryAfterMs: this.retryAfterMs,
|
|
53
|
+
metadata: this.metadata
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
static providerError(message, opts) {
|
|
57
|
+
return new ElsiumError({
|
|
58
|
+
code: "PROVIDER_ERROR",
|
|
59
|
+
message,
|
|
60
|
+
provider: opts.provider,
|
|
61
|
+
statusCode: opts.statusCode,
|
|
62
|
+
retryable: opts.retryable ?? false,
|
|
63
|
+
cause: opts.cause
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
static rateLimit(provider, retryAfterMs) {
|
|
67
|
+
return new ElsiumError({
|
|
68
|
+
code: "RATE_LIMIT",
|
|
69
|
+
message: `Rate limited by ${provider}`,
|
|
70
|
+
provider,
|
|
71
|
+
statusCode: 429,
|
|
72
|
+
retryable: true,
|
|
73
|
+
retryAfterMs
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
static authError(provider) {
|
|
77
|
+
return new ElsiumError({
|
|
78
|
+
code: "AUTH_ERROR",
|
|
79
|
+
message: `Authentication failed for ${provider}. Check your API key.`,
|
|
80
|
+
provider,
|
|
81
|
+
statusCode: 401,
|
|
82
|
+
retryable: false
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
static timeout(provider, timeoutMs) {
|
|
86
|
+
return new ElsiumError({
|
|
87
|
+
code: "TIMEOUT",
|
|
88
|
+
message: `Request to ${provider} timed out after ${timeoutMs}ms`,
|
|
89
|
+
provider,
|
|
90
|
+
retryable: true
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
static validation(message, metadata) {
|
|
94
|
+
return new ElsiumError({
|
|
95
|
+
code: "VALIDATION_ERROR",
|
|
96
|
+
message,
|
|
97
|
+
retryable: false,
|
|
98
|
+
metadata
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
static budgetExceeded(spent, budget) {
|
|
102
|
+
return new ElsiumError({
|
|
103
|
+
code: "BUDGET_EXCEEDED",
|
|
104
|
+
message: `Token budget exceeded: spent ${spent}, budget ${budget}`,
|
|
105
|
+
retryable: false,
|
|
106
|
+
metadata: { spent, budget }
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
});
|
|
111
|
+
// ../core/src/utils.ts
|
|
112
|
+
import { randomBytes } from "crypto";
|
|
113
|
+
function cryptoHex(bytes) {
|
|
114
|
+
return randomBytes(bytes).toString("hex");
|
|
115
|
+
}
|
|
116
|
+
function generateId(prefix = "els") {
|
|
117
|
+
const timestamp = Date.now().toString(36);
|
|
118
|
+
const random = cryptoHex(4);
|
|
119
|
+
return `${prefix}_${timestamp}_${random}`;
|
|
120
|
+
}
|
|
121
|
+
function generateTraceId() {
|
|
122
|
+
const timestamp = Date.now().toString(36);
|
|
123
|
+
const random = cryptoHex(6);
|
|
124
|
+
return `trc_${timestamp}_${random}`;
|
|
125
|
+
}
|
|
126
|
+
async function sleep(ms) {
|
|
127
|
+
return new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
128
|
+
}
|
|
129
|
+
var init_utils = () => {};
|
|
130
|
+
|
|
131
|
+
// ../core/src/stream.ts
|
|
132
|
+
function shouldEmitCheckpoint(lastCheckpointTime, intervalMs, textLength) {
|
|
133
|
+
const elapsed = Date.now() - lastCheckpointTime;
|
|
134
|
+
return elapsed >= intervalMs && textLength > 0;
|
|
135
|
+
}
|
|
136
|
+
function createCheckpoint(textAccumulator, eventIndex, now) {
|
|
137
|
+
return {
|
|
138
|
+
id: generateId("ckpt"),
|
|
139
|
+
timestamp: now,
|
|
140
|
+
text: textAccumulator,
|
|
141
|
+
tokensSoFar: Math.ceil(textAccumulator.length / 1.5),
|
|
142
|
+
eventIndex
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
function toError(err) {
|
|
146
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
147
|
+
}
|
|
148
|
+
function* emitErrorEvent(err, textAccumulator, onPartialRecovery) {
|
|
149
|
+
const error = toError(err);
|
|
150
|
+
if (textAccumulator.length > 0) {
|
|
151
|
+
onPartialRecovery?.(textAccumulator, error);
|
|
152
|
+
yield { type: "recovery", partialText: textAccumulator, error };
|
|
153
|
+
} else {
|
|
154
|
+
yield { type: "error", error };
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
function createStream(executor) {
|
|
158
|
+
let resolve2 = null;
|
|
159
|
+
const buffer = [];
|
|
160
|
+
let done = false;
|
|
161
|
+
let error = null;
|
|
162
|
+
let dropped = 0;
|
|
163
|
+
const source = {
|
|
164
|
+
[Symbol.asyncIterator]() {
|
|
165
|
+
return {
|
|
166
|
+
next() {
|
|
167
|
+
if (buffer.length > 0) {
|
|
168
|
+
const value = buffer.shift();
|
|
169
|
+
return Promise.resolve({ value, done: false });
|
|
170
|
+
}
|
|
171
|
+
if (done) {
|
|
172
|
+
return Promise.resolve({ value: undefined, done: true });
|
|
173
|
+
}
|
|
174
|
+
if (error) {
|
|
175
|
+
return Promise.reject(error);
|
|
176
|
+
}
|
|
177
|
+
return new Promise((r) => {
|
|
178
|
+
resolve2 = r;
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
const emit = (event) => {
|
|
185
|
+
if (resolve2) {
|
|
186
|
+
const r = resolve2;
|
|
187
|
+
resolve2 = null;
|
|
188
|
+
r({ value: event, done: false });
|
|
189
|
+
} else {
|
|
190
|
+
if (buffer.length < MAX_BUFFER_SIZE) {
|
|
191
|
+
buffer.push(event);
|
|
192
|
+
} else {
|
|
193
|
+
dropped++;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
executor(emit).then(() => {
|
|
198
|
+
if (dropped > 0) {
|
|
199
|
+
emit({
|
|
200
|
+
type: "error",
|
|
201
|
+
error: new Error(`Stream buffer overflow: ${dropped} events dropped`)
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
done = true;
|
|
205
|
+
if (resolve2) {
|
|
206
|
+
const r = resolve2;
|
|
207
|
+
resolve2 = null;
|
|
208
|
+
r({ value: undefined, done: true });
|
|
209
|
+
}
|
|
210
|
+
}).catch((e) => {
|
|
211
|
+
error = e instanceof Error ? e : new Error(String(e));
|
|
212
|
+
if (resolve2) {
|
|
213
|
+
resolve2({ value: { type: "error", error }, done: false });
|
|
214
|
+
resolve2 = null;
|
|
215
|
+
}
|
|
216
|
+
});
|
|
217
|
+
return new ElsiumStream(source);
|
|
218
|
+
}
|
|
219
|
+
var ElsiumStream, MAX_BUFFER_SIZE = 1e4;
|
|
220
|
+
var init_stream = __esm(() => {
|
|
221
|
+
init_utils();
|
|
222
|
+
ElsiumStream = class ElsiumStream {
|
|
223
|
+
source;
|
|
224
|
+
iterating = false;
|
|
225
|
+
constructor(source) {
|
|
226
|
+
this.source = source;
|
|
227
|
+
}
|
|
228
|
+
async* [Symbol.asyncIterator]() {
|
|
229
|
+
if (this.iterating) {
|
|
230
|
+
throw new Error("ElsiumStream supports only a single consumer");
|
|
231
|
+
}
|
|
232
|
+
this.iterating = true;
|
|
233
|
+
yield* this.source;
|
|
234
|
+
}
|
|
235
|
+
text() {
|
|
236
|
+
const source = this.source;
|
|
237
|
+
return {
|
|
238
|
+
async* [Symbol.asyncIterator]() {
|
|
239
|
+
for await (const event of source) {
|
|
240
|
+
if (event.type === "text_delta") {
|
|
241
|
+
yield event.text;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
async toText() {
|
|
248
|
+
const parts = [];
|
|
249
|
+
for await (const text of this.text()) {
|
|
250
|
+
parts.push(text);
|
|
251
|
+
}
|
|
252
|
+
return parts.join("");
|
|
253
|
+
}
|
|
254
|
+
async toTextWithTimeout(timeoutMs) {
|
|
255
|
+
const parts = [];
|
|
256
|
+
const deadline = Date.now() + timeoutMs;
|
|
257
|
+
const iterator = this.source[Symbol.asyncIterator]();
|
|
258
|
+
try {
|
|
259
|
+
while (true) {
|
|
260
|
+
const remaining = deadline - Date.now();
|
|
261
|
+
if (remaining <= 0)
|
|
262
|
+
break;
|
|
263
|
+
let timer;
|
|
264
|
+
const timeoutPromise = new Promise((resolve2) => {
|
|
265
|
+
timer = setTimeout(() => resolve2({ value: undefined, done: true }), remaining);
|
|
266
|
+
});
|
|
267
|
+
const result = await Promise.race([iterator.next(), timeoutPromise]);
|
|
268
|
+
if (timer !== undefined)
|
|
269
|
+
clearTimeout(timer);
|
|
270
|
+
if (result.done)
|
|
271
|
+
break;
|
|
272
|
+
const event = result.value;
|
|
273
|
+
if (event.type === "text_delta") {
|
|
274
|
+
parts.push(event.text);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
} catch (err) {
|
|
278
|
+
if (parts.length === 0)
|
|
279
|
+
throw err;
|
|
280
|
+
} finally {
|
|
281
|
+
await iterator.return?.();
|
|
282
|
+
}
|
|
283
|
+
return parts.join("");
|
|
284
|
+
}
|
|
285
|
+
async toResponse() {
|
|
286
|
+
const parts = [];
|
|
287
|
+
let usage = null;
|
|
288
|
+
let stopReason = null;
|
|
289
|
+
for await (const event of this.source) {
|
|
290
|
+
switch (event.type) {
|
|
291
|
+
case "text_delta":
|
|
292
|
+
parts.push(event.text);
|
|
293
|
+
break;
|
|
294
|
+
case "message_end":
|
|
295
|
+
usage = event.usage;
|
|
296
|
+
stopReason = event.stopReason;
|
|
297
|
+
break;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
return { text: parts.join(""), usage, stopReason };
|
|
301
|
+
}
|
|
302
|
+
pipe(transform) {
|
|
303
|
+
return new ElsiumStream(transform(this.source));
|
|
304
|
+
}
|
|
305
|
+
resilient(options = {}) {
|
|
306
|
+
const { checkpointIntervalMs = 1000, onCheckpoint, onPartialRecovery } = options;
|
|
307
|
+
const source = this.source;
|
|
308
|
+
const resilientSource = {
|
|
309
|
+
async* [Symbol.asyncIterator]() {
|
|
310
|
+
let lastCheckpointTime = Date.now();
|
|
311
|
+
let textAccumulator = "";
|
|
312
|
+
let eventIndex = 0;
|
|
313
|
+
try {
|
|
314
|
+
for await (const event of source) {
|
|
315
|
+
eventIndex++;
|
|
316
|
+
if (event.type === "text_delta") {
|
|
317
|
+
textAccumulator += event.text;
|
|
318
|
+
}
|
|
319
|
+
yield event;
|
|
320
|
+
if (shouldEmitCheckpoint(lastCheckpointTime, checkpointIntervalMs, textAccumulator.length)) {
|
|
321
|
+
const now = Date.now();
|
|
322
|
+
const checkpoint = createCheckpoint(textAccumulator, eventIndex, now);
|
|
323
|
+
onCheckpoint?.(checkpoint);
|
|
324
|
+
yield { type: "checkpoint", checkpoint };
|
|
325
|
+
lastCheckpointTime = now;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
} catch (err) {
|
|
329
|
+
yield* emitErrorEvent(err, textAccumulator, onPartialRecovery);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
};
|
|
333
|
+
return new ElsiumStream(resilientSource);
|
|
334
|
+
}
|
|
335
|
+
};
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
// ../core/src/logger.ts
|
|
339
|
+
var init_logger = () => {};
|
|
340
|
+
|
|
341
|
+
// ../core/src/config.ts
|
|
342
|
+
var init_config = __esm(() => {
|
|
343
|
+
init_errors();
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
// ../core/src/circuit-breaker.ts
|
|
347
|
+
var init_circuit_breaker = __esm(() => {
|
|
348
|
+
init_errors();
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
// ../core/src/dedup.ts
|
|
352
|
+
var init_dedup = __esm(() => {
|
|
353
|
+
init_errors();
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
// ../core/src/policy.ts
|
|
357
|
+
var init_policy = __esm(() => {
|
|
358
|
+
init_errors();
|
|
359
|
+
init_utils();
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
// ../core/src/shutdown.ts
|
|
363
|
+
var init_shutdown = __esm(() => {
|
|
364
|
+
init_errors();
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
// ../core/src/index.ts
|
|
368
|
+
var init_src = __esm(() => {
|
|
369
|
+
init_errors();
|
|
370
|
+
init_stream();
|
|
371
|
+
init_logger();
|
|
372
|
+
init_config();
|
|
373
|
+
init_utils();
|
|
374
|
+
init_circuit_breaker();
|
|
375
|
+
init_dedup();
|
|
376
|
+
init_policy();
|
|
377
|
+
init_shutdown();
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
// ../testing/src/mock-provider.ts
|
|
381
|
+
function mockProvider(options = {}) {
|
|
382
|
+
const { responses = [], defaultResponse, onRequest } = options;
|
|
383
|
+
const calls = [];
|
|
384
|
+
let callIndex = 0;
|
|
385
|
+
function getNextResponse() {
|
|
386
|
+
if (callIndex < responses.length) {
|
|
387
|
+
return responses[callIndex++];
|
|
388
|
+
}
|
|
389
|
+
if (defaultResponse) {
|
|
390
|
+
callIndex++;
|
|
391
|
+
return defaultResponse;
|
|
392
|
+
}
|
|
393
|
+
callIndex++;
|
|
394
|
+
return { content: "" };
|
|
395
|
+
}
|
|
396
|
+
async function emitStreamEvents(emit, config) {
|
|
397
|
+
if (config.delay) {
|
|
398
|
+
await new Promise((r) => setTimeout(r, config.delay));
|
|
399
|
+
}
|
|
400
|
+
emit({
|
|
401
|
+
type: "message_start",
|
|
402
|
+
id: generateId("msg"),
|
|
403
|
+
model: config.model ?? "mock-model"
|
|
404
|
+
});
|
|
405
|
+
const content = config.content ?? "";
|
|
406
|
+
if (content) {
|
|
407
|
+
const words = content.split(" ");
|
|
408
|
+
for (const word of words) {
|
|
409
|
+
emit({ type: "text_delta", text: `${word} ` });
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
emit({
|
|
413
|
+
type: "message_end",
|
|
414
|
+
usage: {
|
|
415
|
+
inputTokens: config.usage?.inputTokens ?? 10,
|
|
416
|
+
outputTokens: config.usage?.outputTokens ?? 5,
|
|
417
|
+
totalTokens: config.usage?.totalTokens ?? 15
|
|
418
|
+
},
|
|
419
|
+
stopReason: config.stopReason ?? "end_turn"
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
function buildResponse(config, request) {
|
|
423
|
+
const model = config.model ?? request.model ?? "mock-model";
|
|
424
|
+
const content = config.content ?? "";
|
|
425
|
+
const toolCalls = config.toolCalls?.map((tc) => ({
|
|
426
|
+
id: tc.id ?? generateId("tc"),
|
|
427
|
+
name: tc.name,
|
|
428
|
+
arguments: tc.arguments
|
|
429
|
+
}));
|
|
430
|
+
const usage = {
|
|
431
|
+
inputTokens: config.usage?.inputTokens ?? Math.ceil(content.length / 4),
|
|
432
|
+
outputTokens: config.usage?.outputTokens ?? Math.ceil(content.length / 4),
|
|
433
|
+
totalTokens: 0,
|
|
434
|
+
...config.usage
|
|
435
|
+
};
|
|
436
|
+
usage.totalTokens = usage.inputTokens + usage.outputTokens;
|
|
437
|
+
const message = {
|
|
438
|
+
role: "assistant",
|
|
439
|
+
content,
|
|
440
|
+
...toolCalls?.length ? { toolCalls } : {}
|
|
441
|
+
};
|
|
442
|
+
return {
|
|
443
|
+
id: generateId("msg"),
|
|
444
|
+
message,
|
|
445
|
+
usage,
|
|
446
|
+
cost: { inputCost: 0, outputCost: 0, totalCost: 0, currency: "USD" },
|
|
447
|
+
model,
|
|
448
|
+
provider: "mock",
|
|
449
|
+
stopReason: config.stopReason ?? (toolCalls?.length ? "tool_use" : "end_turn"),
|
|
450
|
+
latencyMs: config.delay ?? 0,
|
|
451
|
+
traceId: generateTraceId()
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
return {
|
|
455
|
+
name: "mock",
|
|
456
|
+
defaultModel: "mock-model",
|
|
457
|
+
get calls() {
|
|
458
|
+
return calls;
|
|
459
|
+
},
|
|
460
|
+
get callCount() {
|
|
461
|
+
return calls.length;
|
|
462
|
+
},
|
|
463
|
+
async complete(request) {
|
|
464
|
+
calls.push(request);
|
|
465
|
+
onRequest?.(request);
|
|
466
|
+
const config = getNextResponse();
|
|
467
|
+
if (config.delay) {
|
|
468
|
+
await new Promise((r) => setTimeout(r, config.delay));
|
|
469
|
+
}
|
|
470
|
+
return buildResponse(config, request);
|
|
471
|
+
},
|
|
472
|
+
stream(request) {
|
|
473
|
+
calls.push(request);
|
|
474
|
+
onRequest?.(request);
|
|
475
|
+
const config = getNextResponse();
|
|
476
|
+
return createStream((emit) => emitStreamEvents(emit, config));
|
|
477
|
+
},
|
|
478
|
+
async listModels() {
|
|
479
|
+
return ["mock-model"];
|
|
480
|
+
},
|
|
481
|
+
reset() {
|
|
482
|
+
calls.length = 0;
|
|
483
|
+
callIndex = 0;
|
|
484
|
+
}
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
var init_mock_provider = __esm(() => {
|
|
488
|
+
init_src();
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
// ../testing/src/fixtures.ts
|
|
492
|
+
import { createHash } from "crypto";
|
|
493
|
+
function hashMessages(messages) {
|
|
494
|
+
const content = messages.map((m) => `${m.role}:${m.content}`).join("|");
|
|
495
|
+
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
496
|
+
}
|
|
497
|
+
function createFixture(name, entries) {
|
|
498
|
+
return {
|
|
499
|
+
name,
|
|
500
|
+
entries,
|
|
501
|
+
toProvider(options) {
|
|
502
|
+
if (options?.matching === "request-hash") {
|
|
503
|
+
const hashMap = new Map;
|
|
504
|
+
for (const entry of entries) {
|
|
505
|
+
const hash = hashMessages(entry.request.messages);
|
|
506
|
+
hashMap.set(hash, entry.response);
|
|
507
|
+
}
|
|
508
|
+
const provider = mockProvider({
|
|
509
|
+
responses: entries.map((e) => e.response)
|
|
510
|
+
});
|
|
511
|
+
const originalComplete = provider.complete.bind(provider);
|
|
512
|
+
const wrapped = Object.create(provider);
|
|
513
|
+
wrapped.complete = async (request) => {
|
|
514
|
+
const reqMessages = request.messages.map((m) => ({
|
|
515
|
+
role: m.role,
|
|
516
|
+
content: typeof m.content === "string" ? m.content : "[complex]"
|
|
517
|
+
}));
|
|
518
|
+
const hash = hashMessages(reqMessages);
|
|
519
|
+
const matched = hashMap.get(hash);
|
|
520
|
+
if (matched) {
|
|
521
|
+
return mockProvider({ responses: [matched] }).complete(request);
|
|
522
|
+
}
|
|
523
|
+
return originalComplete(request);
|
|
524
|
+
};
|
|
525
|
+
return wrapped;
|
|
526
|
+
}
|
|
527
|
+
return mockProvider({
|
|
528
|
+
responses: entries.map((e) => e.response)
|
|
529
|
+
});
|
|
530
|
+
},
|
|
531
|
+
toJSON() {
|
|
532
|
+
return JSON.stringify({
|
|
533
|
+
name,
|
|
534
|
+
entries: entries.map((e) => ({
|
|
535
|
+
...e,
|
|
536
|
+
timestamp: e.timestamp ?? new Date().toISOString()
|
|
537
|
+
}))
|
|
538
|
+
}, null, 2);
|
|
539
|
+
}
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
function loadFixture(json) {
|
|
543
|
+
const data = JSON.parse(json);
|
|
544
|
+
return createFixture(data.name, data.entries);
|
|
545
|
+
}
|
|
546
|
+
function createRecorder() {
|
|
547
|
+
const entries = [];
|
|
548
|
+
return {
|
|
549
|
+
wrap(provider) {
|
|
550
|
+
const originalComplete = provider.complete.bind(provider);
|
|
551
|
+
const wrapped = Object.create(provider);
|
|
552
|
+
wrapped.complete = async (request) => {
|
|
553
|
+
const response = await originalComplete(request);
|
|
554
|
+
entries.push({
|
|
555
|
+
request: {
|
|
556
|
+
messages: request.messages.map((m) => ({
|
|
557
|
+
role: m.role,
|
|
558
|
+
content: typeof m.content === "string" ? m.content : "[complex]"
|
|
559
|
+
})),
|
|
560
|
+
model: request.model,
|
|
561
|
+
system: request.system
|
|
562
|
+
},
|
|
563
|
+
response: {
|
|
564
|
+
content: typeof response.message.content === "string" ? response.message.content : "",
|
|
565
|
+
toolCalls: response.message.toolCalls,
|
|
566
|
+
stopReason: response.stopReason,
|
|
567
|
+
usage: response.usage,
|
|
568
|
+
model: response.model
|
|
569
|
+
},
|
|
570
|
+
timestamp: new Date().toISOString()
|
|
571
|
+
});
|
|
572
|
+
return response;
|
|
573
|
+
};
|
|
574
|
+
return wrapped;
|
|
575
|
+
},
|
|
576
|
+
getEntries() {
|
|
577
|
+
return [...entries];
|
|
578
|
+
},
|
|
579
|
+
toFixture(name) {
|
|
580
|
+
return createFixture(name, [...entries]);
|
|
581
|
+
},
|
|
582
|
+
clear() {
|
|
583
|
+
entries.length = 0;
|
|
584
|
+
}
|
|
585
|
+
};
|
|
586
|
+
}
|
|
587
|
+
var init_fixtures = __esm(() => {
|
|
588
|
+
init_mock_provider();
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
// ../testing/src/eval.ts
|
|
592
|
+
function evaluateContains(output, criterion) {
|
|
593
|
+
const target = criterion.caseSensitive ? criterion.value : criterion.value.toLowerCase();
|
|
594
|
+
const haystack = criterion.caseSensitive ? output : output.toLowerCase();
|
|
595
|
+
const passed = haystack.includes(target);
|
|
596
|
+
return {
|
|
597
|
+
type: "contains",
|
|
598
|
+
passed,
|
|
599
|
+
message: passed ? `Contains "${criterion.value}"` : `Does not contain "${criterion.value}"`
|
|
600
|
+
};
|
|
601
|
+
}
|
|
602
|
+
function evaluateNotContains(output, criterion) {
|
|
603
|
+
const target = criterion.caseSensitive ? criterion.value : criterion.value.toLowerCase();
|
|
604
|
+
const haystack = criterion.caseSensitive ? output : output.toLowerCase();
|
|
605
|
+
const passed = !haystack.includes(target);
|
|
606
|
+
return {
|
|
607
|
+
type: "not_contains",
|
|
608
|
+
passed,
|
|
609
|
+
message: passed ? `Does not contain "${criterion.value}"` : `Contains "${criterion.value}" (should not)`
|
|
610
|
+
};
|
|
611
|
+
}
|
|
612
|
+
function evaluateMatches(output, criterion) {
|
|
613
|
+
const regex = new RegExp(criterion.pattern, criterion.flags);
|
|
614
|
+
const passed = regex.test(output);
|
|
615
|
+
return {
|
|
616
|
+
type: "matches",
|
|
617
|
+
passed,
|
|
618
|
+
message: passed ? `Matches /${criterion.pattern}/` : `Does not match /${criterion.pattern}/`
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
function evaluateLengthMin(output, criterion) {
|
|
622
|
+
const passed = output.length >= criterion.value;
|
|
623
|
+
return {
|
|
624
|
+
type: "length_min",
|
|
625
|
+
passed,
|
|
626
|
+
message: passed ? `Length ${output.length} >= ${criterion.value}` : `Length ${output.length} < ${criterion.value}`
|
|
627
|
+
};
|
|
628
|
+
}
|
|
629
|
+
function evaluateLengthMax(output, criterion) {
|
|
630
|
+
const passed = output.length <= criterion.value;
|
|
631
|
+
return {
|
|
632
|
+
type: "length_max",
|
|
633
|
+
passed,
|
|
634
|
+
message: passed ? `Length ${output.length} <= ${criterion.value}` : `Length ${output.length} > ${criterion.value}`
|
|
635
|
+
};
|
|
636
|
+
}
|
|
637
|
+
function evaluateJsonValid(output) {
|
|
638
|
+
try {
|
|
639
|
+
JSON.parse(output);
|
|
640
|
+
return { type: "json_valid", passed: true, message: "Valid JSON" };
|
|
641
|
+
} catch {
|
|
642
|
+
return { type: "json_valid", passed: false, message: "Invalid JSON" };
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
function evaluateJsonMatches(output, criterion) {
|
|
646
|
+
try {
|
|
647
|
+
const parsed = JSON.parse(output);
|
|
648
|
+
const passed = matchesSchema(parsed, criterion.schema);
|
|
649
|
+
return {
|
|
650
|
+
type: "json_matches",
|
|
651
|
+
passed,
|
|
652
|
+
message: passed ? "JSON matches schema" : "JSON does not match schema"
|
|
653
|
+
};
|
|
654
|
+
} catch {
|
|
655
|
+
return { type: "json_matches", passed: false, message: "Invalid JSON" };
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
function evaluateCustom(output, criterion) {
|
|
659
|
+
const passed = criterion.fn(output);
|
|
660
|
+
return {
|
|
661
|
+
type: `custom:${criterion.name}`,
|
|
662
|
+
passed,
|
|
663
|
+
message: passed ? `Custom check "${criterion.name}" passed` : `Custom check "${criterion.name}" failed`
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
function evaluateSemanticSimilarity(output, criterion) {
|
|
667
|
+
const refWords = new Set(criterion.reference.toLowerCase().split(/\s+/).filter((w) => w.length > 3));
|
|
668
|
+
const outWords = output.toLowerCase().split(/\s+/).filter((w) => w.length > 3);
|
|
669
|
+
const overlap = outWords.filter((w) => refWords.has(w)).length;
|
|
670
|
+
const score = refWords.size > 0 ? overlap / refWords.size : 0;
|
|
671
|
+
const threshold = criterion.threshold ?? 0.7;
|
|
672
|
+
const passed = score >= threshold;
|
|
673
|
+
return {
|
|
674
|
+
type: "semantic_similarity",
|
|
675
|
+
passed,
|
|
676
|
+
message: passed ? `Semantic similarity ${(score * 100).toFixed(0)}% >= ${(threshold * 100).toFixed(0)}%` : `Semantic similarity ${(score * 100).toFixed(0)}% < ${(threshold * 100).toFixed(0)}%`
|
|
677
|
+
};
|
|
678
|
+
}
|
|
679
|
+
function evaluateFactualAccuracy(output, criterion) {
|
|
680
|
+
const facts = criterion.facts;
|
|
681
|
+
let matchedFacts = 0;
|
|
682
|
+
const outputLower = output.toLowerCase();
|
|
683
|
+
for (const fact of facts) {
|
|
684
|
+
const factWords = fact.toLowerCase().split(/\s+/).filter((w) => w.length > 3);
|
|
685
|
+
const matches = factWords.filter((w) => outputLower.includes(w)).length;
|
|
686
|
+
if (matches / Math.max(factWords.length, 1) > 0.5) {
|
|
687
|
+
matchedFacts++;
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
const score = facts.length > 0 ? matchedFacts / facts.length : 1;
|
|
691
|
+
const threshold = criterion.threshold ?? 0.7;
|
|
692
|
+
const passed = score >= threshold;
|
|
693
|
+
return {
|
|
694
|
+
type: "factual_accuracy",
|
|
695
|
+
passed,
|
|
696
|
+
message: passed ? `Factual accuracy: ${matchedFacts}/${facts.length} facts verified` : `Factual accuracy: only ${matchedFacts}/${facts.length} facts found`
|
|
697
|
+
};
|
|
698
|
+
}
|
|
699
|
+
function evaluateCriterion(output, criterion) {
|
|
700
|
+
switch (criterion.type) {
|
|
701
|
+
case "contains":
|
|
702
|
+
return evaluateContains(output, criterion);
|
|
703
|
+
case "not_contains":
|
|
704
|
+
return evaluateNotContains(output, criterion);
|
|
705
|
+
case "matches":
|
|
706
|
+
return evaluateMatches(output, criterion);
|
|
707
|
+
case "length_min":
|
|
708
|
+
return evaluateLengthMin(output, criterion);
|
|
709
|
+
case "length_max":
|
|
710
|
+
return evaluateLengthMax(output, criterion);
|
|
711
|
+
case "json_valid":
|
|
712
|
+
return evaluateJsonValid(output);
|
|
713
|
+
case "json_matches":
|
|
714
|
+
return evaluateJsonMatches(output, criterion);
|
|
715
|
+
case "custom":
|
|
716
|
+
return evaluateCustom(output, criterion);
|
|
717
|
+
case "llm_judge":
|
|
718
|
+
return { type: "llm_judge", passed: false, message: "LLM judge requires async evaluation" };
|
|
719
|
+
case "semantic_similarity":
|
|
720
|
+
return evaluateSemanticSimilarity(output, criterion);
|
|
721
|
+
case "factual_accuracy":
|
|
722
|
+
return evaluateFactualAccuracy(output, criterion);
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
function matchesSchema(value, schema) {
|
|
726
|
+
if (typeof value !== "object" || value === null)
|
|
727
|
+
return false;
|
|
728
|
+
const obj = value;
|
|
729
|
+
for (const key of Object.keys(schema)) {
|
|
730
|
+
if (!(key in obj))
|
|
731
|
+
return false;
|
|
732
|
+
const expectedType = schema[key];
|
|
733
|
+
if (typeof expectedType === "string") {
|
|
734
|
+
const actualType = typeof obj[key];
|
|
735
|
+
if (actualType !== expectedType)
|
|
736
|
+
return false;
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
return true;
|
|
740
|
+
}
|
|
741
|
+
function makeRunnerErrorResult(evalCase, error, startTime) {
|
|
742
|
+
return {
|
|
743
|
+
name: evalCase.name,
|
|
744
|
+
passed: false,
|
|
745
|
+
score: 0,
|
|
746
|
+
criteria: [
|
|
747
|
+
{
|
|
748
|
+
type: "error",
|
|
749
|
+
passed: false,
|
|
750
|
+
message: `Runner error: ${error instanceof Error ? error.message : String(error)}`
|
|
751
|
+
}
|
|
752
|
+
],
|
|
753
|
+
input: evalCase.input,
|
|
754
|
+
output: "",
|
|
755
|
+
durationMs: Math.round(performance.now() - startTime),
|
|
756
|
+
tags: evalCase.tags ?? []
|
|
757
|
+
};
|
|
758
|
+
}
|
|
759
|
+
function checkExpected(output, expected) {
|
|
760
|
+
const passed = output.includes(expected);
|
|
761
|
+
return {
|
|
762
|
+
type: "expected",
|
|
763
|
+
passed,
|
|
764
|
+
message: passed ? "Output contains expected text" : `Output does not contain expected "${expected}"`
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
async function evaluateLlmJudge(output, criterion) {
|
|
768
|
+
try {
|
|
769
|
+
const fullPrompt = `${criterion.prompt}
|
|
770
|
+
|
|
771
|
+
Output to evaluate:
|
|
772
|
+
${output}`;
|
|
773
|
+
const result = await criterion.judge(fullPrompt);
|
|
774
|
+
const threshold = criterion.threshold ?? 0.7;
|
|
775
|
+
const passed = result.score >= threshold;
|
|
776
|
+
return {
|
|
777
|
+
type: "llm_judge",
|
|
778
|
+
passed,
|
|
779
|
+
message: passed ? `LLM judge score: ${result.score.toFixed(2)} (${result.reasoning})` : `LLM judge score: ${result.score.toFixed(2)} < ${threshold} (${result.reasoning})`
|
|
780
|
+
};
|
|
781
|
+
} catch (error) {
|
|
782
|
+
return {
|
|
783
|
+
type: "llm_judge",
|
|
784
|
+
passed: false,
|
|
785
|
+
message: `LLM judge error: ${error instanceof Error ? error.message : String(error)}`
|
|
786
|
+
};
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
async function evaluateAllCriteria(output, evalCase) {
|
|
790
|
+
const criteriaResults = [];
|
|
791
|
+
if (evalCase.expected !== undefined) {
|
|
792
|
+
criteriaResults.push(checkExpected(output, evalCase.expected));
|
|
793
|
+
}
|
|
794
|
+
for (const criterion of evalCase.criteria ?? []) {
|
|
795
|
+
if (criterion.type === "llm_judge") {
|
|
796
|
+
criteriaResults.push(await evaluateLlmJudge(output, criterion));
|
|
797
|
+
} else {
|
|
798
|
+
criteriaResults.push(evaluateCriterion(output, criterion));
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
return criteriaResults;
|
|
802
|
+
}
|
|
803
|
+
async function runCase(evalCase, runner) {
|
|
804
|
+
const startTime = performance.now();
|
|
805
|
+
let output;
|
|
806
|
+
try {
|
|
807
|
+
output = await runner(evalCase.input);
|
|
808
|
+
} catch (error) {
|
|
809
|
+
return makeRunnerErrorResult(evalCase, error, startTime);
|
|
810
|
+
}
|
|
811
|
+
const criteriaResults = await evaluateAllCriteria(output, evalCase);
|
|
812
|
+
const passedCount = criteriaResults.filter((c) => c.passed).length;
|
|
813
|
+
const totalCount = criteriaResults.length;
|
|
814
|
+
const allPassed = totalCount === 0 || passedCount === totalCount;
|
|
815
|
+
const score = totalCount === 0 ? 1 : passedCount / totalCount;
|
|
816
|
+
return {
|
|
817
|
+
name: evalCase.name,
|
|
818
|
+
passed: allPassed,
|
|
819
|
+
score,
|
|
820
|
+
criteria: criteriaResults,
|
|
821
|
+
input: evalCase.input,
|
|
822
|
+
output,
|
|
823
|
+
durationMs: Math.round(performance.now() - startTime),
|
|
824
|
+
tags: evalCase.tags ?? []
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
async function runEvalSuite(config) {
|
|
828
|
+
const startTime = performance.now();
|
|
829
|
+
const concurrency = config.concurrency ?? 1;
|
|
830
|
+
const results = [];
|
|
831
|
+
if (concurrency <= 1) {
|
|
832
|
+
for (const evalCase of config.cases) {
|
|
833
|
+
results.push(await runCase(evalCase, config.runner));
|
|
834
|
+
}
|
|
835
|
+
} else {
|
|
836
|
+
for (let i = 0;i < config.cases.length; i += concurrency) {
|
|
837
|
+
const batch = config.cases.slice(i, i + concurrency);
|
|
838
|
+
const batchResults = await Promise.all(batch.map((c) => runCase(c, config.runner)));
|
|
839
|
+
results.push(...batchResults);
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
const passed = results.filter((r) => r.passed).length;
|
|
843
|
+
const failed = results.length - passed;
|
|
844
|
+
return {
|
|
845
|
+
name: config.name,
|
|
846
|
+
total: results.length,
|
|
847
|
+
passed,
|
|
848
|
+
failed,
|
|
849
|
+
score: results.length > 0 ? passed / results.length : 0,
|
|
850
|
+
results,
|
|
851
|
+
durationMs: Math.round(performance.now() - startTime)
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
function formatEvalReport(result) {
|
|
855
|
+
const lines = [];
|
|
856
|
+
lines.push(`
|
|
857
|
+
Eval Suite: ${result.name}`);
|
|
858
|
+
lines.push(` ${"\u2500".repeat(50)}`);
|
|
859
|
+
for (const r of result.results) {
|
|
860
|
+
const icon = r.passed ? "PASS" : "FAIL";
|
|
861
|
+
lines.push(` [${icon}] ${r.name} (${r.durationMs}ms)`);
|
|
862
|
+
if (!r.passed) {
|
|
863
|
+
for (const c of r.criteria) {
|
|
864
|
+
if (!c.passed) {
|
|
865
|
+
lines.push(` ${c.message}`);
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
lines.push(` ${"\u2500".repeat(50)}`);
|
|
871
|
+
lines.push(` Score: ${(result.score * 100).toFixed(1)}% | ${result.passed}/${result.total} passed | ${result.durationMs}ms`);
|
|
872
|
+
lines.push("");
|
|
873
|
+
return lines.join(`
|
|
874
|
+
`);
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
// ../testing/src/snapshot.ts
|
|
878
|
+
import { createHash as createHash2 } from "crypto";
|
|
879
|
+
function createSnapshotStore(existing) {
|
|
880
|
+
const snapshots = new Map;
|
|
881
|
+
if (existing) {
|
|
882
|
+
for (const s of existing) {
|
|
883
|
+
snapshots.set(s.name, s);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
return {
|
|
887
|
+
get(name) {
|
|
888
|
+
return snapshots.get(name);
|
|
889
|
+
},
|
|
890
|
+
set(name, snapshot) {
|
|
891
|
+
snapshots.set(name, snapshot);
|
|
892
|
+
},
|
|
893
|
+
getAll() {
|
|
894
|
+
return Array.from(snapshots.values());
|
|
895
|
+
},
|
|
896
|
+
toJSON() {
|
|
897
|
+
return JSON.stringify(Array.from(snapshots.values()), null, 2);
|
|
898
|
+
}
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
function hashOutput(output) {
|
|
902
|
+
return createHash2("sha256").update(output).digest("hex");
|
|
903
|
+
}
|
|
904
|
+
async function testSnapshot(name, store, runner, request) {
|
|
905
|
+
const output = await runner();
|
|
906
|
+
const currentHash = hashOutput(output);
|
|
907
|
+
const existing = store.get(name);
|
|
908
|
+
const snapshot = {
|
|
909
|
+
name,
|
|
910
|
+
request: {
|
|
911
|
+
system: request?.system,
|
|
912
|
+
messages: request?.messages?.map((m) => ({
|
|
913
|
+
role: m.role,
|
|
914
|
+
content: typeof m.content === "string" ? m.content : "[complex]"
|
|
915
|
+
})) ?? [],
|
|
916
|
+
model: request?.model
|
|
917
|
+
},
|
|
918
|
+
outputHash: currentHash,
|
|
919
|
+
timestamp: new Date().toISOString()
|
|
920
|
+
};
|
|
921
|
+
if (!existing) {
|
|
922
|
+
store.set(name, snapshot);
|
|
923
|
+
return { name, status: "new", currentHash, output };
|
|
924
|
+
}
|
|
925
|
+
if (existing.outputHash === currentHash) {
|
|
926
|
+
return {
|
|
927
|
+
name,
|
|
928
|
+
status: "match",
|
|
929
|
+
previousHash: existing.outputHash,
|
|
930
|
+
currentHash,
|
|
931
|
+
output
|
|
932
|
+
};
|
|
933
|
+
}
|
|
934
|
+
store.set(name, snapshot);
|
|
935
|
+
return {
|
|
936
|
+
name,
|
|
937
|
+
status: "changed",
|
|
938
|
+
previousHash: existing.outputHash,
|
|
939
|
+
currentHash,
|
|
940
|
+
output
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
var init_snapshot = () => {};
|
|
944
|
+
|
|
945
|
+
// ../testing/src/prompts.ts
|
|
946
|
+
function definePrompt(config) {
|
|
947
|
+
return { ...config };
|
|
948
|
+
}
|
|
949
|
+
function compareLine(fromLine, toLine, lineNumber) {
|
|
950
|
+
if (fromLine === undefined) {
|
|
951
|
+
return [{ type: "added", lineNumber, content: toLine }];
|
|
952
|
+
}
|
|
953
|
+
if (toLine === undefined) {
|
|
954
|
+
return [{ type: "removed", lineNumber, content: fromLine }];
|
|
955
|
+
}
|
|
956
|
+
if (fromLine !== toLine) {
|
|
957
|
+
return [
|
|
958
|
+
{ type: "removed", lineNumber, content: fromLine },
|
|
959
|
+
{ type: "added", lineNumber, content: toLine }
|
|
960
|
+
];
|
|
961
|
+
}
|
|
962
|
+
return [{ type: "unchanged", lineNumber, content: fromLine }];
|
|
963
|
+
}
|
|
964
|
+
function computeLineChanges(fromLines, toLines) {
|
|
965
|
+
const changes = [];
|
|
966
|
+
const maxLen = Math.max(fromLines.length, toLines.length);
|
|
967
|
+
for (let i = 0;i < maxLen; i++) {
|
|
968
|
+
changes.push(...compareLine(fromLines[i], toLines[i], i + 1));
|
|
969
|
+
}
|
|
970
|
+
return changes;
|
|
971
|
+
}
|
|
972
|
+
function createPromptRegistry() {
|
|
973
|
+
const store = new Map;
|
|
974
|
+
function compareVersions(a, b) {
|
|
975
|
+
const aParts = a.split(".").map(Number);
|
|
976
|
+
const bParts = b.split(".").map(Number);
|
|
977
|
+
for (let i = 0;i < Math.max(aParts.length, bParts.length); i++) {
|
|
978
|
+
const aVal = aParts[i] ?? 0;
|
|
979
|
+
const bVal = bParts[i] ?? 0;
|
|
980
|
+
if (aVal !== bVal)
|
|
981
|
+
return aVal - bVal;
|
|
982
|
+
}
|
|
983
|
+
return 0;
|
|
984
|
+
}
|
|
985
|
+
return {
|
|
986
|
+
register(name, prompt) {
|
|
987
|
+
if (!store.has(name)) {
|
|
988
|
+
store.set(name, new Map);
|
|
989
|
+
}
|
|
990
|
+
const versions = store.get(name);
|
|
991
|
+
if (versions) {
|
|
992
|
+
versions.set(prompt.version, prompt);
|
|
993
|
+
}
|
|
994
|
+
},
|
|
995
|
+
get(name, version) {
|
|
996
|
+
const versions = store.get(name);
|
|
997
|
+
if (!versions)
|
|
998
|
+
return;
|
|
999
|
+
if (version)
|
|
1000
|
+
return versions.get(version);
|
|
1001
|
+
return this.getLatest(name);
|
|
1002
|
+
},
|
|
1003
|
+
getLatest(name) {
|
|
1004
|
+
const versions = store.get(name);
|
|
1005
|
+
if (!versions || versions.size === 0)
|
|
1006
|
+
return;
|
|
1007
|
+
const sorted = [...versions.keys()].sort(compareVersions);
|
|
1008
|
+
return versions.get(sorted[sorted.length - 1]);
|
|
1009
|
+
},
|
|
1010
|
+
list() {
|
|
1011
|
+
const result = [];
|
|
1012
|
+
for (const [name, versions] of store) {
|
|
1013
|
+
result.push({
|
|
1014
|
+
name,
|
|
1015
|
+
versions: [...versions.keys()].sort(compareVersions)
|
|
1016
|
+
});
|
|
1017
|
+
}
|
|
1018
|
+
return result;
|
|
1019
|
+
},
|
|
1020
|
+
getVersions(name) {
|
|
1021
|
+
const versions = store.get(name);
|
|
1022
|
+
if (!versions)
|
|
1023
|
+
return [];
|
|
1024
|
+
return [...versions.keys()].sort(compareVersions);
|
|
1025
|
+
},
|
|
1026
|
+
diff(name, fromVersion, toVersion) {
|
|
1027
|
+
const versions = store.get(name);
|
|
1028
|
+
if (!versions)
|
|
1029
|
+
return null;
|
|
1030
|
+
const from = versions.get(fromVersion);
|
|
1031
|
+
const to = versions.get(toVersion);
|
|
1032
|
+
if (!from || !to)
|
|
1033
|
+
return null;
|
|
1034
|
+
const fromLines = from.content.split(`
|
|
1035
|
+
`);
|
|
1036
|
+
const toLines = to.content.split(`
|
|
1037
|
+
`);
|
|
1038
|
+
const changes = computeLineChanges(fromLines, toLines);
|
|
1039
|
+
return { name, fromVersion, toVersion, changes };
|
|
1040
|
+
},
|
|
1041
|
+
render(name, variables, version) {
|
|
1042
|
+
const prompt = this.get(name, version);
|
|
1043
|
+
if (!prompt) {
|
|
1044
|
+
throw new Error(`Prompt "${name}" not found${version ? ` (version ${version})` : ""}`);
|
|
1045
|
+
}
|
|
1046
|
+
let rendered = prompt.content;
|
|
1047
|
+
for (const [key, value] of Object.entries(variables)) {
|
|
1048
|
+
rendered = rendered.replaceAll(`{{${key}}}`, value);
|
|
1049
|
+
}
|
|
1050
|
+
return rendered;
|
|
1051
|
+
}
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
// ../testing/src/regression.ts
|
|
1056
|
+
import { mkdirSync, readFileSync as readFileSync2, writeFileSync } from "fs";
|
|
1057
|
+
import { dirname } from "path";
|
|
1058
|
+
function makeEmptyResult(name) {
|
|
1059
|
+
return {
|
|
1060
|
+
name,
|
|
1061
|
+
totalCases: 0,
|
|
1062
|
+
regressions: [],
|
|
1063
|
+
improvements: [],
|
|
1064
|
+
unchanged: 0,
|
|
1065
|
+
overallScore: 0,
|
|
1066
|
+
baselineScore: 0
|
|
1067
|
+
};
|
|
1068
|
+
}
|
|
1069
|
+
async function scoreCase(input, currentOutput, baselineOutput, scorer) {
|
|
1070
|
+
if (scorer)
|
|
1071
|
+
return scorer(input, currentOutput);
|
|
1072
|
+
return currentOutput === baselineOutput ? 1 : 0.5;
|
|
1073
|
+
}
|
|
1074
|
+
function classifyDetail(detail, regressions, improvements) {
|
|
1075
|
+
if (detail.delta < -0.1) {
|
|
1076
|
+
regressions.push(detail);
|
|
1077
|
+
return false;
|
|
1078
|
+
}
|
|
1079
|
+
if (detail.delta > 0.1) {
|
|
1080
|
+
improvements.push(detail);
|
|
1081
|
+
return false;
|
|
1082
|
+
}
|
|
1083
|
+
return true;
|
|
1084
|
+
}
|
|
1085
|
+
async function compareWithBaseline(name, baseline, runner, scorer) {
|
|
1086
|
+
const regressions = [];
|
|
1087
|
+
const improvements = [];
|
|
1088
|
+
let unchanged = 0;
|
|
1089
|
+
let totalCurrentScore = 0;
|
|
1090
|
+
const baselineScore = baseline.cases.reduce((sum, c) => sum + c.score, 0) / baseline.cases.length;
|
|
1091
|
+
for (const baselineCase of baseline.cases) {
|
|
1092
|
+
const currentOutput = await runner(baselineCase.input);
|
|
1093
|
+
const currentScore = await scoreCase(baselineCase.input, currentOutput, baselineCase.output, scorer);
|
|
1094
|
+
totalCurrentScore += currentScore;
|
|
1095
|
+
const detail = {
|
|
1096
|
+
input: baselineCase.input,
|
|
1097
|
+
baselineOutput: baselineCase.output,
|
|
1098
|
+
currentOutput,
|
|
1099
|
+
baselineScore: baselineCase.score,
|
|
1100
|
+
currentScore,
|
|
1101
|
+
delta: currentScore - baselineCase.score
|
|
1102
|
+
};
|
|
1103
|
+
if (classifyDetail(detail, regressions, improvements)) {
|
|
1104
|
+
unchanged++;
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
return {
|
|
1108
|
+
name,
|
|
1109
|
+
totalCases: baseline.cases.length,
|
|
1110
|
+
regressions,
|
|
1111
|
+
improvements,
|
|
1112
|
+
unchanged,
|
|
1113
|
+
overallScore: totalCurrentScore / baseline.cases.length,
|
|
1114
|
+
baselineScore
|
|
1115
|
+
};
|
|
1116
|
+
}
|
|
1117
|
+
function createRegressionSuite(name) {
|
|
1118
|
+
let baseline = null;
|
|
1119
|
+
return {
|
|
1120
|
+
get baseline() {
|
|
1121
|
+
return baseline;
|
|
1122
|
+
},
|
|
1123
|
+
async load(path) {
|
|
1124
|
+
try {
|
|
1125
|
+
const data = readFileSync2(path, "utf-8");
|
|
1126
|
+
baseline = JSON.parse(data);
|
|
1127
|
+
} catch {
|
|
1128
|
+
baseline = null;
|
|
1129
|
+
}
|
|
1130
|
+
},
|
|
1131
|
+
async save(path) {
|
|
1132
|
+
if (!baseline) {
|
|
1133
|
+
baseline = {
|
|
1134
|
+
name,
|
|
1135
|
+
cases: [],
|
|
1136
|
+
createdAt: Date.now(),
|
|
1137
|
+
updatedAt: Date.now()
|
|
1138
|
+
};
|
|
1139
|
+
}
|
|
1140
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
1141
|
+
writeFileSync(path, JSON.stringify(baseline, null, 2));
|
|
1142
|
+
},
|
|
1143
|
+
addCase(input, output, score) {
|
|
1144
|
+
if (!baseline) {
|
|
1145
|
+
baseline = {
|
|
1146
|
+
name,
|
|
1147
|
+
cases: [],
|
|
1148
|
+
createdAt: Date.now(),
|
|
1149
|
+
updatedAt: Date.now()
|
|
1150
|
+
};
|
|
1151
|
+
}
|
|
1152
|
+
const existing = baseline.cases.findIndex((c) => c.input === input);
|
|
1153
|
+
if (existing >= 0) {
|
|
1154
|
+
baseline.cases[existing] = { input, output, score, timestamp: Date.now() };
|
|
1155
|
+
} else {
|
|
1156
|
+
baseline.cases.push({ input, output, score, timestamp: Date.now() });
|
|
1157
|
+
}
|
|
1158
|
+
baseline.updatedAt = Date.now();
|
|
1159
|
+
},
|
|
1160
|
+
async run(runner, scorer) {
|
|
1161
|
+
if (!baseline || baseline.cases.length === 0) {
|
|
1162
|
+
return makeEmptyResult(name);
|
|
1163
|
+
}
|
|
1164
|
+
return compareWithBaseline(name, baseline, runner, scorer);
|
|
1165
|
+
}
|
|
1166
|
+
};
|
|
1167
|
+
}
|
|
1168
|
+
var init_regression = () => {};
|
|
1169
|
+
|
|
1170
|
+
// ../testing/src/replay.ts
|
|
1171
|
+
function createReplayRecorder() {
|
|
1172
|
+
const entries = [];
|
|
1173
|
+
return {
|
|
1174
|
+
wrap(completeFn) {
|
|
1175
|
+
return async (request) => {
|
|
1176
|
+
const response = await completeFn(request);
|
|
1177
|
+
entries.push({
|
|
1178
|
+
request,
|
|
1179
|
+
response,
|
|
1180
|
+
timestamp: Date.now()
|
|
1181
|
+
});
|
|
1182
|
+
return response;
|
|
1183
|
+
};
|
|
1184
|
+
},
|
|
1185
|
+
getEntries() {
|
|
1186
|
+
return [...entries];
|
|
1187
|
+
},
|
|
1188
|
+
toJSON() {
|
|
1189
|
+
return JSON.stringify(entries, null, 2);
|
|
1190
|
+
},
|
|
1191
|
+
clear() {
|
|
1192
|
+
entries.length = 0;
|
|
1193
|
+
}
|
|
1194
|
+
};
|
|
1195
|
+
}
|
|
1196
|
+
function createReplayPlayer(entriesOrJson) {
|
|
1197
|
+
const entries = typeof entriesOrJson === "string" ? JSON.parse(entriesOrJson) : [...entriesOrJson];
|
|
1198
|
+
let index = 0;
|
|
1199
|
+
return {
|
|
1200
|
+
get remaining() {
|
|
1201
|
+
return entries.length - index;
|
|
1202
|
+
},
|
|
1203
|
+
async complete(_request) {
|
|
1204
|
+
if (index >= entries.length) {
|
|
1205
|
+
throw new Error("Replay exhausted: no more recorded responses");
|
|
1206
|
+
}
|
|
1207
|
+
const entry = entries[index];
|
|
1208
|
+
index++;
|
|
1209
|
+
return entry.response;
|
|
1210
|
+
}
|
|
1211
|
+
};
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
// ../testing/src/pinning.ts
|
|
1215
|
+
import { createHash as createHash3 } from "crypto";
|
|
1216
|
+
function sha256(input) {
|
|
1217
|
+
return createHash3("sha256").update(input).digest("hex");
|
|
1218
|
+
}
|
|
1219
|
+
function createPinStore(existing) {
|
|
1220
|
+
const pins = new Map;
|
|
1221
|
+
if (existing) {
|
|
1222
|
+
for (const pin of existing) {
|
|
1223
|
+
const key = `${pin.promptHash}:${pin.configHash}`;
|
|
1224
|
+
pins.set(key, pin);
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
return {
|
|
1228
|
+
get(key) {
|
|
1229
|
+
return pins.get(key);
|
|
1230
|
+
},
|
|
1231
|
+
set(key, pin) {
|
|
1232
|
+
pins.set(key, pin);
|
|
1233
|
+
},
|
|
1234
|
+
delete(key) {
|
|
1235
|
+
return pins.delete(key);
|
|
1236
|
+
},
|
|
1237
|
+
getAll() {
|
|
1238
|
+
return Array.from(pins.values());
|
|
1239
|
+
},
|
|
1240
|
+
toJSON() {
|
|
1241
|
+
return JSON.stringify(Array.from(pins.values()), null, 2);
|
|
1242
|
+
}
|
|
1243
|
+
};
|
|
1244
|
+
}
|
|
1245
|
+
async function pinOutput(name, store, runner, config, options) {
|
|
1246
|
+
const promptHash = sha256(config.prompt);
|
|
1247
|
+
const configHash = sha256(JSON.stringify({
|
|
1248
|
+
model: config.model,
|
|
1249
|
+
temperature: config.temperature,
|
|
1250
|
+
seed: config.seed
|
|
1251
|
+
}));
|
|
1252
|
+
const key = `${promptHash}:${configHash}`;
|
|
1253
|
+
const output = await runner();
|
|
1254
|
+
const outputHash = sha256(output);
|
|
1255
|
+
const pin = {
|
|
1256
|
+
promptHash,
|
|
1257
|
+
configHash,
|
|
1258
|
+
outputHash,
|
|
1259
|
+
outputText: output,
|
|
1260
|
+
model: config.model,
|
|
1261
|
+
createdAt: Date.now()
|
|
1262
|
+
};
|
|
1263
|
+
const existing = store.get(key);
|
|
1264
|
+
if (!existing) {
|
|
1265
|
+
store.set(key, pin);
|
|
1266
|
+
return { status: "new", pin };
|
|
1267
|
+
}
|
|
1268
|
+
if (existing.outputHash === outputHash) {
|
|
1269
|
+
return { status: "match", pin, previousPin: existing };
|
|
1270
|
+
}
|
|
1271
|
+
if (options?.assert) {
|
|
1272
|
+
throw ElsiumError.validation(`Pin mismatch for "${name}": expected hash ${existing.outputHash}, got ${outputHash}`);
|
|
1273
|
+
}
|
|
1274
|
+
store.set(key, pin);
|
|
1275
|
+
return { status: "mismatch", pin, previousPin: existing };
|
|
1276
|
+
}
|
|
1277
|
+
var init_pinning = __esm(() => {
|
|
1278
|
+
init_src();
|
|
1279
|
+
});
|
|
1280
|
+
|
|
1281
|
+
// ../testing/src/determinism.ts
|
|
1282
|
+
async function assertDeterministic(fn, options) {
|
|
1283
|
+
const runs = options?.runs ?? 5;
|
|
1284
|
+
const seed = options?.seed;
|
|
1285
|
+
const tolerance = options?.tolerance ?? 0;
|
|
1286
|
+
const outputs = [];
|
|
1287
|
+
for (let i = 0;i < runs; i++) {
|
|
1288
|
+
const output = await fn(seed);
|
|
1289
|
+
outputs.push(output);
|
|
1290
|
+
}
|
|
1291
|
+
const unique = new Set(outputs);
|
|
1292
|
+
const uniqueOutputs = unique.size;
|
|
1293
|
+
const variance = runs > 1 ? (uniqueOutputs - 1) / (runs - 1) : 0;
|
|
1294
|
+
const deterministic = variance <= tolerance;
|
|
1295
|
+
if (!deterministic && tolerance === 0) {
|
|
1296
|
+
throw ElsiumError.validation(`Non-deterministic output: ${uniqueOutputs} unique outputs across ${runs} runs (variance: ${variance.toFixed(3)})`);
|
|
1297
|
+
}
|
|
1298
|
+
return {
|
|
1299
|
+
deterministic,
|
|
1300
|
+
runs,
|
|
1301
|
+
uniqueOutputs,
|
|
1302
|
+
outputs,
|
|
1303
|
+
variance
|
|
1304
|
+
};
|
|
1305
|
+
}
|
|
1306
|
+
async function assertStable(fn, options) {
|
|
1307
|
+
const intervalMs = options?.intervalMs ?? 100;
|
|
1308
|
+
const runs = options?.runs ?? 3;
|
|
1309
|
+
const seed = options?.seed;
|
|
1310
|
+
const outputs = [];
|
|
1311
|
+
for (let i = 0;i < runs; i++) {
|
|
1312
|
+
if (i > 0) {
|
|
1313
|
+
await sleep(intervalMs);
|
|
1314
|
+
}
|
|
1315
|
+
const output = await fn(seed);
|
|
1316
|
+
outputs.push({ output, timestamp: Date.now() });
|
|
1317
|
+
}
|
|
1318
|
+
const unique = new Set(outputs.map((o) => o.output));
|
|
1319
|
+
const uniqueOutputs = unique.size;
|
|
1320
|
+
const variance = runs > 1 ? (uniqueOutputs - 1) / (runs - 1) : 0;
|
|
1321
|
+
return {
|
|
1322
|
+
stable: uniqueOutputs === 1,
|
|
1323
|
+
runs,
|
|
1324
|
+
uniqueOutputs,
|
|
1325
|
+
outputs,
|
|
1326
|
+
variance
|
|
1327
|
+
};
|
|
1328
|
+
}
|
|
1329
|
+
var init_determinism = __esm(() => {
|
|
1330
|
+
init_src();
|
|
1331
|
+
});
|
|
1332
|
+
|
|
1333
|
+
// ../testing/src/index.ts
|
|
1334
|
+
var exports_src = {};
|
|
1335
|
+
__export(exports_src, {
|
|
1336
|
+
testSnapshot: () => testSnapshot,
|
|
1337
|
+
runEvalSuite: () => runEvalSuite,
|
|
1338
|
+
pinOutput: () => pinOutput,
|
|
1339
|
+
mockProvider: () => mockProvider,
|
|
1340
|
+
loadFixture: () => loadFixture,
|
|
1341
|
+
hashOutput: () => hashOutput,
|
|
1342
|
+
formatEvalReport: () => formatEvalReport,
|
|
1343
|
+
definePrompt: () => definePrompt,
|
|
1344
|
+
createSnapshotStore: () => createSnapshotStore,
|
|
1345
|
+
createReplayRecorder: () => createReplayRecorder,
|
|
1346
|
+
createReplayPlayer: () => createReplayPlayer,
|
|
1347
|
+
createRegressionSuite: () => createRegressionSuite,
|
|
1348
|
+
createRecorder: () => createRecorder,
|
|
1349
|
+
createPromptRegistry: () => createPromptRegistry,
|
|
1350
|
+
createPinStore: () => createPinStore,
|
|
1351
|
+
createFixture: () => createFixture,
|
|
1352
|
+
assertStable: () => assertStable,
|
|
1353
|
+
assertDeterministic: () => assertDeterministic
|
|
1354
|
+
});
|
|
1355
|
+
var init_src2 = __esm(() => {
|
|
1356
|
+
init_mock_provider();
|
|
1357
|
+
init_fixtures();
|
|
1358
|
+
init_snapshot();
|
|
1359
|
+
init_regression();
|
|
1360
|
+
init_pinning();
|
|
1361
|
+
init_determinism();
|
|
1362
|
+
});
|
|
1363
|
+
|
|
1364
|
+
// src/commands/cost.ts
|
|
1365
|
+
import { existsSync, readFileSync } from "fs";
|
|
1366
|
+
import { join } from "path";
|
|
1367
|
+
var COST_FILE = ".elsium/cost-report.json";
|
|
1368
|
+
async function costCommand(args) {
|
|
1369
|
+
const costPath = join(process.cwd(), COST_FILE);
|
|
1370
|
+
if (!existsSync(costPath)) {
|
|
1371
|
+
console.log(`
|
|
1372
|
+
No cost report found.
|
|
1373
|
+
|
|
1374
|
+
Cost reports are generated automatically when you run your app
|
|
1375
|
+
with observability enabled:
|
|
1376
|
+
|
|
1377
|
+
const app = createApp({
|
|
1378
|
+
observe: {
|
|
1379
|
+
costTracking: true,
|
|
1380
|
+
},
|
|
1381
|
+
})
|
|
1382
|
+
|
|
1383
|
+
The report will be saved to .elsium/cost-report.json
|
|
1384
|
+
`);
|
|
1385
|
+
return;
|
|
1386
|
+
}
|
|
1387
|
+
try {
|
|
1388
|
+
const data = JSON.parse(readFileSync(costPath, "utf-8"));
|
|
1389
|
+
console.log(`
|
|
1390
|
+
ElsiumAI Cost Report`);
|
|
1391
|
+
console.log(` ${"\u2500".repeat(50)}`);
|
|
1392
|
+
console.log(` Generated: ${data.timestamp}`);
|
|
1393
|
+
console.log();
|
|
1394
|
+
console.log(` Total Cost: $${data.totalCost.toFixed(6)}`);
|
|
1395
|
+
console.log(` Total Tokens: ${data.totalTokens.toLocaleString()}`);
|
|
1396
|
+
console.log(` Input Tokens: ${data.totalInputTokens.toLocaleString()}`);
|
|
1397
|
+
console.log(` Output Tokens: ${data.totalOutputTokens.toLocaleString()}`);
|
|
1398
|
+
console.log(` Total API Calls: ${data.callCount}`);
|
|
1399
|
+
console.log();
|
|
1400
|
+
if (Object.keys(data.byModel).length > 0) {
|
|
1401
|
+
console.log(" By Model:");
|
|
1402
|
+
console.log(` ${"\u2500".repeat(50)}`);
|
|
1403
|
+
for (const [model, stats] of Object.entries(data.byModel)) {
|
|
1404
|
+
console.log(` ${model}`);
|
|
1405
|
+
console.log(` Cost: $${stats.cost.toFixed(6)}`);
|
|
1406
|
+
console.log(` Tokens: ${stats.tokens.toLocaleString()}`);
|
|
1407
|
+
console.log(` Calls: ${stats.calls}`);
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
console.log();
|
|
1411
|
+
} catch (err) {
|
|
1412
|
+
console.error("Failed to read cost report:", err instanceof Error ? err.message : err);
|
|
1413
|
+
process.exit(1);
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
// src/commands/dev.ts
|
|
1418
|
+
import { spawn } from "child_process";
|
|
1419
|
+
import { existsSync as existsSync2 } from "fs";
|
|
1420
|
+
import { resolve } from "path";
|
|
1421
|
+
async function devCommand(args) {
|
|
1422
|
+
const entryFile = args[0] ?? "src/index.ts";
|
|
1423
|
+
const cwd = process.cwd();
|
|
1424
|
+
const fullPath = resolve(cwd, entryFile);
|
|
1425
|
+
if (!fullPath.startsWith(`${cwd}/`) && fullPath !== cwd) {
|
|
1426
|
+
console.error("Error: entry file must be within the project directory");
|
|
1427
|
+
process.exit(1);
|
|
1428
|
+
}
|
|
1429
|
+
if (!existsSync2(fullPath)) {
|
|
1430
|
+
console.error(`Entry file not found: ${entryFile}`);
|
|
1431
|
+
console.error("Run this command from your ElsiumAI project root.");
|
|
1432
|
+
process.exit(1);
|
|
1433
|
+
}
|
|
1434
|
+
console.log(`
|
|
1435
|
+
ElsiumAI Dev Server`);
|
|
1436
|
+
console.log(` Watching: ${entryFile}`);
|
|
1437
|
+
console.log(` Press Ctrl+C to stop
|
|
1438
|
+
`);
|
|
1439
|
+
const child = spawn("bun", ["--watch", entryFile], {
|
|
1440
|
+
stdio: "inherit",
|
|
1441
|
+
cwd: process.cwd()
|
|
1442
|
+
});
|
|
1443
|
+
child.on("error", (err) => {
|
|
1444
|
+
console.error("Failed to start dev server:", err.message);
|
|
1445
|
+
process.exit(1);
|
|
1446
|
+
});
|
|
1447
|
+
child.on("exit", (code) => {
|
|
1448
|
+
process.exit(code ?? 0);
|
|
1449
|
+
});
|
|
1450
|
+
process.on("SIGINT", () => {
|
|
1451
|
+
child.kill("SIGINT");
|
|
1452
|
+
});
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
// src/commands/eval.ts
|
|
1456
|
+
import { existsSync as existsSync3 } from "fs";
|
|
1457
|
+
import { join as join2 } from "path";
|
|
1458
|
+
async function evalCommand(args) {
|
|
1459
|
+
const evalFile = args[0];
|
|
1460
|
+
if (!evalFile) {
|
|
1461
|
+
console.log(`
|
|
1462
|
+
Usage: elsium eval <file>
|
|
1463
|
+
|
|
1464
|
+
Run an evaluation suite against your prompts.
|
|
1465
|
+
|
|
1466
|
+
Examples:
|
|
1467
|
+
elsium eval ./evals/suite.ts
|
|
1468
|
+
elsium eval ./evals/quality.ts
|
|
1469
|
+
|
|
1470
|
+
Eval file should export a default EvalSuiteConfig:
|
|
1471
|
+
|
|
1472
|
+
import { type EvalSuiteConfig } from '@elsium-ai/testing'
|
|
1473
|
+
|
|
1474
|
+
export default {
|
|
1475
|
+
name: 'my-eval',
|
|
1476
|
+
cases: [
|
|
1477
|
+
{
|
|
1478
|
+
name: 'test-1',
|
|
1479
|
+
input: 'What is TypeScript?',
|
|
1480
|
+
criteria: [
|
|
1481
|
+
{ type: 'contains', value: 'typed' },
|
|
1482
|
+
{ type: 'length_min', value: 20 },
|
|
1483
|
+
],
|
|
1484
|
+
},
|
|
1485
|
+
],
|
|
1486
|
+
runner: async (input) => {
|
|
1487
|
+
// Call your agent/LLM here
|
|
1488
|
+
return response
|
|
1489
|
+
},
|
|
1490
|
+
} satisfies EvalSuiteConfig
|
|
1491
|
+
`);
|
|
1492
|
+
return;
|
|
1493
|
+
}
|
|
1494
|
+
const fullPath = join2(process.cwd(), evalFile);
|
|
1495
|
+
if (!existsSync3(fullPath)) {
|
|
1496
|
+
console.error(`Eval file not found: ${evalFile}`);
|
|
1497
|
+
process.exit(1);
|
|
1498
|
+
}
|
|
1499
|
+
try {
|
|
1500
|
+
const mod = await import(fullPath);
|
|
1501
|
+
const config = mod.default ?? mod;
|
|
1502
|
+
if (!config.name || !config.cases || !config.runner) {
|
|
1503
|
+
console.error("Eval file must export a valid EvalSuiteConfig with name, cases, and runner.");
|
|
1504
|
+
process.exit(1);
|
|
1505
|
+
}
|
|
1506
|
+
const { runEvalSuite: runEvalSuite2, formatEvalReport: formatEvalReport2 } = await Promise.resolve().then(() => (init_src2(), exports_src));
|
|
1507
|
+
console.log(`
|
|
1508
|
+
Running eval suite: ${config.name}`);
|
|
1509
|
+
console.log(` Cases: ${config.cases.length}
|
|
1510
|
+
`);
|
|
1511
|
+
const result = await runEvalSuite2(config);
|
|
1512
|
+
console.log(formatEvalReport2(result));
|
|
1513
|
+
if (result.failed > 0) {
|
|
1514
|
+
process.exit(1);
|
|
1515
|
+
}
|
|
1516
|
+
} catch (err2) {
|
|
1517
|
+
console.error("Failed to run eval:", err2 instanceof Error ? err2.message : err2);
|
|
1518
|
+
process.exit(1);
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
// src/commands/init.ts
|
|
1523
|
+
import { existsSync as existsSync4, mkdirSync as mkdirSync2, writeFileSync as writeFileSync2 } from "fs";
|
|
1524
|
+
import { join as join3 } from "path";
|
|
1525
|
+
async function initCommand(args) {
|
|
1526
|
+
const projectName = args[0] ?? "my-elsium-app";
|
|
1527
|
+
const projectDir = join3(process.cwd(), projectName);
|
|
1528
|
+
if (existsSync4(projectDir)) {
|
|
1529
|
+
console.error(`Directory "${projectName}" already exists.`);
|
|
1530
|
+
process.exit(1);
|
|
1531
|
+
}
|
|
1532
|
+
console.log(`
|
|
1533
|
+
Creating ElsiumAI project: ${projectName}
|
|
1534
|
+
`);
|
|
1535
|
+
const dirs = [
|
|
1536
|
+
"src/agents",
|
|
1537
|
+
"src/tools",
|
|
1538
|
+
"src/policies",
|
|
1539
|
+
"src/gateway",
|
|
1540
|
+
"src/workflows",
|
|
1541
|
+
"evals",
|
|
1542
|
+
"test/agents",
|
|
1543
|
+
".elsium/baselines",
|
|
1544
|
+
".elsium/recordings"
|
|
1545
|
+
];
|
|
1546
|
+
for (const dir of dirs) {
|
|
1547
|
+
mkdirSync2(join3(projectDir, dir), { recursive: true });
|
|
1548
|
+
}
|
|
1549
|
+
const files = [
|
|
1550
|
+
["package.json", packageJsonContent(projectName)],
|
|
1551
|
+
["tsconfig.json", tsconfigContent()],
|
|
1552
|
+
["biome.json", biomeJsonContent()],
|
|
1553
|
+
[".env.example", envExampleContent()],
|
|
1554
|
+
[".gitignore", gitignoreContent()],
|
|
1555
|
+
["elsium.config.ts", configContent()],
|
|
1556
|
+
["src/index.ts", indexContent()],
|
|
1557
|
+
["src/gateway/mesh.ts", meshContent()],
|
|
1558
|
+
["src/policies/default.ts", policiesContent()],
|
|
1559
|
+
["src/tools/example.ts", toolContent()],
|
|
1560
|
+
["src/agents/assistant.ts", agentContent()],
|
|
1561
|
+
["src/workflows/example.ts", workflowContent()],
|
|
1562
|
+
["evals/quality.eval.ts", qualityEvalContent()],
|
|
1563
|
+
["evals/determinism.eval.ts", determinismEvalContent()],
|
|
1564
|
+
["test/agents/assistant.test.ts", testContent()],
|
|
1565
|
+
[".elsium/baselines/.gitkeep", ""],
|
|
1566
|
+
[".elsium/recordings/.gitkeep", ""],
|
|
1567
|
+
["README.md", readmeContent(projectName)]
|
|
1568
|
+
];
|
|
1569
|
+
for (const [filePath, content] of files) {
|
|
1570
|
+
writeFileSync2(join3(projectDir, filePath), content);
|
|
1571
|
+
}
|
|
1572
|
+
console.log(" Created files:");
|
|
1573
|
+
for (const [filePath] of files) {
|
|
1574
|
+
console.log(` ${projectName}/${filePath}`);
|
|
1575
|
+
}
|
|
1576
|
+
console.log();
|
|
1577
|
+
console.log(" Next steps:");
|
|
1578
|
+
console.log(` cd ${projectName}`);
|
|
1579
|
+
console.log(" cp .env.example .env # add your API keys");
|
|
1580
|
+
console.log(" bun install");
|
|
1581
|
+
console.log(" bun run dev");
|
|
1582
|
+
console.log();
|
|
1583
|
+
}
|
|
1584
|
+
function packageJsonContent(projectName) {
|
|
1585
|
+
return `${JSON.stringify({
|
|
1586
|
+
name: projectName,
|
|
1587
|
+
version: "0.1.0",
|
|
1588
|
+
type: "module",
|
|
1589
|
+
scripts: {
|
|
1590
|
+
dev: "elsium dev",
|
|
1591
|
+
start: "bun src/index.ts",
|
|
1592
|
+
test: "vitest run",
|
|
1593
|
+
eval: "elsium eval evals/quality.eval.ts",
|
|
1594
|
+
"eval:determinism": "elsium eval evals/determinism.eval.ts",
|
|
1595
|
+
lint: "biome check .",
|
|
1596
|
+
format: "biome check --write ."
|
|
1597
|
+
},
|
|
1598
|
+
dependencies: {
|
|
1599
|
+
"@elsium-ai/core": "^0.1.0",
|
|
1600
|
+
"@elsium-ai/gateway": "^0.1.0",
|
|
1601
|
+
"@elsium-ai/agents": "^0.1.0",
|
|
1602
|
+
"@elsium-ai/tools": "^0.1.0",
|
|
1603
|
+
"@elsium-ai/workflows": "^0.1.0",
|
|
1604
|
+
"@elsium-ai/observe": "^0.1.0",
|
|
1605
|
+
"@elsium-ai/app": "^0.1.0",
|
|
1606
|
+
zod: "^3.23.0"
|
|
1607
|
+
},
|
|
1608
|
+
devDependencies: {
|
|
1609
|
+
"@elsium-ai/testing": "^0.1.0",
|
|
1610
|
+
"@biomejs/biome": "^1.9.0",
|
|
1611
|
+
"bun-types": "^1.3.0",
|
|
1612
|
+
typescript: "^5.7.0",
|
|
1613
|
+
vitest: "^3.0.0"
|
|
1614
|
+
}
|
|
1615
|
+
}, null, 2)}
|
|
1616
|
+
`;
|
|
1617
|
+
}
|
|
1618
|
+
function tsconfigContent() {
|
|
1619
|
+
return `${JSON.stringify({
|
|
1620
|
+
compilerOptions: {
|
|
1621
|
+
target: "ESNext",
|
|
1622
|
+
module: "ESNext",
|
|
1623
|
+
moduleResolution: "bundler",
|
|
1624
|
+
strict: true,
|
|
1625
|
+
esModuleInterop: true,
|
|
1626
|
+
skipLibCheck: true,
|
|
1627
|
+
types: ["bun-types"]
|
|
1628
|
+
},
|
|
1629
|
+
include: ["src", "evals", "test", "elsium.config.ts"]
|
|
1630
|
+
}, null, 2)}
|
|
1631
|
+
`;
|
|
1632
|
+
}
|
|
1633
|
+
function biomeJsonContent() {
|
|
1634
|
+
return `${JSON.stringify({
|
|
1635
|
+
$schema: "https://biomejs.dev/schemas/1.9.0/schema.json",
|
|
1636
|
+
organizeImports: { enabled: true },
|
|
1637
|
+
linter: {
|
|
1638
|
+
enabled: true,
|
|
1639
|
+
rules: { recommended: true }
|
|
1640
|
+
},
|
|
1641
|
+
formatter: {
|
|
1642
|
+
enabled: true,
|
|
1643
|
+
indentStyle: "tab",
|
|
1644
|
+
lineWidth: 100
|
|
1645
|
+
}
|
|
1646
|
+
}, null, 2)}
|
|
1647
|
+
`;
|
|
1648
|
+
}
|
|
1649
|
+
function envExampleContent() {
|
|
1650
|
+
return `# Provider API keys \u2014 add at least one
|
|
1651
|
+
ANTHROPIC_API_KEY=your-anthropic-key-here
|
|
1652
|
+
OPENAI_API_KEY=your-openai-key-here
|
|
1653
|
+
`;
|
|
1654
|
+
}
|
|
1655
|
+
function gitignoreContent() {
|
|
1656
|
+
return `node_modules/
|
|
1657
|
+
dist/
|
|
1658
|
+
.env
|
|
1659
|
+
.env.*
|
|
1660
|
+
!.env.example
|
|
1661
|
+
.elsium/recordings/*.json
|
|
1662
|
+
`;
|
|
1663
|
+
}
|
|
1664
|
+
function configContent() {
|
|
1665
|
+
return `import type { AppConfig } from '@elsium-ai/app'
|
|
1666
|
+
import { env } from '@elsium-ai/core'
|
|
1667
|
+
|
|
1668
|
+
const config = {
|
|
1669
|
+
gateway: {
|
|
1670
|
+
providers: {
|
|
1671
|
+
anthropic: { apiKey: env('ANTHROPIC_API_KEY') },
|
|
1672
|
+
},
|
|
1673
|
+
defaultModel: 'claude-sonnet-4-6',
|
|
1674
|
+
},
|
|
1675
|
+
observe: {
|
|
1676
|
+
tracing: true,
|
|
1677
|
+
costTracking: true,
|
|
1678
|
+
},
|
|
1679
|
+
server: {
|
|
1680
|
+
port: 3000,
|
|
1681
|
+
},
|
|
1682
|
+
} satisfies Omit<AppConfig, 'agents'>
|
|
1683
|
+
|
|
1684
|
+
export default config
|
|
1685
|
+
`;
|
|
1686
|
+
}
|
|
1687
|
+
function indexContent() {
|
|
1688
|
+
return `import { createApp } from '@elsium-ai/app'
|
|
1689
|
+
import config from '../elsium.config'
|
|
1690
|
+
import { assistant } from './agents/assistant'
|
|
1691
|
+
|
|
1692
|
+
const app = createApp({
|
|
1693
|
+
...config,
|
|
1694
|
+
agents: [assistant],
|
|
1695
|
+
})
|
|
1696
|
+
|
|
1697
|
+
app.listen()
|
|
1698
|
+
`;
|
|
1699
|
+
}
|
|
1700
|
+
function meshContent() {
|
|
1701
|
+
return `import { createProviderMesh } from '@elsium-ai/gateway'
|
|
1702
|
+
import { env } from '@elsium-ai/core'
|
|
1703
|
+
|
|
1704
|
+
export const mesh = createProviderMesh({
|
|
1705
|
+
providers: [
|
|
1706
|
+
{ name: 'anthropic', config: { apiKey: env('ANTHROPIC_API_KEY') }, priority: 1 },
|
|
1707
|
+
{ name: 'openai', config: { apiKey: env('OPENAI_API_KEY') }, priority: 2 },
|
|
1708
|
+
],
|
|
1709
|
+
strategy: 'fallback',
|
|
1710
|
+
circuitBreaker: {
|
|
1711
|
+
failureThreshold: 3,
|
|
1712
|
+
resetTimeoutMs: 30_000,
|
|
1713
|
+
},
|
|
1714
|
+
})
|
|
1715
|
+
`;
|
|
1716
|
+
}
|
|
1717
|
+
function policiesContent() {
|
|
1718
|
+
return `import { createPolicySet, modelAccessPolicy, costLimitPolicy } from '@elsium-ai/core'
|
|
1719
|
+
|
|
1720
|
+
export const policies = createPolicySet([
|
|
1721
|
+
modelAccessPolicy(['claude-sonnet-4-6', 'claude-haiku-4-5', 'gpt-4o']),
|
|
1722
|
+
costLimitPolicy(5.0),
|
|
1723
|
+
])
|
|
1724
|
+
`;
|
|
1725
|
+
}
|
|
1726
|
+
function toolContent() {
|
|
1727
|
+
return `import { defineTool } from '@elsium-ai/tools'
|
|
1728
|
+
import { z } from 'zod'
|
|
1729
|
+
|
|
1730
|
+
export const calculatorTool = defineTool({
|
|
1731
|
+
name: 'calculator',
|
|
1732
|
+
description: 'Add two numbers together',
|
|
1733
|
+
input: z.object({
|
|
1734
|
+
a: z.number().describe('First number'),
|
|
1735
|
+
b: z.number().describe('Second number'),
|
|
1736
|
+
}),
|
|
1737
|
+
handler: async ({ a, b }) => {
|
|
1738
|
+
return { result: a + b }
|
|
1739
|
+
},
|
|
1740
|
+
})
|
|
1741
|
+
`;
|
|
1742
|
+
}
|
|
1743
|
+
function agentContent() {
|
|
1744
|
+
return `import { defineAgent } from '@elsium-ai/agents'
|
|
1745
|
+
import { mesh } from '../gateway/mesh'
|
|
1746
|
+
import { calculatorTool } from '../tools/example'
|
|
1747
|
+
|
|
1748
|
+
export const assistant = defineAgent(
|
|
1749
|
+
{
|
|
1750
|
+
name: 'assistant',
|
|
1751
|
+
system: 'You are a helpful AI assistant. Use the calculator tool for math questions.',
|
|
1752
|
+
model: 'claude-sonnet-4-6',
|
|
1753
|
+
tools: [calculatorTool],
|
|
1754
|
+
guardrails: {
|
|
1755
|
+
maxIterations: 10,
|
|
1756
|
+
semantic: {
|
|
1757
|
+
relevance: { enabled: true, threshold: 0.5 },
|
|
1758
|
+
},
|
|
1759
|
+
},
|
|
1760
|
+
confidence: {
|
|
1761
|
+
hallucinationRisk: true,
|
|
1762
|
+
relevanceScore: true,
|
|
1763
|
+
},
|
|
1764
|
+
},
|
|
1765
|
+
{ complete: (req) => mesh.complete(req) },
|
|
1766
|
+
)
|
|
1767
|
+
`;
|
|
1768
|
+
}
|
|
1769
|
+
function workflowContent() {
|
|
1770
|
+
return `import { defineWorkflow, step } from '@elsium-ai/workflows'
|
|
1771
|
+
import { assistant } from '../agents/assistant'
|
|
1772
|
+
import { extractText } from '@elsium-ai/core'
|
|
1773
|
+
|
|
1774
|
+
export const researchWorkflow = defineWorkflow({
|
|
1775
|
+
name: 'research-pipeline',
|
|
1776
|
+
steps: [
|
|
1777
|
+
step('research', {
|
|
1778
|
+
handler: async (input: string) => {
|
|
1779
|
+
const result = await assistant.run(\`Research this topic: \${input}\`)
|
|
1780
|
+
return extractText(result.message.content)
|
|
1781
|
+
},
|
|
1782
|
+
}),
|
|
1783
|
+
step('summarise', {
|
|
1784
|
+
handler: async (input: string) => {
|
|
1785
|
+
const result = await assistant.run(\`Summarise in two sentences: \${input}\`)
|
|
1786
|
+
return extractText(result.message.content)
|
|
1787
|
+
},
|
|
1788
|
+
}),
|
|
1789
|
+
],
|
|
1790
|
+
})
|
|
1791
|
+
`;
|
|
1792
|
+
}
|
|
1793
|
+
function qualityEvalContent() {
|
|
1794
|
+
return `import type { EvalSuiteConfig } from '@elsium-ai/testing'
|
|
1795
|
+
import { assistant } from '../src/agents/assistant'
|
|
1796
|
+
import { extractText } from '@elsium-ai/core'
|
|
1797
|
+
|
|
1798
|
+
export default {
|
|
1799
|
+
name: 'quality',
|
|
1800
|
+
cases: [
|
|
1801
|
+
{
|
|
1802
|
+
name: 'factual-answer',
|
|
1803
|
+
input: 'What is 2 + 2?',
|
|
1804
|
+
criteria: [{ type: 'contains', value: '4' }],
|
|
1805
|
+
},
|
|
1806
|
+
{
|
|
1807
|
+
name: 'polite-greeting',
|
|
1808
|
+
input: 'Hello!',
|
|
1809
|
+
criteria: [{ type: 'contains', value: 'Hello' }],
|
|
1810
|
+
},
|
|
1811
|
+
],
|
|
1812
|
+
runner: async (input) => {
|
|
1813
|
+
const result = await assistant.run(input)
|
|
1814
|
+
return extractText(result.message.content)
|
|
1815
|
+
},
|
|
1816
|
+
} satisfies EvalSuiteConfig
|
|
1817
|
+
`;
|
|
1818
|
+
}
|
|
1819
|
+
function determinismEvalContent() {
|
|
1820
|
+
return `import type { EvalSuiteConfig } from '@elsium-ai/testing'
|
|
1821
|
+
import { assertDeterministic } from '@elsium-ai/testing'
|
|
1822
|
+
import { assistant } from '../src/agents/assistant'
|
|
1823
|
+
import { extractText } from '@elsium-ai/core'
|
|
1824
|
+
|
|
1825
|
+
export default {
|
|
1826
|
+
name: 'determinism',
|
|
1827
|
+
cases: [
|
|
1828
|
+
{
|
|
1829
|
+
name: 'stable-math',
|
|
1830
|
+
input: 'What is 2 + 2? Reply with just the number.',
|
|
1831
|
+
},
|
|
1832
|
+
],
|
|
1833
|
+
runner: async (input) => {
|
|
1834
|
+
const result = await assertDeterministic(
|
|
1835
|
+
async () => {
|
|
1836
|
+
const res = await assistant.run(input)
|
|
1837
|
+
return extractText(res.message.content)
|
|
1838
|
+
},
|
|
1839
|
+
{ runs: 3, tolerance: 0 },
|
|
1840
|
+
)
|
|
1841
|
+
return result.outputs[0]
|
|
1842
|
+
},
|
|
1843
|
+
} satisfies EvalSuiteConfig
|
|
1844
|
+
`;
|
|
1845
|
+
}
|
|
1846
|
+
function testContent() {
|
|
1847
|
+
return `import { describe, it, expect } from 'vitest'
|
|
1848
|
+
import { mockProvider, createReplayRecorder, createReplayPlayer } from '@elsium-ai/testing'
|
|
1849
|
+
import { defineAgent } from '@elsium-ai/agents'
|
|
1850
|
+
import { calculatorTool } from '../../src/tools/example'
|
|
1851
|
+
|
|
1852
|
+
describe('assistant agent', () => {
|
|
1853
|
+
it('should respond to a greeting', async () => {
|
|
1854
|
+
const mock = mockProvider({
|
|
1855
|
+
responses: [{ content: 'Hello! How can I help you today?' }],
|
|
1856
|
+
})
|
|
1857
|
+
|
|
1858
|
+
const agent = defineAgent(
|
|
1859
|
+
{
|
|
1860
|
+
name: 'test-assistant',
|
|
1861
|
+
system: 'You are a helpful AI assistant.',
|
|
1862
|
+
model: 'mock',
|
|
1863
|
+
tools: [calculatorTool],
|
|
1864
|
+
},
|
|
1865
|
+
{ complete: (req) => mock.complete(req) },
|
|
1866
|
+
)
|
|
1867
|
+
|
|
1868
|
+
const result = await agent.run('Hello!')
|
|
1869
|
+
expect(result.message.content).toContain('Hello')
|
|
1870
|
+
expect(mock.callCount).toBe(1)
|
|
1871
|
+
})
|
|
1872
|
+
|
|
1873
|
+
it('should replay recorded interactions', async () => {
|
|
1874
|
+
const recorder = createReplayRecorder()
|
|
1875
|
+
const mock = mockProvider({
|
|
1876
|
+
responses: [{ content: 'The answer is 4.' }],
|
|
1877
|
+
})
|
|
1878
|
+
|
|
1879
|
+
const wrappedComplete = recorder.wrap((req) => mock.complete(req))
|
|
1880
|
+
await wrappedComplete({
|
|
1881
|
+
model: 'mock',
|
|
1882
|
+
messages: [{ role: 'user', content: [{ type: 'text', text: 'What is 2+2?' }] }],
|
|
1883
|
+
})
|
|
1884
|
+
|
|
1885
|
+
const player = createReplayPlayer(recorder.toJSON())
|
|
1886
|
+
const replayed = await player.complete({
|
|
1887
|
+
model: 'mock',
|
|
1888
|
+
messages: [{ role: 'user', content: [{ type: 'text', text: 'What is 2+2?' }] }],
|
|
1889
|
+
})
|
|
1890
|
+
|
|
1891
|
+
expect(replayed).toBeDefined()
|
|
1892
|
+
})
|
|
1893
|
+
})
|
|
1894
|
+
`;
|
|
1895
|
+
}
|
|
1896
|
+
function readmeContent(projectName) {
|
|
1897
|
+
return `# ${projectName}
|
|
1898
|
+
|
|
1899
|
+
Built with [ElsiumAI](https://github.com/elsium-ai/elsium-ai).
|
|
1900
|
+
|
|
1901
|
+
## Quick start
|
|
1902
|
+
|
|
1903
|
+
\`\`\`bash
|
|
1904
|
+
cp .env.example .env # add your API keys
|
|
1905
|
+
bun install
|
|
1906
|
+
bun run dev
|
|
1907
|
+
\`\`\`
|
|
1908
|
+
|
|
1909
|
+
## Scripts
|
|
1910
|
+
|
|
1911
|
+
| Command | Description |
|
|
1912
|
+
| --- | --- |
|
|
1913
|
+
| \`bun run dev\` | Start the dev server |
|
|
1914
|
+
| \`bun run test\` | Run unit tests |
|
|
1915
|
+
| \`bun run eval\` | Run quality eval suite |
|
|
1916
|
+
| \`bun run eval:determinism\` | Run determinism eval |
|
|
1917
|
+
| \`bun run lint\` | Lint with Biome |
|
|
1918
|
+
| \`bun run format\` | Auto-format with Biome |
|
|
1919
|
+
|
|
1920
|
+
## Project structure
|
|
1921
|
+
|
|
1922
|
+
- **src/agents/** \u2014 Agent definitions with guardrails
|
|
1923
|
+
- **src/tools/** \u2014 Tool schemas validated by Zod
|
|
1924
|
+
- **src/policies/** \u2014 Policy sets (model allowlist, cost caps)
|
|
1925
|
+
- **src/gateway/** \u2014 Provider mesh with circuit breaker
|
|
1926
|
+
- **src/workflows/** \u2014 Multi-step workflows
|
|
1927
|
+
- **evals/** \u2014 Eval suites (quality + determinism)
|
|
1928
|
+
- **test/** \u2014 Unit tests with mock providers and replay
|
|
1929
|
+
`;
|
|
1930
|
+
}
|
|
1931
|
+
|
|
1932
|
+
// src/commands/prompt.ts
|
|
1933
|
+
import { existsSync as existsSync5, readFileSync as readFileSync3, readdirSync } from "fs";
|
|
1934
|
+
import { join as join4 } from "path";
|
|
1935
|
+
var PROMPTS_DIR = ".elsium/prompts";
|
|
1936
|
+
function showHelp() {
|
|
1937
|
+
console.log(`
|
|
1938
|
+
ElsiumAI Prompt Manager
|
|
1939
|
+
|
|
1940
|
+
Usage:
|
|
1941
|
+
elsium prompt list List all registered prompts
|
|
1942
|
+
elsium prompt diff <name> <v1> <v2> Show diff between versions
|
|
1943
|
+
elsium prompt history <name> Show version history
|
|
1944
|
+
elsium prompt show <name> [version] Show prompt content
|
|
1945
|
+
|
|
1946
|
+
Prompts are stored in .elsium/prompts/ as JSON files.
|
|
1947
|
+
`);
|
|
1948
|
+
}
|
|
1949
|
+
function loadPromptFiles(promptsPath) {
|
|
1950
|
+
if (!existsSync5(promptsPath)) {
|
|
1951
|
+
return [];
|
|
1952
|
+
}
|
|
1953
|
+
return readdirSync(promptsPath).filter((f) => f.endsWith(".json")).map((f) => {
|
|
1954
|
+
try {
|
|
1955
|
+
return JSON.parse(readFileSync3(join4(promptsPath, f), "utf-8"));
|
|
1956
|
+
} catch {
|
|
1957
|
+
return null;
|
|
1958
|
+
}
|
|
1959
|
+
}).filter((p) => p !== null);
|
|
1960
|
+
}
|
|
1961
|
+
function handleList(promptsPath) {
|
|
1962
|
+
const allPrompts = loadPromptFiles(promptsPath);
|
|
1963
|
+
if (allPrompts.length === 0) {
|
|
1964
|
+
console.log(`
|
|
1965
|
+
No prompts found. Store prompts in .elsium/prompts/
|
|
1966
|
+
`);
|
|
1967
|
+
return;
|
|
1968
|
+
}
|
|
1969
|
+
const prompts = new Map;
|
|
1970
|
+
for (const data of allPrompts) {
|
|
1971
|
+
if (!prompts.has(data.name))
|
|
1972
|
+
prompts.set(data.name, []);
|
|
1973
|
+
const versions = prompts.get(data.name);
|
|
1974
|
+
if (versions)
|
|
1975
|
+
versions.push(data.version);
|
|
1976
|
+
}
|
|
1977
|
+
console.log(`
|
|
1978
|
+
Registered Prompts (${prompts.size})`);
|
|
1979
|
+
console.log(` ${"\u2500".repeat(50)}`);
|
|
1980
|
+
for (const [name, versions] of prompts) {
|
|
1981
|
+
console.log(` ${name} \u2014 ${versions.length} version(s): ${versions.join(", ")}`);
|
|
1982
|
+
}
|
|
1983
|
+
console.log();
|
|
1984
|
+
}
|
|
1985
|
+
function handleHistory(promptsPath, name) {
|
|
1986
|
+
const files = loadPromptFiles(promptsPath).filter((p) => p.name === name).sort((a, b) => a.version.localeCompare(b.version));
|
|
1987
|
+
if (files.length === 0) {
|
|
1988
|
+
console.log(`
|
|
1989
|
+
No versions found for prompt "${name}".
|
|
1990
|
+
`);
|
|
1991
|
+
return;
|
|
1992
|
+
}
|
|
1993
|
+
console.log(`
|
|
1994
|
+
Prompt History: ${name} (${files.length} versions)`);
|
|
1995
|
+
console.log(` ${"\u2500".repeat(50)}`);
|
|
1996
|
+
for (const p of files) {
|
|
1997
|
+
console.log(` v${p.version} \u2014 ${p.variables.length} variables: ${p.variables.join(", ") || "none"}`);
|
|
1998
|
+
}
|
|
1999
|
+
console.log();
|
|
2000
|
+
}
|
|
2001
|
+
function handleShow(promptsPath, name, version) {
|
|
2002
|
+
const files = loadPromptFiles(promptsPath).filter((p) => p.name === name);
|
|
2003
|
+
const prompt = version ? files.find((p) => p.version === version) : files.sort((a, b) => b.version.localeCompare(a.version))[0];
|
|
2004
|
+
if (!prompt) {
|
|
2005
|
+
console.log(`
|
|
2006
|
+
Prompt "${name}"${version ? ` v${version}` : ""} not found.
|
|
2007
|
+
`);
|
|
2008
|
+
return;
|
|
2009
|
+
}
|
|
2010
|
+
console.log(`
|
|
2011
|
+
Prompt: ${prompt.name} v${prompt.version}`);
|
|
2012
|
+
console.log(` Variables: ${prompt.variables.join(", ") || "none"}`);
|
|
2013
|
+
console.log(` ${"\u2500".repeat(50)}`);
|
|
2014
|
+
console.log(prompt.content);
|
|
2015
|
+
console.log(` ${"\u2500".repeat(50)}
|
|
2016
|
+
`);
|
|
2017
|
+
}
|
|
2018
|
+
function printDiffLines(fromLines, toLines) {
|
|
2019
|
+
const maxLen = Math.max(fromLines.length, toLines.length);
|
|
2020
|
+
for (let i = 0;i < maxLen; i++) {
|
|
2021
|
+
const fl = fromLines[i];
|
|
2022
|
+
const tl = toLines[i];
|
|
2023
|
+
if (fl === undefined) {
|
|
2024
|
+
console.log(` + ${tl}`);
|
|
2025
|
+
} else if (tl === undefined) {
|
|
2026
|
+
console.log(` - ${fl}`);
|
|
2027
|
+
} else if (fl !== tl) {
|
|
2028
|
+
console.log(` - ${fl}`);
|
|
2029
|
+
console.log(` + ${tl}`);
|
|
2030
|
+
} else {
|
|
2031
|
+
console.log(` ${fl}`);
|
|
2032
|
+
}
|
|
2033
|
+
}
|
|
2034
|
+
}
|
|
2035
|
+
function handleDiff(promptsPath, name, v1, v2) {
|
|
2036
|
+
const files = loadPromptFiles(promptsPath).filter((p) => p.name === name);
|
|
2037
|
+
const from = files.find((p) => p.version === v1);
|
|
2038
|
+
const to = files.find((p) => p.version === v2);
|
|
2039
|
+
if (!from || !to) {
|
|
2040
|
+
console.error(` Could not find both versions: ${v1} and ${v2}`);
|
|
2041
|
+
process.exit(1);
|
|
2042
|
+
}
|
|
2043
|
+
console.log(`
|
|
2044
|
+
Diff: ${name} v${v1} \u2192 v${v2}`);
|
|
2045
|
+
console.log(` ${"\u2500".repeat(50)}`);
|
|
2046
|
+
printDiffLines(from.content.split(`
|
|
2047
|
+
`), to.content.split(`
|
|
2048
|
+
`));
|
|
2049
|
+
console.log(` ${"\u2500".repeat(50)}
|
|
2050
|
+
`);
|
|
2051
|
+
}
|
|
2052
|
+
async function promptCommand(args) {
|
|
2053
|
+
const subcommand = args[0];
|
|
2054
|
+
if (!subcommand || subcommand === "--help" || subcommand === "-h") {
|
|
2055
|
+
showHelp();
|
|
2056
|
+
return;
|
|
2057
|
+
}
|
|
2058
|
+
const promptsPath = join4(process.cwd(), PROMPTS_DIR);
|
|
2059
|
+
switch (subcommand) {
|
|
2060
|
+
case "list":
|
|
2061
|
+
handleList(promptsPath);
|
|
2062
|
+
break;
|
|
2063
|
+
case "history": {
|
|
2064
|
+
const name = args[1];
|
|
2065
|
+
if (!name) {
|
|
2066
|
+
console.error(" Please provide a prompt name: elsium prompt history <name>");
|
|
2067
|
+
process.exit(1);
|
|
2068
|
+
}
|
|
2069
|
+
handleHistory(promptsPath, name);
|
|
2070
|
+
break;
|
|
2071
|
+
}
|
|
2072
|
+
case "show": {
|
|
2073
|
+
const name = args[1];
|
|
2074
|
+
if (!name) {
|
|
2075
|
+
console.error(" Please provide a prompt name: elsium prompt show <name> [version]");
|
|
2076
|
+
process.exit(1);
|
|
2077
|
+
}
|
|
2078
|
+
handleShow(promptsPath, name, args[2]);
|
|
2079
|
+
break;
|
|
2080
|
+
}
|
|
2081
|
+
case "diff": {
|
|
2082
|
+
const name = args[1];
|
|
2083
|
+
const v1 = args[2];
|
|
2084
|
+
const v2 = args[3];
|
|
2085
|
+
if (!name || !v1 || !v2) {
|
|
2086
|
+
console.error(" Usage: elsium prompt diff <name> <v1> <v2>");
|
|
2087
|
+
process.exit(1);
|
|
2088
|
+
}
|
|
2089
|
+
handleDiff(promptsPath, name, v1, v2);
|
|
2090
|
+
break;
|
|
2091
|
+
}
|
|
2092
|
+
default:
|
|
2093
|
+
console.error(` Unknown subcommand: ${subcommand}`);
|
|
2094
|
+
console.log(' Run "elsium prompt --help" for usage information.');
|
|
2095
|
+
process.exit(1);
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
|
|
2099
|
+
// src/commands/trace.ts
|
|
2100
|
+
import { existsSync as existsSync6, readFileSync as readFileSync4, readdirSync as readdirSync2 } from "fs";
|
|
2101
|
+
import { join as join5 } from "path";
|
|
2102
|
+
var TRACES_DIR = ".elsium/traces";
|
|
2103
|
+
function formatStatus(status) {
|
|
2104
|
+
if (status === "ok")
|
|
2105
|
+
return "OK";
|
|
2106
|
+
if (status === "error")
|
|
2107
|
+
return "ERR";
|
|
2108
|
+
return "...";
|
|
2109
|
+
}
|
|
2110
|
+
function formatDuration(durationMs) {
|
|
2111
|
+
return durationMs ? `${durationMs}ms` : "?";
|
|
2112
|
+
}
|
|
2113
|
+
function showNoTracesHelp() {
|
|
2114
|
+
console.log(`
|
|
2115
|
+
No traces found.
|
|
2116
|
+
|
|
2117
|
+
Traces are recorded when you run your app with tracing enabled:
|
|
2118
|
+
|
|
2119
|
+
const app = createApp({
|
|
2120
|
+
observe: {
|
|
2121
|
+
tracing: true,
|
|
2122
|
+
},
|
|
2123
|
+
})
|
|
2124
|
+
|
|
2125
|
+
Usage:
|
|
2126
|
+
elsium trace List recent traces
|
|
2127
|
+
elsium trace <id> Inspect a specific trace
|
|
2128
|
+
`);
|
|
2129
|
+
}
|
|
2130
|
+
function listTraces(tracesPath) {
|
|
2131
|
+
if (!existsSync6(tracesPath)) {
|
|
2132
|
+
showNoTracesHelp();
|
|
2133
|
+
return;
|
|
2134
|
+
}
|
|
2135
|
+
try {
|
|
2136
|
+
const files = readdirSync2(tracesPath).filter((f) => f.endsWith(".json")).sort().reverse().slice(0, 20);
|
|
2137
|
+
if (files.length === 0) {
|
|
2138
|
+
console.log(`
|
|
2139
|
+
No traces recorded yet.
|
|
2140
|
+
`);
|
|
2141
|
+
return;
|
|
2142
|
+
}
|
|
2143
|
+
console.log(`
|
|
2144
|
+
Recent Traces (${files.length})`);
|
|
2145
|
+
console.log(` ${"\u2500".repeat(60)}`);
|
|
2146
|
+
for (const file of files) {
|
|
2147
|
+
const data = JSON.parse(readFileSync4(join5(tracesPath, file), "utf-8"));
|
|
2148
|
+
const root = data.find((s) => !s.parentId) ?? data[0];
|
|
2149
|
+
if (root) {
|
|
2150
|
+
const status = formatStatus(root.status);
|
|
2151
|
+
const duration = formatDuration(root.durationMs);
|
|
2152
|
+
console.log(` [${status}] ${root.traceId} ${root.name} ${duration}`);
|
|
2153
|
+
}
|
|
2154
|
+
}
|
|
2155
|
+
console.log();
|
|
2156
|
+
} catch (err2) {
|
|
2157
|
+
console.error("Failed to read traces:", err2 instanceof Error ? err2.message : err2);
|
|
2158
|
+
}
|
|
2159
|
+
}
|
|
2160
|
+
function showTrace(tracesPath, traceId) {
|
|
2161
|
+
const traceFile = join5(tracesPath, `${traceId}.json`);
|
|
2162
|
+
if (!existsSync6(traceFile)) {
|
|
2163
|
+
console.error(`Trace not found: ${traceId}`);
|
|
2164
|
+
process.exit(1);
|
|
2165
|
+
}
|
|
2166
|
+
try {
|
|
2167
|
+
const spans = JSON.parse(readFileSync4(traceFile, "utf-8"));
|
|
2168
|
+
console.log(`
|
|
2169
|
+
Trace: ${traceId}`);
|
|
2170
|
+
console.log(` Spans: ${spans.length}`);
|
|
2171
|
+
console.log(` ${"\u2500".repeat(60)}`);
|
|
2172
|
+
const roots = spans.filter((s) => !s.parentId);
|
|
2173
|
+
for (const root of roots) {
|
|
2174
|
+
printSpanTree(root, spans, 0);
|
|
2175
|
+
}
|
|
2176
|
+
console.log();
|
|
2177
|
+
} catch (err2) {
|
|
2178
|
+
console.error("Failed to read trace:", err2 instanceof Error ? err2.message : err2);
|
|
2179
|
+
process.exit(1);
|
|
2180
|
+
}
|
|
2181
|
+
}
|
|
2182
|
+
async function traceCommand(args) {
|
|
2183
|
+
const traceId = args[0];
|
|
2184
|
+
const tracesPath = join5(process.cwd(), TRACES_DIR);
|
|
2185
|
+
if (!traceId) {
|
|
2186
|
+
listTraces(tracesPath);
|
|
2187
|
+
return;
|
|
2188
|
+
}
|
|
2189
|
+
if (!/^[a-zA-Z0-9_-]+$/.test(traceId)) {
|
|
2190
|
+
console.error("Invalid trace ID format");
|
|
2191
|
+
process.exit(1);
|
|
2192
|
+
}
|
|
2193
|
+
showTrace(tracesPath, traceId);
|
|
2194
|
+
}
|
|
2195
|
+
function printSpanTree(span, allSpans, depth) {
|
|
2196
|
+
const indent = ` ${" ".repeat(depth)}`;
|
|
2197
|
+
const status = formatStatus(span.status);
|
|
2198
|
+
const duration = span.durationMs ? `${span.durationMs}ms` : "";
|
|
2199
|
+
const kind = span.kind ? `[${span.kind}]` : "";
|
|
2200
|
+
console.log(`${indent}${kind} ${span.name} (${status}) ${duration}`);
|
|
2201
|
+
for (const [key, value] of Object.entries(span.metadata)) {
|
|
2202
|
+
console.log(`${indent} ${key}: ${JSON.stringify(value)}`);
|
|
2203
|
+
}
|
|
2204
|
+
for (const event of span.events) {
|
|
2205
|
+
console.log(`${indent} > ${event.name}${event.data ? `: ${JSON.stringify(event.data)}` : ""}`);
|
|
2206
|
+
}
|
|
2207
|
+
const children = allSpans.filter((s) => s.parentId === span.id);
|
|
2208
|
+
for (const child of children) {
|
|
2209
|
+
printSpanTree(child, allSpans, depth + 1);
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
|
|
2213
|
+
// src/commands/xray.ts
|
|
2214
|
+
import { existsSync as existsSync7, readFileSync as readFileSync5 } from "fs";
|
|
2215
|
+
import { join as join6 } from "path";
|
|
2216
|
+
var XRAY_FILE = ".elsium/xray-history.json";
|
|
2217
|
+
async function xrayCommand(args) {
|
|
2218
|
+
const flag = args[0];
|
|
2219
|
+
if (flag === "--help" || flag === "-h") {
|
|
2220
|
+
console.log(`
|
|
2221
|
+
ElsiumAI X-Ray \u2014 Inspect LLM calls
|
|
2222
|
+
|
|
2223
|
+
Usage:
|
|
2224
|
+
elsium xray Show last call
|
|
2225
|
+
elsium xray --last N Show last N calls
|
|
2226
|
+
elsium xray --trace <id> Show call by trace ID
|
|
2227
|
+
elsium xray --raw Show raw request/response
|
|
2228
|
+
|
|
2229
|
+
X-Ray data is captured when xray mode is enabled:
|
|
2230
|
+
|
|
2231
|
+
const gw = gateway({ ..., xray: true })
|
|
2232
|
+
`);
|
|
2233
|
+
return;
|
|
2234
|
+
}
|
|
2235
|
+
const xrayPath = join6(process.cwd(), XRAY_FILE);
|
|
2236
|
+
if (!existsSync7(xrayPath)) {
|
|
2237
|
+
console.log(`
|
|
2238
|
+
No X-Ray data found.
|
|
2239
|
+
|
|
2240
|
+
Enable X-Ray mode on your gateway to capture LLM call details:
|
|
2241
|
+
|
|
2242
|
+
const gw = gateway({ provider: 'anthropic', apiKey: '...', xray: true })
|
|
2243
|
+
await gw.complete({ messages: [...] })
|
|
2244
|
+
console.log(gw.lastCall())
|
|
2245
|
+
|
|
2246
|
+
X-Ray data will be saved to .elsium/xray-history.json
|
|
2247
|
+
`);
|
|
2248
|
+
return;
|
|
2249
|
+
}
|
|
2250
|
+
try {
|
|
2251
|
+
const entries = JSON.parse(readFileSync5(xrayPath, "utf-8"));
|
|
2252
|
+
if (flag === "--trace") {
|
|
2253
|
+
const traceId = args[1];
|
|
2254
|
+
if (!traceId) {
|
|
2255
|
+
console.error(" Please provide a trace ID: elsium xray --trace <id>");
|
|
2256
|
+
process.exit(1);
|
|
2257
|
+
}
|
|
2258
|
+
const entry = entries.find((e) => e.traceId === traceId);
|
|
2259
|
+
if (!entry) {
|
|
2260
|
+
console.error(` Trace not found: ${traceId}`);
|
|
2261
|
+
process.exit(1);
|
|
2262
|
+
}
|
|
2263
|
+
printEntry(entry, args.includes("--raw"));
|
|
2264
|
+
return;
|
|
2265
|
+
}
|
|
2266
|
+
const count = flag === "--last" ? Number.parseInt(args[1] ?? "5", 10) : 1;
|
|
2267
|
+
const showRaw = args.includes("--raw");
|
|
2268
|
+
const toShow = entries.slice(0, count);
|
|
2269
|
+
if (toShow.length === 0) {
|
|
2270
|
+
console.log(`
|
|
2271
|
+
No X-Ray data recorded yet.
|
|
2272
|
+
`);
|
|
2273
|
+
return;
|
|
2274
|
+
}
|
|
2275
|
+
console.log(`
|
|
2276
|
+
ElsiumAI X-Ray \u2014 ${toShow.length} call(s)`);
|
|
2277
|
+
console.log(` ${"\u2500".repeat(60)}`);
|
|
2278
|
+
for (const entry of toShow) {
|
|
2279
|
+
printEntry(entry, showRaw);
|
|
2280
|
+
}
|
|
2281
|
+
} catch (err2) {
|
|
2282
|
+
console.error("Failed to read X-Ray data:", err2 instanceof Error ? err2.message : err2);
|
|
2283
|
+
process.exit(1);
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
function printEntry(entry, raw = false) {
|
|
2287
|
+
console.log(`
|
|
2288
|
+
Trace: ${entry.traceId}
|
|
2289
|
+
Time: ${new Date(entry.timestamp).toISOString()}
|
|
2290
|
+
Provider: ${entry.provider}
|
|
2291
|
+
Model: ${entry.model}
|
|
2292
|
+
Latency: ${entry.latencyMs}ms
|
|
2293
|
+
Tokens: ${entry.usage.inputTokens} in / ${entry.usage.outputTokens} out (${entry.usage.totalTokens} total)
|
|
2294
|
+
Cost: $${entry.cost.totalCost.toFixed(6)}`);
|
|
2295
|
+
if (raw) {
|
|
2296
|
+
console.log(`
|
|
2297
|
+
\u2500\u2500 Request \u2500\u2500
|
|
2298
|
+
${entry.request.method} ${entry.request.url}
|
|
2299
|
+
Headers: ${JSON.stringify(entry.request.headers, null, 4)}
|
|
2300
|
+
Body: ${JSON.stringify(entry.request.body, null, 4)}
|
|
2301
|
+
|
|
2302
|
+
\u2500\u2500 Response \u2500\u2500
|
|
2303
|
+
Status: ${entry.response.status}
|
|
2304
|
+
Headers: ${JSON.stringify(entry.response.headers, null, 4)}
|
|
2305
|
+
Body: ${JSON.stringify(entry.response.body, null, 4)}`);
|
|
2306
|
+
}
|
|
2307
|
+
console.log(` ${"\u2500".repeat(60)}`);
|
|
2308
|
+
}
|
|
2309
|
+
|
|
2310
|
+
// src/cli.ts
|
|
2311
|
+
var VERSION = "0.1.0";
|
|
2312
|
+
var HELP = `
|
|
2313
|
+
ElsiumAI CLI v${VERSION}
|
|
2314
|
+
|
|
2315
|
+
Usage: elsium <command> [options]
|
|
2316
|
+
|
|
2317
|
+
Commands:
|
|
2318
|
+
init [name] Scaffold a new ElsiumAI project
|
|
2319
|
+
dev Start development server with hot reload
|
|
2320
|
+
eval [file] Run evaluation suite
|
|
2321
|
+
cost Show cost report from last run
|
|
2322
|
+
trace [id] Inspect trace from last run
|
|
2323
|
+
xray Inspect LLM calls (X-Ray mode)
|
|
2324
|
+
prompt Manage prompt versions
|
|
2325
|
+
|
|
2326
|
+
Options:
|
|
2327
|
+
--help, -h Show this help message
|
|
2328
|
+
--version, -v Show version
|
|
2329
|
+
|
|
2330
|
+
Examples:
|
|
2331
|
+
elsium init my-ai-app
|
|
2332
|
+
elsium dev
|
|
2333
|
+
elsium eval ./evals/suite.ts
|
|
2334
|
+
elsium cost
|
|
2335
|
+
elsium trace trc_abc123
|
|
2336
|
+
elsium xray --last 5
|
|
2337
|
+
elsium prompt list
|
|
2338
|
+
`;
|
|
2339
|
+
async function main() {
|
|
2340
|
+
const args = process.argv.slice(2);
|
|
2341
|
+
const command = args[0];
|
|
2342
|
+
if (!command || command === "--help" || command === "-h") {
|
|
2343
|
+
console.log(HELP);
|
|
2344
|
+
process.exit(0);
|
|
2345
|
+
}
|
|
2346
|
+
if (command === "--version" || command === "-v") {
|
|
2347
|
+
console.log(VERSION);
|
|
2348
|
+
process.exit(0);
|
|
2349
|
+
}
|
|
2350
|
+
switch (command) {
|
|
2351
|
+
case "init":
|
|
2352
|
+
await initCommand(args.slice(1));
|
|
2353
|
+
break;
|
|
2354
|
+
case "dev":
|
|
2355
|
+
await devCommand(args.slice(1));
|
|
2356
|
+
break;
|
|
2357
|
+
case "eval":
|
|
2358
|
+
await evalCommand(args.slice(1));
|
|
2359
|
+
break;
|
|
2360
|
+
case "cost":
|
|
2361
|
+
await costCommand(args.slice(1));
|
|
2362
|
+
break;
|
|
2363
|
+
case "trace":
|
|
2364
|
+
await traceCommand(args.slice(1));
|
|
2365
|
+
break;
|
|
2366
|
+
case "xray":
|
|
2367
|
+
await xrayCommand(args.slice(1));
|
|
2368
|
+
break;
|
|
2369
|
+
case "prompt":
|
|
2370
|
+
await promptCommand(args.slice(1));
|
|
2371
|
+
break;
|
|
2372
|
+
default:
|
|
2373
|
+
console.error(`Unknown command: ${command}`);
|
|
2374
|
+
console.log(HELP);
|
|
2375
|
+
process.exit(1);
|
|
2376
|
+
}
|
|
2377
|
+
}
|
|
2378
|
+
main().catch((err2) => {
|
|
2379
|
+
const message = err2 instanceof Error ? err2.message : String(err2);
|
|
2380
|
+
console.error("Error:", message);
|
|
2381
|
+
process.exit(1);
|
|
2382
|
+
});
|