@elsium-ai/cli 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js ADDED
@@ -0,0 +1,2382 @@
1
+ #!/usr/bin/env bun
2
+ // @bun
3
+ var __defProp = Object.defineProperty;
4
+ var __returnValue = (v) => v;
5
+ function __exportSetter(name, newValue) {
6
+ this[name] = __returnValue.bind(null, newValue);
7
+ }
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, {
11
+ get: all[name],
12
+ enumerable: true,
13
+ configurable: true,
14
+ set: __exportSetter.bind(all, name)
15
+ });
16
+ };
17
+ var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
18
+
19
+ // ../core/src/errors.ts
20
+ var ElsiumError;
21
+ var init_errors = __esm(() => {
22
+ ElsiumError = class ElsiumError extends Error {
23
+ code;
24
+ provider;
25
+ model;
26
+ statusCode;
27
+ retryable;
28
+ retryAfterMs;
29
+ cause;
30
+ metadata;
31
+ constructor(details) {
32
+ super(details.message);
33
+ this.name = "ElsiumError";
34
+ this.code = details.code;
35
+ this.provider = details.provider;
36
+ this.model = details.model;
37
+ this.statusCode = details.statusCode;
38
+ this.retryable = details.retryable;
39
+ this.retryAfterMs = details.retryAfterMs;
40
+ this.cause = details.cause;
41
+ this.metadata = details.metadata;
42
+ }
43
+ toJSON() {
44
+ return {
45
+ name: this.name,
46
+ code: this.code,
47
+ message: this.message,
48
+ provider: this.provider,
49
+ model: this.model,
50
+ statusCode: this.statusCode,
51
+ retryable: this.retryable,
52
+ retryAfterMs: this.retryAfterMs,
53
+ metadata: this.metadata
54
+ };
55
+ }
56
+ static providerError(message, opts) {
57
+ return new ElsiumError({
58
+ code: "PROVIDER_ERROR",
59
+ message,
60
+ provider: opts.provider,
61
+ statusCode: opts.statusCode,
62
+ retryable: opts.retryable ?? false,
63
+ cause: opts.cause
64
+ });
65
+ }
66
+ static rateLimit(provider, retryAfterMs) {
67
+ return new ElsiumError({
68
+ code: "RATE_LIMIT",
69
+ message: `Rate limited by ${provider}`,
70
+ provider,
71
+ statusCode: 429,
72
+ retryable: true,
73
+ retryAfterMs
74
+ });
75
+ }
76
+ static authError(provider) {
77
+ return new ElsiumError({
78
+ code: "AUTH_ERROR",
79
+ message: `Authentication failed for ${provider}. Check your API key.`,
80
+ provider,
81
+ statusCode: 401,
82
+ retryable: false
83
+ });
84
+ }
85
+ static timeout(provider, timeoutMs) {
86
+ return new ElsiumError({
87
+ code: "TIMEOUT",
88
+ message: `Request to ${provider} timed out after ${timeoutMs}ms`,
89
+ provider,
90
+ retryable: true
91
+ });
92
+ }
93
+ static validation(message, metadata) {
94
+ return new ElsiumError({
95
+ code: "VALIDATION_ERROR",
96
+ message,
97
+ retryable: false,
98
+ metadata
99
+ });
100
+ }
101
+ static budgetExceeded(spent, budget) {
102
+ return new ElsiumError({
103
+ code: "BUDGET_EXCEEDED",
104
+ message: `Token budget exceeded: spent ${spent}, budget ${budget}`,
105
+ retryable: false,
106
+ metadata: { spent, budget }
107
+ });
108
+ }
109
+ };
110
+ });
111
+ // ../core/src/utils.ts
112
+ import { randomBytes } from "crypto";
113
+ function cryptoHex(bytes) {
114
+ return randomBytes(bytes).toString("hex");
115
+ }
116
+ function generateId(prefix = "els") {
117
+ const timestamp = Date.now().toString(36);
118
+ const random = cryptoHex(4);
119
+ return `${prefix}_${timestamp}_${random}`;
120
+ }
121
+ function generateTraceId() {
122
+ const timestamp = Date.now().toString(36);
123
+ const random = cryptoHex(6);
124
+ return `trc_${timestamp}_${random}`;
125
+ }
126
+ async function sleep(ms) {
127
+ return new Promise((resolve2) => setTimeout(resolve2, ms));
128
+ }
129
+ var init_utils = () => {};
130
+
131
+ // ../core/src/stream.ts
132
+ function shouldEmitCheckpoint(lastCheckpointTime, intervalMs, textLength) {
133
+ const elapsed = Date.now() - lastCheckpointTime;
134
+ return elapsed >= intervalMs && textLength > 0;
135
+ }
136
+ function createCheckpoint(textAccumulator, eventIndex, now) {
137
+ return {
138
+ id: generateId("ckpt"),
139
+ timestamp: now,
140
+ text: textAccumulator,
141
+ tokensSoFar: Math.ceil(textAccumulator.length / 1.5),
142
+ eventIndex
143
+ };
144
+ }
145
+ function toError(err) {
146
+ return err instanceof Error ? err : new Error(String(err));
147
+ }
148
+ function* emitErrorEvent(err, textAccumulator, onPartialRecovery) {
149
+ const error = toError(err);
150
+ if (textAccumulator.length > 0) {
151
+ onPartialRecovery?.(textAccumulator, error);
152
+ yield { type: "recovery", partialText: textAccumulator, error };
153
+ } else {
154
+ yield { type: "error", error };
155
+ }
156
+ }
157
+ function createStream(executor) {
158
+ let resolve2 = null;
159
+ const buffer = [];
160
+ let done = false;
161
+ let error = null;
162
+ let dropped = 0;
163
+ const source = {
164
+ [Symbol.asyncIterator]() {
165
+ return {
166
+ next() {
167
+ if (buffer.length > 0) {
168
+ const value = buffer.shift();
169
+ return Promise.resolve({ value, done: false });
170
+ }
171
+ if (done) {
172
+ return Promise.resolve({ value: undefined, done: true });
173
+ }
174
+ if (error) {
175
+ return Promise.reject(error);
176
+ }
177
+ return new Promise((r) => {
178
+ resolve2 = r;
179
+ });
180
+ }
181
+ };
182
+ }
183
+ };
184
+ const emit = (event) => {
185
+ if (resolve2) {
186
+ const r = resolve2;
187
+ resolve2 = null;
188
+ r({ value: event, done: false });
189
+ } else {
190
+ if (buffer.length < MAX_BUFFER_SIZE) {
191
+ buffer.push(event);
192
+ } else {
193
+ dropped++;
194
+ }
195
+ }
196
+ };
197
+ executor(emit).then(() => {
198
+ if (dropped > 0) {
199
+ emit({
200
+ type: "error",
201
+ error: new Error(`Stream buffer overflow: ${dropped} events dropped`)
202
+ });
203
+ }
204
+ done = true;
205
+ if (resolve2) {
206
+ const r = resolve2;
207
+ resolve2 = null;
208
+ r({ value: undefined, done: true });
209
+ }
210
+ }).catch((e) => {
211
+ error = e instanceof Error ? e : new Error(String(e));
212
+ if (resolve2) {
213
+ resolve2({ value: { type: "error", error }, done: false });
214
+ resolve2 = null;
215
+ }
216
+ });
217
+ return new ElsiumStream(source);
218
+ }
219
+ var ElsiumStream, MAX_BUFFER_SIZE = 1e4;
220
+ var init_stream = __esm(() => {
221
+ init_utils();
222
+ ElsiumStream = class ElsiumStream {
223
+ source;
224
+ iterating = false;
225
+ constructor(source) {
226
+ this.source = source;
227
+ }
228
+ async* [Symbol.asyncIterator]() {
229
+ if (this.iterating) {
230
+ throw new Error("ElsiumStream supports only a single consumer");
231
+ }
232
+ this.iterating = true;
233
+ yield* this.source;
234
+ }
235
+ text() {
236
+ const source = this.source;
237
+ return {
238
+ async* [Symbol.asyncIterator]() {
239
+ for await (const event of source) {
240
+ if (event.type === "text_delta") {
241
+ yield event.text;
242
+ }
243
+ }
244
+ }
245
+ };
246
+ }
247
+ async toText() {
248
+ const parts = [];
249
+ for await (const text of this.text()) {
250
+ parts.push(text);
251
+ }
252
+ return parts.join("");
253
+ }
254
+ async toTextWithTimeout(timeoutMs) {
255
+ const parts = [];
256
+ const deadline = Date.now() + timeoutMs;
257
+ const iterator = this.source[Symbol.asyncIterator]();
258
+ try {
259
+ while (true) {
260
+ const remaining = deadline - Date.now();
261
+ if (remaining <= 0)
262
+ break;
263
+ let timer;
264
+ const timeoutPromise = new Promise((resolve2) => {
265
+ timer = setTimeout(() => resolve2({ value: undefined, done: true }), remaining);
266
+ });
267
+ const result = await Promise.race([iterator.next(), timeoutPromise]);
268
+ if (timer !== undefined)
269
+ clearTimeout(timer);
270
+ if (result.done)
271
+ break;
272
+ const event = result.value;
273
+ if (event.type === "text_delta") {
274
+ parts.push(event.text);
275
+ }
276
+ }
277
+ } catch (err) {
278
+ if (parts.length === 0)
279
+ throw err;
280
+ } finally {
281
+ await iterator.return?.();
282
+ }
283
+ return parts.join("");
284
+ }
285
+ async toResponse() {
286
+ const parts = [];
287
+ let usage = null;
288
+ let stopReason = null;
289
+ for await (const event of this.source) {
290
+ switch (event.type) {
291
+ case "text_delta":
292
+ parts.push(event.text);
293
+ break;
294
+ case "message_end":
295
+ usage = event.usage;
296
+ stopReason = event.stopReason;
297
+ break;
298
+ }
299
+ }
300
+ return { text: parts.join(""), usage, stopReason };
301
+ }
302
+ pipe(transform) {
303
+ return new ElsiumStream(transform(this.source));
304
+ }
305
+ resilient(options = {}) {
306
+ const { checkpointIntervalMs = 1000, onCheckpoint, onPartialRecovery } = options;
307
+ const source = this.source;
308
+ const resilientSource = {
309
+ async* [Symbol.asyncIterator]() {
310
+ let lastCheckpointTime = Date.now();
311
+ let textAccumulator = "";
312
+ let eventIndex = 0;
313
+ try {
314
+ for await (const event of source) {
315
+ eventIndex++;
316
+ if (event.type === "text_delta") {
317
+ textAccumulator += event.text;
318
+ }
319
+ yield event;
320
+ if (shouldEmitCheckpoint(lastCheckpointTime, checkpointIntervalMs, textAccumulator.length)) {
321
+ const now = Date.now();
322
+ const checkpoint = createCheckpoint(textAccumulator, eventIndex, now);
323
+ onCheckpoint?.(checkpoint);
324
+ yield { type: "checkpoint", checkpoint };
325
+ lastCheckpointTime = now;
326
+ }
327
+ }
328
+ } catch (err) {
329
+ yield* emitErrorEvent(err, textAccumulator, onPartialRecovery);
330
+ }
331
+ }
332
+ };
333
+ return new ElsiumStream(resilientSource);
334
+ }
335
+ };
336
+ });
337
+
338
+ // ../core/src/logger.ts
339
+ var init_logger = () => {};
340
+
341
+ // ../core/src/config.ts
342
+ var init_config = __esm(() => {
343
+ init_errors();
344
+ });
345
+
346
+ // ../core/src/circuit-breaker.ts
347
+ var init_circuit_breaker = __esm(() => {
348
+ init_errors();
349
+ });
350
+
351
+ // ../core/src/dedup.ts
352
+ var init_dedup = __esm(() => {
353
+ init_errors();
354
+ });
355
+
356
+ // ../core/src/policy.ts
357
+ var init_policy = __esm(() => {
358
+ init_errors();
359
+ init_utils();
360
+ });
361
+
362
+ // ../core/src/shutdown.ts
363
+ var init_shutdown = __esm(() => {
364
+ init_errors();
365
+ });
366
+
367
+ // ../core/src/index.ts
368
+ var init_src = __esm(() => {
369
+ init_errors();
370
+ init_stream();
371
+ init_logger();
372
+ init_config();
373
+ init_utils();
374
+ init_circuit_breaker();
375
+ init_dedup();
376
+ init_policy();
377
+ init_shutdown();
378
+ });
379
+
380
+ // ../testing/src/mock-provider.ts
381
+ function mockProvider(options = {}) {
382
+ const { responses = [], defaultResponse, onRequest } = options;
383
+ const calls = [];
384
+ let callIndex = 0;
385
+ function getNextResponse() {
386
+ if (callIndex < responses.length) {
387
+ return responses[callIndex++];
388
+ }
389
+ if (defaultResponse) {
390
+ callIndex++;
391
+ return defaultResponse;
392
+ }
393
+ callIndex++;
394
+ return { content: "" };
395
+ }
396
+ async function emitStreamEvents(emit, config) {
397
+ if (config.delay) {
398
+ await new Promise((r) => setTimeout(r, config.delay));
399
+ }
400
+ emit({
401
+ type: "message_start",
402
+ id: generateId("msg"),
403
+ model: config.model ?? "mock-model"
404
+ });
405
+ const content = config.content ?? "";
406
+ if (content) {
407
+ const words = content.split(" ");
408
+ for (const word of words) {
409
+ emit({ type: "text_delta", text: `${word} ` });
410
+ }
411
+ }
412
+ emit({
413
+ type: "message_end",
414
+ usage: {
415
+ inputTokens: config.usage?.inputTokens ?? 10,
416
+ outputTokens: config.usage?.outputTokens ?? 5,
417
+ totalTokens: config.usage?.totalTokens ?? 15
418
+ },
419
+ stopReason: config.stopReason ?? "end_turn"
420
+ });
421
+ }
422
+ function buildResponse(config, request) {
423
+ const model = config.model ?? request.model ?? "mock-model";
424
+ const content = config.content ?? "";
425
+ const toolCalls = config.toolCalls?.map((tc) => ({
426
+ id: tc.id ?? generateId("tc"),
427
+ name: tc.name,
428
+ arguments: tc.arguments
429
+ }));
430
+ const usage = {
431
+ inputTokens: config.usage?.inputTokens ?? Math.ceil(content.length / 4),
432
+ outputTokens: config.usage?.outputTokens ?? Math.ceil(content.length / 4),
433
+ totalTokens: 0,
434
+ ...config.usage
435
+ };
436
+ usage.totalTokens = usage.inputTokens + usage.outputTokens;
437
+ const message = {
438
+ role: "assistant",
439
+ content,
440
+ ...toolCalls?.length ? { toolCalls } : {}
441
+ };
442
+ return {
443
+ id: generateId("msg"),
444
+ message,
445
+ usage,
446
+ cost: { inputCost: 0, outputCost: 0, totalCost: 0, currency: "USD" },
447
+ model,
448
+ provider: "mock",
449
+ stopReason: config.stopReason ?? (toolCalls?.length ? "tool_use" : "end_turn"),
450
+ latencyMs: config.delay ?? 0,
451
+ traceId: generateTraceId()
452
+ };
453
+ }
454
+ return {
455
+ name: "mock",
456
+ defaultModel: "mock-model",
457
+ get calls() {
458
+ return calls;
459
+ },
460
+ get callCount() {
461
+ return calls.length;
462
+ },
463
+ async complete(request) {
464
+ calls.push(request);
465
+ onRequest?.(request);
466
+ const config = getNextResponse();
467
+ if (config.delay) {
468
+ await new Promise((r) => setTimeout(r, config.delay));
469
+ }
470
+ return buildResponse(config, request);
471
+ },
472
+ stream(request) {
473
+ calls.push(request);
474
+ onRequest?.(request);
475
+ const config = getNextResponse();
476
+ return createStream((emit) => emitStreamEvents(emit, config));
477
+ },
478
+ async listModels() {
479
+ return ["mock-model"];
480
+ },
481
+ reset() {
482
+ calls.length = 0;
483
+ callIndex = 0;
484
+ }
485
+ };
486
+ }
487
+ var init_mock_provider = __esm(() => {
488
+ init_src();
489
+ });
490
+
491
+ // ../testing/src/fixtures.ts
492
+ import { createHash } from "crypto";
493
+ function hashMessages(messages) {
494
+ const content = messages.map((m) => `${m.role}:${m.content}`).join("|");
495
+ return createHash("sha256").update(content).digest("hex").slice(0, 16);
496
+ }
497
+ function createFixture(name, entries) {
498
+ return {
499
+ name,
500
+ entries,
501
+ toProvider(options) {
502
+ if (options?.matching === "request-hash") {
503
+ const hashMap = new Map;
504
+ for (const entry of entries) {
505
+ const hash = hashMessages(entry.request.messages);
506
+ hashMap.set(hash, entry.response);
507
+ }
508
+ const provider = mockProvider({
509
+ responses: entries.map((e) => e.response)
510
+ });
511
+ const originalComplete = provider.complete.bind(provider);
512
+ const wrapped = Object.create(provider);
513
+ wrapped.complete = async (request) => {
514
+ const reqMessages = request.messages.map((m) => ({
515
+ role: m.role,
516
+ content: typeof m.content === "string" ? m.content : "[complex]"
517
+ }));
518
+ const hash = hashMessages(reqMessages);
519
+ const matched = hashMap.get(hash);
520
+ if (matched) {
521
+ return mockProvider({ responses: [matched] }).complete(request);
522
+ }
523
+ return originalComplete(request);
524
+ };
525
+ return wrapped;
526
+ }
527
+ return mockProvider({
528
+ responses: entries.map((e) => e.response)
529
+ });
530
+ },
531
+ toJSON() {
532
+ return JSON.stringify({
533
+ name,
534
+ entries: entries.map((e) => ({
535
+ ...e,
536
+ timestamp: e.timestamp ?? new Date().toISOString()
537
+ }))
538
+ }, null, 2);
539
+ }
540
+ };
541
+ }
542
+ function loadFixture(json) {
543
+ const data = JSON.parse(json);
544
+ return createFixture(data.name, data.entries);
545
+ }
546
+ function createRecorder() {
547
+ const entries = [];
548
+ return {
549
+ wrap(provider) {
550
+ const originalComplete = provider.complete.bind(provider);
551
+ const wrapped = Object.create(provider);
552
+ wrapped.complete = async (request) => {
553
+ const response = await originalComplete(request);
554
+ entries.push({
555
+ request: {
556
+ messages: request.messages.map((m) => ({
557
+ role: m.role,
558
+ content: typeof m.content === "string" ? m.content : "[complex]"
559
+ })),
560
+ model: request.model,
561
+ system: request.system
562
+ },
563
+ response: {
564
+ content: typeof response.message.content === "string" ? response.message.content : "",
565
+ toolCalls: response.message.toolCalls,
566
+ stopReason: response.stopReason,
567
+ usage: response.usage,
568
+ model: response.model
569
+ },
570
+ timestamp: new Date().toISOString()
571
+ });
572
+ return response;
573
+ };
574
+ return wrapped;
575
+ },
576
+ getEntries() {
577
+ return [...entries];
578
+ },
579
+ toFixture(name) {
580
+ return createFixture(name, [...entries]);
581
+ },
582
+ clear() {
583
+ entries.length = 0;
584
+ }
585
+ };
586
+ }
587
+ var init_fixtures = __esm(() => {
588
+ init_mock_provider();
589
+ });
590
+
591
+ // ../testing/src/eval.ts
592
+ function evaluateContains(output, criterion) {
593
+ const target = criterion.caseSensitive ? criterion.value : criterion.value.toLowerCase();
594
+ const haystack = criterion.caseSensitive ? output : output.toLowerCase();
595
+ const passed = haystack.includes(target);
596
+ return {
597
+ type: "contains",
598
+ passed,
599
+ message: passed ? `Contains "${criterion.value}"` : `Does not contain "${criterion.value}"`
600
+ };
601
+ }
602
+ function evaluateNotContains(output, criterion) {
603
+ const target = criterion.caseSensitive ? criterion.value : criterion.value.toLowerCase();
604
+ const haystack = criterion.caseSensitive ? output : output.toLowerCase();
605
+ const passed = !haystack.includes(target);
606
+ return {
607
+ type: "not_contains",
608
+ passed,
609
+ message: passed ? `Does not contain "${criterion.value}"` : `Contains "${criterion.value}" (should not)`
610
+ };
611
+ }
612
+ function evaluateMatches(output, criterion) {
613
+ const regex = new RegExp(criterion.pattern, criterion.flags);
614
+ const passed = regex.test(output);
615
+ return {
616
+ type: "matches",
617
+ passed,
618
+ message: passed ? `Matches /${criterion.pattern}/` : `Does not match /${criterion.pattern}/`
619
+ };
620
+ }
621
+ function evaluateLengthMin(output, criterion) {
622
+ const passed = output.length >= criterion.value;
623
+ return {
624
+ type: "length_min",
625
+ passed,
626
+ message: passed ? `Length ${output.length} >= ${criterion.value}` : `Length ${output.length} < ${criterion.value}`
627
+ };
628
+ }
629
+ function evaluateLengthMax(output, criterion) {
630
+ const passed = output.length <= criterion.value;
631
+ return {
632
+ type: "length_max",
633
+ passed,
634
+ message: passed ? `Length ${output.length} <= ${criterion.value}` : `Length ${output.length} > ${criterion.value}`
635
+ };
636
+ }
637
+ function evaluateJsonValid(output) {
638
+ try {
639
+ JSON.parse(output);
640
+ return { type: "json_valid", passed: true, message: "Valid JSON" };
641
+ } catch {
642
+ return { type: "json_valid", passed: false, message: "Invalid JSON" };
643
+ }
644
+ }
645
+ function evaluateJsonMatches(output, criterion) {
646
+ try {
647
+ const parsed = JSON.parse(output);
648
+ const passed = matchesSchema(parsed, criterion.schema);
649
+ return {
650
+ type: "json_matches",
651
+ passed,
652
+ message: passed ? "JSON matches schema" : "JSON does not match schema"
653
+ };
654
+ } catch {
655
+ return { type: "json_matches", passed: false, message: "Invalid JSON" };
656
+ }
657
+ }
658
+ function evaluateCustom(output, criterion) {
659
+ const passed = criterion.fn(output);
660
+ return {
661
+ type: `custom:${criterion.name}`,
662
+ passed,
663
+ message: passed ? `Custom check "${criterion.name}" passed` : `Custom check "${criterion.name}" failed`
664
+ };
665
+ }
666
+ function evaluateSemanticSimilarity(output, criterion) {
667
+ const refWords = new Set(criterion.reference.toLowerCase().split(/\s+/).filter((w) => w.length > 3));
668
+ const outWords = output.toLowerCase().split(/\s+/).filter((w) => w.length > 3);
669
+ const overlap = outWords.filter((w) => refWords.has(w)).length;
670
+ const score = refWords.size > 0 ? overlap / refWords.size : 0;
671
+ const threshold = criterion.threshold ?? 0.7;
672
+ const passed = score >= threshold;
673
+ return {
674
+ type: "semantic_similarity",
675
+ passed,
676
+ message: passed ? `Semantic similarity ${(score * 100).toFixed(0)}% >= ${(threshold * 100).toFixed(0)}%` : `Semantic similarity ${(score * 100).toFixed(0)}% < ${(threshold * 100).toFixed(0)}%`
677
+ };
678
+ }
679
+ function evaluateFactualAccuracy(output, criterion) {
680
+ const facts = criterion.facts;
681
+ let matchedFacts = 0;
682
+ const outputLower = output.toLowerCase();
683
+ for (const fact of facts) {
684
+ const factWords = fact.toLowerCase().split(/\s+/).filter((w) => w.length > 3);
685
+ const matches = factWords.filter((w) => outputLower.includes(w)).length;
686
+ if (matches / Math.max(factWords.length, 1) > 0.5) {
687
+ matchedFacts++;
688
+ }
689
+ }
690
+ const score = facts.length > 0 ? matchedFacts / facts.length : 1;
691
+ const threshold = criterion.threshold ?? 0.7;
692
+ const passed = score >= threshold;
693
+ return {
694
+ type: "factual_accuracy",
695
+ passed,
696
+ message: passed ? `Factual accuracy: ${matchedFacts}/${facts.length} facts verified` : `Factual accuracy: only ${matchedFacts}/${facts.length} facts found`
697
+ };
698
+ }
699
+ function evaluateCriterion(output, criterion) {
700
+ switch (criterion.type) {
701
+ case "contains":
702
+ return evaluateContains(output, criterion);
703
+ case "not_contains":
704
+ return evaluateNotContains(output, criterion);
705
+ case "matches":
706
+ return evaluateMatches(output, criterion);
707
+ case "length_min":
708
+ return evaluateLengthMin(output, criterion);
709
+ case "length_max":
710
+ return evaluateLengthMax(output, criterion);
711
+ case "json_valid":
712
+ return evaluateJsonValid(output);
713
+ case "json_matches":
714
+ return evaluateJsonMatches(output, criterion);
715
+ case "custom":
716
+ return evaluateCustom(output, criterion);
717
+ case "llm_judge":
718
+ return { type: "llm_judge", passed: false, message: "LLM judge requires async evaluation" };
719
+ case "semantic_similarity":
720
+ return evaluateSemanticSimilarity(output, criterion);
721
+ case "factual_accuracy":
722
+ return evaluateFactualAccuracy(output, criterion);
723
+ }
724
+ }
725
+ function matchesSchema(value, schema) {
726
+ if (typeof value !== "object" || value === null)
727
+ return false;
728
+ const obj = value;
729
+ for (const key of Object.keys(schema)) {
730
+ if (!(key in obj))
731
+ return false;
732
+ const expectedType = schema[key];
733
+ if (typeof expectedType === "string") {
734
+ const actualType = typeof obj[key];
735
+ if (actualType !== expectedType)
736
+ return false;
737
+ }
738
+ }
739
+ return true;
740
+ }
741
+ function makeRunnerErrorResult(evalCase, error, startTime) {
742
+ return {
743
+ name: evalCase.name,
744
+ passed: false,
745
+ score: 0,
746
+ criteria: [
747
+ {
748
+ type: "error",
749
+ passed: false,
750
+ message: `Runner error: ${error instanceof Error ? error.message : String(error)}`
751
+ }
752
+ ],
753
+ input: evalCase.input,
754
+ output: "",
755
+ durationMs: Math.round(performance.now() - startTime),
756
+ tags: evalCase.tags ?? []
757
+ };
758
+ }
759
+ function checkExpected(output, expected) {
760
+ const passed = output.includes(expected);
761
+ return {
762
+ type: "expected",
763
+ passed,
764
+ message: passed ? "Output contains expected text" : `Output does not contain expected "${expected}"`
765
+ };
766
+ }
767
+ async function evaluateLlmJudge(output, criterion) {
768
+ try {
769
+ const fullPrompt = `${criterion.prompt}
770
+
771
+ Output to evaluate:
772
+ ${output}`;
773
+ const result = await criterion.judge(fullPrompt);
774
+ const threshold = criterion.threshold ?? 0.7;
775
+ const passed = result.score >= threshold;
776
+ return {
777
+ type: "llm_judge",
778
+ passed,
779
+ message: passed ? `LLM judge score: ${result.score.toFixed(2)} (${result.reasoning})` : `LLM judge score: ${result.score.toFixed(2)} < ${threshold} (${result.reasoning})`
780
+ };
781
+ } catch (error) {
782
+ return {
783
+ type: "llm_judge",
784
+ passed: false,
785
+ message: `LLM judge error: ${error instanceof Error ? error.message : String(error)}`
786
+ };
787
+ }
788
+ }
789
+ async function evaluateAllCriteria(output, evalCase) {
790
+ const criteriaResults = [];
791
+ if (evalCase.expected !== undefined) {
792
+ criteriaResults.push(checkExpected(output, evalCase.expected));
793
+ }
794
+ for (const criterion of evalCase.criteria ?? []) {
795
+ if (criterion.type === "llm_judge") {
796
+ criteriaResults.push(await evaluateLlmJudge(output, criterion));
797
+ } else {
798
+ criteriaResults.push(evaluateCriterion(output, criterion));
799
+ }
800
+ }
801
+ return criteriaResults;
802
+ }
803
+ async function runCase(evalCase, runner) {
804
+ const startTime = performance.now();
805
+ let output;
806
+ try {
807
+ output = await runner(evalCase.input);
808
+ } catch (error) {
809
+ return makeRunnerErrorResult(evalCase, error, startTime);
810
+ }
811
+ const criteriaResults = await evaluateAllCriteria(output, evalCase);
812
+ const passedCount = criteriaResults.filter((c) => c.passed).length;
813
+ const totalCount = criteriaResults.length;
814
+ const allPassed = totalCount === 0 || passedCount === totalCount;
815
+ const score = totalCount === 0 ? 1 : passedCount / totalCount;
816
+ return {
817
+ name: evalCase.name,
818
+ passed: allPassed,
819
+ score,
820
+ criteria: criteriaResults,
821
+ input: evalCase.input,
822
+ output,
823
+ durationMs: Math.round(performance.now() - startTime),
824
+ tags: evalCase.tags ?? []
825
+ };
826
+ }
827
+ async function runEvalSuite(config) {
828
+ const startTime = performance.now();
829
+ const concurrency = config.concurrency ?? 1;
830
+ const results = [];
831
+ if (concurrency <= 1) {
832
+ for (const evalCase of config.cases) {
833
+ results.push(await runCase(evalCase, config.runner));
834
+ }
835
+ } else {
836
+ for (let i = 0;i < config.cases.length; i += concurrency) {
837
+ const batch = config.cases.slice(i, i + concurrency);
838
+ const batchResults = await Promise.all(batch.map((c) => runCase(c, config.runner)));
839
+ results.push(...batchResults);
840
+ }
841
+ }
842
+ const passed = results.filter((r) => r.passed).length;
843
+ const failed = results.length - passed;
844
+ return {
845
+ name: config.name,
846
+ total: results.length,
847
+ passed,
848
+ failed,
849
+ score: results.length > 0 ? passed / results.length : 0,
850
+ results,
851
+ durationMs: Math.round(performance.now() - startTime)
852
+ };
853
+ }
854
+ function formatEvalReport(result) {
855
+ const lines = [];
856
+ lines.push(`
857
+ Eval Suite: ${result.name}`);
858
+ lines.push(` ${"\u2500".repeat(50)}`);
859
+ for (const r of result.results) {
860
+ const icon = r.passed ? "PASS" : "FAIL";
861
+ lines.push(` [${icon}] ${r.name} (${r.durationMs}ms)`);
862
+ if (!r.passed) {
863
+ for (const c of r.criteria) {
864
+ if (!c.passed) {
865
+ lines.push(` ${c.message}`);
866
+ }
867
+ }
868
+ }
869
+ }
870
+ lines.push(` ${"\u2500".repeat(50)}`);
871
+ lines.push(` Score: ${(result.score * 100).toFixed(1)}% | ${result.passed}/${result.total} passed | ${result.durationMs}ms`);
872
+ lines.push("");
873
+ return lines.join(`
874
+ `);
875
+ }
876
+
877
+ // ../testing/src/snapshot.ts
878
+ import { createHash as createHash2 } from "crypto";
879
+ function createSnapshotStore(existing) {
880
+ const snapshots = new Map;
881
+ if (existing) {
882
+ for (const s of existing) {
883
+ snapshots.set(s.name, s);
884
+ }
885
+ }
886
+ return {
887
+ get(name) {
888
+ return snapshots.get(name);
889
+ },
890
+ set(name, snapshot) {
891
+ snapshots.set(name, snapshot);
892
+ },
893
+ getAll() {
894
+ return Array.from(snapshots.values());
895
+ },
896
+ toJSON() {
897
+ return JSON.stringify(Array.from(snapshots.values()), null, 2);
898
+ }
899
+ };
900
+ }
901
+ function hashOutput(output) {
902
+ return createHash2("sha256").update(output).digest("hex");
903
+ }
904
+ async function testSnapshot(name, store, runner, request) {
905
+ const output = await runner();
906
+ const currentHash = hashOutput(output);
907
+ const existing = store.get(name);
908
+ const snapshot = {
909
+ name,
910
+ request: {
911
+ system: request?.system,
912
+ messages: request?.messages?.map((m) => ({
913
+ role: m.role,
914
+ content: typeof m.content === "string" ? m.content : "[complex]"
915
+ })) ?? [],
916
+ model: request?.model
917
+ },
918
+ outputHash: currentHash,
919
+ timestamp: new Date().toISOString()
920
+ };
921
+ if (!existing) {
922
+ store.set(name, snapshot);
923
+ return { name, status: "new", currentHash, output };
924
+ }
925
+ if (existing.outputHash === currentHash) {
926
+ return {
927
+ name,
928
+ status: "match",
929
+ previousHash: existing.outputHash,
930
+ currentHash,
931
+ output
932
+ };
933
+ }
934
+ store.set(name, snapshot);
935
+ return {
936
+ name,
937
+ status: "changed",
938
+ previousHash: existing.outputHash,
939
+ currentHash,
940
+ output
941
+ };
942
+ }
943
+ var init_snapshot = () => {};
944
+
945
+ // ../testing/src/prompts.ts
946
+ function definePrompt(config) {
947
+ return { ...config };
948
+ }
949
+ function compareLine(fromLine, toLine, lineNumber) {
950
+ if (fromLine === undefined) {
951
+ return [{ type: "added", lineNumber, content: toLine }];
952
+ }
953
+ if (toLine === undefined) {
954
+ return [{ type: "removed", lineNumber, content: fromLine }];
955
+ }
956
+ if (fromLine !== toLine) {
957
+ return [
958
+ { type: "removed", lineNumber, content: fromLine },
959
+ { type: "added", lineNumber, content: toLine }
960
+ ];
961
+ }
962
+ return [{ type: "unchanged", lineNumber, content: fromLine }];
963
+ }
964
+ function computeLineChanges(fromLines, toLines) {
965
+ const changes = [];
966
+ const maxLen = Math.max(fromLines.length, toLines.length);
967
+ for (let i = 0;i < maxLen; i++) {
968
+ changes.push(...compareLine(fromLines[i], toLines[i], i + 1));
969
+ }
970
+ return changes;
971
+ }
972
+ function createPromptRegistry() {
973
+ const store = new Map;
974
+ function compareVersions(a, b) {
975
+ const aParts = a.split(".").map(Number);
976
+ const bParts = b.split(".").map(Number);
977
+ for (let i = 0;i < Math.max(aParts.length, bParts.length); i++) {
978
+ const aVal = aParts[i] ?? 0;
979
+ const bVal = bParts[i] ?? 0;
980
+ if (aVal !== bVal)
981
+ return aVal - bVal;
982
+ }
983
+ return 0;
984
+ }
985
+ return {
986
+ register(name, prompt) {
987
+ if (!store.has(name)) {
988
+ store.set(name, new Map);
989
+ }
990
+ const versions = store.get(name);
991
+ if (versions) {
992
+ versions.set(prompt.version, prompt);
993
+ }
994
+ },
995
+ get(name, version) {
996
+ const versions = store.get(name);
997
+ if (!versions)
998
+ return;
999
+ if (version)
1000
+ return versions.get(version);
1001
+ return this.getLatest(name);
1002
+ },
1003
+ getLatest(name) {
1004
+ const versions = store.get(name);
1005
+ if (!versions || versions.size === 0)
1006
+ return;
1007
+ const sorted = [...versions.keys()].sort(compareVersions);
1008
+ return versions.get(sorted[sorted.length - 1]);
1009
+ },
1010
+ list() {
1011
+ const result = [];
1012
+ for (const [name, versions] of store) {
1013
+ result.push({
1014
+ name,
1015
+ versions: [...versions.keys()].sort(compareVersions)
1016
+ });
1017
+ }
1018
+ return result;
1019
+ },
1020
+ getVersions(name) {
1021
+ const versions = store.get(name);
1022
+ if (!versions)
1023
+ return [];
1024
+ return [...versions.keys()].sort(compareVersions);
1025
+ },
1026
+ diff(name, fromVersion, toVersion) {
1027
+ const versions = store.get(name);
1028
+ if (!versions)
1029
+ return null;
1030
+ const from = versions.get(fromVersion);
1031
+ const to = versions.get(toVersion);
1032
+ if (!from || !to)
1033
+ return null;
1034
+ const fromLines = from.content.split(`
1035
+ `);
1036
+ const toLines = to.content.split(`
1037
+ `);
1038
+ const changes = computeLineChanges(fromLines, toLines);
1039
+ return { name, fromVersion, toVersion, changes };
1040
+ },
1041
+ render(name, variables, version) {
1042
+ const prompt = this.get(name, version);
1043
+ if (!prompt) {
1044
+ throw new Error(`Prompt "${name}" not found${version ? ` (version ${version})` : ""}`);
1045
+ }
1046
+ let rendered = prompt.content;
1047
+ for (const [key, value] of Object.entries(variables)) {
1048
+ rendered = rendered.replaceAll(`{{${key}}}`, value);
1049
+ }
1050
+ return rendered;
1051
+ }
1052
+ };
1053
+ }
1054
+
1055
+ // ../testing/src/regression.ts
1056
+ import { mkdirSync, readFileSync as readFileSync2, writeFileSync } from "fs";
1057
+ import { dirname } from "path";
1058
+ function makeEmptyResult(name) {
1059
+ return {
1060
+ name,
1061
+ totalCases: 0,
1062
+ regressions: [],
1063
+ improvements: [],
1064
+ unchanged: 0,
1065
+ overallScore: 0,
1066
+ baselineScore: 0
1067
+ };
1068
+ }
1069
+ async function scoreCase(input, currentOutput, baselineOutput, scorer) {
1070
+ if (scorer)
1071
+ return scorer(input, currentOutput);
1072
+ return currentOutput === baselineOutput ? 1 : 0.5;
1073
+ }
1074
+ function classifyDetail(detail, regressions, improvements) {
1075
+ if (detail.delta < -0.1) {
1076
+ regressions.push(detail);
1077
+ return false;
1078
+ }
1079
+ if (detail.delta > 0.1) {
1080
+ improvements.push(detail);
1081
+ return false;
1082
+ }
1083
+ return true;
1084
+ }
1085
+ async function compareWithBaseline(name, baseline, runner, scorer) {
1086
+ const regressions = [];
1087
+ const improvements = [];
1088
+ let unchanged = 0;
1089
+ let totalCurrentScore = 0;
1090
+ const baselineScore = baseline.cases.reduce((sum, c) => sum + c.score, 0) / baseline.cases.length;
1091
+ for (const baselineCase of baseline.cases) {
1092
+ const currentOutput = await runner(baselineCase.input);
1093
+ const currentScore = await scoreCase(baselineCase.input, currentOutput, baselineCase.output, scorer);
1094
+ totalCurrentScore += currentScore;
1095
+ const detail = {
1096
+ input: baselineCase.input,
1097
+ baselineOutput: baselineCase.output,
1098
+ currentOutput,
1099
+ baselineScore: baselineCase.score,
1100
+ currentScore,
1101
+ delta: currentScore - baselineCase.score
1102
+ };
1103
+ if (classifyDetail(detail, regressions, improvements)) {
1104
+ unchanged++;
1105
+ }
1106
+ }
1107
+ return {
1108
+ name,
1109
+ totalCases: baseline.cases.length,
1110
+ regressions,
1111
+ improvements,
1112
+ unchanged,
1113
+ overallScore: totalCurrentScore / baseline.cases.length,
1114
+ baselineScore
1115
+ };
1116
+ }
1117
+ function createRegressionSuite(name) {
1118
+ let baseline = null;
1119
+ return {
1120
+ get baseline() {
1121
+ return baseline;
1122
+ },
1123
+ async load(path) {
1124
+ try {
1125
+ const data = readFileSync2(path, "utf-8");
1126
+ baseline = JSON.parse(data);
1127
+ } catch {
1128
+ baseline = null;
1129
+ }
1130
+ },
1131
+ async save(path) {
1132
+ if (!baseline) {
1133
+ baseline = {
1134
+ name,
1135
+ cases: [],
1136
+ createdAt: Date.now(),
1137
+ updatedAt: Date.now()
1138
+ };
1139
+ }
1140
+ mkdirSync(dirname(path), { recursive: true });
1141
+ writeFileSync(path, JSON.stringify(baseline, null, 2));
1142
+ },
1143
+ addCase(input, output, score) {
1144
+ if (!baseline) {
1145
+ baseline = {
1146
+ name,
1147
+ cases: [],
1148
+ createdAt: Date.now(),
1149
+ updatedAt: Date.now()
1150
+ };
1151
+ }
1152
+ const existing = baseline.cases.findIndex((c) => c.input === input);
1153
+ if (existing >= 0) {
1154
+ baseline.cases[existing] = { input, output, score, timestamp: Date.now() };
1155
+ } else {
1156
+ baseline.cases.push({ input, output, score, timestamp: Date.now() });
1157
+ }
1158
+ baseline.updatedAt = Date.now();
1159
+ },
1160
+ async run(runner, scorer) {
1161
+ if (!baseline || baseline.cases.length === 0) {
1162
+ return makeEmptyResult(name);
1163
+ }
1164
+ return compareWithBaseline(name, baseline, runner, scorer);
1165
+ }
1166
+ };
1167
+ }
1168
+ var init_regression = () => {};
1169
+
1170
+ // ../testing/src/replay.ts
1171
+ function createReplayRecorder() {
1172
+ const entries = [];
1173
+ return {
1174
+ wrap(completeFn) {
1175
+ return async (request) => {
1176
+ const response = await completeFn(request);
1177
+ entries.push({
1178
+ request,
1179
+ response,
1180
+ timestamp: Date.now()
1181
+ });
1182
+ return response;
1183
+ };
1184
+ },
1185
+ getEntries() {
1186
+ return [...entries];
1187
+ },
1188
+ toJSON() {
1189
+ return JSON.stringify(entries, null, 2);
1190
+ },
1191
+ clear() {
1192
+ entries.length = 0;
1193
+ }
1194
+ };
1195
+ }
1196
+ function createReplayPlayer(entriesOrJson) {
1197
+ const entries = typeof entriesOrJson === "string" ? JSON.parse(entriesOrJson) : [...entriesOrJson];
1198
+ let index = 0;
1199
+ return {
1200
+ get remaining() {
1201
+ return entries.length - index;
1202
+ },
1203
+ async complete(_request) {
1204
+ if (index >= entries.length) {
1205
+ throw new Error("Replay exhausted: no more recorded responses");
1206
+ }
1207
+ const entry = entries[index];
1208
+ index++;
1209
+ return entry.response;
1210
+ }
1211
+ };
1212
+ }
1213
+
1214
+ // ../testing/src/pinning.ts
1215
+ import { createHash as createHash3 } from "crypto";
1216
+ function sha256(input) {
1217
+ return createHash3("sha256").update(input).digest("hex");
1218
+ }
1219
+ function createPinStore(existing) {
1220
+ const pins = new Map;
1221
+ if (existing) {
1222
+ for (const pin of existing) {
1223
+ const key = `${pin.promptHash}:${pin.configHash}`;
1224
+ pins.set(key, pin);
1225
+ }
1226
+ }
1227
+ return {
1228
+ get(key) {
1229
+ return pins.get(key);
1230
+ },
1231
+ set(key, pin) {
1232
+ pins.set(key, pin);
1233
+ },
1234
+ delete(key) {
1235
+ return pins.delete(key);
1236
+ },
1237
+ getAll() {
1238
+ return Array.from(pins.values());
1239
+ },
1240
+ toJSON() {
1241
+ return JSON.stringify(Array.from(pins.values()), null, 2);
1242
+ }
1243
+ };
1244
+ }
1245
+ async function pinOutput(name, store, runner, config, options) {
1246
+ const promptHash = sha256(config.prompt);
1247
+ const configHash = sha256(JSON.stringify({
1248
+ model: config.model,
1249
+ temperature: config.temperature,
1250
+ seed: config.seed
1251
+ }));
1252
+ const key = `${promptHash}:${configHash}`;
1253
+ const output = await runner();
1254
+ const outputHash = sha256(output);
1255
+ const pin = {
1256
+ promptHash,
1257
+ configHash,
1258
+ outputHash,
1259
+ outputText: output,
1260
+ model: config.model,
1261
+ createdAt: Date.now()
1262
+ };
1263
+ const existing = store.get(key);
1264
+ if (!existing) {
1265
+ store.set(key, pin);
1266
+ return { status: "new", pin };
1267
+ }
1268
+ if (existing.outputHash === outputHash) {
1269
+ return { status: "match", pin, previousPin: existing };
1270
+ }
1271
+ if (options?.assert) {
1272
+ throw ElsiumError.validation(`Pin mismatch for "${name}": expected hash ${existing.outputHash}, got ${outputHash}`);
1273
+ }
1274
+ store.set(key, pin);
1275
+ return { status: "mismatch", pin, previousPin: existing };
1276
+ }
1277
+ var init_pinning = __esm(() => {
1278
+ init_src();
1279
+ });
1280
+
1281
+ // ../testing/src/determinism.ts
1282
+ async function assertDeterministic(fn, options) {
1283
+ const runs = options?.runs ?? 5;
1284
+ const seed = options?.seed;
1285
+ const tolerance = options?.tolerance ?? 0;
1286
+ const outputs = [];
1287
+ for (let i = 0;i < runs; i++) {
1288
+ const output = await fn(seed);
1289
+ outputs.push(output);
1290
+ }
1291
+ const unique = new Set(outputs);
1292
+ const uniqueOutputs = unique.size;
1293
+ const variance = runs > 1 ? (uniqueOutputs - 1) / (runs - 1) : 0;
1294
+ const deterministic = variance <= tolerance;
1295
+ if (!deterministic && tolerance === 0) {
1296
+ throw ElsiumError.validation(`Non-deterministic output: ${uniqueOutputs} unique outputs across ${runs} runs (variance: ${variance.toFixed(3)})`);
1297
+ }
1298
+ return {
1299
+ deterministic,
1300
+ runs,
1301
+ uniqueOutputs,
1302
+ outputs,
1303
+ variance
1304
+ };
1305
+ }
1306
+ async function assertStable(fn, options) {
1307
+ const intervalMs = options?.intervalMs ?? 100;
1308
+ const runs = options?.runs ?? 3;
1309
+ const seed = options?.seed;
1310
+ const outputs = [];
1311
+ for (let i = 0;i < runs; i++) {
1312
+ if (i > 0) {
1313
+ await sleep(intervalMs);
1314
+ }
1315
+ const output = await fn(seed);
1316
+ outputs.push({ output, timestamp: Date.now() });
1317
+ }
1318
+ const unique = new Set(outputs.map((o) => o.output));
1319
+ const uniqueOutputs = unique.size;
1320
+ const variance = runs > 1 ? (uniqueOutputs - 1) / (runs - 1) : 0;
1321
+ return {
1322
+ stable: uniqueOutputs === 1,
1323
+ runs,
1324
+ uniqueOutputs,
1325
+ outputs,
1326
+ variance
1327
+ };
1328
+ }
1329
+ var init_determinism = __esm(() => {
1330
+ init_src();
1331
+ });
1332
+
1333
+ // ../testing/src/index.ts
1334
+ var exports_src = {};
1335
+ __export(exports_src, {
1336
+ testSnapshot: () => testSnapshot,
1337
+ runEvalSuite: () => runEvalSuite,
1338
+ pinOutput: () => pinOutput,
1339
+ mockProvider: () => mockProvider,
1340
+ loadFixture: () => loadFixture,
1341
+ hashOutput: () => hashOutput,
1342
+ formatEvalReport: () => formatEvalReport,
1343
+ definePrompt: () => definePrompt,
1344
+ createSnapshotStore: () => createSnapshotStore,
1345
+ createReplayRecorder: () => createReplayRecorder,
1346
+ createReplayPlayer: () => createReplayPlayer,
1347
+ createRegressionSuite: () => createRegressionSuite,
1348
+ createRecorder: () => createRecorder,
1349
+ createPromptRegistry: () => createPromptRegistry,
1350
+ createPinStore: () => createPinStore,
1351
+ createFixture: () => createFixture,
1352
+ assertStable: () => assertStable,
1353
+ assertDeterministic: () => assertDeterministic
1354
+ });
1355
+ var init_src2 = __esm(() => {
1356
+ init_mock_provider();
1357
+ init_fixtures();
1358
+ init_snapshot();
1359
+ init_regression();
1360
+ init_pinning();
1361
+ init_determinism();
1362
+ });
1363
+
1364
+ // src/commands/cost.ts
1365
+ import { existsSync, readFileSync } from "fs";
1366
+ import { join } from "path";
1367
+ var COST_FILE = ".elsium/cost-report.json";
1368
+ async function costCommand(args) {
1369
+ const costPath = join(process.cwd(), COST_FILE);
1370
+ if (!existsSync(costPath)) {
1371
+ console.log(`
1372
+ No cost report found.
1373
+
1374
+ Cost reports are generated automatically when you run your app
1375
+ with observability enabled:
1376
+
1377
+ const app = createApp({
1378
+ observe: {
1379
+ costTracking: true,
1380
+ },
1381
+ })
1382
+
1383
+ The report will be saved to .elsium/cost-report.json
1384
+ `);
1385
+ return;
1386
+ }
1387
+ try {
1388
+ const data = JSON.parse(readFileSync(costPath, "utf-8"));
1389
+ console.log(`
1390
+ ElsiumAI Cost Report`);
1391
+ console.log(` ${"\u2500".repeat(50)}`);
1392
+ console.log(` Generated: ${data.timestamp}`);
1393
+ console.log();
1394
+ console.log(` Total Cost: $${data.totalCost.toFixed(6)}`);
1395
+ console.log(` Total Tokens: ${data.totalTokens.toLocaleString()}`);
1396
+ console.log(` Input Tokens: ${data.totalInputTokens.toLocaleString()}`);
1397
+ console.log(` Output Tokens: ${data.totalOutputTokens.toLocaleString()}`);
1398
+ console.log(` Total API Calls: ${data.callCount}`);
1399
+ console.log();
1400
+ if (Object.keys(data.byModel).length > 0) {
1401
+ console.log(" By Model:");
1402
+ console.log(` ${"\u2500".repeat(50)}`);
1403
+ for (const [model, stats] of Object.entries(data.byModel)) {
1404
+ console.log(` ${model}`);
1405
+ console.log(` Cost: $${stats.cost.toFixed(6)}`);
1406
+ console.log(` Tokens: ${stats.tokens.toLocaleString()}`);
1407
+ console.log(` Calls: ${stats.calls}`);
1408
+ }
1409
+ }
1410
+ console.log();
1411
+ } catch (err) {
1412
+ console.error("Failed to read cost report:", err instanceof Error ? err.message : err);
1413
+ process.exit(1);
1414
+ }
1415
+ }
1416
+
1417
+ // src/commands/dev.ts
1418
+ import { spawn } from "child_process";
1419
+ import { existsSync as existsSync2 } from "fs";
1420
+ import { resolve } from "path";
1421
+ async function devCommand(args) {
1422
+ const entryFile = args[0] ?? "src/index.ts";
1423
+ const cwd = process.cwd();
1424
+ const fullPath = resolve(cwd, entryFile);
1425
+ if (!fullPath.startsWith(`${cwd}/`) && fullPath !== cwd) {
1426
+ console.error("Error: entry file must be within the project directory");
1427
+ process.exit(1);
1428
+ }
1429
+ if (!existsSync2(fullPath)) {
1430
+ console.error(`Entry file not found: ${entryFile}`);
1431
+ console.error("Run this command from your ElsiumAI project root.");
1432
+ process.exit(1);
1433
+ }
1434
+ console.log(`
1435
+ ElsiumAI Dev Server`);
1436
+ console.log(` Watching: ${entryFile}`);
1437
+ console.log(` Press Ctrl+C to stop
1438
+ `);
1439
+ const child = spawn("bun", ["--watch", entryFile], {
1440
+ stdio: "inherit",
1441
+ cwd: process.cwd()
1442
+ });
1443
+ child.on("error", (err) => {
1444
+ console.error("Failed to start dev server:", err.message);
1445
+ process.exit(1);
1446
+ });
1447
+ child.on("exit", (code) => {
1448
+ process.exit(code ?? 0);
1449
+ });
1450
+ process.on("SIGINT", () => {
1451
+ child.kill("SIGINT");
1452
+ });
1453
+ }
1454
+
1455
+ // src/commands/eval.ts
1456
+ import { existsSync as existsSync3 } from "fs";
1457
+ import { join as join2 } from "path";
1458
+ async function evalCommand(args) {
1459
+ const evalFile = args[0];
1460
+ if (!evalFile) {
1461
+ console.log(`
1462
+ Usage: elsium eval <file>
1463
+
1464
+ Run an evaluation suite against your prompts.
1465
+
1466
+ Examples:
1467
+ elsium eval ./evals/suite.ts
1468
+ elsium eval ./evals/quality.ts
1469
+
1470
+ Eval file should export a default EvalSuiteConfig:
1471
+
1472
+ import { type EvalSuiteConfig } from '@elsium-ai/testing'
1473
+
1474
+ export default {
1475
+ name: 'my-eval',
1476
+ cases: [
1477
+ {
1478
+ name: 'test-1',
1479
+ input: 'What is TypeScript?',
1480
+ criteria: [
1481
+ { type: 'contains', value: 'typed' },
1482
+ { type: 'length_min', value: 20 },
1483
+ ],
1484
+ },
1485
+ ],
1486
+ runner: async (input) => {
1487
+ // Call your agent/LLM here
1488
+ return response
1489
+ },
1490
+ } satisfies EvalSuiteConfig
1491
+ `);
1492
+ return;
1493
+ }
1494
+ const fullPath = join2(process.cwd(), evalFile);
1495
+ if (!existsSync3(fullPath)) {
1496
+ console.error(`Eval file not found: ${evalFile}`);
1497
+ process.exit(1);
1498
+ }
1499
+ try {
1500
+ const mod = await import(fullPath);
1501
+ const config = mod.default ?? mod;
1502
+ if (!config.name || !config.cases || !config.runner) {
1503
+ console.error("Eval file must export a valid EvalSuiteConfig with name, cases, and runner.");
1504
+ process.exit(1);
1505
+ }
1506
+ const { runEvalSuite: runEvalSuite2, formatEvalReport: formatEvalReport2 } = await Promise.resolve().then(() => (init_src2(), exports_src));
1507
+ console.log(`
1508
+ Running eval suite: ${config.name}`);
1509
+ console.log(` Cases: ${config.cases.length}
1510
+ `);
1511
+ const result = await runEvalSuite2(config);
1512
+ console.log(formatEvalReport2(result));
1513
+ if (result.failed > 0) {
1514
+ process.exit(1);
1515
+ }
1516
+ } catch (err2) {
1517
+ console.error("Failed to run eval:", err2 instanceof Error ? err2.message : err2);
1518
+ process.exit(1);
1519
+ }
1520
+ }
1521
+
1522
+ // src/commands/init.ts
1523
+ import { existsSync as existsSync4, mkdirSync as mkdirSync2, writeFileSync as writeFileSync2 } from "fs";
1524
+ import { join as join3 } from "path";
1525
+ async function initCommand(args) {
1526
+ const projectName = args[0] ?? "my-elsium-app";
1527
+ const projectDir = join3(process.cwd(), projectName);
1528
+ if (existsSync4(projectDir)) {
1529
+ console.error(`Directory "${projectName}" already exists.`);
1530
+ process.exit(1);
1531
+ }
1532
+ console.log(`
1533
+ Creating ElsiumAI project: ${projectName}
1534
+ `);
1535
+ const dirs = [
1536
+ "src/agents",
1537
+ "src/tools",
1538
+ "src/policies",
1539
+ "src/gateway",
1540
+ "src/workflows",
1541
+ "evals",
1542
+ "test/agents",
1543
+ ".elsium/baselines",
1544
+ ".elsium/recordings"
1545
+ ];
1546
+ for (const dir of dirs) {
1547
+ mkdirSync2(join3(projectDir, dir), { recursive: true });
1548
+ }
1549
+ const files = [
1550
+ ["package.json", packageJsonContent(projectName)],
1551
+ ["tsconfig.json", tsconfigContent()],
1552
+ ["biome.json", biomeJsonContent()],
1553
+ [".env.example", envExampleContent()],
1554
+ [".gitignore", gitignoreContent()],
1555
+ ["elsium.config.ts", configContent()],
1556
+ ["src/index.ts", indexContent()],
1557
+ ["src/gateway/mesh.ts", meshContent()],
1558
+ ["src/policies/default.ts", policiesContent()],
1559
+ ["src/tools/example.ts", toolContent()],
1560
+ ["src/agents/assistant.ts", agentContent()],
1561
+ ["src/workflows/example.ts", workflowContent()],
1562
+ ["evals/quality.eval.ts", qualityEvalContent()],
1563
+ ["evals/determinism.eval.ts", determinismEvalContent()],
1564
+ ["test/agents/assistant.test.ts", testContent()],
1565
+ [".elsium/baselines/.gitkeep", ""],
1566
+ [".elsium/recordings/.gitkeep", ""],
1567
+ ["README.md", readmeContent(projectName)]
1568
+ ];
1569
+ for (const [filePath, content] of files) {
1570
+ writeFileSync2(join3(projectDir, filePath), content);
1571
+ }
1572
+ console.log(" Created files:");
1573
+ for (const [filePath] of files) {
1574
+ console.log(` ${projectName}/${filePath}`);
1575
+ }
1576
+ console.log();
1577
+ console.log(" Next steps:");
1578
+ console.log(` cd ${projectName}`);
1579
+ console.log(" cp .env.example .env # add your API keys");
1580
+ console.log(" bun install");
1581
+ console.log(" bun run dev");
1582
+ console.log();
1583
+ }
1584
+ function packageJsonContent(projectName) {
1585
+ return `${JSON.stringify({
1586
+ name: projectName,
1587
+ version: "0.1.0",
1588
+ type: "module",
1589
+ scripts: {
1590
+ dev: "elsium dev",
1591
+ start: "bun src/index.ts",
1592
+ test: "vitest run",
1593
+ eval: "elsium eval evals/quality.eval.ts",
1594
+ "eval:determinism": "elsium eval evals/determinism.eval.ts",
1595
+ lint: "biome check .",
1596
+ format: "biome check --write ."
1597
+ },
1598
+ dependencies: {
1599
+ "@elsium-ai/core": "^0.1.0",
1600
+ "@elsium-ai/gateway": "^0.1.0",
1601
+ "@elsium-ai/agents": "^0.1.0",
1602
+ "@elsium-ai/tools": "^0.1.0",
1603
+ "@elsium-ai/workflows": "^0.1.0",
1604
+ "@elsium-ai/observe": "^0.1.0",
1605
+ "@elsium-ai/app": "^0.1.0",
1606
+ zod: "^3.23.0"
1607
+ },
1608
+ devDependencies: {
1609
+ "@elsium-ai/testing": "^0.1.0",
1610
+ "@biomejs/biome": "^1.9.0",
1611
+ "bun-types": "^1.3.0",
1612
+ typescript: "^5.7.0",
1613
+ vitest: "^3.0.0"
1614
+ }
1615
+ }, null, 2)}
1616
+ `;
1617
+ }
1618
+ function tsconfigContent() {
1619
+ return `${JSON.stringify({
1620
+ compilerOptions: {
1621
+ target: "ESNext",
1622
+ module: "ESNext",
1623
+ moduleResolution: "bundler",
1624
+ strict: true,
1625
+ esModuleInterop: true,
1626
+ skipLibCheck: true,
1627
+ types: ["bun-types"]
1628
+ },
1629
+ include: ["src", "evals", "test", "elsium.config.ts"]
1630
+ }, null, 2)}
1631
+ `;
1632
+ }
1633
+ function biomeJsonContent() {
1634
+ return `${JSON.stringify({
1635
+ $schema: "https://biomejs.dev/schemas/1.9.0/schema.json",
1636
+ organizeImports: { enabled: true },
1637
+ linter: {
1638
+ enabled: true,
1639
+ rules: { recommended: true }
1640
+ },
1641
+ formatter: {
1642
+ enabled: true,
1643
+ indentStyle: "tab",
1644
+ lineWidth: 100
1645
+ }
1646
+ }, null, 2)}
1647
+ `;
1648
+ }
1649
+ function envExampleContent() {
1650
+ return `# Provider API keys \u2014 add at least one
1651
+ ANTHROPIC_API_KEY=your-anthropic-key-here
1652
+ OPENAI_API_KEY=your-openai-key-here
1653
+ `;
1654
+ }
1655
+ function gitignoreContent() {
1656
+ return `node_modules/
1657
+ dist/
1658
+ .env
1659
+ .env.*
1660
+ !.env.example
1661
+ .elsium/recordings/*.json
1662
+ `;
1663
+ }
1664
+ function configContent() {
1665
+ return `import type { AppConfig } from '@elsium-ai/app'
1666
+ import { env } from '@elsium-ai/core'
1667
+
1668
+ const config = {
1669
+ gateway: {
1670
+ providers: {
1671
+ anthropic: { apiKey: env('ANTHROPIC_API_KEY') },
1672
+ },
1673
+ defaultModel: 'claude-sonnet-4-6',
1674
+ },
1675
+ observe: {
1676
+ tracing: true,
1677
+ costTracking: true,
1678
+ },
1679
+ server: {
1680
+ port: 3000,
1681
+ },
1682
+ } satisfies Omit<AppConfig, 'agents'>
1683
+
1684
+ export default config
1685
+ `;
1686
+ }
1687
+ function indexContent() {
1688
+ return `import { createApp } from '@elsium-ai/app'
1689
+ import config from '../elsium.config'
1690
+ import { assistant } from './agents/assistant'
1691
+
1692
+ const app = createApp({
1693
+ ...config,
1694
+ agents: [assistant],
1695
+ })
1696
+
1697
+ app.listen()
1698
+ `;
1699
+ }
1700
+ function meshContent() {
1701
+ return `import { createProviderMesh } from '@elsium-ai/gateway'
1702
+ import { env } from '@elsium-ai/core'
1703
+
1704
+ export const mesh = createProviderMesh({
1705
+ providers: [
1706
+ { name: 'anthropic', config: { apiKey: env('ANTHROPIC_API_KEY') }, priority: 1 },
1707
+ { name: 'openai', config: { apiKey: env('OPENAI_API_KEY') }, priority: 2 },
1708
+ ],
1709
+ strategy: 'fallback',
1710
+ circuitBreaker: {
1711
+ failureThreshold: 3,
1712
+ resetTimeoutMs: 30_000,
1713
+ },
1714
+ })
1715
+ `;
1716
+ }
1717
+ function policiesContent() {
1718
+ return `import { createPolicySet, modelAccessPolicy, costLimitPolicy } from '@elsium-ai/core'
1719
+
1720
+ export const policies = createPolicySet([
1721
+ modelAccessPolicy(['claude-sonnet-4-6', 'claude-haiku-4-5', 'gpt-4o']),
1722
+ costLimitPolicy(5.0),
1723
+ ])
1724
+ `;
1725
+ }
1726
+ function toolContent() {
1727
+ return `import { defineTool } from '@elsium-ai/tools'
1728
+ import { z } from 'zod'
1729
+
1730
+ export const calculatorTool = defineTool({
1731
+ name: 'calculator',
1732
+ description: 'Add two numbers together',
1733
+ input: z.object({
1734
+ a: z.number().describe('First number'),
1735
+ b: z.number().describe('Second number'),
1736
+ }),
1737
+ handler: async ({ a, b }) => {
1738
+ return { result: a + b }
1739
+ },
1740
+ })
1741
+ `;
1742
+ }
1743
+ function agentContent() {
1744
+ return `import { defineAgent } from '@elsium-ai/agents'
1745
+ import { mesh } from '../gateway/mesh'
1746
+ import { calculatorTool } from '../tools/example'
1747
+
1748
+ export const assistant = defineAgent(
1749
+ {
1750
+ name: 'assistant',
1751
+ system: 'You are a helpful AI assistant. Use the calculator tool for math questions.',
1752
+ model: 'claude-sonnet-4-6',
1753
+ tools: [calculatorTool],
1754
+ guardrails: {
1755
+ maxIterations: 10,
1756
+ semantic: {
1757
+ relevance: { enabled: true, threshold: 0.5 },
1758
+ },
1759
+ },
1760
+ confidence: {
1761
+ hallucinationRisk: true,
1762
+ relevanceScore: true,
1763
+ },
1764
+ },
1765
+ { complete: (req) => mesh.complete(req) },
1766
+ )
1767
+ `;
1768
+ }
1769
+ function workflowContent() {
1770
+ return `import { defineWorkflow, step } from '@elsium-ai/workflows'
1771
+ import { assistant } from '../agents/assistant'
1772
+ import { extractText } from '@elsium-ai/core'
1773
+
1774
+ export const researchWorkflow = defineWorkflow({
1775
+ name: 'research-pipeline',
1776
+ steps: [
1777
+ step('research', {
1778
+ handler: async (input: string) => {
1779
+ const result = await assistant.run(\`Research this topic: \${input}\`)
1780
+ return extractText(result.message.content)
1781
+ },
1782
+ }),
1783
+ step('summarise', {
1784
+ handler: async (input: string) => {
1785
+ const result = await assistant.run(\`Summarise in two sentences: \${input}\`)
1786
+ return extractText(result.message.content)
1787
+ },
1788
+ }),
1789
+ ],
1790
+ })
1791
+ `;
1792
+ }
1793
+ function qualityEvalContent() {
1794
+ return `import type { EvalSuiteConfig } from '@elsium-ai/testing'
1795
+ import { assistant } from '../src/agents/assistant'
1796
+ import { extractText } from '@elsium-ai/core'
1797
+
1798
+ export default {
1799
+ name: 'quality',
1800
+ cases: [
1801
+ {
1802
+ name: 'factual-answer',
1803
+ input: 'What is 2 + 2?',
1804
+ criteria: [{ type: 'contains', value: '4' }],
1805
+ },
1806
+ {
1807
+ name: 'polite-greeting',
1808
+ input: 'Hello!',
1809
+ criteria: [{ type: 'contains', value: 'Hello' }],
1810
+ },
1811
+ ],
1812
+ runner: async (input) => {
1813
+ const result = await assistant.run(input)
1814
+ return extractText(result.message.content)
1815
+ },
1816
+ } satisfies EvalSuiteConfig
1817
+ `;
1818
+ }
1819
+ function determinismEvalContent() {
1820
+ return `import type { EvalSuiteConfig } from '@elsium-ai/testing'
1821
+ import { assertDeterministic } from '@elsium-ai/testing'
1822
+ import { assistant } from '../src/agents/assistant'
1823
+ import { extractText } from '@elsium-ai/core'
1824
+
1825
+ export default {
1826
+ name: 'determinism',
1827
+ cases: [
1828
+ {
1829
+ name: 'stable-math',
1830
+ input: 'What is 2 + 2? Reply with just the number.',
1831
+ },
1832
+ ],
1833
+ runner: async (input) => {
1834
+ const result = await assertDeterministic(
1835
+ async () => {
1836
+ const res = await assistant.run(input)
1837
+ return extractText(res.message.content)
1838
+ },
1839
+ { runs: 3, tolerance: 0 },
1840
+ )
1841
+ return result.outputs[0]
1842
+ },
1843
+ } satisfies EvalSuiteConfig
1844
+ `;
1845
+ }
1846
+ function testContent() {
1847
+ return `import { describe, it, expect } from 'vitest'
1848
+ import { mockProvider, createReplayRecorder, createReplayPlayer } from '@elsium-ai/testing'
1849
+ import { defineAgent } from '@elsium-ai/agents'
1850
+ import { calculatorTool } from '../../src/tools/example'
1851
+
1852
+ describe('assistant agent', () => {
1853
+ it('should respond to a greeting', async () => {
1854
+ const mock = mockProvider({
1855
+ responses: [{ content: 'Hello! How can I help you today?' }],
1856
+ })
1857
+
1858
+ const agent = defineAgent(
1859
+ {
1860
+ name: 'test-assistant',
1861
+ system: 'You are a helpful AI assistant.',
1862
+ model: 'mock',
1863
+ tools: [calculatorTool],
1864
+ },
1865
+ { complete: (req) => mock.complete(req) },
1866
+ )
1867
+
1868
+ const result = await agent.run('Hello!')
1869
+ expect(result.message.content).toContain('Hello')
1870
+ expect(mock.callCount).toBe(1)
1871
+ })
1872
+
1873
+ it('should replay recorded interactions', async () => {
1874
+ const recorder = createReplayRecorder()
1875
+ const mock = mockProvider({
1876
+ responses: [{ content: 'The answer is 4.' }],
1877
+ })
1878
+
1879
+ const wrappedComplete = recorder.wrap((req) => mock.complete(req))
1880
+ await wrappedComplete({
1881
+ model: 'mock',
1882
+ messages: [{ role: 'user', content: [{ type: 'text', text: 'What is 2+2?' }] }],
1883
+ })
1884
+
1885
+ const player = createReplayPlayer(recorder.toJSON())
1886
+ const replayed = await player.complete({
1887
+ model: 'mock',
1888
+ messages: [{ role: 'user', content: [{ type: 'text', text: 'What is 2+2?' }] }],
1889
+ })
1890
+
1891
+ expect(replayed).toBeDefined()
1892
+ })
1893
+ })
1894
+ `;
1895
+ }
1896
+ function readmeContent(projectName) {
1897
+ return `# ${projectName}
1898
+
1899
+ Built with [ElsiumAI](https://github.com/elsium-ai/elsium-ai).
1900
+
1901
+ ## Quick start
1902
+
1903
+ \`\`\`bash
1904
+ cp .env.example .env # add your API keys
1905
+ bun install
1906
+ bun run dev
1907
+ \`\`\`
1908
+
1909
+ ## Scripts
1910
+
1911
+ | Command | Description |
1912
+ | --- | --- |
1913
+ | \`bun run dev\` | Start the dev server |
1914
+ | \`bun run test\` | Run unit tests |
1915
+ | \`bun run eval\` | Run quality eval suite |
1916
+ | \`bun run eval:determinism\` | Run determinism eval |
1917
+ | \`bun run lint\` | Lint with Biome |
1918
+ | \`bun run format\` | Auto-format with Biome |
1919
+
1920
+ ## Project structure
1921
+
1922
+ - **src/agents/** \u2014 Agent definitions with guardrails
1923
+ - **src/tools/** \u2014 Tool schemas validated by Zod
1924
+ - **src/policies/** \u2014 Policy sets (model allowlist, cost caps)
1925
+ - **src/gateway/** \u2014 Provider mesh with circuit breaker
1926
+ - **src/workflows/** \u2014 Multi-step workflows
1927
+ - **evals/** \u2014 Eval suites (quality + determinism)
1928
+ - **test/** \u2014 Unit tests with mock providers and replay
1929
+ `;
1930
+ }
1931
+
1932
+ // src/commands/prompt.ts
1933
+ import { existsSync as existsSync5, readFileSync as readFileSync3, readdirSync } from "fs";
1934
+ import { join as join4 } from "path";
1935
+ var PROMPTS_DIR = ".elsium/prompts";
1936
+ function showHelp() {
1937
+ console.log(`
1938
+ ElsiumAI Prompt Manager
1939
+
1940
+ Usage:
1941
+ elsium prompt list List all registered prompts
1942
+ elsium prompt diff <name> <v1> <v2> Show diff between versions
1943
+ elsium prompt history <name> Show version history
1944
+ elsium prompt show <name> [version] Show prompt content
1945
+
1946
+ Prompts are stored in .elsium/prompts/ as JSON files.
1947
+ `);
1948
+ }
1949
+ function loadPromptFiles(promptsPath) {
1950
+ if (!existsSync5(promptsPath)) {
1951
+ return [];
1952
+ }
1953
+ return readdirSync(promptsPath).filter((f) => f.endsWith(".json")).map((f) => {
1954
+ try {
1955
+ return JSON.parse(readFileSync3(join4(promptsPath, f), "utf-8"));
1956
+ } catch {
1957
+ return null;
1958
+ }
1959
+ }).filter((p) => p !== null);
1960
+ }
1961
+ function handleList(promptsPath) {
1962
+ const allPrompts = loadPromptFiles(promptsPath);
1963
+ if (allPrompts.length === 0) {
1964
+ console.log(`
1965
+ No prompts found. Store prompts in .elsium/prompts/
1966
+ `);
1967
+ return;
1968
+ }
1969
+ const prompts = new Map;
1970
+ for (const data of allPrompts) {
1971
+ if (!prompts.has(data.name))
1972
+ prompts.set(data.name, []);
1973
+ const versions = prompts.get(data.name);
1974
+ if (versions)
1975
+ versions.push(data.version);
1976
+ }
1977
+ console.log(`
1978
+ Registered Prompts (${prompts.size})`);
1979
+ console.log(` ${"\u2500".repeat(50)}`);
1980
+ for (const [name, versions] of prompts) {
1981
+ console.log(` ${name} \u2014 ${versions.length} version(s): ${versions.join(", ")}`);
1982
+ }
1983
+ console.log();
1984
+ }
1985
+ function handleHistory(promptsPath, name) {
1986
+ const files = loadPromptFiles(promptsPath).filter((p) => p.name === name).sort((a, b) => a.version.localeCompare(b.version));
1987
+ if (files.length === 0) {
1988
+ console.log(`
1989
+ No versions found for prompt "${name}".
1990
+ `);
1991
+ return;
1992
+ }
1993
+ console.log(`
1994
+ Prompt History: ${name} (${files.length} versions)`);
1995
+ console.log(` ${"\u2500".repeat(50)}`);
1996
+ for (const p of files) {
1997
+ console.log(` v${p.version} \u2014 ${p.variables.length} variables: ${p.variables.join(", ") || "none"}`);
1998
+ }
1999
+ console.log();
2000
+ }
2001
+ function handleShow(promptsPath, name, version) {
2002
+ const files = loadPromptFiles(promptsPath).filter((p) => p.name === name);
2003
+ const prompt = version ? files.find((p) => p.version === version) : files.sort((a, b) => b.version.localeCompare(a.version))[0];
2004
+ if (!prompt) {
2005
+ console.log(`
2006
+ Prompt "${name}"${version ? ` v${version}` : ""} not found.
2007
+ `);
2008
+ return;
2009
+ }
2010
+ console.log(`
2011
+ Prompt: ${prompt.name} v${prompt.version}`);
2012
+ console.log(` Variables: ${prompt.variables.join(", ") || "none"}`);
2013
+ console.log(` ${"\u2500".repeat(50)}`);
2014
+ console.log(prompt.content);
2015
+ console.log(` ${"\u2500".repeat(50)}
2016
+ `);
2017
+ }
2018
+ function printDiffLines(fromLines, toLines) {
2019
+ const maxLen = Math.max(fromLines.length, toLines.length);
2020
+ for (let i = 0;i < maxLen; i++) {
2021
+ const fl = fromLines[i];
2022
+ const tl = toLines[i];
2023
+ if (fl === undefined) {
2024
+ console.log(` + ${tl}`);
2025
+ } else if (tl === undefined) {
2026
+ console.log(` - ${fl}`);
2027
+ } else if (fl !== tl) {
2028
+ console.log(` - ${fl}`);
2029
+ console.log(` + ${tl}`);
2030
+ } else {
2031
+ console.log(` ${fl}`);
2032
+ }
2033
+ }
2034
+ }
2035
+ function handleDiff(promptsPath, name, v1, v2) {
2036
+ const files = loadPromptFiles(promptsPath).filter((p) => p.name === name);
2037
+ const from = files.find((p) => p.version === v1);
2038
+ const to = files.find((p) => p.version === v2);
2039
+ if (!from || !to) {
2040
+ console.error(` Could not find both versions: ${v1} and ${v2}`);
2041
+ process.exit(1);
2042
+ }
2043
+ console.log(`
2044
+ Diff: ${name} v${v1} \u2192 v${v2}`);
2045
+ console.log(` ${"\u2500".repeat(50)}`);
2046
+ printDiffLines(from.content.split(`
2047
+ `), to.content.split(`
2048
+ `));
2049
+ console.log(` ${"\u2500".repeat(50)}
2050
+ `);
2051
+ }
2052
+ async function promptCommand(args) {
2053
+ const subcommand = args[0];
2054
+ if (!subcommand || subcommand === "--help" || subcommand === "-h") {
2055
+ showHelp();
2056
+ return;
2057
+ }
2058
+ const promptsPath = join4(process.cwd(), PROMPTS_DIR);
2059
+ switch (subcommand) {
2060
+ case "list":
2061
+ handleList(promptsPath);
2062
+ break;
2063
+ case "history": {
2064
+ const name = args[1];
2065
+ if (!name) {
2066
+ console.error(" Please provide a prompt name: elsium prompt history <name>");
2067
+ process.exit(1);
2068
+ }
2069
+ handleHistory(promptsPath, name);
2070
+ break;
2071
+ }
2072
+ case "show": {
2073
+ const name = args[1];
2074
+ if (!name) {
2075
+ console.error(" Please provide a prompt name: elsium prompt show <name> [version]");
2076
+ process.exit(1);
2077
+ }
2078
+ handleShow(promptsPath, name, args[2]);
2079
+ break;
2080
+ }
2081
+ case "diff": {
2082
+ const name = args[1];
2083
+ const v1 = args[2];
2084
+ const v2 = args[3];
2085
+ if (!name || !v1 || !v2) {
2086
+ console.error(" Usage: elsium prompt diff <name> <v1> <v2>");
2087
+ process.exit(1);
2088
+ }
2089
+ handleDiff(promptsPath, name, v1, v2);
2090
+ break;
2091
+ }
2092
+ default:
2093
+ console.error(` Unknown subcommand: ${subcommand}`);
2094
+ console.log(' Run "elsium prompt --help" for usage information.');
2095
+ process.exit(1);
2096
+ }
2097
+ }
2098
+
2099
+ // src/commands/trace.ts
2100
+ import { existsSync as existsSync6, readFileSync as readFileSync4, readdirSync as readdirSync2 } from "fs";
2101
+ import { join as join5 } from "path";
2102
+ var TRACES_DIR = ".elsium/traces";
2103
+ function formatStatus(status) {
2104
+ if (status === "ok")
2105
+ return "OK";
2106
+ if (status === "error")
2107
+ return "ERR";
2108
+ return "...";
2109
+ }
2110
+ function formatDuration(durationMs) {
2111
+ return durationMs ? `${durationMs}ms` : "?";
2112
+ }
2113
+ function showNoTracesHelp() {
2114
+ console.log(`
2115
+ No traces found.
2116
+
2117
+ Traces are recorded when you run your app with tracing enabled:
2118
+
2119
+ const app = createApp({
2120
+ observe: {
2121
+ tracing: true,
2122
+ },
2123
+ })
2124
+
2125
+ Usage:
2126
+ elsium trace List recent traces
2127
+ elsium trace <id> Inspect a specific trace
2128
+ `);
2129
+ }
2130
+ function listTraces(tracesPath) {
2131
+ if (!existsSync6(tracesPath)) {
2132
+ showNoTracesHelp();
2133
+ return;
2134
+ }
2135
+ try {
2136
+ const files = readdirSync2(tracesPath).filter((f) => f.endsWith(".json")).sort().reverse().slice(0, 20);
2137
+ if (files.length === 0) {
2138
+ console.log(`
2139
+ No traces recorded yet.
2140
+ `);
2141
+ return;
2142
+ }
2143
+ console.log(`
2144
+ Recent Traces (${files.length})`);
2145
+ console.log(` ${"\u2500".repeat(60)}`);
2146
+ for (const file of files) {
2147
+ const data = JSON.parse(readFileSync4(join5(tracesPath, file), "utf-8"));
2148
+ const root = data.find((s) => !s.parentId) ?? data[0];
2149
+ if (root) {
2150
+ const status = formatStatus(root.status);
2151
+ const duration = formatDuration(root.durationMs);
2152
+ console.log(` [${status}] ${root.traceId} ${root.name} ${duration}`);
2153
+ }
2154
+ }
2155
+ console.log();
2156
+ } catch (err2) {
2157
+ console.error("Failed to read traces:", err2 instanceof Error ? err2.message : err2);
2158
+ }
2159
+ }
2160
+ function showTrace(tracesPath, traceId) {
2161
+ const traceFile = join5(tracesPath, `${traceId}.json`);
2162
+ if (!existsSync6(traceFile)) {
2163
+ console.error(`Trace not found: ${traceId}`);
2164
+ process.exit(1);
2165
+ }
2166
+ try {
2167
+ const spans = JSON.parse(readFileSync4(traceFile, "utf-8"));
2168
+ console.log(`
2169
+ Trace: ${traceId}`);
2170
+ console.log(` Spans: ${spans.length}`);
2171
+ console.log(` ${"\u2500".repeat(60)}`);
2172
+ const roots = spans.filter((s) => !s.parentId);
2173
+ for (const root of roots) {
2174
+ printSpanTree(root, spans, 0);
2175
+ }
2176
+ console.log();
2177
+ } catch (err2) {
2178
+ console.error("Failed to read trace:", err2 instanceof Error ? err2.message : err2);
2179
+ process.exit(1);
2180
+ }
2181
+ }
2182
+ async function traceCommand(args) {
2183
+ const traceId = args[0];
2184
+ const tracesPath = join5(process.cwd(), TRACES_DIR);
2185
+ if (!traceId) {
2186
+ listTraces(tracesPath);
2187
+ return;
2188
+ }
2189
+ if (!/^[a-zA-Z0-9_-]+$/.test(traceId)) {
2190
+ console.error("Invalid trace ID format");
2191
+ process.exit(1);
2192
+ }
2193
+ showTrace(tracesPath, traceId);
2194
+ }
2195
+ function printSpanTree(span, allSpans, depth) {
2196
+ const indent = ` ${" ".repeat(depth)}`;
2197
+ const status = formatStatus(span.status);
2198
+ const duration = span.durationMs ? `${span.durationMs}ms` : "";
2199
+ const kind = span.kind ? `[${span.kind}]` : "";
2200
+ console.log(`${indent}${kind} ${span.name} (${status}) ${duration}`);
2201
+ for (const [key, value] of Object.entries(span.metadata)) {
2202
+ console.log(`${indent} ${key}: ${JSON.stringify(value)}`);
2203
+ }
2204
+ for (const event of span.events) {
2205
+ console.log(`${indent} > ${event.name}${event.data ? `: ${JSON.stringify(event.data)}` : ""}`);
2206
+ }
2207
+ const children = allSpans.filter((s) => s.parentId === span.id);
2208
+ for (const child of children) {
2209
+ printSpanTree(child, allSpans, depth + 1);
2210
+ }
2211
+ }
2212
+
2213
+ // src/commands/xray.ts
2214
+ import { existsSync as existsSync7, readFileSync as readFileSync5 } from "fs";
2215
+ import { join as join6 } from "path";
2216
+ var XRAY_FILE = ".elsium/xray-history.json";
2217
+ async function xrayCommand(args) {
2218
+ const flag = args[0];
2219
+ if (flag === "--help" || flag === "-h") {
2220
+ console.log(`
2221
+ ElsiumAI X-Ray \u2014 Inspect LLM calls
2222
+
2223
+ Usage:
2224
+ elsium xray Show last call
2225
+ elsium xray --last N Show last N calls
2226
+ elsium xray --trace <id> Show call by trace ID
2227
+ elsium xray --raw Show raw request/response
2228
+
2229
+ X-Ray data is captured when xray mode is enabled:
2230
+
2231
+ const gw = gateway({ ..., xray: true })
2232
+ `);
2233
+ return;
2234
+ }
2235
+ const xrayPath = join6(process.cwd(), XRAY_FILE);
2236
+ if (!existsSync7(xrayPath)) {
2237
+ console.log(`
2238
+ No X-Ray data found.
2239
+
2240
+ Enable X-Ray mode on your gateway to capture LLM call details:
2241
+
2242
+ const gw = gateway({ provider: 'anthropic', apiKey: '...', xray: true })
2243
+ await gw.complete({ messages: [...] })
2244
+ console.log(gw.lastCall())
2245
+
2246
+ X-Ray data will be saved to .elsium/xray-history.json
2247
+ `);
2248
+ return;
2249
+ }
2250
+ try {
2251
+ const entries = JSON.parse(readFileSync5(xrayPath, "utf-8"));
2252
+ if (flag === "--trace") {
2253
+ const traceId = args[1];
2254
+ if (!traceId) {
2255
+ console.error(" Please provide a trace ID: elsium xray --trace <id>");
2256
+ process.exit(1);
2257
+ }
2258
+ const entry = entries.find((e) => e.traceId === traceId);
2259
+ if (!entry) {
2260
+ console.error(` Trace not found: ${traceId}`);
2261
+ process.exit(1);
2262
+ }
2263
+ printEntry(entry, args.includes("--raw"));
2264
+ return;
2265
+ }
2266
+ const count = flag === "--last" ? Number.parseInt(args[1] ?? "5", 10) : 1;
2267
+ const showRaw = args.includes("--raw");
2268
+ const toShow = entries.slice(0, count);
2269
+ if (toShow.length === 0) {
2270
+ console.log(`
2271
+ No X-Ray data recorded yet.
2272
+ `);
2273
+ return;
2274
+ }
2275
+ console.log(`
2276
+ ElsiumAI X-Ray \u2014 ${toShow.length} call(s)`);
2277
+ console.log(` ${"\u2500".repeat(60)}`);
2278
+ for (const entry of toShow) {
2279
+ printEntry(entry, showRaw);
2280
+ }
2281
+ } catch (err2) {
2282
+ console.error("Failed to read X-Ray data:", err2 instanceof Error ? err2.message : err2);
2283
+ process.exit(1);
2284
+ }
2285
+ }
2286
+ function printEntry(entry, raw = false) {
2287
+ console.log(`
2288
+ Trace: ${entry.traceId}
2289
+ Time: ${new Date(entry.timestamp).toISOString()}
2290
+ Provider: ${entry.provider}
2291
+ Model: ${entry.model}
2292
+ Latency: ${entry.latencyMs}ms
2293
+ Tokens: ${entry.usage.inputTokens} in / ${entry.usage.outputTokens} out (${entry.usage.totalTokens} total)
2294
+ Cost: $${entry.cost.totalCost.toFixed(6)}`);
2295
+ if (raw) {
2296
+ console.log(`
2297
+ \u2500\u2500 Request \u2500\u2500
2298
+ ${entry.request.method} ${entry.request.url}
2299
+ Headers: ${JSON.stringify(entry.request.headers, null, 4)}
2300
+ Body: ${JSON.stringify(entry.request.body, null, 4)}
2301
+
2302
+ \u2500\u2500 Response \u2500\u2500
2303
+ Status: ${entry.response.status}
2304
+ Headers: ${JSON.stringify(entry.response.headers, null, 4)}
2305
+ Body: ${JSON.stringify(entry.response.body, null, 4)}`);
2306
+ }
2307
+ console.log(` ${"\u2500".repeat(60)}`);
2308
+ }
2309
+
2310
+ // src/cli.ts
2311
+ var VERSION = "0.1.0";
2312
+ var HELP = `
2313
+ ElsiumAI CLI v${VERSION}
2314
+
2315
+ Usage: elsium <command> [options]
2316
+
2317
+ Commands:
2318
+ init [name] Scaffold a new ElsiumAI project
2319
+ dev Start development server with hot reload
2320
+ eval [file] Run evaluation suite
2321
+ cost Show cost report from last run
2322
+ trace [id] Inspect trace from last run
2323
+ xray Inspect LLM calls (X-Ray mode)
2324
+ prompt Manage prompt versions
2325
+
2326
+ Options:
2327
+ --help, -h Show this help message
2328
+ --version, -v Show version
2329
+
2330
+ Examples:
2331
+ elsium init my-ai-app
2332
+ elsium dev
2333
+ elsium eval ./evals/suite.ts
2334
+ elsium cost
2335
+ elsium trace trc_abc123
2336
+ elsium xray --last 5
2337
+ elsium prompt list
2338
+ `;
2339
+ async function main() {
2340
+ const args = process.argv.slice(2);
2341
+ const command = args[0];
2342
+ if (!command || command === "--help" || command === "-h") {
2343
+ console.log(HELP);
2344
+ process.exit(0);
2345
+ }
2346
+ if (command === "--version" || command === "-v") {
2347
+ console.log(VERSION);
2348
+ process.exit(0);
2349
+ }
2350
+ switch (command) {
2351
+ case "init":
2352
+ await initCommand(args.slice(1));
2353
+ break;
2354
+ case "dev":
2355
+ await devCommand(args.slice(1));
2356
+ break;
2357
+ case "eval":
2358
+ await evalCommand(args.slice(1));
2359
+ break;
2360
+ case "cost":
2361
+ await costCommand(args.slice(1));
2362
+ break;
2363
+ case "trace":
2364
+ await traceCommand(args.slice(1));
2365
+ break;
2366
+ case "xray":
2367
+ await xrayCommand(args.slice(1));
2368
+ break;
2369
+ case "prompt":
2370
+ await promptCommand(args.slice(1));
2371
+ break;
2372
+ default:
2373
+ console.error(`Unknown command: ${command}`);
2374
+ console.log(HELP);
2375
+ process.exit(1);
2376
+ }
2377
+ }
2378
+ main().catch((err2) => {
2379
+ const message = err2 instanceof Error ? err2.message : String(err2);
2380
+ console.error("Error:", message);
2381
+ process.exit(1);
2382
+ });