@artemiskit/cli 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,9 @@ import {
12
12
  type StressMetrics,
13
13
  type StressRequestResult,
14
14
  createAdapter,
15
+ estimateCost,
15
16
  getGitInfo,
17
+ getModelPricing,
16
18
  parseScenarioFile,
17
19
  } from '@artemiskit/core';
18
20
  import { generateJSONReport, generateStressHTMLReport } from '@artemiskit/reports';
@@ -195,7 +197,11 @@ export function stressCommand(): Command {
195
197
  console.log();
196
198
 
197
199
  // Calculate stats
198
- const metrics = calculateMetrics(results, endTime.getTime() - startTime.getTime());
200
+ const metrics = calculateMetrics(
201
+ results,
202
+ endTime.getTime() - startTime.getTime(),
203
+ resolvedConfig.model
204
+ );
199
205
 
200
206
  // Build redaction metadata if enabled
201
207
  let redactionInfo: ManifestRedactionInfo | undefined;
@@ -256,9 +262,24 @@ export function stressCommand(): Command {
256
262
  duration: endTime.getTime() - startTime.getTime(),
257
263
  avgLatency: metrics.avg_latency_ms,
258
264
  p50Latency: metrics.p50_latency_ms,
265
+ p90Latency: metrics.p90_latency_ms,
259
266
  p95Latency: metrics.p95_latency_ms,
260
267
  p99Latency: metrics.p99_latency_ms,
261
268
  throughput: metrics.requests_per_second,
269
+ tokens: metrics.tokens
270
+ ? {
271
+ total: metrics.tokens.total_tokens,
272
+ prompt: metrics.tokens.total_prompt_tokens,
273
+ completion: metrics.tokens.total_completion_tokens,
274
+ avgPerRequest: metrics.tokens.avg_tokens_per_request,
275
+ }
276
+ : undefined,
277
+ cost: metrics.cost
278
+ ? {
279
+ totalUsd: metrics.cost.estimated_total_usd,
280
+ model: metrics.cost.model,
281
+ }
282
+ : undefined,
262
283
  };
263
284
  console.log(renderStressSummaryPanel(summaryData));
264
285
 
@@ -318,6 +339,7 @@ interface StressTestOptions {
318
339
  client: {
319
340
  generate: (req: { prompt: string; model?: string; temperature?: number }) => Promise<{
320
341
  text: string;
342
+ tokens?: { prompt: number; completion: number; total: number };
321
343
  }>;
322
344
  };
323
345
  model?: string;
@@ -359,7 +381,7 @@ async function runStressTest(options: StressTestOptions): Promise<StressRequestR
359
381
  active++;
360
382
 
361
383
  try {
362
- await client.generate({
384
+ const response = await client.generate({
363
385
  prompt,
364
386
  model,
365
387
  temperature,
@@ -369,6 +391,7 @@ async function runStressTest(options: StressTestOptions): Promise<StressRequestR
369
391
  success: true,
370
392
  latencyMs: Date.now() - requestStart,
371
393
  timestamp: requestStart,
394
+ tokens: response.tokens,
372
395
  });
373
396
  } catch (error) {
374
397
  results.push({
@@ -415,7 +438,11 @@ async function runStressTest(options: StressTestOptions): Promise<StressRequestR
415
438
  return results;
416
439
  }
417
440
 
418
- function calculateMetrics(results: StressRequestResult[], durationMs: number): StressMetrics {
441
+ function calculateMetrics(
442
+ results: StressRequestResult[],
443
+ durationMs: number,
444
+ model?: string
445
+ ): StressMetrics {
419
446
  const successful = results.filter((r) => r.success);
420
447
  const latencies = successful.map((r) => r.latencyMs).sort((a, b) => a - b);
421
448
 
@@ -431,6 +458,50 @@ function calculateMetrics(results: StressRequestResult[], durationMs: number): S
431
458
  const requestsPerSecond = durationMs > 0 ? (totalRequests / durationMs) * 1000 : 0;
432
459
  const successRate = totalRequests > 0 ? successfulRequests / totalRequests : 0;
433
460
 
461
+ // Calculate token metrics if available
462
+ const resultsWithTokens = results.filter((r) => r.tokens);
463
+ let tokens: StressMetrics['tokens'];
464
+ let cost: StressMetrics['cost'];
465
+
466
+ if (resultsWithTokens.length > 0) {
467
+ const totalPromptTokens = resultsWithTokens.reduce(
468
+ (sum, r) => sum + (r.tokens?.prompt || 0),
469
+ 0
470
+ );
471
+ const totalCompletionTokens = resultsWithTokens.reduce(
472
+ (sum, r) => sum + (r.tokens?.completion || 0),
473
+ 0
474
+ );
475
+ const totalTokens = totalPromptTokens + totalCompletionTokens;
476
+
477
+ tokens = {
478
+ total_prompt_tokens: totalPromptTokens,
479
+ total_completion_tokens: totalCompletionTokens,
480
+ total_tokens: totalTokens,
481
+ avg_tokens_per_request:
482
+ resultsWithTokens.length > 0 ? totalTokens / resultsWithTokens.length : 0,
483
+ };
484
+
485
+ // Estimate cost if model is known
486
+ if (model && totalTokens > 0) {
487
+ const costEstimate = estimateCost(totalPromptTokens, totalCompletionTokens, model);
488
+ const pricing = getModelPricing(model);
489
+
490
+ cost = {
491
+ estimated_total_usd: costEstimate.totalUsd,
492
+ breakdown: {
493
+ prompt_cost_usd: costEstimate.promptCostUsd,
494
+ completion_cost_usd: costEstimate.completionCostUsd,
495
+ },
496
+ model,
497
+ pricing: {
498
+ prompt_per_1k: pricing.promptPer1K,
499
+ completion_per_1k: pricing.completionPer1K,
500
+ },
501
+ };
502
+ }
503
+ }
504
+
434
505
  return {
435
506
  total_requests: totalRequests,
436
507
  successful_requests: successfulRequests,
@@ -444,6 +515,8 @@ function calculateMetrics(results: StressRequestResult[], durationMs: number): S
444
515
  p90_latency_ms: percentile(latencies, 90),
445
516
  p95_latency_ms: percentile(latencies, 95),
446
517
  p99_latency_ms: percentile(latencies, 99),
518
+ tokens,
519
+ cost,
447
520
  };
448
521
  }
449
522
 
@@ -13,7 +13,9 @@ const CONFIG_FILENAMES = ['artemis.config.yaml', 'artemis.config.yml', 'artemis.
13
13
  /**
14
14
  * Find and load the configuration file
15
15
  */
16
- export async function loadConfig(configPath?: string): Promise<ArtemisConfig | null> {
16
+ export async function loadConfig(
17
+ configPath?: string
18
+ ): Promise<(ArtemisConfig & { _path: string }) | null> {
17
19
  const path = configPath || findConfigFile();
18
20
 
19
21
  if (!path) {
@@ -36,7 +38,8 @@ export async function loadConfig(configPath?: string): Promise<ArtemisConfig | n
36
38
  throw new Error(`Invalid config file ${path}:\n${issues}`);
37
39
  }
38
40
 
39
- return result.data;
41
+ // Include the config file path for logging
42
+ return { ...result.data, _path: path };
40
43
  } catch (error) {
41
44
  if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
42
45
  return null;
@@ -16,6 +16,7 @@ const ProviderConfigSchema = z.object({
16
16
  resourceName: z.string().optional(),
17
17
  deploymentName: z.string().optional(),
18
18
  apiVersion: z.string().optional(),
19
+ embeddingDeploymentName: z.string().optional(),
19
20
  // Vercel AI specific
20
21
  underlyingProvider: z.enum(['openai', 'azure', 'anthropic', 'google', 'mistral']).optional(),
21
22
  });
package/src/ui/index.ts CHANGED
@@ -30,6 +30,7 @@ export {
30
30
  renderStressSummaryPanel,
31
31
  renderRedteamSummaryPanel,
32
32
  renderInfoBox,
33
+ renderFailureReason,
33
34
  } from './panels.js';
34
35
  export type { SummaryData, StressSummaryData, RedteamSummaryData } from './panels.js';
35
36
 
@@ -40,3 +41,21 @@ export type { ErrorContext } from './errors.js';
40
41
  // Live status tracking
41
42
  export { LiveTestStatus, Spinner, createSpinner } from './live-status.js';
42
43
  export type { TestStatus } from './live-status.js';
44
+
45
+ // Interactive prompts
46
+ export {
47
+ isInteractive,
48
+ promptProvider,
49
+ promptModel,
50
+ promptScenarios,
51
+ promptConfirm,
52
+ promptInput,
53
+ promptPassword,
54
+ promptSelect,
55
+ promptApiKeyIfNeeded,
56
+ getApiKeyEnvVar,
57
+ runInitWizard,
58
+ PROVIDER_CHOICES,
59
+ MODEL_CHOICES,
60
+ } from './prompts.js';
61
+ export type { InitWizardResult } from './prompts.js';
package/src/ui/panels.ts CHANGED
@@ -23,9 +23,22 @@ export interface StressSummaryData {
23
23
  duration: number;
24
24
  avgLatency: number;
25
25
  p50Latency: number;
26
+ p90Latency: number;
26
27
  p95Latency: number;
27
28
  p99Latency: number;
28
29
  throughput: number;
30
+ /** Token usage (optional) */
31
+ tokens?: {
32
+ total: number;
33
+ prompt: number;
34
+ completion: number;
35
+ avgPerRequest: number;
36
+ };
37
+ /** Cost estimation (optional) */
38
+ cost?: {
39
+ totalUsd: number;
40
+ model: string;
41
+ };
29
42
  }
30
43
 
31
44
  export interface RedteamSummaryData {
@@ -76,6 +89,32 @@ export function renderSummaryPanel(data: SummaryData): string {
76
89
  return lines.join('\n');
77
90
  }
78
91
 
92
+ /**
93
+ * Format cost for display
94
+ */
95
+ function formatCostDisplay(costUsd: number): string {
96
+ if (costUsd < 0.01) {
97
+ return `${(costUsd * 100).toFixed(4)}¢`;
98
+ }
99
+ if (costUsd < 1) {
100
+ return `$${costUsd.toFixed(4)}`;
101
+ }
102
+ return `$${costUsd.toFixed(2)}`;
103
+ }
104
+
105
+ /**
106
+ * Format token count with K/M suffix
107
+ */
108
+ function formatTokenCount(tokens: number): string {
109
+ if (tokens >= 1_000_000) {
110
+ return `${(tokens / 1_000_000).toFixed(2)}M`;
111
+ }
112
+ if (tokens >= 1_000) {
113
+ return `${(tokens / 1_000).toFixed(1)}K`;
114
+ }
115
+ return tokens.toString();
116
+ }
117
+
79
118
  /**
80
119
  * Render a stress test summary panel
81
120
  */
@@ -83,15 +122,24 @@ export function renderStressSummaryPanel(data: StressSummaryData): string {
83
122
  const width = 55;
84
123
 
85
124
  if (!isTTY) {
86
- return [
125
+ const lines = [
87
126
  '=== STRESS TEST RESULTS ===',
88
127
  `Total Requests: ${data.totalRequests}`,
89
128
  `Successful: ${data.successfulRequests} (${data.successRate.toFixed(1)}%)`,
90
129
  `Failed: ${data.failedRequests}`,
91
130
  `Duration: ${formatDuration(data.duration)}`,
92
131
  `Throughput: ${data.throughput.toFixed(1)} req/s`,
93
- `Latency: avg=${data.avgLatency.toFixed(0)}ms p50=${data.p50Latency.toFixed(0)}ms p95=${data.p95Latency.toFixed(0)}ms p99=${data.p99Latency.toFixed(0)}ms`,
94
- ].join('\n');
132
+ `Latency: avg=${data.avgLatency.toFixed(0)}ms p50=${data.p50Latency.toFixed(0)}ms p90=${data.p90Latency.toFixed(0)}ms p95=${data.p95Latency.toFixed(0)}ms p99=${data.p99Latency.toFixed(0)}ms`,
133
+ ];
134
+ if (data.tokens) {
135
+ lines.push(
136
+ `Tokens: ${formatTokenCount(data.tokens.total)} total (${formatTokenCount(data.tokens.avgPerRequest)}/req)`
137
+ );
138
+ }
139
+ if (data.cost) {
140
+ lines.push(`Estimated Cost: ${formatCostDisplay(data.cost.totalUsd)} (${data.cost.model})`);
141
+ }
142
+ return lines.join('\n');
95
143
  }
96
144
 
97
145
  const border = '═'.repeat(width - 2);
@@ -129,13 +177,59 @@ export function renderStressSummaryPanel(data: StressSummaryData): string {
129
177
  chalk.cyan('║'),
130
178
  chalk.cyan('║') +
131
179
  padText(
132
- ` p50: ${data.p50Latency.toFixed(0)}ms p95: ${data.p95Latency.toFixed(0)}ms p99: ${data.p99Latency.toFixed(0)}ms`,
180
+ ` p50: ${data.p50Latency.toFixed(0)}ms p90: ${data.p90Latency.toFixed(0)}ms p95: ${data.p95Latency.toFixed(0)}ms p99: ${data.p99Latency.toFixed(0)}ms`,
133
181
  width - 2
134
182
  ) +
135
183
  chalk.cyan('║'),
136
- chalk.cyan(`╚${border}╝`),
137
184
  ];
138
185
 
186
+ // Add token usage section if available
187
+ if (data.tokens) {
188
+ lines.push(chalk.cyan(`╠${border}╣`));
189
+ lines.push(chalk.cyan('║') + centerText(chalk.dim('Token Usage'), width - 2) + chalk.cyan('║'));
190
+ lines.push(chalk.cyan(`╠${border}╣`));
191
+ lines.push(
192
+ chalk.cyan('║') +
193
+ padText(` Total: ${chalk.bold(formatTokenCount(data.tokens.total))} tokens`, width - 2) +
194
+ chalk.cyan('║')
195
+ );
196
+ lines.push(
197
+ chalk.cyan('║') +
198
+ padText(
199
+ ` Prompt: ${formatTokenCount(data.tokens.prompt)} Completion: ${formatTokenCount(data.tokens.completion)}`,
200
+ width - 2
201
+ ) +
202
+ chalk.cyan('║')
203
+ );
204
+ lines.push(
205
+ chalk.cyan('║') +
206
+ padText(` Avg/Request: ${data.tokens.avgPerRequest.toFixed(0)} tokens`, width - 2) +
207
+ chalk.cyan('║')
208
+ );
209
+ }
210
+
211
+ // Add cost estimation section if available
212
+ if (data.cost) {
213
+ lines.push(chalk.cyan(`╠${border}╣`));
214
+ lines.push(
215
+ chalk.cyan('║') + centerText(chalk.dim('Cost Estimation'), width - 2) + chalk.cyan('║')
216
+ );
217
+ lines.push(chalk.cyan(`╠${border}╣`));
218
+ lines.push(
219
+ chalk.cyan('║') +
220
+ padText(
221
+ ` Estimated Total: ${chalk.bold(chalk.yellow(formatCostDisplay(data.cost.totalUsd)))}`,
222
+ width - 2
223
+ ) +
224
+ chalk.cyan('║')
225
+ );
226
+ lines.push(
227
+ chalk.cyan('║') + padText(` Model: ${data.cost.model}`, width - 2) + chalk.cyan('║')
228
+ );
229
+ }
230
+
231
+ lines.push(chalk.cyan(`╚${border}╝`));
232
+
139
233
  return lines.join('\n');
140
234
  }
141
235
 
@@ -214,3 +308,57 @@ export function renderInfoBox(title: string, lines: string[]): string {
214
308
 
215
309
  return result.join('\n');
216
310
  }
311
+
312
+ /**
313
+ * Render a styled failure reason for test cases
314
+ * Provides consistent formatting for verbose error output
315
+ */
316
+ export function renderFailureReason(
317
+ reason: string,
318
+ options?: {
319
+ matcherType?: string;
320
+ indent?: number;
321
+ }
322
+ ): string {
323
+ const indent = ' '.repeat(options?.indent ?? 3);
324
+ const matcherType = options?.matcherType;
325
+
326
+ if (!isTTY) {
327
+ // Plain text for CI/CD
328
+ return `${indent}Reason: ${reason}`;
329
+ }
330
+
331
+ // Parse the reason to provide better formatting
332
+ const isInlineError = reason.includes('Inline matcher error');
333
+ const isExpressionError = reason.includes('Unsupported expression pattern');
334
+ const isMissingValues = reason.includes('Missing required values');
335
+
336
+ // Extract key parts for better display
337
+ if (isInlineError && isExpressionError) {
338
+ // Extract the unsupported pattern
339
+ const patternMatch = reason.match(/Unsupported expression pattern: (.+)$/);
340
+ const pattern = patternMatch ? patternMatch[1] : null;
341
+
342
+ const lines = [
343
+ `${indent}${chalk.red('│')} ${chalk.red.bold('Inline Matcher Error')}`,
344
+ `${indent}${chalk.red('│')} ${chalk.dim('Expression not supported:')} ${chalk.yellow(pattern || 'unknown')}`,
345
+ `${indent}${chalk.red('│')} ${chalk.dim('Hint:')} Use ${chalk.cyan('response.')} prefix (e.g., ${chalk.cyan('response.startsWith("...")')})`,
346
+ ];
347
+ return lines.join('\n');
348
+ }
349
+
350
+ if (isMissingValues) {
351
+ // Extract mode info
352
+ const modeMatch = reason.match(/\(mode: (\w+)\)/);
353
+ const mode = modeMatch ? modeMatch[1] : null;
354
+
355
+ return `${indent}${chalk.red('│')} ${chalk.dim('Expected value not found')}${mode ? chalk.dim(` (${mode} mode)`) : ''}`;
356
+ }
357
+
358
+ // Generic styled failure
359
+ if (matcherType) {
360
+ return `${indent}${chalk.red('│')} ${chalk.dim(`[${matcherType}]`)} ${reason}`;
361
+ }
362
+
363
+ return `${indent}${chalk.red('│')} ${chalk.dim(reason)}`;
364
+ }