npm - @artemiskit/cli - Versions diffs - 0.1.8 → 0.2.0 - Mend

@artemiskit/cli 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +83 -0
package/bin/artemis.ts +0 -0
package/dist/index.js +70637 -33387
package/dist/src/commands/compare.d.ts.map +1 -1
package/dist/src/commands/init.d.ts.map +1 -1
package/dist/src/commands/redteam.d.ts.map +1 -1
package/dist/src/commands/run.d.ts.map +1 -1
package/dist/src/commands/stress.d.ts.map +1 -1
package/dist/src/config/loader.d.ts +3 -1
package/dist/src/config/loader.d.ts.map +1 -1
package/dist/src/config/schema.d.ts +8 -0
package/dist/src/config/schema.d.ts.map +1 -1
package/dist/src/ui/index.d.ts +3 -1
package/dist/src/ui/index.d.ts.map +1 -1
package/dist/src/ui/panels.d.ts +21 -0
package/dist/src/ui/panels.d.ts.map +1 -1
package/dist/src/ui/prompts.d.ts +92 -0
package/dist/src/ui/prompts.d.ts.map +1 -0
package/dist/src/utils/adapter.d.ts.map +1 -1
package/package.json +6 -6
package/src/commands/compare.ts +25 -0
package/src/commands/init.ts +173 -69
package/src/commands/redteam.ts +63 -10
package/src/commands/run.ts +542 -137
package/src/commands/stress.ts +76 -3
package/src/config/loader.ts +5 -2
package/src/config/schema.ts +1 -0
package/src/ui/index.ts +19 -0
package/src/ui/panels.ts +153 -5
package/src/ui/prompts.ts +749 -0
package/src/utils/adapter.ts +8 -0

package/src/commands/stress.ts CHANGED Viewed

@@ -12,7 +12,9 @@ import {
   type StressMetrics,
   type StressRequestResult,
   createAdapter,
+  estimateCost,
   getGitInfo,
+  getModelPricing,
   parseScenarioFile,
 } from '@artemiskit/core';
 import { generateJSONReport, generateStressHTMLReport } from '@artemiskit/reports';
@@ -195,7 +197,11 @@ export function stressCommand(): Command {
         console.log();
         // Calculate stats
-        const metrics = calculateMetrics(results, endTime.getTime() - startTime.getTime());
+        const metrics = calculateMetrics(
+          results,
+          endTime.getTime() - startTime.getTime(),
+          resolvedConfig.model
+        );
         // Build redaction metadata if enabled
         let redactionInfo: ManifestRedactionInfo | undefined;
@@ -256,9 +262,24 @@ export function stressCommand(): Command {
           duration: endTime.getTime() - startTime.getTime(),
           avgLatency: metrics.avg_latency_ms,
           p50Latency: metrics.p50_latency_ms,
+          p90Latency: metrics.p90_latency_ms,
           p95Latency: metrics.p95_latency_ms,
           p99Latency: metrics.p99_latency_ms,
           throughput: metrics.requests_per_second,
+          tokens: metrics.tokens
+            ? {
+                total: metrics.tokens.total_tokens,
+                prompt: metrics.tokens.total_prompt_tokens,
+                completion: metrics.tokens.total_completion_tokens,
+                avgPerRequest: metrics.tokens.avg_tokens_per_request,
+              }
+            : undefined,
+          cost: metrics.cost
+            ? {
+                totalUsd: metrics.cost.estimated_total_usd,
+                model: metrics.cost.model,
+              }
+            : undefined,
         };
         console.log(renderStressSummaryPanel(summaryData));
@@ -318,6 +339,7 @@ interface StressTestOptions {
   client: {
     generate: (req: { prompt: string; model?: string; temperature?: number }) => Promise<{
       text: string;
+      tokens?: { prompt: number; completion: number; total: number };
     }>;
   };
   model?: string;
@@ -359,7 +381,7 @@ async function runStressTest(options: StressTestOptions): Promise<StressRequestR
     active++;
     try {
-      await client.generate({
+      const response = await client.generate({
         prompt,
         model,
         temperature,
@@ -369,6 +391,7 @@ async function runStressTest(options: StressTestOptions): Promise<StressRequestR
         success: true,
         latencyMs: Date.now() - requestStart,
         timestamp: requestStart,
+        tokens: response.tokens,
       });
     } catch (error) {
       results.push({
@@ -415,7 +438,11 @@ async function runStressTest(options: StressTestOptions): Promise<StressRequestR
   return results;
 }
-function calculateMetrics(results: StressRequestResult[], durationMs: number): StressMetrics {
+function calculateMetrics(
+  results: StressRequestResult[],
+  durationMs: number,
+  model?: string
+): StressMetrics {
   const successful = results.filter((r) => r.success);
   const latencies = successful.map((r) => r.latencyMs).sort((a, b) => a - b);
@@ -431,6 +458,50 @@ function calculateMetrics(results: StressRequestResult[], durationMs: number): S
   const requestsPerSecond = durationMs > 0 ? (totalRequests / durationMs) * 1000 : 0;
   const successRate = totalRequests > 0 ? successfulRequests / totalRequests : 0;
+  // Calculate token metrics if available
+  const resultsWithTokens = results.filter((r) => r.tokens);
+  let tokens: StressMetrics['tokens'];
+  let cost: StressMetrics['cost'];
+  if (resultsWithTokens.length > 0) {
+    const totalPromptTokens = resultsWithTokens.reduce(
+      (sum, r) => sum + (r.tokens?.prompt || 0),
+      0
+    );
+    const totalCompletionTokens = resultsWithTokens.reduce(
+      (sum, r) => sum + (r.tokens?.completion || 0),
+      0
+    );
+    const totalTokens = totalPromptTokens + totalCompletionTokens;
+    tokens = {
+      total_prompt_tokens: totalPromptTokens,
+      total_completion_tokens: totalCompletionTokens,
+      total_tokens: totalTokens,
+      avg_tokens_per_request:
+        resultsWithTokens.length > 0 ? totalTokens / resultsWithTokens.length : 0,
+    };
+    // Estimate cost if model is known
+    if (model && totalTokens > 0) {
+      const costEstimate = estimateCost(totalPromptTokens, totalCompletionTokens, model);
+      const pricing = getModelPricing(model);
+      cost = {
+        estimated_total_usd: costEstimate.totalUsd,
+        breakdown: {
+          prompt_cost_usd: costEstimate.promptCostUsd,
+          completion_cost_usd: costEstimate.completionCostUsd,
+        },
+        model,
+        pricing: {
+          prompt_per_1k: pricing.promptPer1K,
+          completion_per_1k: pricing.completionPer1K,
+        },
+      };
+    }
+  }
   return {
     total_requests: totalRequests,
     successful_requests: successfulRequests,
@@ -444,6 +515,8 @@ function calculateMetrics(results: StressRequestResult[], durationMs: number): S
     p90_latency_ms: percentile(latencies, 90),
     p95_latency_ms: percentile(latencies, 95),
     p99_latency_ms: percentile(latencies, 99),
+    tokens,
+    cost,
   };
 }

package/src/config/loader.ts CHANGED Viewed

@@ -13,7 +13,9 @@ const CONFIG_FILENAMES = ['artemis.config.yaml', 'artemis.config.yml', 'artemis.
 /**
  * Find and load the configuration file
  */
-export async function loadConfig(configPath?: string): Promise<ArtemisConfig | null> {
+export async function loadConfig(
+  configPath?: string
+): Promise<(ArtemisConfig & { _path: string }) | null> {
   const path = configPath || findConfigFile();
   if (!path) {
@@ -36,7 +38,8 @@ export async function loadConfig(configPath?: string): Promise<ArtemisConfig | n
       throw new Error(`Invalid config file ${path}:\n${issues}`);
     }
-    return result.data;
+    // Include the config file path for logging
+    return { ...result.data, _path: path };
   } catch (error) {
     if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
       return null;

package/src/config/schema.ts CHANGED Viewed

@@ -16,6 +16,7 @@ const ProviderConfigSchema = z.object({
   resourceName: z.string().optional(),
   deploymentName: z.string().optional(),
   apiVersion: z.string().optional(),
+  embeddingDeploymentName: z.string().optional(),
   // Vercel AI specific
   underlyingProvider: z.enum(['openai', 'azure', 'anthropic', 'google', 'mistral']).optional(),
 });

package/src/ui/index.ts CHANGED Viewed

@@ -30,6 +30,7 @@ export {
   renderStressSummaryPanel,
   renderRedteamSummaryPanel,
   renderInfoBox,
+  renderFailureReason,
 } from './panels.js';
 export type { SummaryData, StressSummaryData, RedteamSummaryData } from './panels.js';
@@ -40,3 +41,21 @@ export type { ErrorContext } from './errors.js';
 // Live status tracking
 export { LiveTestStatus, Spinner, createSpinner } from './live-status.js';
 export type { TestStatus } from './live-status.js';
+// Interactive prompts
+export {
+  isInteractive,
+  promptProvider,
+  promptModel,
+  promptScenarios,
+  promptConfirm,
+  promptInput,
+  promptPassword,
+  promptSelect,
+  promptApiKeyIfNeeded,
+  getApiKeyEnvVar,
+  runInitWizard,
+  PROVIDER_CHOICES,
+  MODEL_CHOICES,
+} from './prompts.js';
+export type { InitWizardResult } from './prompts.js';

package/src/ui/panels.ts CHANGED Viewed

@@ -23,9 +23,22 @@ export interface StressSummaryData {
   duration: number;
   avgLatency: number;
   p50Latency: number;
+  p90Latency: number;
   p95Latency: number;
   p99Latency: number;
   throughput: number;
+  /** Token usage (optional) */
+  tokens?: {
+    total: number;
+    prompt: number;
+    completion: number;
+    avgPerRequest: number;
+  };
+  /** Cost estimation (optional) */
+  cost?: {
+    totalUsd: number;
+    model: string;
+  };
 }
 export interface RedteamSummaryData {
@@ -76,6 +89,32 @@ export function renderSummaryPanel(data: SummaryData): string {
   return lines.join('\n');
 }
+/**
+ * Format cost for display
+ */
+function formatCostDisplay(costUsd: number): string {
+  if (costUsd < 0.01) {
+    return `${(costUsd * 100).toFixed(4)}¢`;
+  }
+  if (costUsd < 1) {
+    return `$${costUsd.toFixed(4)}`;
+  }
+  return `$${costUsd.toFixed(2)}`;
+}
+/**
+ * Format token count with K/M suffix
+ */
+function formatTokenCount(tokens: number): string {
+  if (tokens >= 1_000_000) {
+    return `${(tokens / 1_000_000).toFixed(2)}M`;
+  }
+  if (tokens >= 1_000) {
+    return `${(tokens / 1_000).toFixed(1)}K`;
+  }
+  return tokens.toString();
+}
 /**
  * Render a stress test summary panel
  */
@@ -83,15 +122,24 @@ export function renderStressSummaryPanel(data: StressSummaryData): string {
   const width = 55;
   if (!isTTY) {
-    return [
+    const lines = [
       '=== STRESS TEST RESULTS ===',
       `Total Requests: ${data.totalRequests}`,
       `Successful: ${data.successfulRequests} (${data.successRate.toFixed(1)}%)`,
       `Failed: ${data.failedRequests}`,
       `Duration: ${formatDuration(data.duration)}`,
       `Throughput: ${data.throughput.toFixed(1)} req/s`,
-      `Latency: avg=${data.avgLatency.toFixed(0)}ms p50=${data.p50Latency.toFixed(0)}ms p95=${data.p95Latency.toFixed(0)}ms p99=${data.p99Latency.toFixed(0)}ms`,
-    ].join('\n');
+      `Latency: avg=${data.avgLatency.toFixed(0)}ms p50=${data.p50Latency.toFixed(0)}ms p90=${data.p90Latency.toFixed(0)}ms p95=${data.p95Latency.toFixed(0)}ms p99=${data.p99Latency.toFixed(0)}ms`,
+    ];
+    if (data.tokens) {
+      lines.push(
+        `Tokens: ${formatTokenCount(data.tokens.total)} total (${formatTokenCount(data.tokens.avgPerRequest)}/req)`
+      );
+    }
+    if (data.cost) {
+      lines.push(`Estimated Cost: ${formatCostDisplay(data.cost.totalUsd)} (${data.cost.model})`);
+    }
+    return lines.join('\n');
   }
   const border = '═'.repeat(width - 2);
@@ -129,13 +177,59 @@ export function renderStressSummaryPanel(data: StressSummaryData): string {
       chalk.cyan('║'),
     chalk.cyan('║') +
       padText(
-        `  p50: ${data.p50Latency.toFixed(0)}ms  p95: ${data.p95Latency.toFixed(0)}ms  p99: ${data.p99Latency.toFixed(0)}ms`,
+        `  p50: ${data.p50Latency.toFixed(0)}ms  p90: ${data.p90Latency.toFixed(0)}ms  p95: ${data.p95Latency.toFixed(0)}ms  p99: ${data.p99Latency.toFixed(0)}ms`,
         width - 2
       ) +
       chalk.cyan('║'),
-    chalk.cyan(`╚${border}╝`),
   ];
+  // Add token usage section if available
+  if (data.tokens) {
+    lines.push(chalk.cyan(`╠${border}╣`));
+    lines.push(chalk.cyan('║') + centerText(chalk.dim('Token Usage'), width - 2) + chalk.cyan('║'));
+    lines.push(chalk.cyan(`╠${border}╣`));
+    lines.push(
+      chalk.cyan('║') +
+        padText(`  Total: ${chalk.bold(formatTokenCount(data.tokens.total))} tokens`, width - 2) +
+        chalk.cyan('║')
+    );
+    lines.push(
+      chalk.cyan('║') +
+        padText(
+          `  Prompt: ${formatTokenCount(data.tokens.prompt)}  Completion: ${formatTokenCount(data.tokens.completion)}`,
+          width - 2
+        ) +
+        chalk.cyan('║')
+    );
+    lines.push(
+      chalk.cyan('║') +
+        padText(`  Avg/Request: ${data.tokens.avgPerRequest.toFixed(0)} tokens`, width - 2) +
+        chalk.cyan('║')
+    );
+  }
+  // Add cost estimation section if available
+  if (data.cost) {
+    lines.push(chalk.cyan(`╠${border}╣`));
+    lines.push(
+      chalk.cyan('║') + centerText(chalk.dim('Cost Estimation'), width - 2) + chalk.cyan('║')
+    );
+    lines.push(chalk.cyan(`╠${border}╣`));
+    lines.push(
+      chalk.cyan('║') +
+        padText(
+          `  Estimated Total: ${chalk.bold(chalk.yellow(formatCostDisplay(data.cost.totalUsd)))}`,
+          width - 2
+        ) +
+        chalk.cyan('║')
+    );
+    lines.push(
+      chalk.cyan('║') + padText(`  Model: ${data.cost.model}`, width - 2) + chalk.cyan('║')
+    );
+  }
+  lines.push(chalk.cyan(`╚${border}╝`));
   return lines.join('\n');
 }
@@ -214,3 +308,57 @@ export function renderInfoBox(title: string, lines: string[]): string {
   return result.join('\n');
 }
+/**
+ * Render a styled failure reason for test cases
+ * Provides consistent formatting for verbose error output
+ */
+export function renderFailureReason(
+  reason: string,
+  options?: {
+    matcherType?: string;
+    indent?: number;
+  }
+): string {
+  const indent = ' '.repeat(options?.indent ?? 3);
+  const matcherType = options?.matcherType;
+  if (!isTTY) {
+    // Plain text for CI/CD
+    return `${indent}Reason: ${reason}`;
+  }
+  // Parse the reason to provide better formatting
+  const isInlineError = reason.includes('Inline matcher error');
+  const isExpressionError = reason.includes('Unsupported expression pattern');
+  const isMissingValues = reason.includes('Missing required values');
+  // Extract key parts for better display
+  if (isInlineError && isExpressionError) {
+    // Extract the unsupported pattern
+    const patternMatch = reason.match(/Unsupported expression pattern: (.+)$/);
+    const pattern = patternMatch ? patternMatch[1] : null;
+    const lines = [
+      `${indent}${chalk.red('│')} ${chalk.red.bold('Inline Matcher Error')}`,
+      `${indent}${chalk.red('│')} ${chalk.dim('Expression not supported:')} ${chalk.yellow(pattern || 'unknown')}`,
+      `${indent}${chalk.red('│')} ${chalk.dim('Hint:')} Use ${chalk.cyan('response.')} prefix (e.g., ${chalk.cyan('response.startsWith("...")')})`,
+    ];
+    return lines.join('\n');
+  }
+  if (isMissingValues) {
+    // Extract mode info
+    const modeMatch = reason.match(/\(mode: (\w+)\)/);
+    const mode = modeMatch ? modeMatch[1] : null;
+    return `${indent}${chalk.red('│')} ${chalk.dim('Expected value not found')}${mode ? chalk.dim(` (${mode} mode)`) : ''}`;
+  }
+  // Generic styled failure
+  if (matcherType) {
+    return `${indent}${chalk.red('│')} ${chalk.dim(`[${matcherType}]`)} ${reason}`;
+  }
+  return `${indent}${chalk.red('│')} ${chalk.dim(reason)}`;
+}