npm - @artemiskit/cli - Versions diffs - 0.1.4 → 0.1.6 - Mend

@artemiskit/cli 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/CHANGELOG.md +24 -0
package/README.md +1 -0
package/dist/index.js +19129 -20009
package/dist/src/commands/compare.d.ts.map +1 -1
package/dist/src/commands/history.d.ts.map +1 -1
package/dist/src/commands/init.d.ts.map +1 -1
package/dist/src/commands/redteam.d.ts.map +1 -1
package/dist/src/commands/report.d.ts.map +1 -1
package/dist/src/commands/run.d.ts.map +1 -1
package/dist/src/commands/stress.d.ts.map +1 -1
package/dist/src/ui/colors.d.ts +44 -0
package/dist/src/ui/colors.d.ts.map +1 -0
package/dist/src/ui/errors.d.ts +39 -0
package/dist/src/ui/errors.d.ts.map +1 -0
package/dist/src/ui/index.d.ts +16 -0
package/dist/src/ui/index.d.ts.map +1 -0
package/dist/src/ui/live-status.d.ts +82 -0
package/dist/src/ui/live-status.d.ts.map +1 -0
package/dist/src/ui/panels.d.ts +49 -0
package/dist/src/ui/panels.d.ts.map +1 -0
package/dist/src/ui/progress.d.ts +60 -0
package/dist/src/ui/progress.d.ts.map +1 -0
package/dist/src/ui/utils.d.ts +42 -0
package/dist/src/ui/utils.d.ts.map +1 -0
package/package.json +6 -6
package/src/__tests__/helpers/index.ts +6 -0
package/src/__tests__/helpers/mock-adapter.ts +90 -0
package/src/__tests__/helpers/test-utils.ts +205 -0
package/src/__tests__/integration/compare-command.test.ts +236 -0
package/src/__tests__/integration/config.test.ts +125 -0
package/src/__tests__/integration/history-command.test.ts +251 -0
package/src/__tests__/integration/init-command.test.ts +177 -0
package/src/__tests__/integration/report-command.test.ts +245 -0
package/src/__tests__/integration/ui.test.ts +230 -0
package/src/commands/compare.ts +158 -49
package/src/commands/history.ts +131 -30
package/src/commands/init.ts +181 -21
package/src/commands/redteam.ts +118 -75
package/src/commands/report.ts +29 -14
package/src/commands/run.ts +86 -66
package/src/commands/stress.ts +61 -63
package/src/ui/colors.ts +62 -0
package/src/ui/errors.ts +248 -0
package/src/ui/index.ts +42 -0
package/src/ui/live-status.ts +259 -0
package/src/ui/panels.ts +216 -0
package/src/ui/progress.ts +139 -0
package/src/ui/utils.ts +88 -0

package/src/__tests__/helpers/test-utils.ts ADDED Viewed

@@ -0,0 +1,205 @@
+/**
+ * Test utilities for CLI integration tests
+ */
+import { mkdir, rm, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+/**
+ * Creates a temporary directory for test isolation
+ */
+export async function createTestDir(prefix = 'artemis-test'): Promise<string> {
+  const testDir = join(tmpdir(), `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  await mkdir(testDir, { recursive: true });
+  return testDir;
+}
+/**
+ * Cleans up a test directory
+ */
+export async function cleanupTestDir(testDir: string): Promise<void> {
+  try {
+    await rm(testDir, { recursive: true, force: true });
+  } catch {
+    // Ignore cleanup errors
+  }
+}
+/**
+ * Creates a test scenario file
+ */
+export async function createScenarioFile(
+  dir: string,
+  name: string,
+  content: string
+): Promise<string> {
+  const scenariosDir = join(dir, 'scenarios');
+  await mkdir(scenariosDir, { recursive: true });
+  const filePath = join(scenariosDir, `${name}.yaml`);
+  await writeFile(filePath, content);
+  return filePath;
+}
+/**
+ * Creates a test config file
+ */
+export async function createConfigFile(
+  dir: string,
+  config: Record<string, unknown>
+): Promise<string> {
+  const filePath = join(dir, 'artemis.config.yaml');
+  const yaml = Object.entries(config)
+    .map(([key, value]) => {
+      if (typeof value === 'object' && value !== null) {
+        const nested = Object.entries(value as Record<string, unknown>)
+          .map(([k, v]) => `  ${k}: ${JSON.stringify(v)}`)
+          .join('\n');
+        return `${key}:\n${nested}`;
+      }
+      return `${key}: ${JSON.stringify(value)}`;
+    })
+    .join('\n');
+  await writeFile(filePath, yaml);
+  return filePath;
+}
+/**
+ * Sample scenario templates for testing
+ */
+export const scenarioTemplates = {
+  simple: `
+name: simple-test
+description: A simple test scenario
+cases:
+  - id: test-1
+    prompt: "Say hello"
+    expected:
+      type: contains
+      values:
+        - "hello"
+      mode: any
+`,
+  multiCase: `
+name: multi-case-test
+description: Multiple test cases
+cases:
+  - id: case-1
+    prompt: "What is 2+2?"
+    expected:
+      type: contains
+      values:
+        - "4"
+      mode: any
+  - id: case-2
+    prompt: "What is the capital of France?"
+    expected:
+      type: contains
+      values:
+        - "Paris"
+      mode: any
+  - id: case-3
+    prompt: "Say hello"
+    expected:
+      type: contains
+      values:
+        - "hello"
+      mode: any
+`,
+  withProvider: `
+name: provider-test
+description: Test with provider config
+provider: openai
+model: gpt-4o-mini
+cases:
+  - id: test-1
+    prompt: "Hello"
+    expected:
+      type: contains
+      values:
+        - "hello"
+      mode: any
+`,
+  exactMatch: `
+name: exact-match-test
+description: Test exact matching
+cases:
+  - id: exact-1
+    prompt: "Return exactly: hello world"
+    expected:
+      type: exact
+      value: "hello world"
+`,
+  regexMatch: `
+name: regex-test
+description: Test regex matching
+cases:
+  - id: regex-1
+    prompt: "Return a number"
+    expected:
+      type: regex
+      pattern: "\\\\d+"
+`,
+  failing: `
+name: failing-test
+description: A test that should fail
+cases:
+  - id: will-fail
+    prompt: "Say hello"
+    expected:
+      type: exact
+      value: "this will not match"
+`,
+};
+/**
+ * Captures console output during test execution
+ */
+export class OutputCapture {
+  private originalLog: typeof console.log;
+  private originalError: typeof console.error;
+  private logs: string[] = [];
+  private errors: string[] = [];
+  start(): void {
+    this.originalLog = console.log;
+    this.originalError = console.error;
+    this.logs = [];
+    this.errors = [];
+    console.log = (...args: unknown[]) => {
+      this.logs.push(args.map(String).join(' '));
+    };
+    console.error = (...args: unknown[]) => {
+      this.errors.push(args.map(String).join(' '));
+    };
+  }
+  stop(): { logs: string[]; errors: string[] } {
+    console.log = this.originalLog;
+    console.error = this.originalError;
+    return { logs: this.logs, errors: this.errors };
+  }
+  getOutput(): string {
+    return this.logs.join('\n');
+  }
+  getErrors(): string {
+    return this.errors.join('\n');
+  }
+}

package/src/__tests__/integration/compare-command.test.ts ADDED Viewed

@@ -0,0 +1,236 @@
+/**
+ * Integration tests for compare command
+ */
+import { describe, expect, it, beforeEach, afterEach } from 'bun:test';
+import { mkdir, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { createTestDir, cleanupTestDir } from '../helpers/test-utils.js';
+import { createStorage } from '../../utils/storage.js';
+describe('Compare Command', () => {
+  let testDir: string;
+  let originalCwd: string;
+  beforeEach(async () => {
+    testDir = await createTestDir('compare-test');
+    originalCwd = process.cwd();
+    process.chdir(testDir);
+    // Create storage directory
+    await mkdir(join(testDir, 'artemis-runs', 'test-project'), { recursive: true });
+  });
+  afterEach(async () => {
+    process.chdir(originalCwd);
+    await cleanupTestDir(testDir);
+  });
+  describe('storage comparison', () => {
+    it('should compare two runs and calculate deltas', async () => {
+      const baselineManifest = {
+        run_id: 'baseline-001',
+        project: 'test-project',
+        config: { scenario: 'test-scenario' },
+        start_time: new Date('2026-01-15T10:00:00Z').toISOString(),
+        metrics: {
+          success_rate: 0.8,
+          passed_cases: 4,
+          failed_cases: 1,
+          total_tokens: 500,
+          median_latency_ms: 200,
+        },
+        cases: [],
+      };
+      const currentManifest = {
+        run_id: 'current-001',
+        project: 'test-project',
+        config: { scenario: 'test-scenario' },
+        start_time: new Date('2026-01-16T10:00:00Z').toISOString(),
+        metrics: {
+          success_rate: 1.0,
+          passed_cases: 5,
+          failed_cases: 0,
+          total_tokens: 450,
+          median_latency_ms: 150,
+        },
+        cases: [],
+      };
+      await writeFile(
+        join(testDir, 'artemis-runs', 'test-project', 'baseline-001.json'),
+        JSON.stringify(baselineManifest)
+      );
+      await writeFile(
+        join(testDir, 'artemis-runs', 'test-project', 'current-001.json'),
+        JSON.stringify(currentManifest)
+      );
+      const storage = createStorage({
+        fileConfig: {
+          storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
+        },
+      });
+      const comparison = await storage.compare!('baseline-001', 'current-001');
+      expect(comparison.baseline.metrics.success_rate).toBe(0.8);
+      expect(comparison.current.metrics.success_rate).toBe(1.0);
+      expect(comparison.delta.successRate).toBeCloseTo(0.2, 5); // 1.0 - 0.8
+      expect(comparison.delta.latency).toBe(-50); // 150 - 200 (improved)
+      expect(comparison.delta.tokens).toBe(-50); // 450 - 500 (reduced)
+    });
+    it('should detect regression when success rate drops', async () => {
+      const baselineManifest = {
+        run_id: 'baseline-002',
+        project: 'test-project',
+        config: { scenario: 'test-scenario' },
+        start_time: new Date('2026-01-15T10:00:00Z').toISOString(),
+        metrics: {
+          success_rate: 1.0,
+          passed_cases: 5,
+          failed_cases: 0,
+          total_tokens: 500,
+          median_latency_ms: 200,
+        },
+        cases: [],
+      };
+      const currentManifest = {
+        run_id: 'current-002',
+        project: 'test-project',
+        config: { scenario: 'test-scenario' },
+        start_time: new Date('2026-01-16T10:00:00Z').toISOString(),
+        metrics: {
+          success_rate: 0.6,
+          passed_cases: 3,
+          failed_cases: 2,
+          total_tokens: 600,
+          median_latency_ms: 300,
+        },
+        cases: [],
+      };
+      await writeFile(
+        join(testDir, 'artemis-runs', 'test-project', 'baseline-002.json'),
+        JSON.stringify(baselineManifest)
+      );
+      await writeFile(
+        join(testDir, 'artemis-runs', 'test-project', 'current-002.json'),
+        JSON.stringify(currentManifest)
+      );
+      const storage = createStorage({
+        fileConfig: {
+          storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
+        },
+      });
+      const comparison = await storage.compare!('baseline-002', 'current-002');
+      // Success rate dropped by 0.4 (40%)
+      expect(comparison.delta.successRate).toBeCloseTo(-0.4, 5);
+      // This would be a regression (threshold typically 5%)
+      const threshold = 0.05;
+      const hasRegression = comparison.delta.successRate < -threshold;
+      expect(hasRegression).toBe(true);
+    });
+    it('should handle identical runs', async () => {
+      const manifest = {
+        run_id: 'same-001',
+        project: 'test-project',
+        config: { scenario: 'test-scenario' },
+        start_time: new Date().toISOString(),
+        metrics: {
+          success_rate: 0.9,
+          passed_cases: 9,
+          failed_cases: 1,
+          total_tokens: 1000,
+          median_latency_ms: 250,
+        },
+        cases: [],
+      };
+      await writeFile(
+        join(testDir, 'artemis-runs', 'test-project', 'same-001.json'),
+        JSON.stringify(manifest)
+      );
+      const storage = createStorage({
+        fileConfig: {
+          storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
+        },
+      });
+      const comparison = await storage.compare!('same-001', 'same-001');
+      expect(comparison.delta.successRate).toBe(0);
+      expect(comparison.delta.latency).toBe(0);
+      expect(comparison.delta.tokens).toBe(0);
+    });
+    it('should throw error for non-existent baseline', async () => {
+      const currentManifest = {
+        run_id: 'exists-001',
+        project: 'test-project',
+        config: { scenario: 'test-scenario' },
+        start_time: new Date().toISOString(),
+        metrics: {
+          success_rate: 1.0,
+          passed_cases: 5,
+          failed_cases: 0,
+          total_tokens: 500,
+          median_latency_ms: 200,
+        },
+        cases: [],
+      };
+      await writeFile(
+        join(testDir, 'artemis-runs', 'test-project', 'exists-001.json'),
+        JSON.stringify(currentManifest)
+      );
+      const storage = createStorage({
+        fileConfig: {
+          storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
+        },
+      });
+      await expect(storage.compare!('non-existent', 'exists-001')).rejects.toThrow();
+    });
+    it('should throw error for non-existent current', async () => {
+      const baselineManifest = {
+        run_id: 'exists-002',
+        project: 'test-project',
+        config: { scenario: 'test-scenario' },
+        start_time: new Date().toISOString(),
+        metrics: {
+          success_rate: 1.0,
+          passed_cases: 5,
+          failed_cases: 0,
+          total_tokens: 500,
+          median_latency_ms: 200,
+        },
+        cases: [],
+      };
+      await writeFile(
+        join(testDir, 'artemis-runs', 'test-project', 'exists-002.json'),
+        JSON.stringify(baselineManifest)
+      );
+      const storage = createStorage({
+        fileConfig: {
+          storage: { type: 'local', basePath: join(testDir, 'artemis-runs') },
+        },
+      });
+      await expect(storage.compare!('exists-002', 'non-existent')).rejects.toThrow();
+    });
+  });
+});

package/src/__tests__/integration/config.test.ts ADDED Viewed

@@ -0,0 +1,125 @@
+/**
+ * Integration tests for CLI configuration loading
+ */
+import { describe, expect, it, beforeEach, afterEach } from 'bun:test';
+import { writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { loadConfig } from '../../config/loader.js';
+import { createTestDir, cleanupTestDir } from '../helpers/test-utils.js';
+describe('Config Loader', () => {
+  let testDir: string;
+  let originalCwd: string;
+  beforeEach(async () => {
+    testDir = await createTestDir('config-test');
+    originalCwd = process.cwd();
+    process.chdir(testDir);
+  });
+  afterEach(async () => {
+    process.chdir(originalCwd);
+    await cleanupTestDir(testDir);
+  });
+  describe('loadConfig', () => {
+    it('should return null when no config file exists', async () => {
+      const config = await loadConfig();
+      expect(config).toBeNull();
+    });
+    it('should load artemis.config.yaml from current directory', async () => {
+      const configContent = `
+provider: openai
+model: gpt-4o-mini
+project: test-project
+`;
+      await writeFile(join(testDir, 'artemis.config.yaml'), configContent);
+      const config = await loadConfig();
+      expect(config).toBeDefined();
+      expect(config?.provider).toBe('openai');
+      expect(config?.model).toBe('gpt-4o-mini');
+      expect(config?.project).toBe('test-project');
+    });
+    it('should load artemis.config.yml (yml extension)', async () => {
+      const configContent = `
+provider: azure-openai
+model: gpt-4
+`;
+      await writeFile(join(testDir, 'artemis.config.yml'), configContent);
+      const config = await loadConfig();
+      expect(config).toBeDefined();
+      expect(config?.provider).toBe('azure-openai');
+    });
+    it('should load config from explicit path', async () => {
+      const customPath = join(testDir, 'custom-config.yaml');
+      const configContent = `
+provider: anthropic
+model: claude-3-sonnet
+`;
+      await writeFile(customPath, configContent);
+      const config = await loadConfig(customPath);
+      expect(config).toBeDefined();
+      expect(config?.provider).toBe('anthropic');
+      expect(config?.model).toBe('claude-3-sonnet');
+    });
+    it('should load nested azure configuration', async () => {
+      const configContent = `
+provider: azure-openai
+model: gpt-4
+providers:
+  azure-openai:
+    resourceName: my-resource
+    deploymentName: my-deployment
+    apiVersion: "2024-02-15-preview"
+`;
+      await writeFile(join(testDir, 'artemis.config.yaml'), configContent);
+      const config = await loadConfig();
+      expect(config).toBeDefined();
+      expect(config?.providers?.['azure-openai']?.resourceName).toBe('my-resource');
+      expect(config?.providers?.['azure-openai']?.deploymentName).toBe('my-deployment');
+    });
+    it('should load storage configuration', async () => {
+      const configContent = `
+provider: openai
+storage:
+  type: local
+  basePath: ./my-runs
+`;
+      await writeFile(join(testDir, 'artemis.config.yaml'), configContent);
+      const config = await loadConfig();
+      expect(config).toBeDefined();
+      expect(config?.storage?.type).toBe('local');
+      expect(config?.storage?.basePath).toBe('./my-runs');
+    });
+    it('should throw error for invalid YAML', async () => {
+      const invalidYaml = `
+provider: openai
+  model: gpt-4  # invalid indentation
+invalid: [unclosed
+`;
+      await writeFile(join(testDir, 'artemis.config.yaml'), invalidYaml);
+      await expect(loadConfig()).rejects.toThrow();
+    });
+    it('should prefer .yaml over .yml when both exist', async () => {
+      await writeFile(join(testDir, 'artemis.config.yaml'), 'provider: openai');
+      await writeFile(join(testDir, 'artemis.config.yml'), 'provider: anthropic');
+      const config = await loadConfig();
+      expect(config?.provider).toBe('openai');
+    });
+  });
+});