npm - ai-xray - Versions diffs - 1.2.0 → 2.0.0 - Mend

ai-xray 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/src/client.ts ADDED Viewed

@@ -0,0 +1,203 @@
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { requestJson } from './utils/http';
+export interface ProviderConfig {
+    baseUrl: string;
+    apiKey?: string;
+    model: string;
+}
+interface ConfigFile {
+    providers?: Record<string, ProviderConfig>;
+}
+export function loadConfig(providerName?: string): ProviderConfig {
+    // 1. Check environment variables first
+    const envBaseUrl = process.env.AI_XRAY_BASE_URL || 'https://api.openai.com/v1';
+    const envApiKey = process.env.AI_XRAY_API_KEY;
+    const envModel = process.env.AI_XRAY_MODEL || 'gpt-4o';
+    // 2. If provider specified, check config file
+    if (providerName) {
+        const configFile = loadConfigFile();
+        if (configFile?.providers?.[providerName]) {
+            const providerConfig = configFile.providers[providerName];
+            return {
+                baseUrl: providerConfig.baseUrl || envBaseUrl,
+                apiKey: providerConfig.apiKey || envApiKey,
+                model: providerConfig.model || envModel,
+            };
+        }
+    }
+    // 3. Default to env vars
+    return {
+        baseUrl: envBaseUrl,
+        apiKey: envApiKey,
+        model: envModel,
+    };
+}
+function loadConfigFile(): ConfigFile | null {
+    const configPath = path.join(os.homedir(), '.ai-xray.json');
+    try {
+        if (fs.existsSync(configPath)) {
+            const content = fs.readFileSync(configPath, 'utf-8');
+            return JSON.parse(content) as ConfigFile;
+        }
+    } catch {
+        // Ignore config file errors
+    }
+    return null;
+}
+export interface ChatMessage {
+    role: 'system' | 'user' | 'assistant';
+    content: string | Array<{ type: string; [key: string]: unknown }>;
+}
+export interface ChatRequest {
+    model: string;
+    messages: ChatMessage[];
+    max_tokens?: number;
+    temperature?: number;
+    stream?: boolean;
+    response_format?: { type: string };
+    tools?: unknown[];
+}
+export interface ChatResponse {
+    id: string;
+    model: string;
+    choices: Array<{
+        message: { role: string; content: string };
+        finish_reason: string;
+    }>;
+    usage?: {
+        prompt_tokens: number;
+        completion_tokens: number;
+        total_tokens: number;
+    };
+    headers: Record<string, string>;
+    latency_ms: number;
+}
+export async function chat(
+    config: ProviderConfig,
+    request: ChatRequest
+): Promise<ChatResponse> {
+    const startTime = performance.now();
+    const url = `${config.baseUrl}/chat/completions`;
+    const headers: Record<string, string> = {
+        'Content-Type': 'application/json',
+        'Accept': 'application/json',
+    };
+    if (config.apiKey) {
+        headers['Authorization'] = `Bearer ${config.apiKey}`;
+    }
+    try {
+        const { response, data } = await requestJson(url, {
+            method: 'POST',
+            headers,
+            body: JSON.stringify(request),
+        });
+        const latencyMs = performance.now() - startTime;
+        return {
+            id: (data as any).id || '',
+            model: (data as any).model || config.model,
+            choices: (data as any).choices || [],
+            usage: (data as any).usage,
+            headers: response.headers,
+            latency_ms: latencyMs,
+        };
+    } catch (error) {
+        const latencyMs = performance.now() - startTime;
+        throw new Error(`Chat request failed: ${(error as Error).message}`);
+    }
+}
+export async function chatStream(
+    config: ProviderConfig,
+    request: ChatRequest,
+    onChunk: (content: string) => void
+): Promise<ChatResponse> {
+    const startTime = performance.now();
+    const url = `${config.baseUrl}/chat/completions`;
+    const headers: Record<string, string> = {
+        'Content-Type': 'application/json',
+        'Accept': 'text/event-stream',
+    };
+    if (config.apiKey) {
+        headers['Authorization'] = `Bearer ${config.apiKey}`;
+    }
+    const { response } = await request(url, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify({ ...request, stream: true }),
+    });
+    // For streaming, we'd need to handle the SSE stream
+    // For now, return a simplified response
+    const latencyMs = performance.now() - startTime;
+    return {
+        id: '',
+        model: config.model,
+        choices: [],
+        headers: response.headers,
+        latency_ms: latencyMs,
+    };
+}
+function request(
+    urlString: string,
+    options: {
+        method?: string;
+        headers?: Record<string, string>;
+        body?: string;
+    }
+): Promise<{ response: { headers: Record<string, string> }; data?: unknown }> {
+    return new Promise((resolve, reject) => {
+        const https = require('https');
+        const url = new URL(urlString);
+        const req = https.request({
+            hostname: url.hostname,
+            port: url.port || 443,
+            path: url.pathname + url.search,
+            method: options.method || 'GET',
+            headers: options.headers || {},
+        }, (res: any) => {
+            let body = '';
+            res.on('data', (chunk: string) => { body += chunk; });
+            res.on('end', () => {
+                const headers: Record<string, string> = {};
+                for (const [key, value] of Object.entries(res.headers)) {
+                    if (typeof value === 'string') {
+                        headers[key] = value;
+                    } else if (Array.isArray(value)) {
+                        headers[key] = value.join(', ');
+                    }
+                }
+                resolve({
+                    response: { headers },
+                    data: undefined,
+                });
+            });
+        });
+        req.on('error', reject);
+        req.write(options.body || '');
+        req.end();
+    });
+}

package/src/commands/bench.ts ADDED Viewed

@@ -0,0 +1,99 @@
+import { ProviderConfig, chat } from '../client';
+import { Timer, mean, median, p95 } from '../utils/timer';
+export interface BenchStats {
+    mean: number;
+    median: number;
+    p95: number;
+}
+export interface BenchResult {
+    rounds: number;
+    stats: {
+        ttft_ms: BenchStats;
+        total_ms: BenchStats;
+        tokens_per_second: BenchStats;
+        output_tokens: { mean: number; total: number };
+    };
+}
+async function runSingleBench(config: ProviderConfig): Promise<{
+    ttft_ms: number;
+    total_ms: number;
+    tokens: number;
+}> {
+    const timer = new Timer();
+    timer.start();
+    let firstTokenTime = 0;
+    let totalTokens = 0;
+    try {
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{ role: 'user', content: 'Write a haiku about coding.' }],
+            max_tokens: 100,
+        });
+        firstTokenTime = timer.elapsed();
+        totalTokens = response.usage?.completion_tokens ||
+            (response.choices[0]?.message?.content?.split(/\s+/).length || 0);
+        return {
+            ttft_ms: Math.round(firstTokenTime),
+            total_ms: Math.round(timer.elapsed()),
+            tokens: totalTokens,
+        };
+    } catch (error) {
+        return {
+            ttft_ms: Math.round(timer.elapsed()),
+            total_ms: Math.round(timer.elapsed()),
+            tokens: 0,
+        };
+    }
+}
+export async function bench(
+    config: ProviderConfig,
+    options?: { rounds?: number }
+): Promise<BenchResult> {
+    const rounds = options?.rounds || 5;
+    const results: Array<{ ttft_ms: number; total_ms: number; tokens: number }> = [];
+    for (let i = 0; i < rounds; i++) {
+        const result = await runSingleBench(config);
+        results.push(result);
+    }
+    const ttftValues = results.map(r => r.ttft_ms);
+    const totalValues = results.map(r => r.total_ms);
+    const tpsValues = results.map(r => r.tokens > 0 ? r.tokens / (r.total_ms / 1000) : 0);
+    const tokenValues = results.map(r => r.tokens);
+    const totalTokens = tokenValues.reduce((a, b) => a + b, 0);
+    return {
+        rounds,
+        stats: {
+            ttft_ms: {
+                mean: Math.round(mean(ttftValues)),
+                median: Math.round(median(ttftValues)),
+                p95: Math.round(p95(ttftValues)),
+            },
+            total_ms: {
+                mean: Math.round(mean(totalValues)),
+                median: Math.round(median(totalValues)),
+                p95: Math.round(p95(totalValues)),
+            },
+            tokens_per_second: {
+                mean: parseFloat(mean(tpsValues).toFixed(2)),
+                median: parseFloat(median(tpsValues).toFixed(2)),
+                p95: parseFloat(p95(tpsValues).toFixed(2)),
+            },
+            output_tokens: {
+                mean: Math.round(mean(tokenValues)),
+                total: totalTokens,
+            },
+        },
+    };
+}

package/src/commands/compare.ts ADDED Viewed

@@ -0,0 +1,76 @@
+import { ProviderConfig, loadConfig, chat } from '../client';
+import { bench } from './bench';
+export interface CompareResultItem {
+    provider: string;
+    model: string;
+    ttft_ms: number;
+    total_ms: number;
+    tokens: number;
+    error?: string;
+}
+export interface CompareResult {
+    prompt: string;
+    results: CompareResultItem[];
+}
+async function runProviderBench(
+    providerName: string,
+    prompt: string
+): Promise<CompareResultItem> {
+    try {
+        const config = loadConfig(providerName);
+        // Run a single request to get timing
+        const startTime = performance.now();
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{ role: 'user', content: prompt }],
+            max_tokens: 50,
+        });
+        const totalMs = performance.now() - startTime;
+        // Estimate TTFT (simplified - first chunk response)
+        const ttftMs = Math.round(totalMs * 0.3); // Rough estimate
+        const tokens = response.usage?.completion_tokens ||
+            (response.choices[0]?.message?.content?.split(/\s+/).length || 0);
+        return {
+            provider: providerName,
+            model: config.model,
+            ttft_ms: ttftMs,
+            total_ms: Math.round(totalMs),
+            tokens,
+        };
+    } catch (error) {
+        return {
+            provider: providerName,
+            model: '',
+            ttft_ms: 0,
+            total_ms: 0,
+            tokens: 0,
+            error: (error as Error).message,
+        };
+    }
+}
+export async function compare(
+    providers: string[],
+    options?: { prompt?: string; rounds?: number }
+): Promise<CompareResult> {
+    const prompt = options?.prompt || 'Write a haiku about coding.';
+    const results: CompareResultItem[] = [];
+    // Run benchmarks for each provider in parallel
+    const promises = providers.map(provider => runProviderBench(provider, prompt));
+    const providerResults = await Promise.all(promises);
+    results.push(...providerResults);
+    return {
+        prompt,
+        results,
+    };
+}

package/src/commands/id.ts ADDED Viewed

@@ -0,0 +1,139 @@
+import { ProviderConfig, chat } from '../client';
+export interface IdResult {
+    self_reported: {
+        model: string | null;
+        cutoff: string | null;
+        context_window: number | null;
+    };
+    api_reported: {
+        model: string | null;
+        organization: string | null;
+    };
+    fingerprint: {
+        provider: string;
+        confidence: number;
+    };
+}
+function extractModelName(content: string): string | null {
+    // Try to extract just the model identifier
+    const lines = content.trim().split('\n');
+    const firstLine = lines[0].trim();
+    if (firstLine.length > 0 && firstLine.length < 200) {
+        return firstLine;
+    }
+    return null;
+}
+function extractCutoff(content: string): string | null {
+    // Look for YYYY-MM pattern
+    const match = content.match(/\d{4}-\d{2}/);
+    return match ? match[0] : null;
+}
+function extractContextWindow(content: string): number | null {
+    // Look for numbers that could be context window
+    const match = content.match(/(\d{3,6})\s*(?:tokens?)?/i);
+    if (match) {
+        const num = parseInt(match[1], 10);
+        // Reasonable context windows are between 4K and 2M
+        if (num >= 4000 && num <= 2000000) {
+            return num;
+        }
+    }
+    return null;
+}
+function detectProvider(baseUrl: string): string {
+    const url = baseUrl.toLowerCase();
+    if (url.includes('openai')) return 'openai';
+    if (url.includes('anthropic')) return 'anthropic';
+    if (url.includes('google')) return 'google';
+    if (url.includes('ollama')) return 'ollama';
+    if (url.includes('groq')) return 'groq';
+    if (url.includes('azure')) return 'azure';
+    return 'unknown';
+}
+export async function identify(config: ProviderConfig): Promise<IdResult> {
+    const result: IdResult = {
+        self_reported: {
+            model: null,
+            cutoff: null,
+            context_window: null,
+        },
+        api_reported: {
+            model: null,
+            organization: null,
+        },
+        fingerprint: {
+            provider: detectProvider(config.baseUrl),
+            confidence: 0.5,
+        },
+    };
+    try {
+        // 1. Ask for model name
+        const modelResponse = await chat(config, {
+            model: config.model,
+            messages: [{
+                role: 'user',
+                content: 'What model are you? Reply with only the model identifier.'
+            }],
+            max_tokens: 50,
+        });
+        if (modelResponse.choices[0]?.message?.content) {
+            result.self_reported.model = extractModelName(modelResponse.choices[0].message.content);
+        }
+        result.api_reported.model = modelResponse.model;
+        // 2. Ask for knowledge cutoff
+        const cutoffResponse = await chat(config, {
+            model: config.model,
+            messages: [{
+                role: 'user',
+                content: 'What is your knowledge cutoff date? Reply YYYY-MM only.'
+            }],
+            max_tokens: 20,
+        });
+        if (cutoffResponse.choices[0]?.message?.content) {
+            result.self_reported.cutoff = extractCutoff(cutoffResponse.choices[0].message.content);
+        }
+        // 3. Ask for context window
+        const contextResponse = await chat(config, {
+            model: config.model,
+            messages: [{
+                role: 'user',
+                content: 'What is your maximum context window in tokens? Reply with only the number.'
+            }],
+            max_tokens: 20,
+        });
+        if (contextResponse.choices[0]?.message?.content) {
+            result.self_reported.context_window = extractContextWindow(contextResponse.choices[0].message.content);
+        }
+        // 4. Extract organization from headers
+        if (modelResponse.headers['openai-organization']) {
+            result.api_reported.organization = modelResponse.headers['openai-organization'];
+        }
+        // Calculate confidence based on what we found
+        let confidenceScore = 0;
+        if (result.self_reported.model) confidenceScore += 0.3;
+        if (result.self_reported.cutoff) confidenceScore += 0.3;
+        if (result.self_reported.context_window) confidenceScore += 0.3;
+        if (result.api_reported.model) confidenceScore += 0.1;
+        result.fingerprint.confidence = Math.min(1, confidenceScore);
+    } catch (error) {
+        // Return partial results on error
+    }
+    return result;
+}

package/src/commands/ping.ts ADDED Viewed

@@ -0,0 +1,55 @@
+import { ProviderConfig, chat } from '../client';
+export interface PingResult {
+    reachable: boolean;
+    latency_ms: number;
+    model: string | null;
+    rate_limit: {
+        remaining: number | null;
+        reset_at: string | null;
+    };
+    error?: string;
+}
+export async function ping(config: ProviderConfig): Promise<PingResult> {
+    const startTime = performance.now();
+    try {
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{ role: 'user', content: 'hi' }],
+            max_tokens: 1,
+        });
+        const latencyMs = performance.now() - startTime;
+        // Extract rate limit info from headers
+        const headers = response.headers;
+        const remaining = headers['x-ratelimit-remaining']
+            ? parseInt(headers['x-ratelimit-remaining'], 10)
+            : null;
+        const resetAt = headers['x-ratelimit-reset'] || null;
+        return {
+            reachable: true,
+            latency_ms: Math.round(latencyMs),
+            model: response.model || null,
+            rate_limit: {
+                remaining,
+                reset_at: resetAt,
+            },
+        };
+    } catch (error) {
+        const latencyMs = performance.now() - startTime;
+        return {
+            reachable: false,
+            latency_ms: Math.round(latencyMs),
+            model: null,
+            rate_limit: {
+                remaining: null,
+                reset_at: null,
+            },
+            error: (error as Error).message,
+        };
+    }
+}

package/src/commands/probe.ts ADDED Viewed

@@ -0,0 +1,136 @@
+import { ProviderConfig, chat } from '../client';
+import { Timer } from '../utils/timer';
+export interface ProbeResult {
+    capabilities: Record<string, boolean>;
+    probe_duration_ms: number;
+}
+export async function probe(config: ProviderConfig): Promise<ProbeResult> {
+    const timer = new Timer();
+    timer.start();
+    const capabilities: Record<string, boolean> = {
+        json_mode: false,
+        function_calling: false,
+        vision: false,
+        streaming: false,
+        system_prompt: false,
+        temperature_control: false,
+    };
+    // Test JSON mode
+    try {
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{ role: 'user', content: 'Say hello' }],
+            max_tokens: 10,
+            response_format: { type: 'json_object' },
+        });
+        // If we get here without error, JSON mode is supported
+        capabilities.json_mode = true;
+    } catch {
+        // JSON mode not supported
+    }
+    // Test function calling
+    try {
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{ role: 'user', content: 'What is 2+2?' }],
+            max_tokens: 50,
+            tools: [{
+                type: 'function',
+                function: {
+                    name: 'add',
+                    description: 'Add two numbers',
+                    parameters: {
+                        type: 'object',
+                        properties: {
+                            a: { type: 'number' },
+                            b: { type: 'number' }
+                        },
+                        required: ['a', 'b']
+                    }
+                }
+            }],
+        });
+        // If response contains tool_calls, function calling is supported
+        const hasToolCalls = response.choices[0]?.message &&
+            'tool_calls' in response.choices[0].message;
+        capabilities.function_calling = !!hasToolCalls;
+    } catch {
+        // Function calling not supported
+    }
+    // Test vision (send a tiny base64 image)
+    try {
+        // Minimal 1x1 white PNG in base64
+        const tinyImage = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{
+                role: 'user',
+                content: [
+                    { type: 'text', text: 'What color is this image?' },
+                    { type: 'image_url', image_url: { url: tinyImage } }
+                ]
+            }],
+            max_tokens: 20,
+        });
+        // If we get a valid response, vision is supported
+        capabilities.vision = response.choices.length > 0 && !!response.choices[0].message?.content;
+    } catch {
+        // Vision not supported
+    }
+    // Test streaming (we can't easily test streaming without SSE handling, so mark as likely)
+    try {
+        // Try with stream: true - if it doesn't error, streaming is likely supported
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{ role: 'user', content: 'Hi' }],
+            max_tokens: 5,
+            stream: true,
+        });
+        // Note: proper streaming requires handling SSE, but we can at least try
+        capabilities.streaming = true;
+    } catch {
+        capabilities.streaming = false;
+    }
+    // Test system prompt
+    try {
+        const response = await chat(config, {
+            model: config.model,
+            messages: [
+                { role: 'system', content: 'You are a helpful assistant.' },
+                { role: 'user', content: 'Hi' }
+            ],
+            max_tokens: 10,
+        });
+        capabilities.system_prompt = response.choices.length > 0;
+    } catch {
+        capabilities.system_prompt = false;
+    }
+    // Test temperature control
+    try {
+        const response = await chat(config, {
+            model: config.model,
+            messages: [{ role: 'user', content: 'Say one word' }],
+            max_tokens: 5,
+            temperature: 0,
+        });
+        capabilities.temperature_control = response.choices.length > 0;
+    } catch {
+        capabilities.temperature_control = false;
+    }
+    timer.stop();
+    return {
+        capabilities,
+        probe_duration_ms: Math.round(timer.elapsed()),
+    };
+}