ai-xray 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/client.ts ADDED
@@ -0,0 +1,203 @@
1
+ import * as fs from 'fs';
2
+ import * as path from 'path';
3
+ import * as os from 'os';
4
+ import { requestJson } from './utils/http';
5
+
6
+ export interface ProviderConfig {
7
+ baseUrl: string;
8
+ apiKey?: string;
9
+ model: string;
10
+ }
11
+
12
+ interface ConfigFile {
13
+ providers?: Record<string, ProviderConfig>;
14
+ }
15
+
16
+ export function loadConfig(providerName?: string): ProviderConfig {
17
+ // 1. Check environment variables first
18
+ const envBaseUrl = process.env.AI_XRAY_BASE_URL || 'https://api.openai.com/v1';
19
+ const envApiKey = process.env.AI_XRAY_API_KEY;
20
+ const envModel = process.env.AI_XRAY_MODEL || 'gpt-4o';
21
+
22
+ // 2. If provider specified, check config file
23
+ if (providerName) {
24
+ const configFile = loadConfigFile();
25
+ if (configFile?.providers?.[providerName]) {
26
+ const providerConfig = configFile.providers[providerName];
27
+ return {
28
+ baseUrl: providerConfig.baseUrl || envBaseUrl,
29
+ apiKey: providerConfig.apiKey || envApiKey,
30
+ model: providerConfig.model || envModel,
31
+ };
32
+ }
33
+ }
34
+
35
+ // 3. Default to env vars
36
+ return {
37
+ baseUrl: envBaseUrl,
38
+ apiKey: envApiKey,
39
+ model: envModel,
40
+ };
41
+ }
42
+
43
+ function loadConfigFile(): ConfigFile | null {
44
+ const configPath = path.join(os.homedir(), '.ai-xray.json');
45
+ try {
46
+ if (fs.existsSync(configPath)) {
47
+ const content = fs.readFileSync(configPath, 'utf-8');
48
+ return JSON.parse(content) as ConfigFile;
49
+ }
50
+ } catch {
51
+ // Ignore config file errors
52
+ }
53
+ return null;
54
+ }
55
+
56
+ export interface ChatMessage {
57
+ role: 'system' | 'user' | 'assistant';
58
+ content: string | Array<{ type: string; [key: string]: unknown }>;
59
+ }
60
+
61
+ export interface ChatRequest {
62
+ model: string;
63
+ messages: ChatMessage[];
64
+ max_tokens?: number;
65
+ temperature?: number;
66
+ stream?: boolean;
67
+ response_format?: { type: string };
68
+ tools?: unknown[];
69
+ }
70
+
71
+ export interface ChatResponse {
72
+ id: string;
73
+ model: string;
74
+ choices: Array<{
75
+ message: { role: string; content: string };
76
+ finish_reason: string;
77
+ }>;
78
+ usage?: {
79
+ prompt_tokens: number;
80
+ completion_tokens: number;
81
+ total_tokens: number;
82
+ };
83
+ headers: Record<string, string>;
84
+ latency_ms: number;
85
+ }
86
+
87
+ export async function chat(
88
+ config: ProviderConfig,
89
+ request: ChatRequest
90
+ ): Promise<ChatResponse> {
91
+ const startTime = performance.now();
92
+
93
+ const url = `${config.baseUrl}/chat/completions`;
94
+ const headers: Record<string, string> = {
95
+ 'Content-Type': 'application/json',
96
+ 'Accept': 'application/json',
97
+ };
98
+
99
+ if (config.apiKey) {
100
+ headers['Authorization'] = `Bearer ${config.apiKey}`;
101
+ }
102
+
103
+ try {
104
+ const { response, data } = await requestJson(url, {
105
+ method: 'POST',
106
+ headers,
107
+ body: JSON.stringify(request),
108
+ });
109
+
110
+ const latencyMs = performance.now() - startTime;
111
+
112
+ return {
113
+ id: (data as any).id || '',
114
+ model: (data as any).model || config.model,
115
+ choices: (data as any).choices || [],
116
+ usage: (data as any).usage,
117
+ headers: response.headers,
118
+ latency_ms: latencyMs,
119
+ };
120
+ } catch (error) {
121
+ const latencyMs = performance.now() - startTime;
122
+ throw new Error(`Chat request failed: ${(error as Error).message}`);
123
+ }
124
+ }
125
+
126
+ export async function chatStream(
127
+ config: ProviderConfig,
128
+ request: ChatRequest,
129
+ onChunk: (content: string) => void
130
+ ): Promise<ChatResponse> {
131
+ const startTime = performance.now();
132
+
133
+ const url = `${config.baseUrl}/chat/completions`;
134
+ const headers: Record<string, string> = {
135
+ 'Content-Type': 'application/json',
136
+ 'Accept': 'text/event-stream',
137
+ };
138
+
139
+ if (config.apiKey) {
140
+ headers['Authorization'] = `Bearer ${config.apiKey}`;
141
+ }
142
+
143
+ const { response } = await request(url, {
144
+ method: 'POST',
145
+ headers,
146
+ body: JSON.stringify({ ...request, stream: true }),
147
+ });
148
+
149
+ // For streaming, we'd need to handle the SSE stream
150
+ // For now, return a simplified response
151
+ const latencyMs = performance.now() - startTime;
152
+
153
+ return {
154
+ id: '',
155
+ model: config.model,
156
+ choices: [],
157
+ headers: response.headers,
158
+ latency_ms: latencyMs,
159
+ };
160
+ }
161
+
162
+ function request(
163
+ urlString: string,
164
+ options: {
165
+ method?: string;
166
+ headers?: Record<string, string>;
167
+ body?: string;
168
+ }
169
+ ): Promise<{ response: { headers: Record<string, string> }; data?: unknown }> {
170
+ return new Promise((resolve, reject) => {
171
+ const https = require('https');
172
+ const url = new URL(urlString);
173
+
174
+ const req = https.request({
175
+ hostname: url.hostname,
176
+ port: url.port || 443,
177
+ path: url.pathname + url.search,
178
+ method: options.method || 'GET',
179
+ headers: options.headers || {},
180
+ }, (res: any) => {
181
+ let body = '';
182
+ res.on('data', (chunk: string) => { body += chunk; });
183
+ res.on('end', () => {
184
+ const headers: Record<string, string> = {};
185
+ for (const [key, value] of Object.entries(res.headers)) {
186
+ if (typeof value === 'string') {
187
+ headers[key] = value;
188
+ } else if (Array.isArray(value)) {
189
+ headers[key] = value.join(', ');
190
+ }
191
+ }
192
+ resolve({
193
+ response: { headers },
194
+ data: undefined,
195
+ });
196
+ });
197
+ });
198
+
199
+ req.on('error', reject);
200
+ req.write(options.body || '');
201
+ req.end();
202
+ });
203
+ }
@@ -0,0 +1,99 @@
1
+ import { ProviderConfig, chat } from '../client';
2
+ import { Timer, mean, median, p95 } from '../utils/timer';
3
+
4
+ export interface BenchStats {
5
+ mean: number;
6
+ median: number;
7
+ p95: number;
8
+ }
9
+
10
+ export interface BenchResult {
11
+ rounds: number;
12
+ stats: {
13
+ ttft_ms: BenchStats;
14
+ total_ms: BenchStats;
15
+ tokens_per_second: BenchStats;
16
+ output_tokens: { mean: number; total: number };
17
+ };
18
+ }
19
+
20
+ async function runSingleBench(config: ProviderConfig): Promise<{
21
+ ttft_ms: number;
22
+ total_ms: number;
23
+ tokens: number;
24
+ }> {
25
+ const timer = new Timer();
26
+ timer.start();
27
+
28
+ let firstTokenTime = 0;
29
+ let totalTokens = 0;
30
+
31
+ try {
32
+ const response = await chat(config, {
33
+ model: config.model,
34
+ messages: [{ role: 'user', content: 'Write a haiku about coding.' }],
35
+ max_tokens: 100,
36
+ });
37
+
38
+ firstTokenTime = timer.elapsed();
39
+ totalTokens = response.usage?.completion_tokens ||
40
+ (response.choices[0]?.message?.content?.split(/\s+/).length || 0);
41
+
42
+ return {
43
+ ttft_ms: Math.round(firstTokenTime),
44
+ total_ms: Math.round(timer.elapsed()),
45
+ tokens: totalTokens,
46
+ };
47
+ } catch (error) {
48
+ return {
49
+ ttft_ms: Math.round(timer.elapsed()),
50
+ total_ms: Math.round(timer.elapsed()),
51
+ tokens: 0,
52
+ };
53
+ }
54
+ }
55
+
56
+ export async function bench(
57
+ config: ProviderConfig,
58
+ options?: { rounds?: number }
59
+ ): Promise<BenchResult> {
60
+ const rounds = options?.rounds || 5;
61
+ const results: Array<{ ttft_ms: number; total_ms: number; tokens: number }> = [];
62
+
63
+ for (let i = 0; i < rounds; i++) {
64
+ const result = await runSingleBench(config);
65
+ results.push(result);
66
+ }
67
+
68
+ const ttftValues = results.map(r => r.ttft_ms);
69
+ const totalValues = results.map(r => r.total_ms);
70
+ const tpsValues = results.map(r => r.tokens > 0 ? r.tokens / (r.total_ms / 1000) : 0);
71
+ const tokenValues = results.map(r => r.tokens);
72
+
73
+ const totalTokens = tokenValues.reduce((a, b) => a + b, 0);
74
+
75
+ return {
76
+ rounds,
77
+ stats: {
78
+ ttft_ms: {
79
+ mean: Math.round(mean(ttftValues)),
80
+ median: Math.round(median(ttftValues)),
81
+ p95: Math.round(p95(ttftValues)),
82
+ },
83
+ total_ms: {
84
+ mean: Math.round(mean(totalValues)),
85
+ median: Math.round(median(totalValues)),
86
+ p95: Math.round(p95(totalValues)),
87
+ },
88
+ tokens_per_second: {
89
+ mean: parseFloat(mean(tpsValues).toFixed(2)),
90
+ median: parseFloat(median(tpsValues).toFixed(2)),
91
+ p95: parseFloat(p95(tpsValues).toFixed(2)),
92
+ },
93
+ output_tokens: {
94
+ mean: Math.round(mean(tokenValues)),
95
+ total: totalTokens,
96
+ },
97
+ },
98
+ };
99
+ }
@@ -0,0 +1,76 @@
1
+ import { ProviderConfig, loadConfig, chat } from '../client';
2
+ import { bench } from './bench';
3
+
4
+ export interface CompareResultItem {
5
+ provider: string;
6
+ model: string;
7
+ ttft_ms: number;
8
+ total_ms: number;
9
+ tokens: number;
10
+ error?: string;
11
+ }
12
+
13
+ export interface CompareResult {
14
+ prompt: string;
15
+ results: CompareResultItem[];
16
+ }
17
+
18
+ async function runProviderBench(
19
+ providerName: string,
20
+ prompt: string
21
+ ): Promise<CompareResultItem> {
22
+ try {
23
+ const config = loadConfig(providerName);
24
+
25
+ // Run a single request to get timing
26
+ const startTime = performance.now();
27
+ const response = await chat(config, {
28
+ model: config.model,
29
+ messages: [{ role: 'user', content: prompt }],
30
+ max_tokens: 50,
31
+ });
32
+ const totalMs = performance.now() - startTime;
33
+
34
+ // Estimate TTFT (simplified - first chunk response)
35
+ const ttftMs = Math.round(totalMs * 0.3); // Rough estimate
36
+ const tokens = response.usage?.completion_tokens ||
37
+ (response.choices[0]?.message?.content?.split(/\s+/).length || 0);
38
+
39
+ return {
40
+ provider: providerName,
41
+ model: config.model,
42
+ ttft_ms: ttftMs,
43
+ total_ms: Math.round(totalMs),
44
+ tokens,
45
+ };
46
+ } catch (error) {
47
+ return {
48
+ provider: providerName,
49
+ model: '',
50
+ ttft_ms: 0,
51
+ total_ms: 0,
52
+ tokens: 0,
53
+ error: (error as Error).message,
54
+ };
55
+ }
56
+ }
57
+
58
+ export async function compare(
59
+ providers: string[],
60
+ options?: { prompt?: string; rounds?: number }
61
+ ): Promise<CompareResult> {
62
+ const prompt = options?.prompt || 'Write a haiku about coding.';
63
+
64
+ const results: CompareResultItem[] = [];
65
+
66
+ // Run benchmarks for each provider in parallel
67
+ const promises = providers.map(provider => runProviderBench(provider, prompt));
68
+ const providerResults = await Promise.all(promises);
69
+
70
+ results.push(...providerResults);
71
+
72
+ return {
73
+ prompt,
74
+ results,
75
+ };
76
+ }
@@ -0,0 +1,139 @@
1
+ import { ProviderConfig, chat } from '../client';
2
+
3
+ export interface IdResult {
4
+ self_reported: {
5
+ model: string | null;
6
+ cutoff: string | null;
7
+ context_window: number | null;
8
+ };
9
+ api_reported: {
10
+ model: string | null;
11
+ organization: string | null;
12
+ };
13
+ fingerprint: {
14
+ provider: string;
15
+ confidence: number;
16
+ };
17
+ }
18
+
19
+ function extractModelName(content: string): string | null {
20
+ // Try to extract just the model identifier
21
+ const lines = content.trim().split('\n');
22
+ const firstLine = lines[0].trim();
23
+ if (firstLine.length > 0 && firstLine.length < 200) {
24
+ return firstLine;
25
+ }
26
+ return null;
27
+ }
28
+
29
+ function extractCutoff(content: string): string | null {
30
+ // Look for YYYY-MM pattern
31
+ const match = content.match(/\d{4}-\d{2}/);
32
+ return match ? match[0] : null;
33
+ }
34
+
35
+ function extractContextWindow(content: string): number | null {
36
+ // Look for numbers that could be context window
37
+ const match = content.match(/(\d{3,6})\s*(?:tokens?)?/i);
38
+ if (match) {
39
+ const num = parseInt(match[1], 10);
40
+ // Reasonable context windows are between 4K and 2M
41
+ if (num >= 4000 && num <= 2000000) {
42
+ return num;
43
+ }
44
+ }
45
+ return null;
46
+ }
47
+
48
+ function detectProvider(baseUrl: string): string {
49
+ const url = baseUrl.toLowerCase();
50
+ if (url.includes('openai')) return 'openai';
51
+ if (url.includes('anthropic')) return 'anthropic';
52
+ if (url.includes('google')) return 'google';
53
+ if (url.includes('ollama')) return 'ollama';
54
+ if (url.includes('groq')) return 'groq';
55
+ if (url.includes('azure')) return 'azure';
56
+ return 'unknown';
57
+ }
58
+
59
+ export async function identify(config: ProviderConfig): Promise<IdResult> {
60
+ const result: IdResult = {
61
+ self_reported: {
62
+ model: null,
63
+ cutoff: null,
64
+ context_window: null,
65
+ },
66
+ api_reported: {
67
+ model: null,
68
+ organization: null,
69
+ },
70
+ fingerprint: {
71
+ provider: detectProvider(config.baseUrl),
72
+ confidence: 0.5,
73
+ },
74
+ };
75
+
76
+ try {
77
+ // 1. Ask for model name
78
+ const modelResponse = await chat(config, {
79
+ model: config.model,
80
+ messages: [{
81
+ role: 'user',
82
+ content: 'What model are you? Reply with only the model identifier.'
83
+ }],
84
+ max_tokens: 50,
85
+ });
86
+
87
+ if (modelResponse.choices[0]?.message?.content) {
88
+ result.self_reported.model = extractModelName(modelResponse.choices[0].message.content);
89
+ }
90
+ result.api_reported.model = modelResponse.model;
91
+
92
+ // 2. Ask for knowledge cutoff
93
+ const cutoffResponse = await chat(config, {
94
+ model: config.model,
95
+ messages: [{
96
+ role: 'user',
97
+ content: 'What is your knowledge cutoff date? Reply YYYY-MM only.'
98
+ }],
99
+ max_tokens: 20,
100
+ });
101
+
102
+ if (cutoffResponse.choices[0]?.message?.content) {
103
+ result.self_reported.cutoff = extractCutoff(cutoffResponse.choices[0].message.content);
104
+ }
105
+
106
+ // 3. Ask for context window
107
+ const contextResponse = await chat(config, {
108
+ model: config.model,
109
+ messages: [{
110
+ role: 'user',
111
+ content: 'What is your maximum context window in tokens? Reply with only the number.'
112
+ }],
113
+ max_tokens: 20,
114
+ });
115
+
116
+ if (contextResponse.choices[0]?.message?.content) {
117
+ result.self_reported.context_window = extractContextWindow(contextResponse.choices[0].message.content);
118
+ }
119
+
120
+ // 4. Extract organization from headers
121
+ if (modelResponse.headers['openai-organization']) {
122
+ result.api_reported.organization = modelResponse.headers['openai-organization'];
123
+ }
124
+
125
+ // Calculate confidence based on what we found
126
+ let confidenceScore = 0;
127
+ if (result.self_reported.model) confidenceScore += 0.3;
128
+ if (result.self_reported.cutoff) confidenceScore += 0.3;
129
+ if (result.self_reported.context_window) confidenceScore += 0.3;
130
+ if (result.api_reported.model) confidenceScore += 0.1;
131
+
132
+ result.fingerprint.confidence = Math.min(1, confidenceScore);
133
+
134
+ } catch (error) {
135
+ // Return partial results on error
136
+ }
137
+
138
+ return result;
139
+ }
@@ -0,0 +1,55 @@
1
+ import { ProviderConfig, chat } from '../client';
2
+
3
+ export interface PingResult {
4
+ reachable: boolean;
5
+ latency_ms: number;
6
+ model: string | null;
7
+ rate_limit: {
8
+ remaining: number | null;
9
+ reset_at: string | null;
10
+ };
11
+ error?: string;
12
+ }
13
+
14
+ export async function ping(config: ProviderConfig): Promise<PingResult> {
15
+ const startTime = performance.now();
16
+
17
+ try {
18
+ const response = await chat(config, {
19
+ model: config.model,
20
+ messages: [{ role: 'user', content: 'hi' }],
21
+ max_tokens: 1,
22
+ });
23
+
24
+ const latencyMs = performance.now() - startTime;
25
+
26
+ // Extract rate limit info from headers
27
+ const headers = response.headers;
28
+ const remaining = headers['x-ratelimit-remaining']
29
+ ? parseInt(headers['x-ratelimit-remaining'], 10)
30
+ : null;
31
+ const resetAt = headers['x-ratelimit-reset'] || null;
32
+
33
+ return {
34
+ reachable: true,
35
+ latency_ms: Math.round(latencyMs),
36
+ model: response.model || null,
37
+ rate_limit: {
38
+ remaining,
39
+ reset_at: resetAt,
40
+ },
41
+ };
42
+ } catch (error) {
43
+ const latencyMs = performance.now() - startTime;
44
+ return {
45
+ reachable: false,
46
+ latency_ms: Math.round(latencyMs),
47
+ model: null,
48
+ rate_limit: {
49
+ remaining: null,
50
+ reset_at: null,
51
+ },
52
+ error: (error as Error).message,
53
+ };
54
+ }
55
+ }
@@ -0,0 +1,136 @@
1
+ import { ProviderConfig, chat } from '../client';
2
+ import { Timer } from '../utils/timer';
3
+
4
+ export interface ProbeResult {
5
+ capabilities: Record<string, boolean>;
6
+ probe_duration_ms: number;
7
+ }
8
+
9
+ export async function probe(config: ProviderConfig): Promise<ProbeResult> {
10
+ const timer = new Timer();
11
+ timer.start();
12
+
13
+ const capabilities: Record<string, boolean> = {
14
+ json_mode: false,
15
+ function_calling: false,
16
+ vision: false,
17
+ streaming: false,
18
+ system_prompt: false,
19
+ temperature_control: false,
20
+ };
21
+
22
+ // Test JSON mode
23
+ try {
24
+ const response = await chat(config, {
25
+ model: config.model,
26
+ messages: [{ role: 'user', content: 'Say hello' }],
27
+ max_tokens: 10,
28
+ response_format: { type: 'json_object' },
29
+ });
30
+ // If we get here without error, JSON mode is supported
31
+ capabilities.json_mode = true;
32
+ } catch {
33
+ // JSON mode not supported
34
+ }
35
+
36
+ // Test function calling
37
+ try {
38
+ const response = await chat(config, {
39
+ model: config.model,
40
+ messages: [{ role: 'user', content: 'What is 2+2?' }],
41
+ max_tokens: 50,
42
+ tools: [{
43
+ type: 'function',
44
+ function: {
45
+ name: 'add',
46
+ description: 'Add two numbers',
47
+ parameters: {
48
+ type: 'object',
49
+ properties: {
50
+ a: { type: 'number' },
51
+ b: { type: 'number' }
52
+ },
53
+ required: ['a', 'b']
54
+ }
55
+ }
56
+ }],
57
+ });
58
+ // If response contains tool_calls, function calling is supported
59
+ const hasToolCalls = response.choices[0]?.message &&
60
+ 'tool_calls' in response.choices[0].message;
61
+ capabilities.function_calling = !!hasToolCalls;
62
+ } catch {
63
+ // Function calling not supported
64
+ }
65
+
66
+ // Test vision (send a tiny base64 image)
67
+ try {
68
+ // Minimal 1x1 white PNG in base64
69
+ const tinyImage = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
70
+ const response = await chat(config, {
71
+ model: config.model,
72
+ messages: [{
73
+ role: 'user',
74
+ content: [
75
+ { type: 'text', text: 'What color is this image?' },
76
+ { type: 'image_url', image_url: { url: tinyImage } }
77
+ ]
78
+ }],
79
+ max_tokens: 20,
80
+ });
81
+ // If we get a valid response, vision is supported
82
+ capabilities.vision = response.choices.length > 0 && !!response.choices[0].message?.content;
83
+ } catch {
84
+ // Vision not supported
85
+ }
86
+
87
+ // Test streaming (we can't easily test streaming without SSE handling, so mark as likely)
88
+ try {
89
+ // Try with stream: true - if it doesn't error, streaming is likely supported
90
+ const response = await chat(config, {
91
+ model: config.model,
92
+ messages: [{ role: 'user', content: 'Hi' }],
93
+ max_tokens: 5,
94
+ stream: true,
95
+ });
96
+ // Note: proper streaming requires handling SSE, but we can at least try
97
+ capabilities.streaming = true;
98
+ } catch {
99
+ capabilities.streaming = false;
100
+ }
101
+
102
+ // Test system prompt
103
+ try {
104
+ const response = await chat(config, {
105
+ model: config.model,
106
+ messages: [
107
+ { role: 'system', content: 'You are a helpful assistant.' },
108
+ { role: 'user', content: 'Hi' }
109
+ ],
110
+ max_tokens: 10,
111
+ });
112
+ capabilities.system_prompt = response.choices.length > 0;
113
+ } catch {
114
+ capabilities.system_prompt = false;
115
+ }
116
+
117
+ // Test temperature control
118
+ try {
119
+ const response = await chat(config, {
120
+ model: config.model,
121
+ messages: [{ role: 'user', content: 'Say one word' }],
122
+ max_tokens: 5,
123
+ temperature: 0,
124
+ });
125
+ capabilities.temperature_control = response.choices.length > 0;
126
+ } catch {
127
+ capabilities.temperature_control = false;
128
+ }
129
+
130
+ timer.stop();
131
+
132
+ return {
133
+ capabilities,
134
+ probe_duration_ms: Math.round(timer.elapsed()),
135
+ };
136
+ }