@vidda/llm-watcher 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ import { DatabaseSync } from 'node:sqlite';
2
+ import path from 'node:path';
3
+ import fs from 'node:fs';
4
+ import os from 'node:os';
5
+ import { fileURLToPath } from 'node:url';
6
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
7
+ let db = null;
8
+ export function getDb(dbPath) {
9
+ if (db)
10
+ return db;
11
+ // Default to ~/.promptglass/promptglass.db
12
+ const homeDir = os.homedir();
13
+ const appDataDir = path.join(homeDir, '.promptglass');
14
+ const defaultPath = path.join(appDataDir, 'promptglass.db');
15
+ const resolvedPath = dbPath ?? process.env.DB_PATH ?? defaultPath;
16
+ const dir = path.dirname(resolvedPath);
17
+ try {
18
+ if (!fs.existsSync(dir)) {
19
+ fs.mkdirSync(dir, { recursive: true });
20
+ }
21
+ db = new DatabaseSync(resolvedPath);
22
+ db.exec('PRAGMA journal_mode = WAL');
23
+ db.exec('PRAGMA foreign_keys = ON');
24
+ initSchema(db);
25
+ return db;
26
+ }
27
+ catch (error) {
28
+ console.error(`Failed to initialize database with error ${error}`);
29
+ throw error;
30
+ }
31
+ }
32
+ function initSchema(db) {
33
+ db.exec('BEGIN');
34
+ try {
35
+ db.prepare(`
36
+ CREATE TABLE IF NOT EXISTS requests (
37
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
38
+ mode TEXT NOT NULL CHECK(mode IN ('chat', 'observe', 'benchmark')),
39
+ created_at DATETIME DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
40
+ context_hash TEXT,
41
+ request_body TEXT,
42
+ response_body TEXT,
43
+ metrics TEXT
44
+ );
45
+ `).run();
46
+ db.prepare(`
47
+ CREATE TABLE IF NOT EXISTS settings (
48
+ key TEXT PRIMARY KEY,
49
+ value TEXT NOT NULL
50
+ );
51
+ `).run();
52
+ db.exec('COMMIT');
53
+ }
54
+ catch (error) {
55
+ db.exec('ROLLBACK');
56
+ throw error;
57
+ }
58
+ }
59
+ export function saveRequest(mode, requestBody, responseBody, metrics, contextHash) {
60
+ const db = getDb();
61
+ const info = db.prepare(`
62
+ INSERT INTO requests (mode, request_body, response_body, metrics, context_hash)
63
+ VALUES (?, ?, ?, ?, ?)
64
+ `).run(mode, JSON.stringify(requestBody), responseBody ? JSON.stringify(responseBody) : null, metrics ? JSON.stringify(metrics) : null, contextHash || null);
65
+ return info.lastInsertRowid;
66
+ }
67
+ export function updateRequest(id, responseBody, metrics) {
68
+ const db = getDb();
69
+ if (metrics) {
70
+ db.prepare(`
71
+ UPDATE requests
72
+ SET response_body = ?, metrics = ?
73
+ WHERE id = ?
74
+ `).run(JSON.stringify(responseBody), JSON.stringify(metrics), id);
75
+ }
76
+ else {
77
+ db.prepare(`
78
+ UPDATE requests
79
+ SET response_body = ?
80
+ WHERE id = ?
81
+ `).run(JSON.stringify(responseBody), id);
82
+ }
83
+ }
84
+ export function getRequests(limit = 50, offset = 0) {
85
+ const db = getDb();
86
+ const rows = db.prepare(`
87
+ SELECT * FROM requests ORDER BY created_at DESC LIMIT ? OFFSET ?
88
+ `).all(limit, offset);
89
+ return rows.map(row => ({
90
+ id: row.id,
91
+ mode: row.mode,
92
+ createdAt: row.created_at,
93
+ contextHash: row.context_hash,
94
+ requestBody: JSON.parse(row.request_body),
95
+ responseBody: row.response_body ? JSON.parse(row.response_body) : undefined,
96
+ metrics: row.metrics ? JSON.parse(row.metrics) : undefined,
97
+ }));
98
+ }
99
+ export function getRequestById(id) {
100
+ const db = getDb();
101
+ const row = db.prepare('SELECT * FROM requests WHERE id = ?').get(id);
102
+ if (!row)
103
+ return null;
104
+ return {
105
+ id: row.id,
106
+ mode: row.mode,
107
+ createdAt: row.created_at,
108
+ contextHash: row.context_hash,
109
+ requestBody: JSON.parse(row.request_body),
110
+ responseBody: row.response_body ? JSON.parse(row.response_body) : undefined,
111
+ metrics: row.metrics ? JSON.parse(row.metrics) : undefined,
112
+ };
113
+ }
114
+ export function deleteRequest(id) {
115
+ const db = getDb();
116
+ db.prepare('DELETE FROM requests WHERE id = ?').run(id);
117
+ }
118
+ export function deleteRequests(ids) {
119
+ if (ids.length === 0)
120
+ return;
121
+ const db = getDb();
122
+ const placeholders = ids.map(() => '?').join(',');
123
+ db.prepare(`DELETE FROM requests WHERE id IN (${placeholders})`).run(...ids);
124
+ }
125
+ export function saveSetting(key, value) {
126
+ const db = getDb();
127
+ db.prepare('INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)').run(key, value);
128
+ }
129
+ export function getSetting(key) {
130
+ const db = getDb();
131
+ const row = db.prepare('SELECT value FROM settings WHERE key = ?').get(key);
132
+ return row ? row.value : null;
133
+ }
134
+ export function closeDb() {
135
+ if (db) {
136
+ db.close();
137
+ db = null;
138
+ }
139
+ }
140
+ const PROFILES_KEY = 'PROXY_PROFILES';
141
+ const ACTIVE_PROFILE_KEY = 'ACTIVE_PROXY_ID';
142
+ function generateId() {
143
+ return Date.now().toString(36) + Math.random().toString(36).slice(2, 8);
144
+ }
145
+ export function getProxyProfiles() {
146
+ const raw = getSetting(PROFILES_KEY);
147
+ if (raw) {
148
+ try {
149
+ const parsed = JSON.parse(raw);
150
+ if (Array.isArray(parsed))
151
+ return parsed;
152
+ }
153
+ catch {
154
+ // fall through to migration
155
+ }
156
+ }
157
+ // Migration: if legacy single settings exist, convert to first profile
158
+ const legacyUrl = getSetting('TARGET_URL') || process.env.TARGET_URL;
159
+ const legacyKey = getSetting('TARGET_API_KEY') || process.env.TARGET_API_KEY;
160
+ if (legacyUrl) {
161
+ const profile = {
162
+ id: generateId(),
163
+ name: 'Default',
164
+ targetUrl: legacyUrl,
165
+ targetApiKey: legacyKey || '',
166
+ };
167
+ saveSetting(PROFILES_KEY, JSON.stringify([profile]));
168
+ saveSetting(ACTIVE_PROFILE_KEY, profile.id);
169
+ return [profile];
170
+ }
171
+ return [];
172
+ }
173
+ export function saveProxyProfiles(profiles) {
174
+ saveSetting(PROFILES_KEY, JSON.stringify(profiles));
175
+ }
176
+ export function getActiveProxyProfile() {
177
+ const profiles = getProxyProfiles();
178
+ if (profiles.length === 0)
179
+ return null;
180
+ const activeId = getSetting(ACTIVE_PROFILE_KEY);
181
+ if (activeId) {
182
+ const found = profiles.find((p) => p.id === activeId);
183
+ if (found)
184
+ return found;
185
+ }
186
+ return profiles[0];
187
+ }
188
+ export function setActiveProxyProfile(id) {
189
+ saveSetting(ACTIVE_PROFILE_KEY, id);
190
+ }
191
+ export function createProxyProfile(name, targetUrl, targetApiKey, model, inputTokenPrice, outputTokenPrice, cachedInputTokenPrice) {
192
+ const profiles = getProxyProfiles();
193
+ const profile = { id: generateId(), name, targetUrl, targetApiKey, model, inputTokenPrice, outputTokenPrice, cachedInputTokenPrice };
194
+ profiles.push(profile);
195
+ saveProxyProfiles(profiles);
196
+ // If first profile, auto-activate it
197
+ if (profiles.length === 1) {
198
+ setActiveProxyProfile(profile.id);
199
+ }
200
+ return profile;
201
+ }
202
+ export function updateProxyProfile(id, updates) {
203
+ const profiles = getProxyProfiles();
204
+ const idx = profiles.findIndex((p) => p.id === id);
205
+ if (idx === -1)
206
+ return null;
207
+ profiles[idx] = { ...profiles[idx], ...updates };
208
+ saveProxyProfiles(profiles);
209
+ return profiles[idx];
210
+ }
211
+ export function deleteProxyProfile(id) {
212
+ const profiles = getProxyProfiles();
213
+ const filtered = profiles.filter((p) => p.id !== id);
214
+ saveProxyProfiles(filtered);
215
+ // If deleted the active one, reassign
216
+ const activeId = getSetting(ACTIVE_PROFILE_KEY);
217
+ if (activeId === id) {
218
+ if (filtered.length > 0) {
219
+ setActiveProxyProfile(filtered[0].id);
220
+ }
221
+ else {
222
+ saveSetting(ACTIVE_PROFILE_KEY, '');
223
+ }
224
+ }
225
+ }
@@ -0,0 +1,25 @@
1
+ import { Metrics } from '../../shared/types.js';
2
+ export interface ProcessedStream {
3
+ clientStream: ReadableStream<Uint8Array>;
4
+ metricsPromise: Promise<{
5
+ metrics: Metrics;
6
+ responseBody: any;
7
+ }>;
8
+ }
9
+ /**
10
+ * Extracts a normalized token breakdown from an upstream usage payload.
11
+ * Reads values directly from the response body's usage object (no inference).
12
+ * Compatible with OpenAI Chat Completions, OpenAI Responses API, and Anthropic-style usage.
13
+ */
14
+ export declare function extractTokenBreakdown(usage: any): {
15
+ promptTokens?: number;
16
+ completionTokens?: number;
17
+ totalTokens?: number;
18
+ cachedTokens?: number;
19
+ reasoningTokens?: number;
20
+ outputTextTokens?: number;
21
+ };
22
+ /**
23
+ * Splits an LLM SSE stream: one untouched for the client, one for metrics/capture.
24
+ */
25
+ export declare function processStream(response: Response, requestStartTime: number, requestBody?: any, onChunk?: (body: any) => void): ProcessedStream;
@@ -0,0 +1,250 @@
1
+ import { EventSourceParserStream } from 'eventsource-parser/stream';
2
+ /**
3
+ * Extracts a normalized token breakdown from an upstream usage payload.
4
+ * Reads values directly from the response body's usage object (no inference).
5
+ * Compatible with OpenAI Chat Completions, OpenAI Responses API, and Anthropic-style usage.
6
+ */
7
+ export function extractTokenBreakdown(usage) {
8
+ if (!usage || typeof usage !== 'object')
9
+ return {};
10
+ const promptDetails = usage.prompt_tokens_details || usage.input_tokens_details || {};
11
+ const completionDetails = usage.completion_tokens_details || usage.output_tokens_details || {};
12
+ const promptTokens = usage.prompt_tokens ?? usage.input_tokens;
13
+ const completionTokens = usage.completion_tokens ?? usage.output_tokens;
14
+ const totalTokens = usage.total_tokens
15
+ ?? (promptTokens != null && completionTokens != null ? promptTokens + completionTokens : undefined);
16
+ const cachedTokens = promptDetails.cached_tokens ?? usage.cache_read_input_tokens;
17
+ const reasoningTokens = completionDetails.reasoning_tokens;
18
+ const outputTextTokens = completionDetails.text_tokens;
19
+ return {
20
+ promptTokens: promptTokens != null ? Number(promptTokens) : undefined,
21
+ completionTokens: completionTokens != null ? Number(completionTokens) : undefined,
22
+ totalTokens: totalTokens != null ? Number(totalTokens) : undefined,
23
+ cachedTokens: cachedTokens != null ? Number(cachedTokens) : undefined,
24
+ reasoningTokens: reasoningTokens != null ? Number(reasoningTokens) : undefined,
25
+ outputTextTokens: outputTextTokens != null ? Number(outputTextTokens) : undefined,
26
+ };
27
+ }
28
+ /**
29
+ * Splits an LLM SSE stream: one untouched for the client, one for metrics/capture.
30
+ */
31
+ export function processStream(response, requestStartTime, requestBody, onChunk) {
32
+ // Split the stream: one for the client, one for our internal processing
33
+ const [clientStream, internalStream] = response.body.tee();
34
+ const metricsPromise = (async () => {
35
+ let firstTokenTime = null;
36
+ let lastTokenTime = requestStartTime;
37
+ const interTokenLatencies = [];
38
+ let tokenCount = 0;
39
+ let fullResponseBody = null;
40
+ const parserStream = internalStream
41
+ .pipeThrough(new TextDecoderStream())
42
+ .pipeThrough(new EventSourceParserStream());
43
+ let usageTokenCount = null;
44
+ let promptTokenCount = null;
45
+ // Node 24 allows async iteration over Web Streams natively
46
+ for await (const event of parserStream) {
47
+ if (event.data === '[DONE]')
48
+ continue;
49
+ try {
50
+ const data = JSON.parse(event.data);
51
+ const now = performance.now();
52
+ const eventType = event.event;
53
+ // Check for usage info in the chunk (OpenAI spec and others)
54
+ // Chat Completions usage is often in the last chunk or every chunk with stream_options
55
+ // Responses API usage is in the response.completed event
56
+ const usage = data.usage || data.statistics || data.stats;
57
+ if (usage) {
58
+ if (usage.completion_tokens)
59
+ usageTokenCount = usage.completion_tokens;
60
+ if (usage.prompt_tokens)
61
+ promptTokenCount = usage.prompt_tokens;
62
+ if (usage.input_tokens)
63
+ promptTokenCount = usage.input_tokens; // Anthropic/others
64
+ if (usage.output_tokens)
65
+ usageTokenCount = usage.output_tokens;
66
+ }
67
+ // Extract content based on API type
68
+ let content = '';
69
+ let reasoningContent = '';
70
+ let deltaToolCalls = [];
71
+ if (eventType === 'response.output_text.delta') {
72
+ content = data.delta || '';
73
+ }
74
+ else if (eventType === 'response.reasoning_summary_text.delta') {
75
+ // Some models send summary or reasoning in separate delta events
76
+ reasoningContent = data.delta || '';
77
+ }
78
+ else {
79
+ // Fallback to Chat Completions format
80
+ const delta = data.choices?.[0]?.delta || data.delta || {};
81
+ // Keep reasoning_content separate from content to preserve the distinction
82
+ reasoningContent = delta.reasoning_content || '';
83
+ content = delta.content || '';
84
+ deltaToolCalls = Array.isArray(delta.tool_calls) ? delta.tool_calls : [];
85
+ }
86
+ if (content || reasoningContent || deltaToolCalls.length > 0) {
87
+ tokenCount++;
88
+ if (firstTokenTime === null) {
89
+ firstTokenTime = now;
90
+ }
91
+ else {
92
+ interTokenLatencies.push(now - lastTokenTime);
93
+ }
94
+ lastTokenTime = now;
95
+ }
96
+ // Reconstruct the full response body
97
+ if (!fullResponseBody) {
98
+ fullResponseBody = { ...data };
99
+ // Initialize for Chat Completions
100
+ const choice = fullResponseBody.choices?.[0];
101
+ if (choice?.delta) {
102
+ choice.message = {
103
+ role: choice.delta.role || 'assistant',
104
+ content: choice.delta.content || ''
105
+ };
106
+ if (choice.delta.reasoning_content) {
107
+ choice.message.reasoning_content = choice.delta.reasoning_content;
108
+ }
109
+ if (deltaToolCalls.length > 0) {
110
+ choice.message.tool_calls = deltaToolCalls.map((tc) => ({
111
+ index: tc.index ?? 0,
112
+ id: tc.id || '',
113
+ type: tc.type || 'function',
114
+ function: {
115
+ name: tc.function?.name || '',
116
+ arguments: tc.function?.arguments || ''
117
+ }
118
+ }));
119
+ }
120
+ delete choice.delta;
121
+ }
122
+ // Initialize for Responses API
123
+ if (eventType?.startsWith('response.')) {
124
+ fullResponseBody.output = fullResponseBody.output || [];
125
+ if (eventType === 'response.output_text.delta' && content) {
126
+ fullResponseBody.output.push({
127
+ type: 'message',
128
+ role: 'assistant',
129
+ content: content
130
+ });
131
+ }
132
+ else if (eventType === 'response.reasoning_summary_text.delta' && reasoningContent) {
133
+ fullResponseBody.output.push({
134
+ type: 'reasoning',
135
+ role: 'assistant',
136
+ summary: [{ type: 'summary_text', text: reasoningContent }]
137
+ });
138
+ }
139
+ }
140
+ }
141
+ else {
142
+ // Update existing response body
143
+ if (eventType === 'response.output_text.delta') {
144
+ const outputItem = fullResponseBody.output?.find((i) => i.type === 'message');
145
+ if (outputItem) {
146
+ outputItem.content += content;
147
+ }
148
+ else {
149
+ fullResponseBody.output = fullResponseBody.output || [];
150
+ fullResponseBody.output.push({
151
+ type: 'message',
152
+ role: 'assistant',
153
+ content: content
154
+ });
155
+ }
156
+ }
157
+ else if (eventType === 'response.reasoning_summary_text.delta') {
158
+ const reasoningItem = fullResponseBody.output?.find((i) => i.type === 'reasoning');
159
+ if (reasoningItem) {
160
+ reasoningItem.summary[0].text += reasoningContent;
161
+ }
162
+ else {
163
+ fullResponseBody.output = fullResponseBody.output || [];
164
+ fullResponseBody.output.push({
165
+ type: 'reasoning',
166
+ role: 'assistant',
167
+ summary: [{ type: 'summary_text', text: reasoningContent }]
168
+ });
169
+ }
170
+ }
171
+ else if (eventType === 'response.completed') {
172
+ // Take the final response object as the ground truth
173
+ fullResponseBody = { ...data };
174
+ }
175
+ else {
176
+ if (content && fullResponseBody.choices?.[0]?.message) {
177
+ fullResponseBody.choices[0].message.content += content;
178
+ }
179
+ if (reasoningContent && fullResponseBody.choices?.[0]?.message) {
180
+ const msg = fullResponseBody.choices[0].message;
181
+ msg.reasoning_content = (msg.reasoning_content || '') + reasoningContent;
182
+ }
183
+ if (deltaToolCalls.length > 0 && fullResponseBody.choices?.[0]?.message) {
184
+ const msg = fullResponseBody.choices[0].message;
185
+ msg.tool_calls = msg.tool_calls || [];
186
+ for (const tc of deltaToolCalls) {
187
+ const existing = msg.tool_calls.find((t) => t.index === (tc.index ?? 0));
188
+ if (existing) {
189
+ if (tc.id)
190
+ existing.id = tc.id;
191
+ if (tc.type)
192
+ existing.type = tc.type;
193
+ if (tc.function?.name)
194
+ existing.function.name = tc.function.name;
195
+ if (tc.function?.arguments)
196
+ existing.function.arguments += tc.function.arguments;
197
+ }
198
+ else {
199
+ msg.tool_calls.push({
200
+ index: tc.index ?? 0,
201
+ id: tc.id || '',
202
+ type: tc.type || 'function',
203
+ function: {
204
+ name: tc.function?.name || '',
205
+ arguments: tc.function?.arguments || ''
206
+ }
207
+ });
208
+ }
209
+ }
210
+ }
211
+ }
212
+ // If we got more usage info later
213
+ if (usage) {
214
+ fullResponseBody.usage = { ...fullResponseBody.usage, ...usage };
215
+ }
216
+ }
217
+ if (onChunk && fullResponseBody) {
218
+ onChunk(fullResponseBody);
219
+ }
220
+ }
221
+ catch (e) {
222
+ // Ignore malformed chunks
223
+ }
224
+ }
225
+ const now = performance.now();
226
+ const totalLatency = now - requestStartTime;
227
+ const ttft = firstTokenTime ? firstTokenTime - requestStartTime : totalLatency;
228
+ const generationTimeMs = totalLatency - ttft;
229
+ const finalTokenCount = usageTokenCount ?? tokenCount;
230
+ // Estimate prompt token count if not provided by response
231
+ if (promptTokenCount === null && requestBody?.messages) {
232
+ const promptText = JSON.stringify(requestBody.messages);
233
+ promptTokenCount = Math.ceil(promptText.length / 4); // Basic estimation
234
+ }
235
+ const prefillSpeed = (promptTokenCount && ttft > 0) ? (promptTokenCount / (ttft / 1000)) : 0;
236
+ return {
237
+ metrics: {
238
+ ttft,
239
+ totalLatency,
240
+ tokensPerSecond: generationTimeMs > 0 ? (finalTokenCount / (generationTimeMs / 1000)) : 0,
241
+ promptPrefillSpeed: prefillSpeed,
242
+ tokenCount: finalTokenCount,
243
+ interTokenLatencies,
244
+ completedAt: new Date().toISOString(),
245
+ },
246
+ responseBody: fullResponseBody,
247
+ };
248
+ })();
249
+ return { clientStream, metricsPromise };
250
+ }
@@ -0,0 +1,33 @@
1
+ export interface Metrics {
2
+ ttft: number;
3
+ totalLatency: number;
4
+ tokensPerSecond: number;
5
+ promptPrefillSpeed: number;
6
+ tokenCount: number;
7
+ interTokenLatencies: number[];
8
+ completedAt: string;
9
+ }
10
+ export type RequestMode = 'chat' | 'observe' | 'benchmark';
11
+ export interface StoredRequest {
12
+ id: number;
13
+ mode: RequestMode;
14
+ createdAt: string;
15
+ contextHash: string | null;
16
+ requestBody: any;
17
+ responseBody?: any;
18
+ metrics?: Metrics;
19
+ }
20
+ export interface Settings {
21
+ targetUrl: string;
22
+ targetApiKey: string;
23
+ }
24
+ export interface ProxyProfile {
25
+ id: string;
26
+ name: string;
27
+ targetUrl: string;
28
+ targetApiKey: string;
29
+ model?: string;
30
+ inputTokenPrice?: number;
31
+ outputTokenPrice?: number;
32
+ cachedInputTokenPrice?: number;
33
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "@ngvuhuy/promptglass-server",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "index.ts",
6
+ "type": "module",
7
+ "scripts": {
8
+ "dev": "tsx watch index.ts",
9
+ "build": "tsc",
10
+ "start": "node dist/index.js"
11
+ },
12
+ "keywords": [],
13
+ "author": "",
14
+ "license": "ISC",
15
+ "packageManager": "pnpm@10.28.2",
16
+ "dependencies": {
17
+ "axios": "^1.13.6",
18
+ "cors": "^2.8.6",
19
+ "dotenv": "^17.3.1",
20
+ "eventsource-parser": "^3.0.6",
21
+ "express": "^5.2.1",
22
+ "zod": "^4.3.6"
23
+ },
24
+ "devDependencies": {
25
+ "@types/cors": "^2.8.19",
26
+ "@types/express": "^5.0.6",
27
+ "@types/node": "^25.5.0",
28
+ "tsx": "^4.21.0",
29
+ "typescript": "^5.9.3"
30
+ }
31
+ }
@@ -0,0 +1,37 @@
1
+ export interface Metrics {
2
+ ttft: number; // Time to first token (ms)
3
+ totalLatency: number; // Total request-response time (ms)
4
+ tokensPerSecond: number;
5
+ promptPrefillSpeed: number; // Tokens per second reading context window
6
+ tokenCount: number;
7
+ interTokenLatencies: number[]; // ms between tokens
8
+ completedAt: string;
9
+ }
10
+
11
+ export type RequestMode = 'chat' | 'observe' | 'benchmark';
12
+
13
+ export interface StoredRequest {
14
+ id: number;
15
+ mode: RequestMode;
16
+ createdAt: string;
17
+ contextHash: string | null;
18
+ requestBody: any;
19
+ responseBody?: any;
20
+ metrics?: Metrics;
21
+ }
22
+
23
+ export interface Settings {
24
+ targetUrl: string;
25
+ targetApiKey: string;
26
+ }
27
+
28
+ export interface ProxyProfile {
29
+ id: string;
30
+ name: string;
31
+ targetUrl: string;
32
+ targetApiKey: string;
33
+ model?: string;
34
+ inputTokenPrice?: number; // USD per 1M input tokens
35
+ outputTokenPrice?: number; // USD per 1M output tokens
36
+ cachedInputTokenPrice?: number; // USD per 1M cached input tokens
37
+ }