@vidda/llm-watcher 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -0
- package/cli.js +35 -0
- package/client/dist/assets/geist-cyrillic-ext-wght-normal-DjL33-gN.woff2 +0 -0
- package/client/dist/assets/geist-cyrillic-wght-normal-BEAKL7Jp.woff2 +0 -0
- package/client/dist/assets/geist-latin-ext-wght-normal-DC-KSUi6.woff2 +0 -0
- package/client/dist/assets/geist-latin-wght-normal-BgDaEnEv.woff2 +0 -0
- package/client/dist/assets/geist-vietnamese-wght-normal-6IgcOCM7.woff2 +0 -0
- package/client/dist/assets/index-BCCqRaer.js +16 -0
- package/client/dist/assets/index-Bs1XvUcJ.css +2 -0
- package/client/dist/frankenstein.txt +7741 -0
- package/client/dist/index.html +23 -0
- package/client/package.json +47 -0
- package/package.json +40 -0
- package/server/dist/server/index.d.ts +3 -0
- package/server/dist/server/index.js +53 -0
- package/server/dist/server/routes/dashboard.d.ts +3 -0
- package/server/dist/server/routes/dashboard.js +205 -0
- package/server/dist/server/routes/proxy.d.ts +3 -0
- package/server/dist/server/routes/proxy.js +135 -0
- package/server/dist/server/services/storage.d.ts +19 -0
- package/server/dist/server/services/storage.js +225 -0
- package/server/dist/server/services/stream.d.ts +25 -0
- package/server/dist/server/services/stream.js +250 -0
- package/server/dist/shared/types.d.ts +33 -0
- package/server/dist/shared/types.js +2 -0
- package/server/package.json +31 -0
- package/shared/types.ts +37 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import { DatabaseSync } from 'node:sqlite';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import fs from 'node:fs';
|
|
4
|
+
import os from 'node:os';
|
|
5
|
+
import { fileURLToPath } from 'node:url';
|
|
6
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
let db = null;
|
|
8
|
+
export function getDb(dbPath) {
|
|
9
|
+
if (db)
|
|
10
|
+
return db;
|
|
11
|
+
// Default to ~/.promptglass/promptglass.db
|
|
12
|
+
const homeDir = os.homedir();
|
|
13
|
+
const appDataDir = path.join(homeDir, '.promptglass');
|
|
14
|
+
const defaultPath = path.join(appDataDir, 'promptglass.db');
|
|
15
|
+
const resolvedPath = dbPath ?? process.env.DB_PATH ?? defaultPath;
|
|
16
|
+
const dir = path.dirname(resolvedPath);
|
|
17
|
+
try {
|
|
18
|
+
if (!fs.existsSync(dir)) {
|
|
19
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
20
|
+
}
|
|
21
|
+
db = new DatabaseSync(resolvedPath);
|
|
22
|
+
db.exec('PRAGMA journal_mode = WAL');
|
|
23
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
24
|
+
initSchema(db);
|
|
25
|
+
return db;
|
|
26
|
+
}
|
|
27
|
+
catch (error) {
|
|
28
|
+
console.error(`Failed to initialize database with error ${error}`);
|
|
29
|
+
throw error;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
function initSchema(db) {
|
|
33
|
+
db.exec('BEGIN');
|
|
34
|
+
try {
|
|
35
|
+
db.prepare(`
|
|
36
|
+
CREATE TABLE IF NOT EXISTS requests (
|
|
37
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
38
|
+
mode TEXT NOT NULL CHECK(mode IN ('chat', 'observe', 'benchmark')),
|
|
39
|
+
created_at DATETIME DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
|
40
|
+
context_hash TEXT,
|
|
41
|
+
request_body TEXT,
|
|
42
|
+
response_body TEXT,
|
|
43
|
+
metrics TEXT
|
|
44
|
+
);
|
|
45
|
+
`).run();
|
|
46
|
+
db.prepare(`
|
|
47
|
+
CREATE TABLE IF NOT EXISTS settings (
|
|
48
|
+
key TEXT PRIMARY KEY,
|
|
49
|
+
value TEXT NOT NULL
|
|
50
|
+
);
|
|
51
|
+
`).run();
|
|
52
|
+
db.exec('COMMIT');
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
db.exec('ROLLBACK');
|
|
56
|
+
throw error;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
export function saveRequest(mode, requestBody, responseBody, metrics, contextHash) {
|
|
60
|
+
const db = getDb();
|
|
61
|
+
const info = db.prepare(`
|
|
62
|
+
INSERT INTO requests (mode, request_body, response_body, metrics, context_hash)
|
|
63
|
+
VALUES (?, ?, ?, ?, ?)
|
|
64
|
+
`).run(mode, JSON.stringify(requestBody), responseBody ? JSON.stringify(responseBody) : null, metrics ? JSON.stringify(metrics) : null, contextHash || null);
|
|
65
|
+
return info.lastInsertRowid;
|
|
66
|
+
}
|
|
67
|
+
export function updateRequest(id, responseBody, metrics) {
|
|
68
|
+
const db = getDb();
|
|
69
|
+
if (metrics) {
|
|
70
|
+
db.prepare(`
|
|
71
|
+
UPDATE requests
|
|
72
|
+
SET response_body = ?, metrics = ?
|
|
73
|
+
WHERE id = ?
|
|
74
|
+
`).run(JSON.stringify(responseBody), JSON.stringify(metrics), id);
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
db.prepare(`
|
|
78
|
+
UPDATE requests
|
|
79
|
+
SET response_body = ?
|
|
80
|
+
WHERE id = ?
|
|
81
|
+
`).run(JSON.stringify(responseBody), id);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
export function getRequests(limit = 50, offset = 0) {
|
|
85
|
+
const db = getDb();
|
|
86
|
+
const rows = db.prepare(`
|
|
87
|
+
SELECT * FROM requests ORDER BY created_at DESC LIMIT ? OFFSET ?
|
|
88
|
+
`).all(limit, offset);
|
|
89
|
+
return rows.map(row => ({
|
|
90
|
+
id: row.id,
|
|
91
|
+
mode: row.mode,
|
|
92
|
+
createdAt: row.created_at,
|
|
93
|
+
contextHash: row.context_hash,
|
|
94
|
+
requestBody: JSON.parse(row.request_body),
|
|
95
|
+
responseBody: row.response_body ? JSON.parse(row.response_body) : undefined,
|
|
96
|
+
metrics: row.metrics ? JSON.parse(row.metrics) : undefined,
|
|
97
|
+
}));
|
|
98
|
+
}
|
|
99
|
+
export function getRequestById(id) {
|
|
100
|
+
const db = getDb();
|
|
101
|
+
const row = db.prepare('SELECT * FROM requests WHERE id = ?').get(id);
|
|
102
|
+
if (!row)
|
|
103
|
+
return null;
|
|
104
|
+
return {
|
|
105
|
+
id: row.id,
|
|
106
|
+
mode: row.mode,
|
|
107
|
+
createdAt: row.created_at,
|
|
108
|
+
contextHash: row.context_hash,
|
|
109
|
+
requestBody: JSON.parse(row.request_body),
|
|
110
|
+
responseBody: row.response_body ? JSON.parse(row.response_body) : undefined,
|
|
111
|
+
metrics: row.metrics ? JSON.parse(row.metrics) : undefined,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
export function deleteRequest(id) {
|
|
115
|
+
const db = getDb();
|
|
116
|
+
db.prepare('DELETE FROM requests WHERE id = ?').run(id);
|
|
117
|
+
}
|
|
118
|
+
export function deleteRequests(ids) {
|
|
119
|
+
if (ids.length === 0)
|
|
120
|
+
return;
|
|
121
|
+
const db = getDb();
|
|
122
|
+
const placeholders = ids.map(() => '?').join(',');
|
|
123
|
+
db.prepare(`DELETE FROM requests WHERE id IN (${placeholders})`).run(...ids);
|
|
124
|
+
}
|
|
125
|
+
export function saveSetting(key, value) {
|
|
126
|
+
const db = getDb();
|
|
127
|
+
db.prepare('INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)').run(key, value);
|
|
128
|
+
}
|
|
129
|
+
export function getSetting(key) {
|
|
130
|
+
const db = getDb();
|
|
131
|
+
const row = db.prepare('SELECT value FROM settings WHERE key = ?').get(key);
|
|
132
|
+
return row ? row.value : null;
|
|
133
|
+
}
|
|
134
|
+
export function closeDb() {
|
|
135
|
+
if (db) {
|
|
136
|
+
db.close();
|
|
137
|
+
db = null;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const PROFILES_KEY = 'PROXY_PROFILES';
|
|
141
|
+
const ACTIVE_PROFILE_KEY = 'ACTIVE_PROXY_ID';
|
|
142
|
+
function generateId() {
|
|
143
|
+
return Date.now().toString(36) + Math.random().toString(36).slice(2, 8);
|
|
144
|
+
}
|
|
145
|
+
export function getProxyProfiles() {
|
|
146
|
+
const raw = getSetting(PROFILES_KEY);
|
|
147
|
+
if (raw) {
|
|
148
|
+
try {
|
|
149
|
+
const parsed = JSON.parse(raw);
|
|
150
|
+
if (Array.isArray(parsed))
|
|
151
|
+
return parsed;
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
// fall through to migration
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Migration: if legacy single settings exist, convert to first profile
|
|
158
|
+
const legacyUrl = getSetting('TARGET_URL') || process.env.TARGET_URL;
|
|
159
|
+
const legacyKey = getSetting('TARGET_API_KEY') || process.env.TARGET_API_KEY;
|
|
160
|
+
if (legacyUrl) {
|
|
161
|
+
const profile = {
|
|
162
|
+
id: generateId(),
|
|
163
|
+
name: 'Default',
|
|
164
|
+
targetUrl: legacyUrl,
|
|
165
|
+
targetApiKey: legacyKey || '',
|
|
166
|
+
};
|
|
167
|
+
saveSetting(PROFILES_KEY, JSON.stringify([profile]));
|
|
168
|
+
saveSetting(ACTIVE_PROFILE_KEY, profile.id);
|
|
169
|
+
return [profile];
|
|
170
|
+
}
|
|
171
|
+
return [];
|
|
172
|
+
}
|
|
173
|
+
export function saveProxyProfiles(profiles) {
|
|
174
|
+
saveSetting(PROFILES_KEY, JSON.stringify(profiles));
|
|
175
|
+
}
|
|
176
|
+
export function getActiveProxyProfile() {
|
|
177
|
+
const profiles = getProxyProfiles();
|
|
178
|
+
if (profiles.length === 0)
|
|
179
|
+
return null;
|
|
180
|
+
const activeId = getSetting(ACTIVE_PROFILE_KEY);
|
|
181
|
+
if (activeId) {
|
|
182
|
+
const found = profiles.find((p) => p.id === activeId);
|
|
183
|
+
if (found)
|
|
184
|
+
return found;
|
|
185
|
+
}
|
|
186
|
+
return profiles[0];
|
|
187
|
+
}
|
|
188
|
+
export function setActiveProxyProfile(id) {
|
|
189
|
+
saveSetting(ACTIVE_PROFILE_KEY, id);
|
|
190
|
+
}
|
|
191
|
+
export function createProxyProfile(name, targetUrl, targetApiKey, model, inputTokenPrice, outputTokenPrice, cachedInputTokenPrice) {
|
|
192
|
+
const profiles = getProxyProfiles();
|
|
193
|
+
const profile = { id: generateId(), name, targetUrl, targetApiKey, model, inputTokenPrice, outputTokenPrice, cachedInputTokenPrice };
|
|
194
|
+
profiles.push(profile);
|
|
195
|
+
saveProxyProfiles(profiles);
|
|
196
|
+
// If first profile, auto-activate it
|
|
197
|
+
if (profiles.length === 1) {
|
|
198
|
+
setActiveProxyProfile(profile.id);
|
|
199
|
+
}
|
|
200
|
+
return profile;
|
|
201
|
+
}
|
|
202
|
+
export function updateProxyProfile(id, updates) {
|
|
203
|
+
const profiles = getProxyProfiles();
|
|
204
|
+
const idx = profiles.findIndex((p) => p.id === id);
|
|
205
|
+
if (idx === -1)
|
|
206
|
+
return null;
|
|
207
|
+
profiles[idx] = { ...profiles[idx], ...updates };
|
|
208
|
+
saveProxyProfiles(profiles);
|
|
209
|
+
return profiles[idx];
|
|
210
|
+
}
|
|
211
|
+
export function deleteProxyProfile(id) {
|
|
212
|
+
const profiles = getProxyProfiles();
|
|
213
|
+
const filtered = profiles.filter((p) => p.id !== id);
|
|
214
|
+
saveProxyProfiles(filtered);
|
|
215
|
+
// If deleted the active one, reassign
|
|
216
|
+
const activeId = getSetting(ACTIVE_PROFILE_KEY);
|
|
217
|
+
if (activeId === id) {
|
|
218
|
+
if (filtered.length > 0) {
|
|
219
|
+
setActiveProxyProfile(filtered[0].id);
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
saveSetting(ACTIVE_PROFILE_KEY, '');
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Metrics } from '../../shared/types.js';
|
|
2
|
+
export interface ProcessedStream {
|
|
3
|
+
clientStream: ReadableStream<Uint8Array>;
|
|
4
|
+
metricsPromise: Promise<{
|
|
5
|
+
metrics: Metrics;
|
|
6
|
+
responseBody: any;
|
|
7
|
+
}>;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Extracts a normalized token breakdown from an upstream usage payload.
|
|
11
|
+
* Reads values directly from the response body's usage object (no inference).
|
|
12
|
+
* Compatible with OpenAI Chat Completions, OpenAI Responses API, and Anthropic-style usage.
|
|
13
|
+
*/
|
|
14
|
+
export declare function extractTokenBreakdown(usage: any): {
|
|
15
|
+
promptTokens?: number;
|
|
16
|
+
completionTokens?: number;
|
|
17
|
+
totalTokens?: number;
|
|
18
|
+
cachedTokens?: number;
|
|
19
|
+
reasoningTokens?: number;
|
|
20
|
+
outputTextTokens?: number;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Splits an LLM SSE stream: one untouched for the client, one for metrics/capture.
|
|
24
|
+
*/
|
|
25
|
+
export declare function processStream(response: Response, requestStartTime: number, requestBody?: any, onChunk?: (body: any) => void): ProcessedStream;
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import { EventSourceParserStream } from 'eventsource-parser/stream';
|
|
2
|
+
/**
|
|
3
|
+
* Extracts a normalized token breakdown from an upstream usage payload.
|
|
4
|
+
* Reads values directly from the response body's usage object (no inference).
|
|
5
|
+
* Compatible with OpenAI Chat Completions, OpenAI Responses API, and Anthropic-style usage.
|
|
6
|
+
*/
|
|
7
|
+
export function extractTokenBreakdown(usage) {
|
|
8
|
+
if (!usage || typeof usage !== 'object')
|
|
9
|
+
return {};
|
|
10
|
+
const promptDetails = usage.prompt_tokens_details || usage.input_tokens_details || {};
|
|
11
|
+
const completionDetails = usage.completion_tokens_details || usage.output_tokens_details || {};
|
|
12
|
+
const promptTokens = usage.prompt_tokens ?? usage.input_tokens;
|
|
13
|
+
const completionTokens = usage.completion_tokens ?? usage.output_tokens;
|
|
14
|
+
const totalTokens = usage.total_tokens
|
|
15
|
+
?? (promptTokens != null && completionTokens != null ? promptTokens + completionTokens : undefined);
|
|
16
|
+
const cachedTokens = promptDetails.cached_tokens ?? usage.cache_read_input_tokens;
|
|
17
|
+
const reasoningTokens = completionDetails.reasoning_tokens;
|
|
18
|
+
const outputTextTokens = completionDetails.text_tokens;
|
|
19
|
+
return {
|
|
20
|
+
promptTokens: promptTokens != null ? Number(promptTokens) : undefined,
|
|
21
|
+
completionTokens: completionTokens != null ? Number(completionTokens) : undefined,
|
|
22
|
+
totalTokens: totalTokens != null ? Number(totalTokens) : undefined,
|
|
23
|
+
cachedTokens: cachedTokens != null ? Number(cachedTokens) : undefined,
|
|
24
|
+
reasoningTokens: reasoningTokens != null ? Number(reasoningTokens) : undefined,
|
|
25
|
+
outputTextTokens: outputTextTokens != null ? Number(outputTextTokens) : undefined,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Splits an LLM SSE stream: one untouched for the client, one for metrics/capture.
|
|
30
|
+
*/
|
|
31
|
+
export function processStream(response, requestStartTime, requestBody, onChunk) {
|
|
32
|
+
// Split the stream: one for the client, one for our internal processing
|
|
33
|
+
const [clientStream, internalStream] = response.body.tee();
|
|
34
|
+
const metricsPromise = (async () => {
|
|
35
|
+
let firstTokenTime = null;
|
|
36
|
+
let lastTokenTime = requestStartTime;
|
|
37
|
+
const interTokenLatencies = [];
|
|
38
|
+
let tokenCount = 0;
|
|
39
|
+
let fullResponseBody = null;
|
|
40
|
+
const parserStream = internalStream
|
|
41
|
+
.pipeThrough(new TextDecoderStream())
|
|
42
|
+
.pipeThrough(new EventSourceParserStream());
|
|
43
|
+
let usageTokenCount = null;
|
|
44
|
+
let promptTokenCount = null;
|
|
45
|
+
// Node 24 allows async iteration over Web Streams natively
|
|
46
|
+
for await (const event of parserStream) {
|
|
47
|
+
if (event.data === '[DONE]')
|
|
48
|
+
continue;
|
|
49
|
+
try {
|
|
50
|
+
const data = JSON.parse(event.data);
|
|
51
|
+
const now = performance.now();
|
|
52
|
+
const eventType = event.event;
|
|
53
|
+
// Check for usage info in the chunk (OpenAI spec and others)
|
|
54
|
+
// Chat Completions usage is often in the last chunk or every chunk with stream_options
|
|
55
|
+
// Responses API usage is in the response.completed event
|
|
56
|
+
const usage = data.usage || data.statistics || data.stats;
|
|
57
|
+
if (usage) {
|
|
58
|
+
if (usage.completion_tokens)
|
|
59
|
+
usageTokenCount = usage.completion_tokens;
|
|
60
|
+
if (usage.prompt_tokens)
|
|
61
|
+
promptTokenCount = usage.prompt_tokens;
|
|
62
|
+
if (usage.input_tokens)
|
|
63
|
+
promptTokenCount = usage.input_tokens; // Anthropic/others
|
|
64
|
+
if (usage.output_tokens)
|
|
65
|
+
usageTokenCount = usage.output_tokens;
|
|
66
|
+
}
|
|
67
|
+
// Extract content based on API type
|
|
68
|
+
let content = '';
|
|
69
|
+
let reasoningContent = '';
|
|
70
|
+
let deltaToolCalls = [];
|
|
71
|
+
if (eventType === 'response.output_text.delta') {
|
|
72
|
+
content = data.delta || '';
|
|
73
|
+
}
|
|
74
|
+
else if (eventType === 'response.reasoning_summary_text.delta') {
|
|
75
|
+
// Some models send summary or reasoning in separate delta events
|
|
76
|
+
reasoningContent = data.delta || '';
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
// Fallback to Chat Completions format
|
|
80
|
+
const delta = data.choices?.[0]?.delta || data.delta || {};
|
|
81
|
+
// Keep reasoning_content separate from content to preserve the distinction
|
|
82
|
+
reasoningContent = delta.reasoning_content || '';
|
|
83
|
+
content = delta.content || '';
|
|
84
|
+
deltaToolCalls = Array.isArray(delta.tool_calls) ? delta.tool_calls : [];
|
|
85
|
+
}
|
|
86
|
+
if (content || reasoningContent || deltaToolCalls.length > 0) {
|
|
87
|
+
tokenCount++;
|
|
88
|
+
if (firstTokenTime === null) {
|
|
89
|
+
firstTokenTime = now;
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
interTokenLatencies.push(now - lastTokenTime);
|
|
93
|
+
}
|
|
94
|
+
lastTokenTime = now;
|
|
95
|
+
}
|
|
96
|
+
// Reconstruct the full response body
|
|
97
|
+
if (!fullResponseBody) {
|
|
98
|
+
fullResponseBody = { ...data };
|
|
99
|
+
// Initialize for Chat Completions
|
|
100
|
+
const choice = fullResponseBody.choices?.[0];
|
|
101
|
+
if (choice?.delta) {
|
|
102
|
+
choice.message = {
|
|
103
|
+
role: choice.delta.role || 'assistant',
|
|
104
|
+
content: choice.delta.content || ''
|
|
105
|
+
};
|
|
106
|
+
if (choice.delta.reasoning_content) {
|
|
107
|
+
choice.message.reasoning_content = choice.delta.reasoning_content;
|
|
108
|
+
}
|
|
109
|
+
if (deltaToolCalls.length > 0) {
|
|
110
|
+
choice.message.tool_calls = deltaToolCalls.map((tc) => ({
|
|
111
|
+
index: tc.index ?? 0,
|
|
112
|
+
id: tc.id || '',
|
|
113
|
+
type: tc.type || 'function',
|
|
114
|
+
function: {
|
|
115
|
+
name: tc.function?.name || '',
|
|
116
|
+
arguments: tc.function?.arguments || ''
|
|
117
|
+
}
|
|
118
|
+
}));
|
|
119
|
+
}
|
|
120
|
+
delete choice.delta;
|
|
121
|
+
}
|
|
122
|
+
// Initialize for Responses API
|
|
123
|
+
if (eventType?.startsWith('response.')) {
|
|
124
|
+
fullResponseBody.output = fullResponseBody.output || [];
|
|
125
|
+
if (eventType === 'response.output_text.delta' && content) {
|
|
126
|
+
fullResponseBody.output.push({
|
|
127
|
+
type: 'message',
|
|
128
|
+
role: 'assistant',
|
|
129
|
+
content: content
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
else if (eventType === 'response.reasoning_summary_text.delta' && reasoningContent) {
|
|
133
|
+
fullResponseBody.output.push({
|
|
134
|
+
type: 'reasoning',
|
|
135
|
+
role: 'assistant',
|
|
136
|
+
summary: [{ type: 'summary_text', text: reasoningContent }]
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
// Update existing response body
|
|
143
|
+
if (eventType === 'response.output_text.delta') {
|
|
144
|
+
const outputItem = fullResponseBody.output?.find((i) => i.type === 'message');
|
|
145
|
+
if (outputItem) {
|
|
146
|
+
outputItem.content += content;
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
fullResponseBody.output = fullResponseBody.output || [];
|
|
150
|
+
fullResponseBody.output.push({
|
|
151
|
+
type: 'message',
|
|
152
|
+
role: 'assistant',
|
|
153
|
+
content: content
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
else if (eventType === 'response.reasoning_summary_text.delta') {
|
|
158
|
+
const reasoningItem = fullResponseBody.output?.find((i) => i.type === 'reasoning');
|
|
159
|
+
if (reasoningItem) {
|
|
160
|
+
reasoningItem.summary[0].text += reasoningContent;
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
163
|
+
fullResponseBody.output = fullResponseBody.output || [];
|
|
164
|
+
fullResponseBody.output.push({
|
|
165
|
+
type: 'reasoning',
|
|
166
|
+
role: 'assistant',
|
|
167
|
+
summary: [{ type: 'summary_text', text: reasoningContent }]
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
else if (eventType === 'response.completed') {
|
|
172
|
+
// Take the final response object as the ground truth
|
|
173
|
+
fullResponseBody = { ...data };
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
if (content && fullResponseBody.choices?.[0]?.message) {
|
|
177
|
+
fullResponseBody.choices[0].message.content += content;
|
|
178
|
+
}
|
|
179
|
+
if (reasoningContent && fullResponseBody.choices?.[0]?.message) {
|
|
180
|
+
const msg = fullResponseBody.choices[0].message;
|
|
181
|
+
msg.reasoning_content = (msg.reasoning_content || '') + reasoningContent;
|
|
182
|
+
}
|
|
183
|
+
if (deltaToolCalls.length > 0 && fullResponseBody.choices?.[0]?.message) {
|
|
184
|
+
const msg = fullResponseBody.choices[0].message;
|
|
185
|
+
msg.tool_calls = msg.tool_calls || [];
|
|
186
|
+
for (const tc of deltaToolCalls) {
|
|
187
|
+
const existing = msg.tool_calls.find((t) => t.index === (tc.index ?? 0));
|
|
188
|
+
if (existing) {
|
|
189
|
+
if (tc.id)
|
|
190
|
+
existing.id = tc.id;
|
|
191
|
+
if (tc.type)
|
|
192
|
+
existing.type = tc.type;
|
|
193
|
+
if (tc.function?.name)
|
|
194
|
+
existing.function.name = tc.function.name;
|
|
195
|
+
if (tc.function?.arguments)
|
|
196
|
+
existing.function.arguments += tc.function.arguments;
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
msg.tool_calls.push({
|
|
200
|
+
index: tc.index ?? 0,
|
|
201
|
+
id: tc.id || '',
|
|
202
|
+
type: tc.type || 'function',
|
|
203
|
+
function: {
|
|
204
|
+
name: tc.function?.name || '',
|
|
205
|
+
arguments: tc.function?.arguments || ''
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
// If we got more usage info later
|
|
213
|
+
if (usage) {
|
|
214
|
+
fullResponseBody.usage = { ...fullResponseBody.usage, ...usage };
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
if (onChunk && fullResponseBody) {
|
|
218
|
+
onChunk(fullResponseBody);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
catch (e) {
|
|
222
|
+
// Ignore malformed chunks
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
const now = performance.now();
|
|
226
|
+
const totalLatency = now - requestStartTime;
|
|
227
|
+
const ttft = firstTokenTime ? firstTokenTime - requestStartTime : totalLatency;
|
|
228
|
+
const generationTimeMs = totalLatency - ttft;
|
|
229
|
+
const finalTokenCount = usageTokenCount ?? tokenCount;
|
|
230
|
+
// Estimate prompt token count if not provided by response
|
|
231
|
+
if (promptTokenCount === null && requestBody?.messages) {
|
|
232
|
+
const promptText = JSON.stringify(requestBody.messages);
|
|
233
|
+
promptTokenCount = Math.ceil(promptText.length / 4); // Basic estimation
|
|
234
|
+
}
|
|
235
|
+
const prefillSpeed = (promptTokenCount && ttft > 0) ? (promptTokenCount / (ttft / 1000)) : 0;
|
|
236
|
+
return {
|
|
237
|
+
metrics: {
|
|
238
|
+
ttft,
|
|
239
|
+
totalLatency,
|
|
240
|
+
tokensPerSecond: generationTimeMs > 0 ? (finalTokenCount / (generationTimeMs / 1000)) : 0,
|
|
241
|
+
promptPrefillSpeed: prefillSpeed,
|
|
242
|
+
tokenCount: finalTokenCount,
|
|
243
|
+
interTokenLatencies,
|
|
244
|
+
completedAt: new Date().toISOString(),
|
|
245
|
+
},
|
|
246
|
+
responseBody: fullResponseBody,
|
|
247
|
+
};
|
|
248
|
+
})();
|
|
249
|
+
return { clientStream, metricsPromise };
|
|
250
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
export interface Metrics {
|
|
2
|
+
ttft: number;
|
|
3
|
+
totalLatency: number;
|
|
4
|
+
tokensPerSecond: number;
|
|
5
|
+
promptPrefillSpeed: number;
|
|
6
|
+
tokenCount: number;
|
|
7
|
+
interTokenLatencies: number[];
|
|
8
|
+
completedAt: string;
|
|
9
|
+
}
|
|
10
|
+
export type RequestMode = 'chat' | 'observe' | 'benchmark';
|
|
11
|
+
export interface StoredRequest {
|
|
12
|
+
id: number;
|
|
13
|
+
mode: RequestMode;
|
|
14
|
+
createdAt: string;
|
|
15
|
+
contextHash: string | null;
|
|
16
|
+
requestBody: any;
|
|
17
|
+
responseBody?: any;
|
|
18
|
+
metrics?: Metrics;
|
|
19
|
+
}
|
|
20
|
+
export interface Settings {
|
|
21
|
+
targetUrl: string;
|
|
22
|
+
targetApiKey: string;
|
|
23
|
+
}
|
|
24
|
+
export interface ProxyProfile {
|
|
25
|
+
id: string;
|
|
26
|
+
name: string;
|
|
27
|
+
targetUrl: string;
|
|
28
|
+
targetApiKey: string;
|
|
29
|
+
model?: string;
|
|
30
|
+
inputTokenPrice?: number;
|
|
31
|
+
outputTokenPrice?: number;
|
|
32
|
+
cachedInputTokenPrice?: number;
|
|
33
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ngvuhuy/promptglass-server",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "index.ts",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"dev": "tsx watch index.ts",
|
|
9
|
+
"build": "tsc",
|
|
10
|
+
"start": "node dist/index.js"
|
|
11
|
+
},
|
|
12
|
+
"keywords": [],
|
|
13
|
+
"author": "",
|
|
14
|
+
"license": "ISC",
|
|
15
|
+
"packageManager": "pnpm@10.28.2",
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"axios": "^1.13.6",
|
|
18
|
+
"cors": "^2.8.6",
|
|
19
|
+
"dotenv": "^17.3.1",
|
|
20
|
+
"eventsource-parser": "^3.0.6",
|
|
21
|
+
"express": "^5.2.1",
|
|
22
|
+
"zod": "^4.3.6"
|
|
23
|
+
},
|
|
24
|
+
"devDependencies": {
|
|
25
|
+
"@types/cors": "^2.8.19",
|
|
26
|
+
"@types/express": "^5.0.6",
|
|
27
|
+
"@types/node": "^25.5.0",
|
|
28
|
+
"tsx": "^4.21.0",
|
|
29
|
+
"typescript": "^5.9.3"
|
|
30
|
+
}
|
|
31
|
+
}
|
package/shared/types.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export interface Metrics {
|
|
2
|
+
ttft: number; // Time to first token (ms)
|
|
3
|
+
totalLatency: number; // Total request-response time (ms)
|
|
4
|
+
tokensPerSecond: number;
|
|
5
|
+
promptPrefillSpeed: number; // Tokens per second reading context window
|
|
6
|
+
tokenCount: number;
|
|
7
|
+
interTokenLatencies: number[]; // ms between tokens
|
|
8
|
+
completedAt: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export type RequestMode = 'chat' | 'observe' | 'benchmark';
|
|
12
|
+
|
|
13
|
+
export interface StoredRequest {
|
|
14
|
+
id: number;
|
|
15
|
+
mode: RequestMode;
|
|
16
|
+
createdAt: string;
|
|
17
|
+
contextHash: string | null;
|
|
18
|
+
requestBody: any;
|
|
19
|
+
responseBody?: any;
|
|
20
|
+
metrics?: Metrics;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface Settings {
|
|
24
|
+
targetUrl: string;
|
|
25
|
+
targetApiKey: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface ProxyProfile {
|
|
29
|
+
id: string;
|
|
30
|
+
name: string;
|
|
31
|
+
targetUrl: string;
|
|
32
|
+
targetApiKey: string;
|
|
33
|
+
model?: string;
|
|
34
|
+
inputTokenPrice?: number; // USD per 1M input tokens
|
|
35
|
+
outputTokenPrice?: number; // USD per 1M output tokens
|
|
36
|
+
cachedInputTokenPrice?: number; // USD per 1M cached input tokens
|
|
37
|
+
}
|