@relayplane/proxy 0.2.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/streaming.ts DELETED
@@ -1,331 +0,0 @@
1
- /**
2
- * Streaming Support for RelayPlane Proxy
3
- *
4
- * Provides SSE (Server-Sent Events) streaming for LLM responses
5
- * and real-time updates.
6
- *
7
- * @packageDocumentation
8
- */
9
-
10
- import type { ServerResponse } from 'node:http';
11
-
12
- /**
13
- * SSE message structure
14
- */
15
- export interface SSEMessage {
16
- event?: string;
17
- data: unknown;
18
- id?: string;
19
- retry?: number;
20
- }
21
-
22
- /**
23
- * Stream writer for SSE responses
24
- */
25
- export class SSEWriter {
26
- private response: ServerResponse;
27
- private closed = false;
28
-
29
- constructor(response: ServerResponse) {
30
- this.response = response;
31
-
32
- // Set SSE headers
33
- response.writeHead(200, {
34
- 'Content-Type': 'text/event-stream',
35
- 'Cache-Control': 'no-cache',
36
- 'Connection': 'keep-alive',
37
- 'Access-Control-Allow-Origin': '*',
38
- });
39
-
40
- // Handle client disconnect
41
- response.on('close', () => {
42
- this.closed = true;
43
- });
44
- }
45
-
46
- /**
47
- * Write an SSE message
48
- */
49
- write(message: SSEMessage): boolean {
50
- if (this.closed) return false;
51
-
52
- const lines: string[] = [];
53
-
54
- if (message.event) {
55
- lines.push(`event: ${message.event}`);
56
- }
57
-
58
- if (message.id) {
59
- lines.push(`id: ${message.id}`);
60
- }
61
-
62
- if (message.retry !== undefined) {
63
- lines.push(`retry: ${message.retry}`);
64
- }
65
-
66
- // Data can be multi-line, each line needs data: prefix
67
- const dataStr = typeof message.data === 'string'
68
- ? message.data
69
- : JSON.stringify(message.data);
70
-
71
- for (const line of dataStr.split('\n')) {
72
- lines.push(`data: ${line}`);
73
- }
74
-
75
- lines.push(''); // Empty line to end message
76
- lines.push('');
77
-
78
- try {
79
- this.response.write(lines.join('\n'));
80
- return true;
81
- } catch {
82
- this.closed = true;
83
- return false;
84
- }
85
- }
86
-
87
- /**
88
- * Write a data-only message (convenience method)
89
- */
90
- writeData(data: unknown): boolean {
91
- return this.write({ data });
92
- }
93
-
94
- /**
95
- * Send a comment (keep-alive)
96
- */
97
- comment(text: string): boolean {
98
- if (this.closed) return false;
99
- try {
100
- this.response.write(`: ${text}\n\n`);
101
- return true;
102
- } catch {
103
- this.closed = true;
104
- return false;
105
- }
106
- }
107
-
108
- /**
109
- * Close the stream
110
- */
111
- close(): void {
112
- if (!this.closed) {
113
- this.write({ data: '[DONE]' });
114
- this.response.end();
115
- this.closed = true;
116
- }
117
- }
118
-
119
- /**
120
- * Check if stream is still open
121
- */
122
- isOpen(): boolean {
123
- return !this.closed;
124
- }
125
- }
126
-
127
- /**
128
- * Create an SSE writer
129
- */
130
- export function createSSEWriter(response: ServerResponse): SSEWriter {
131
- return new SSEWriter(response);
132
- }
133
-
134
- /**
135
- * Stream a provider response to SSE
136
- */
137
- export async function streamProviderResponse(
138
- providerUrl: string,
139
- request: unknown,
140
- headers: Record<string, string>,
141
- writer: SSEWriter,
142
- callbacks?: {
143
- onChunk?: (chunk: unknown) => void;
144
- onComplete?: (fullResponse: unknown) => void;
145
- onError?: (error: Error) => void;
146
- }
147
- ): Promise<{ success: boolean; chunks: unknown[]; ttftMs?: number }> {
148
- const chunks: unknown[] = [];
149
- let ttftMs: number | undefined;
150
- const startTime = Date.now();
151
-
152
- try {
153
- const response = await fetch(providerUrl, {
154
- method: 'POST',
155
- headers: {
156
- ...headers,
157
- 'Accept': 'text/event-stream',
158
- },
159
- body: JSON.stringify(request),
160
- });
161
-
162
- if (!response.ok) {
163
- const error = new Error(`Provider returned ${response.status}`);
164
- callbacks?.onError?.(error);
165
- writer.write({
166
- event: 'error',
167
- data: { error: { message: error.message, status: response.status } },
168
- });
169
- writer.close();
170
- return { success: false, chunks };
171
- }
172
-
173
- if (!response.body) {
174
- const error = new Error('No response body');
175
- callbacks?.onError?.(error);
176
- writer.close();
177
- return { success: false, chunks };
178
- }
179
-
180
- const reader = response.body.getReader();
181
- const decoder = new TextDecoder();
182
- let buffer = '';
183
-
184
- while (true) {
185
- const { done, value } = await reader.read();
186
-
187
- if (done) break;
188
-
189
- if (ttftMs === undefined) {
190
- ttftMs = Date.now() - startTime;
191
- }
192
-
193
- buffer += decoder.decode(value, { stream: true });
194
-
195
- // Parse SSE events from buffer
196
- const lines = buffer.split('\n');
197
- buffer = lines.pop() ?? ''; // Keep incomplete line in buffer
198
-
199
- for (const line of lines) {
200
- if (line.startsWith('data: ')) {
201
- const data = line.slice(6);
202
-
203
- if (data === '[DONE]') {
204
- continue;
205
- }
206
-
207
- try {
208
- const parsed = JSON.parse(data);
209
- chunks.push(parsed);
210
- callbacks?.onChunk?.(parsed);
211
-
212
- // Forward to client
213
- if (!writer.write({ data: parsed })) {
214
- // Client disconnected
215
- return { success: false, chunks, ttftMs };
216
- }
217
- } catch {
218
- // Invalid JSON, skip
219
- }
220
- }
221
- }
222
- }
223
-
224
- // Process any remaining buffer
225
- if (buffer.startsWith('data: ')) {
226
- const data = buffer.slice(6);
227
- if (data && data !== '[DONE]') {
228
- try {
229
- const parsed = JSON.parse(data);
230
- chunks.push(parsed);
231
- callbacks?.onChunk?.(parsed);
232
- writer.write({ data: parsed });
233
- } catch {
234
- // Invalid JSON
235
- }
236
- }
237
- }
238
-
239
- callbacks?.onComplete?.(chunks);
240
- writer.close();
241
-
242
- return { success: true, chunks, ttftMs };
243
- } catch (error) {
244
- callbacks?.onError?.(error instanceof Error ? error : new Error(String(error)));
245
- writer.write({
246
- event: 'error',
247
- data: { error: { message: error instanceof Error ? error.message : 'Stream error' } },
248
- });
249
- writer.close();
250
- return { success: false, chunks, ttftMs };
251
- }
252
- }
253
-
254
- /**
255
- * Aggregate streaming chunks into a complete response
256
- */
257
- export function aggregateStreamingResponse(chunks: unknown[]): {
258
- content: string;
259
- usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
260
- model?: string;
261
- finish_reason?: string;
262
- } {
263
- let content = '';
264
- let usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number } | undefined;
265
- let model: string | undefined;
266
- let finish_reason: string | undefined;
267
-
268
- for (const chunk of chunks) {
269
- if (typeof chunk !== 'object' || chunk === null) continue;
270
-
271
- const c = chunk as Record<string, unknown>;
272
-
273
- // Extract model
274
- if (c.model && typeof c.model === 'string') {
275
- model = c.model;
276
- }
277
-
278
- // Extract content from choices
279
- if (Array.isArray(c.choices) && c.choices.length > 0) {
280
- const choice = c.choices[0] as Record<string, unknown>;
281
-
282
- // Delta content (streaming)
283
- if (choice.delta && typeof choice.delta === 'object') {
284
- const delta = choice.delta as Record<string, unknown>;
285
- if (typeof delta.content === 'string') {
286
- content += delta.content;
287
- }
288
- }
289
-
290
- // Finish reason
291
- if (choice.finish_reason && typeof choice.finish_reason === 'string') {
292
- finish_reason = choice.finish_reason;
293
- }
294
- }
295
-
296
- // Extract usage (usually in last chunk)
297
- if (c.usage && typeof c.usage === 'object') {
298
- const u = c.usage as Record<string, unknown>;
299
- if (
300
- typeof u.prompt_tokens === 'number' &&
301
- typeof u.completion_tokens === 'number'
302
- ) {
303
- usage = {
304
- prompt_tokens: u.prompt_tokens,
305
- completion_tokens: u.completion_tokens,
306
- total_tokens: (u.total_tokens as number) ?? u.prompt_tokens + u.completion_tokens,
307
- };
308
- }
309
- }
310
- }
311
-
312
- return { content, usage, model, finish_reason };
313
- }
314
-
315
- /**
316
- * Keep-alive ping for long-running streams
317
- */
318
- export function startKeepAlive(
319
- writer: SSEWriter,
320
- intervalMs = 15000
321
- ): () => void {
322
- const timer = setInterval(() => {
323
- if (!writer.isOpen()) {
324
- clearInterval(timer);
325
- return;
326
- }
327
- writer.comment('ping');
328
- }, intervalMs);
329
-
330
- return () => clearInterval(timer);
331
- }
package/src/telemetry.ts DELETED
@@ -1,343 +0,0 @@
1
- /**
2
- * RelayPlane Proxy Telemetry
3
- *
4
- * Anonymized telemetry collection for improving model routing.
5
- *
6
- * What we collect (exact schema):
7
- * - device_id: anonymous random ID
8
- * - task_type: inferred from token patterns, NOT prompt content
9
- * - model: which model was used
10
- * - tokens_in/out: token counts
11
- * - latency_ms: response time
12
- * - success: whether request succeeded
13
- * - cost_usd: estimated cost
14
- *
15
- * What we NEVER collect:
16
- * - Prompts or responses
17
- * - File paths or contents
18
- * - Anything that could identify you or your project
19
- *
20
- * @packageDocumentation
21
- */
22
-
23
- import * as fs from 'fs';
24
- import * as path from 'path';
25
- import { getDeviceId, isTelemetryEnabled, getConfigDir } from './config.js';
26
-
27
- /**
28
- * Telemetry event schema (matches PITCH-v2.md)
29
- */
30
- export interface TelemetryEvent {
31
- /** Anonymous device ID */
32
- device_id: string;
33
-
34
- /** Inferred task type (from token patterns, NOT prompt content) */
35
- task_type: string;
36
-
37
- /** Model used */
38
- model: string;
39
-
40
- /** Input tokens */
41
- tokens_in: number;
42
-
43
- /** Output tokens */
44
- tokens_out: number;
45
-
46
- /** Request latency in milliseconds */
47
- latency_ms: number;
48
-
49
- /** Whether request succeeded */
50
- success: boolean;
51
-
52
- /** Estimated cost in USD */
53
- cost_usd: number;
54
-
55
- /** Timestamp */
56
- timestamp: string;
57
- }
58
-
59
- /**
60
- * Local telemetry store using SQLite (via Ledger)
61
- */
62
- const TELEMETRY_FILE = path.join(getConfigDir(), 'telemetry.jsonl');
63
-
64
- // In-memory buffer for audit mode
65
- let auditBuffer: TelemetryEvent[] = [];
66
- let auditMode = false;
67
- let offlineMode = false;
68
-
69
- /**
70
- * Task type inference based on token patterns
71
- * This infers task type from request characteristics, NOT from prompt content
72
- */
73
- export function inferTaskType(
74
- inputTokens: number,
75
- outputTokens: number,
76
- model: string,
77
- hasTools: boolean = false
78
- ): string {
79
- // Simple heuristics based on token patterns
80
- const ratio = outputTokens / Math.max(inputTokens, 1);
81
-
82
- if (hasTools) {
83
- return 'tool_use';
84
- }
85
-
86
- if (inputTokens > 10000) {
87
- return 'long_context';
88
- }
89
-
90
- if (ratio > 5) {
91
- return 'generation';
92
- }
93
-
94
- if (ratio < 0.3 && outputTokens < 100) {
95
- return 'classification';
96
- }
97
-
98
- if (inputTokens < 500 && outputTokens < 500) {
99
- return 'quick_task';
100
- }
101
-
102
- if (inputTokens > 2000 && outputTokens > 500) {
103
- return 'code_review';
104
- }
105
-
106
- if (outputTokens > 1000) {
107
- return 'content_generation';
108
- }
109
-
110
- return 'general';
111
- }
112
-
113
- /**
114
- * Estimate cost based on model and token counts
115
- * Pricing as of 2024 (USD per 1M tokens)
116
- */
117
- const MODEL_PRICING: Record<string, { input: number; output: number }> = {
118
- // Anthropic
119
- 'claude-opus-4-20250514': { input: 15.0, output: 75.0 },
120
- 'claude-sonnet-4-20250514': { input: 3.0, output: 15.0 },
121
- 'claude-3-5-sonnet-20241022': { input: 3.0, output: 15.0 },
122
- 'claude-3-5-sonnet-20240620': { input: 3.0, output: 15.0 },
123
- 'claude-3-5-haiku-20241022': { input: 0.8, output: 4.0 },
124
- 'claude-3-opus-20240229': { input: 15.0, output: 75.0 },
125
- 'claude-3-sonnet-20240229': { input: 3.0, output: 15.0 },
126
- 'claude-3-haiku-20240307': { input: 0.25, output: 1.25 },
127
-
128
- // OpenAI
129
- 'gpt-4o': { input: 2.5, output: 10.0 },
130
- 'gpt-4o-mini': { input: 0.15, output: 0.60 },
131
- 'gpt-4-turbo': { input: 10.0, output: 30.0 },
132
- 'gpt-4': { input: 30.0, output: 60.0 },
133
- 'gpt-3.5-turbo': { input: 0.5, output: 1.5 },
134
-
135
- // Default for unknown models
136
- 'default': { input: 1.0, output: 3.0 },
137
- };
138
-
139
- export function estimateCost(model: string, inputTokens: number, outputTokens: number): number {
140
- const pricing = MODEL_PRICING[model] || MODEL_PRICING['default'];
141
- const inputCost = (inputTokens / 1_000_000) * pricing.input;
142
- const outputCost = (outputTokens / 1_000_000) * pricing.output;
143
- return Math.round((inputCost + outputCost) * 10000) / 10000; // Round to 4 decimal places
144
- }
145
-
146
- /**
147
- * Set audit mode - shows telemetry payload before sending
148
- */
149
- export function setAuditMode(enabled: boolean): void {
150
- auditMode = enabled;
151
- }
152
-
153
- /**
154
- * Check if audit mode is enabled
155
- */
156
- export function isAuditMode(): boolean {
157
- return auditMode;
158
- }
159
-
160
- /**
161
- * Set offline mode - disables all network calls except LLM
162
- */
163
- export function setOfflineMode(enabled: boolean): void {
164
- offlineMode = enabled;
165
- }
166
-
167
- /**
168
- * Check if offline mode is enabled
169
- */
170
- export function isOfflineMode(): boolean {
171
- return offlineMode;
172
- }
173
-
174
- /**
175
- * Get pending audit events
176
- */
177
- export function getAuditBuffer(): TelemetryEvent[] {
178
- return [...auditBuffer];
179
- }
180
-
181
- /**
182
- * Clear audit buffer
183
- */
184
- export function clearAuditBuffer(): void {
185
- auditBuffer = [];
186
- }
187
-
188
- /**
189
- * Record a telemetry event
190
- */
191
- export function recordTelemetry(event: Omit<TelemetryEvent, 'device_id' | 'timestamp'>): void {
192
- if (!isTelemetryEnabled() && !auditMode) {
193
- return; // Telemetry disabled and not in audit mode
194
- }
195
-
196
- const fullEvent: TelemetryEvent = {
197
- ...event,
198
- device_id: getDeviceId(),
199
- timestamp: new Date().toISOString(),
200
- };
201
-
202
- if (auditMode) {
203
- // In audit mode, buffer events and print them
204
- auditBuffer.push(fullEvent);
205
- console.log('\n📊 [TELEMETRY AUDIT] The following data would be collected:');
206
- console.log(JSON.stringify(fullEvent, null, 2));
207
- console.log('');
208
- return;
209
- }
210
-
211
- if (!isTelemetryEnabled()) {
212
- return;
213
- }
214
-
215
- // Store locally (append to JSONL file)
216
- try {
217
- const configDir = getConfigDir();
218
- if (!fs.existsSync(configDir)) {
219
- fs.mkdirSync(configDir, { recursive: true });
220
- }
221
-
222
- fs.appendFileSync(TELEMETRY_FILE, JSON.stringify(fullEvent) + '\n');
223
- } catch (err) {
224
- // Silently fail - telemetry should never break the proxy
225
- }
226
- }
227
-
228
- /**
229
- * Get local telemetry data
230
- */
231
- export function getLocalTelemetry(): TelemetryEvent[] {
232
- try {
233
- if (!fs.existsSync(TELEMETRY_FILE)) {
234
- return [];
235
- }
236
-
237
- const data = fs.readFileSync(TELEMETRY_FILE, 'utf-8');
238
- return data
239
- .split('\n')
240
- .filter(line => line.trim())
241
- .map(line => JSON.parse(line) as TelemetryEvent);
242
- } catch (err) {
243
- return [];
244
- }
245
- }
246
-
247
- /**
248
- * Get telemetry stats summary
249
- */
250
- export function getTelemetryStats(): {
251
- totalEvents: number;
252
- totalCost: number;
253
- byModel: Record<string, { count: number; cost: number }>;
254
- byTaskType: Record<string, { count: number; cost: number }>;
255
- successRate: number;
256
- } {
257
- const events = getLocalTelemetry();
258
-
259
- const byModel: Record<string, { count: number; cost: number }> = {};
260
- const byTaskType: Record<string, { count: number; cost: number }> = {};
261
- let totalCost = 0;
262
- let successCount = 0;
263
-
264
- for (const event of events) {
265
- totalCost += event.cost_usd;
266
- if (event.success) successCount++;
267
-
268
- if (!byModel[event.model]) {
269
- byModel[event.model] = { count: 0, cost: 0 };
270
- }
271
- byModel[event.model].count++;
272
- byModel[event.model].cost += event.cost_usd;
273
-
274
- if (!byTaskType[event.task_type]) {
275
- byTaskType[event.task_type] = { count: 0, cost: 0 };
276
- }
277
- byTaskType[event.task_type].count++;
278
- byTaskType[event.task_type].cost += event.cost_usd;
279
- }
280
-
281
- return {
282
- totalEvents: events.length,
283
- totalCost: Math.round(totalCost * 100) / 100,
284
- byModel,
285
- byTaskType,
286
- successRate: events.length > 0 ? successCount / events.length : 0,
287
- };
288
- }
289
-
290
- /**
291
- * Clear all local telemetry data
292
- */
293
- export function clearTelemetry(): void {
294
- try {
295
- if (fs.existsSync(TELEMETRY_FILE)) {
296
- fs.unlinkSync(TELEMETRY_FILE);
297
- }
298
- } catch (err) {
299
- // Silently fail
300
- }
301
- }
302
-
303
- /**
304
- * Get telemetry file path
305
- */
306
- export function getTelemetryPath(): string {
307
- return TELEMETRY_FILE;
308
- }
309
-
310
- /**
311
- * Print telemetry disclosure message
312
- */
313
- export function printTelemetryDisclosure(): void {
314
- console.log(`
315
- ╭─────────────────────────────────────────────────────────────────────╮
316
- │ 📊 TELEMETRY DISCLOSURE │
317
- ╰─────────────────────────────────────────────────────────────────────╯
318
-
319
- RelayPlane collects anonymous telemetry to improve model routing.
320
-
321
- What we collect:
322
- • Anonymous device ID (random, not fingerprintable)
323
- • Task type (inferred from token patterns, NOT your prompts)
324
- • Model used, token counts, latency, success/failure
325
- • Estimated cost
326
-
327
- What we NEVER collect:
328
- • Your prompts or model responses
329
- • File paths or contents
330
- • Anything that could identify you or your project
331
-
332
- How to verify:
333
- • Run with --audit to see exact payloads before they're sent
334
- • Run with --offline to disable all telemetry transmission
335
- • Full source code: https://github.com/RelayPlane/proxy
336
-
337
- To opt out completely:
338
- $ relayplane-proxy telemetry off
339
-
340
- Learn more: https://relayplane.com/privacy
341
-
342
- `);
343
- }
package/tsconfig.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
6
- "declaration": true,
7
- "declarationMap": true,
8
- "sourceMap": true,
9
- "outDir": "./dist",
10
- "rootDir": "./src",
11
- "strict": true,
12
- "esModuleInterop": true,
13
- "skipLibCheck": true,
14
- "forceConsistentCasingInFileNames": true,
15
- "resolveJsonModule": true
16
- },
17
- "include": ["src/**/*"],
18
- "exclude": ["node_modules", "dist"]
19
- }
package/vitest.config.ts DELETED
@@ -1,21 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- globals: true,
6
- environment: 'node',
7
- include: ['__tests__/**/*.test.ts'],
8
- coverage: {
9
- provider: 'v8',
10
- reporter: ['text', 'json', 'html'],
11
- },
12
- testTimeout: 10000,
13
- // Run tests sequentially to avoid port conflicts
14
- pool: 'forks',
15
- poolOptions: {
16
- forks: {
17
- singleFork: true,
18
- },
19
- },
20
- },
21
- });