@sentienguard/apm 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/openai.js ADDED
@@ -0,0 +1,520 @@
1
+ /**
2
+ * OpenAI Instrumentation Module
3
+ *
4
+ * Wraps OpenAI client to track:
5
+ * - API calls (chat completions, embeddings, images, etc.)
6
+ * - Latency per operation type
7
+ * - Token usage (prompt + completion)
8
+ * - Estimated costs
9
+ * - Error rates
10
+ *
11
+ * Usage:
12
+ * import { instrumentOpenAI } from '@sentienguard/apm';
13
+ * import OpenAI from 'openai';
14
+ *
15
+ * const openai = new OpenAI({ apiKey: '...' });
16
+ * instrumentOpenAI(openai);
17
+ */
18
+
19
+ import config, { debug, warn } from './config.js';
20
+ import { getAggregator } from './aggregator.js';
21
+
22
+ // Pricing per 1K tokens (as of 2024, update as needed)
23
+ const PRICING = {
24
+ 'gpt-4o': { prompt: 0.005, completion: 0.015 },
25
+ 'gpt-4o-mini': { prompt: 0.00015, completion: 0.0006 },
26
+ 'gpt-4-turbo': { prompt: 0.01, completion: 0.03 },
27
+ 'gpt-4': { prompt: 0.03, completion: 0.06 },
28
+ 'gpt-3.5-turbo': { prompt: 0.0005, completion: 0.0015 },
29
+ 'text-embedding-3-small': { prompt: 0.00002, completion: 0 },
30
+ 'text-embedding-3-large': { prompt: 0.00013, completion: 0 },
31
+ 'text-embedding-ada-002': { prompt: 0.0001, completion: 0 },
32
+ 'dall-e-3': { prompt: 0.04, completion: 0 }, // per image, not tokens
33
+ 'dall-e-2': { prompt: 0.02, completion: 0 },
34
+ 'whisper-1': { prompt: 0.006, completion: 0 }, // per minute
35
+ 'tts-1': { prompt: 0.015, completion: 0 }, // per 1K chars
36
+ 'tts-1-hd': { prompt: 0.03, completion: 0 },
37
+ };
38
+
39
+ // Default pricing for unknown models
40
+ const DEFAULT_PRICING = { prompt: 0.01, completion: 0.03 };
41
+
42
+ let originalClient = null;
43
+
44
+ /**
45
+ * Calculate estimated cost based on model and token usage
46
+ */
47
+ function calculateCost(model, promptTokens, completionTokens) {
48
+ const pricing = PRICING[model] || DEFAULT_PRICING;
49
+ const promptCost = (promptTokens / 1000) * pricing.prompt;
50
+ const completionCost = (completionTokens / 1000) * pricing.completion;
51
+ return promptCost + completionCost;
52
+ }
53
+
54
+ /**
55
+ * Extract model name, normalizing variants
56
+ */
57
+ function normalizeModel(model) {
58
+ if (!model) return 'unknown';
59
+
60
+ // Handle dated versions like gpt-4-0125-preview
61
+ if (model.startsWith('gpt-4o-mini')) return 'gpt-4o-mini';
62
+ if (model.startsWith('gpt-4o')) return 'gpt-4o';
63
+ if (model.startsWith('gpt-4-turbo')) return 'gpt-4-turbo';
64
+ if (model.startsWith('gpt-4')) return 'gpt-4';
65
+ if (model.startsWith('gpt-3.5-turbo')) return 'gpt-3.5-turbo';
66
+ if (model.startsWith('text-embedding-3-small')) return 'text-embedding-3-small';
67
+ if (model.startsWith('text-embedding-3-large')) return 'text-embedding-3-large';
68
+ if (model.startsWith('text-embedding-ada')) return 'text-embedding-ada-002';
69
+
70
+ return model;
71
+ }
72
+
73
+ /**
74
+ * Wrap a method to track its execution
75
+ */
76
+ function wrapMethod(obj, methodName, operationType, extractMetrics) {
77
+ const original = obj[methodName];
78
+ if (typeof original !== 'function') return;
79
+
80
+ obj[methodName] = async function (...args) {
81
+ if (!config.openai?.enabled) {
82
+ return original.apply(this, args);
83
+ }
84
+
85
+ const startTime = process.hrtime.bigint();
86
+ let error = null;
87
+ let result = null;
88
+
89
+ try {
90
+ result = await original.apply(this, args);
91
+ return result;
92
+ } catch (err) {
93
+ error = err;
94
+ throw err;
95
+ } finally {
96
+ const latencyNs = process.hrtime.bigint() - startTime;
97
+ const latencyMs = Number(latencyNs) / 1e6;
98
+
99
+ // Extract metrics from args and result
100
+ const metrics = extractMetrics(args, result, error);
101
+
102
+ const aggregator = getAggregator();
103
+ aggregator.recordOpenAIOperation({
104
+ operation: operationType,
105
+ model: metrics.model,
106
+ latency: latencyMs,
107
+ promptTokens: metrics.promptTokens || 0,
108
+ completionTokens: metrics.completionTokens || 0,
109
+ totalTokens: metrics.totalTokens || 0,
110
+ cost: metrics.cost || 0,
111
+ error: error ? (error.message || 'Unknown error') : null,
112
+ statusCode: error?.status || (result ? 200 : null)
113
+ });
114
+
115
+ if (error) {
116
+ warn(`OpenAI ${operationType} failed: ${error.message}`);
117
+ } else {
118
+ debug(`OpenAI ${operationType} completed in ${latencyMs.toFixed(2)}ms (${metrics.totalTokens} tokens, $${metrics.cost.toFixed(6)})`);
119
+ }
120
+ }
121
+ };
122
+
123
+ debug(`Wrapped OpenAI method: ${methodName}`);
124
+ }
125
+
126
+ /**
127
+ * Instrument OpenAI chat completions
128
+ */
129
+ function instrumentChatCompletions(client) {
130
+ if (!client.chat?.completions?.create) {
131
+ debug('OpenAI chat.completions.create not found');
132
+ return;
133
+ }
134
+
135
+ const original = client.chat.completions.create.bind(client.chat.completions);
136
+
137
+ client.chat.completions.create = async function (params, options) {
138
+ if (!config.openai?.enabled) {
139
+ return original(params, options);
140
+ }
141
+
142
+ const startTime = process.hrtime.bigint();
143
+ const model = normalizeModel(params.model);
144
+ let error = null;
145
+ let result = null;
146
+
147
+ try {
148
+ result = await original(params, options);
149
+ return result;
150
+ } catch (err) {
151
+ error = err;
152
+ throw err;
153
+ } finally {
154
+ const latencyNs = process.hrtime.bigint() - startTime;
155
+ const latencyMs = Number(latencyNs) / 1e6;
156
+
157
+ // Extract token usage
158
+ const usage = result?.usage || {};
159
+ const promptTokens = usage.prompt_tokens || 0;
160
+ const completionTokens = usage.completion_tokens || 0;
161
+ const totalTokens = usage.total_tokens || promptTokens + completionTokens;
162
+ const cost = calculateCost(model, promptTokens, completionTokens);
163
+
164
+ const aggregator = getAggregator();
165
+ aggregator.recordOpenAIOperation({
166
+ operation: 'chat.completions',
167
+ model,
168
+ latency: latencyMs,
169
+ promptTokens,
170
+ completionTokens,
171
+ totalTokens,
172
+ cost,
173
+ error: error ? (error.message || 'Unknown error') : null,
174
+ statusCode: error?.status || 200
175
+ });
176
+
177
+ if (error) {
178
+ warn(`OpenAI chat.completions failed: ${error.message}`);
179
+ } else {
180
+ debug(`OpenAI chat.completions [${model}] completed in ${latencyMs.toFixed(2)}ms (${totalTokens} tokens, $${cost.toFixed(6)})`);
181
+ }
182
+ }
183
+ };
184
+
185
+ debug('Instrumented OpenAI chat.completions.create');
186
+ }
187
+
188
+ /**
189
+ * Instrument OpenAI embeddings
190
+ */
191
+ function instrumentEmbeddings(client) {
192
+ if (!client.embeddings?.create) {
193
+ debug('OpenAI embeddings.create not found');
194
+ return;
195
+ }
196
+
197
+ const original = client.embeddings.create.bind(client.embeddings);
198
+
199
+ client.embeddings.create = async function (params, options) {
200
+ if (!config.openai?.enabled) {
201
+ return original(params, options);
202
+ }
203
+
204
+ const startTime = process.hrtime.bigint();
205
+ const model = normalizeModel(params.model);
206
+ let error = null;
207
+ let result = null;
208
+
209
+ try {
210
+ result = await original(params, options);
211
+ return result;
212
+ } catch (err) {
213
+ error = err;
214
+ throw err;
215
+ } finally {
216
+ const latencyNs = process.hrtime.bigint() - startTime;
217
+ const latencyMs = Number(latencyNs) / 1e6;
218
+
219
+ const usage = result?.usage || {};
220
+ const promptTokens = usage.prompt_tokens || 0;
221
+ const totalTokens = usage.total_tokens || promptTokens;
222
+ const cost = calculateCost(model, promptTokens, 0);
223
+
224
+ const aggregator = getAggregator();
225
+ aggregator.recordOpenAIOperation({
226
+ operation: 'embeddings',
227
+ model,
228
+ latency: latencyMs,
229
+ promptTokens,
230
+ completionTokens: 0,
231
+ totalTokens,
232
+ cost,
233
+ error: error ? (error.message || 'Unknown error') : null,
234
+ statusCode: error?.status || 200
235
+ });
236
+
237
+ if (error) {
238
+ warn(`OpenAI embeddings failed: ${error.message}`);
239
+ } else {
240
+ debug(`OpenAI embeddings [${model}] completed in ${latencyMs.toFixed(2)}ms (${totalTokens} tokens, $${cost.toFixed(6)})`);
241
+ }
242
+ }
243
+ };
244
+
245
+ debug('Instrumented OpenAI embeddings.create');
246
+ }
247
+
248
+ /**
249
+ * Instrument OpenAI images (DALL-E)
250
+ */
251
+ function instrumentImages(client) {
252
+ if (!client.images?.generate) {
253
+ debug('OpenAI images.generate not found');
254
+ return;
255
+ }
256
+
257
+ const original = client.images.generate.bind(client.images);
258
+
259
+ client.images.generate = async function (params, options) {
260
+ if (!config.openai?.enabled) {
261
+ return original(params, options);
262
+ }
263
+
264
+ const startTime = process.hrtime.bigint();
265
+ const model = normalizeModel(params.model || 'dall-e-2');
266
+ const numImages = params.n || 1;
267
+ let error = null;
268
+ let result = null;
269
+
270
+ try {
271
+ result = await original(params, options);
272
+ return result;
273
+ } catch (err) {
274
+ error = err;
275
+ throw err;
276
+ } finally {
277
+ const latencyNs = process.hrtime.bigint() - startTime;
278
+ const latencyMs = Number(latencyNs) / 1e6;
279
+
280
+ // DALL-E pricing is per image, not tokens
281
+ const pricing = PRICING[model] || { prompt: 0.02 };
282
+ const cost = pricing.prompt * numImages;
283
+
284
+ const aggregator = getAggregator();
285
+ aggregator.recordOpenAIOperation({
286
+ operation: 'images.generate',
287
+ model,
288
+ latency: latencyMs,
289
+ promptTokens: 0,
290
+ completionTokens: 0,
291
+ totalTokens: 0,
292
+ cost,
293
+ error: error ? (error.message || 'Unknown error') : null,
294
+ statusCode: error?.status || 200,
295
+ metadata: { numImages }
296
+ });
297
+
298
+ if (error) {
299
+ warn(`OpenAI images.generate failed: ${error.message}`);
300
+ } else {
301
+ debug(`OpenAI images.generate [${model}] completed in ${latencyMs.toFixed(2)}ms (${numImages} images, $${cost.toFixed(4)})`);
302
+ }
303
+ }
304
+ };
305
+
306
+ debug('Instrumented OpenAI images.generate');
307
+ }
308
+
309
+ /**
310
+ * Instrument OpenAI audio (Whisper, TTS)
311
+ */
312
+ function instrumentAudio(client) {
313
+ // Transcriptions (Whisper)
314
+ if (client.audio?.transcriptions?.create) {
315
+ const originalTranscribe = client.audio.transcriptions.create.bind(client.audio.transcriptions);
316
+
317
+ client.audio.transcriptions.create = async function (params, options) {
318
+ if (!config.openai?.enabled) {
319
+ return originalTranscribe(params, options);
320
+ }
321
+
322
+ const startTime = process.hrtime.bigint();
323
+ const model = normalizeModel(params.model || 'whisper-1');
324
+ let error = null;
325
+ let result = null;
326
+
327
+ try {
328
+ result = await originalTranscribe(params, options);
329
+ return result;
330
+ } catch (err) {
331
+ error = err;
332
+ throw err;
333
+ } finally {
334
+ const latencyNs = process.hrtime.bigint() - startTime;
335
+ const latencyMs = Number(latencyNs) / 1e6;
336
+
337
+ // Whisper pricing is per minute of audio
338
+ // We don't know the exact duration without parsing the file
339
+ const cost = 0; // Would need file duration
340
+
341
+ const aggregator = getAggregator();
342
+ aggregator.recordOpenAIOperation({
343
+ operation: 'audio.transcriptions',
344
+ model,
345
+ latency: latencyMs,
346
+ promptTokens: 0,
347
+ completionTokens: 0,
348
+ totalTokens: 0,
349
+ cost,
350
+ error: error ? (error.message || 'Unknown error') : null,
351
+ statusCode: error?.status || 200
352
+ });
353
+
354
+ if (error) {
355
+ warn(`OpenAI audio.transcriptions failed: ${error.message}`);
356
+ } else {
357
+ debug(`OpenAI audio.transcriptions [${model}] completed in ${latencyMs.toFixed(2)}ms`);
358
+ }
359
+ }
360
+ };
361
+
362
+ debug('Instrumented OpenAI audio.transcriptions.create');
363
+ }
364
+
365
+ // Text-to-Speech
366
+ if (client.audio?.speech?.create) {
367
+ const originalSpeech = client.audio.speech.create.bind(client.audio.speech);
368
+
369
+ client.audio.speech.create = async function (params, options) {
370
+ if (!config.openai?.enabled) {
371
+ return originalSpeech(params, options);
372
+ }
373
+
374
+ const startTime = process.hrtime.bigint();
375
+ const model = normalizeModel(params.model || 'tts-1');
376
+ const inputLength = params.input?.length || 0;
377
+ let error = null;
378
+ let result = null;
379
+
380
+ try {
381
+ result = await originalSpeech(params, options);
382
+ return result;
383
+ } catch (err) {
384
+ error = err;
385
+ throw err;
386
+ } finally {
387
+ const latencyNs = process.hrtime.bigint() - startTime;
388
+ const latencyMs = Number(latencyNs) / 1e6;
389
+
390
+ // TTS pricing is per 1K characters
391
+ const pricing = PRICING[model] || { prompt: 0.015 };
392
+ const cost = (inputLength / 1000) * pricing.prompt;
393
+
394
+ const aggregator = getAggregator();
395
+ aggregator.recordOpenAIOperation({
396
+ operation: 'audio.speech',
397
+ model,
398
+ latency: latencyMs,
399
+ promptTokens: 0,
400
+ completionTokens: 0,
401
+ totalTokens: 0,
402
+ cost,
403
+ error: error ? (error.message || 'Unknown error') : null,
404
+ statusCode: error?.status || 200,
405
+ metadata: { inputLength }
406
+ });
407
+
408
+ if (error) {
409
+ warn(`OpenAI audio.speech failed: ${error.message}`);
410
+ } else {
411
+ debug(`OpenAI audio.speech [${model}] completed in ${latencyMs.toFixed(2)}ms (${inputLength} chars, $${cost.toFixed(6)})`);
412
+ }
413
+ }
414
+ };
415
+
416
+ debug('Instrumented OpenAI audio.speech.create');
417
+ }
418
+ }
419
+
420
+ /**
421
+ * Instrument OpenAI moderations
422
+ */
423
+ function instrumentModerations(client) {
424
+ if (!client.moderations?.create) {
425
+ debug('OpenAI moderations.create not found');
426
+ return;
427
+ }
428
+
429
+ const original = client.moderations.create.bind(client.moderations);
430
+
431
+ client.moderations.create = async function (params, options) {
432
+ if (!config.openai?.enabled) {
433
+ return original(params, options);
434
+ }
435
+
436
+ const startTime = process.hrtime.bigint();
437
+ const model = normalizeModel(params.model || 'text-moderation-latest');
438
+ let error = null;
439
+ let result = null;
440
+
441
+ try {
442
+ result = await original(params, options);
443
+ return result;
444
+ } catch (err) {
445
+ error = err;
446
+ throw err;
447
+ } finally {
448
+ const latencyNs = process.hrtime.bigint() - startTime;
449
+ const latencyMs = Number(latencyNs) / 1e6;
450
+
451
+ // Moderations are free
452
+ const aggregator = getAggregator();
453
+ aggregator.recordOpenAIOperation({
454
+ operation: 'moderations',
455
+ model,
456
+ latency: latencyMs,
457
+ promptTokens: 0,
458
+ completionTokens: 0,
459
+ totalTokens: 0,
460
+ cost: 0,
461
+ error: error ? (error.message || 'Unknown error') : null,
462
+ statusCode: error?.status || 200
463
+ });
464
+
465
+ if (error) {
466
+ warn(`OpenAI moderations failed: ${error.message}`);
467
+ } else {
468
+ debug(`OpenAI moderations completed in ${latencyMs.toFixed(2)}ms`);
469
+ }
470
+ }
471
+ };
472
+
473
+ debug('Instrumented OpenAI moderations.create');
474
+ }
475
+
476
+ /**
477
+ * Instrument an OpenAI client instance
478
+ *
479
+ * @param {OpenAI} client - OpenAI client instance
480
+ */
481
+ export function instrumentOpenAI(client) {
482
+ if (!client) {
483
+ warn('instrumentOpenAI called with no client');
484
+ return;
485
+ }
486
+
487
+ if (!config.openai?.enabled) {
488
+ debug('OpenAI instrumentation disabled via config');
489
+ return;
490
+ }
491
+
492
+ try {
493
+ originalClient = client;
494
+
495
+ // Instrument all API endpoints
496
+ instrumentChatCompletions(client);
497
+ instrumentEmbeddings(client);
498
+ instrumentImages(client);
499
+ instrumentAudio(client);
500
+ instrumentModerations(client);
501
+
502
+ debug('OpenAI instrumentation attached successfully');
503
+ } catch (err) {
504
+ warn(`Failed to instrument OpenAI: ${err.message}`);
505
+ }
506
+ }
507
+
508
+ /**
509
+ * Stop OpenAI instrumentation
510
+ * Note: This doesn't restore original methods (client should be discarded)
511
+ */
512
+ export function stopOpenAIInstrumentation() {
513
+ originalClient = null;
514
+ debug('OpenAI instrumentation stopped');
515
+ }
516
+
517
+ export default {
518
+ instrumentOpenAI,
519
+ stopOpenAIInstrumentation
520
+ };
package/src/transport.js CHANGED
@@ -120,7 +120,7 @@ async function flush() {
120
120
  // Get and reset metrics
121
121
  const payload = aggregator.flush();
122
122
 
123
- debug(`Flushing ${payload.requests.length} request metrics, ${payload.dependencies.length} dependency metrics`);
123
+ debug(`Flushing ${payload.requests.length} request metrics, ${payload.dependencies.length} dependency metrics, ${payload.openai?.length || 0} OpenAI metrics`);
124
124
 
125
125
  try {
126
126
  const startTime = Date.now();