openlayer 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +9 -8
- package/dist/index.js +106 -39
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -7,6 +7,10 @@ import { Stream } from 'openai/streaming';
|
|
|
7
7
|
*/
|
|
8
8
|
export interface StreamingData {
|
|
9
9
|
[columnName: string]: any;
|
|
10
|
+
/**
|
|
11
|
+
* The total estimated cost of the chat completion in USD. Optional.
|
|
12
|
+
*/
|
|
13
|
+
cost?: number;
|
|
10
14
|
/**
|
|
11
15
|
* The latency of the chat completion in milliseconds. Optional.
|
|
12
16
|
*/
|
|
@@ -15,10 +19,6 @@ export interface StreamingData {
|
|
|
15
19
|
* The output string generated by the chat completion.
|
|
16
20
|
*/
|
|
17
21
|
output: string;
|
|
18
|
-
/**
|
|
19
|
-
* The full prompt history for the chat completion.
|
|
20
|
-
*/
|
|
21
|
-
prompt?: ChatCompletionMessageParam[];
|
|
22
22
|
/**
|
|
23
23
|
* A timestamp representing when the chat completion occurred. Optional.
|
|
24
24
|
*/
|
|
@@ -57,9 +57,9 @@ interface StreamingDataConfig {
|
|
|
57
57
|
*/
|
|
58
58
|
outputColumnName: string | null;
|
|
59
59
|
/**
|
|
60
|
-
* The
|
|
60
|
+
* The full prompt history for the chat completion.
|
|
61
61
|
*/
|
|
62
|
-
|
|
62
|
+
prompt?: ChatCompletionMessageParam[];
|
|
63
63
|
/**
|
|
64
64
|
* The name of the column that stores timestamp data. Can be null.
|
|
65
65
|
*/
|
|
@@ -195,6 +195,7 @@ export declare class OpenAIMonitor {
|
|
|
195
195
|
* @param {OpenAIMonitorConstructorProps} props - The configuration properties for the OpenAI and Openlayer clients.
|
|
196
196
|
*/
|
|
197
197
|
constructor({ openAiApiKey, openlayerApiKey, openlayerProjectName, openlayerInferencePipelineName, openlayerServerUrl, }: OpenAIMonitorConstructorProps);
|
|
198
|
+
private cost;
|
|
198
199
|
private formatChatCompletionInput;
|
|
199
200
|
/**
|
|
200
201
|
* Creates a chat completion using the OpenAI client and streams the result to Openlayer.
|
|
@@ -203,7 +204,7 @@ export declare class OpenAIMonitor {
|
|
|
203
204
|
* @returns {Promise<ChatCompletion | Stream<ChatCompletionChunk>>} Promise of a ChatCompletion or a Stream
|
|
204
205
|
* @throws {Error} Throws an error if monitoring is not active or if no output is received from OpenAI.
|
|
205
206
|
*/
|
|
206
|
-
createChatCompletion: (body: ChatCompletionCreateParams, options?: RequestOptions) => Promise<ChatCompletion | Stream<ChatCompletionChunk>>;
|
|
207
|
+
createChatCompletion: (body: ChatCompletionCreateParams, options?: RequestOptions, additionalLogs?: StreamingData) => Promise<ChatCompletion | Stream<ChatCompletionChunk>>;
|
|
207
208
|
/**
|
|
208
209
|
* Creates a completion using the OpenAI client and streams the result to Openlayer.
|
|
209
210
|
* @param {CompletionCreateParams} body - The parameters for creating a completion.
|
|
@@ -211,7 +212,7 @@ export declare class OpenAIMonitor {
|
|
|
211
212
|
* @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
|
|
212
213
|
* @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
|
|
213
214
|
*/
|
|
214
|
-
createCompletion: (body: CompletionCreateParams, options?: RequestOptions) => Promise<Completion | Stream<Completion>>;
|
|
215
|
+
createCompletion: (body: CompletionCreateParams, options?: RequestOptions, additionalLogs?: StreamingData) => Promise<Completion | Stream<Completion>>;
|
|
215
216
|
/**
|
|
216
217
|
* Starts monitoring for the OpenAI Monitor instance. If monitoring is already active, a warning is logged.
|
|
217
218
|
*/
|
package/dist/index.js
CHANGED
|
@@ -20,6 +20,64 @@ exports.OpenAIMonitor = exports.OpenlayerClient = void 0;
|
|
|
20
20
|
const openai_1 = require("openai");
|
|
21
21
|
const uuid_1 = require("uuid");
|
|
22
22
|
const request_1 = require("./utils/request");
|
|
23
|
+
const OpenAIPricing = {
|
|
24
|
+
'babbage-002': {
|
|
25
|
+
input: 0.0004,
|
|
26
|
+
output: 0.0004,
|
|
27
|
+
},
|
|
28
|
+
'davinci-002': {
|
|
29
|
+
input: 0.002,
|
|
30
|
+
output: 0.002,
|
|
31
|
+
},
|
|
32
|
+
'gpt-3.5-turbo': {
|
|
33
|
+
input: 0.003,
|
|
34
|
+
output: 0.006,
|
|
35
|
+
},
|
|
36
|
+
'gpt-3.5-turbo-0301': {
|
|
37
|
+
input: 0.0015,
|
|
38
|
+
output: 0.002,
|
|
39
|
+
},
|
|
40
|
+
'gpt-3.5-turbo-0613': {
|
|
41
|
+
input: 0.0015,
|
|
42
|
+
output: 0.002,
|
|
43
|
+
},
|
|
44
|
+
'gpt-3.5-turbo-1106': {
|
|
45
|
+
input: 0.001,
|
|
46
|
+
output: 0.002,
|
|
47
|
+
},
|
|
48
|
+
'gpt-3.5-turbo-16k-0613': {
|
|
49
|
+
input: 0.003,
|
|
50
|
+
output: 0.004,
|
|
51
|
+
},
|
|
52
|
+
'gpt-3.5-turbo-instruct': {
|
|
53
|
+
input: 0.0015,
|
|
54
|
+
output: 0.002,
|
|
55
|
+
},
|
|
56
|
+
'gpt-4': {
|
|
57
|
+
input: 0.03,
|
|
58
|
+
output: 0.06,
|
|
59
|
+
},
|
|
60
|
+
'gpt-4-0314': {
|
|
61
|
+
input: 0.03,
|
|
62
|
+
output: 0.06,
|
|
63
|
+
},
|
|
64
|
+
'gpt-4-1106-preview': {
|
|
65
|
+
input: 0.01,
|
|
66
|
+
output: 0.03,
|
|
67
|
+
},
|
|
68
|
+
'gpt-4-1106-vision-preview': {
|
|
69
|
+
input: 0.01,
|
|
70
|
+
output: 0.03,
|
|
71
|
+
},
|
|
72
|
+
'gpt-4-32k': {
|
|
73
|
+
input: 0.06,
|
|
74
|
+
output: 0.12,
|
|
75
|
+
},
|
|
76
|
+
'gpt-4-32k-0314': {
|
|
77
|
+
input: 0.06,
|
|
78
|
+
output: 0.12,
|
|
79
|
+
},
|
|
80
|
+
};
|
|
23
81
|
class OpenlayerClient {
|
|
24
82
|
/**
|
|
25
83
|
* Constructs an OpenlayerClient instance.
|
|
@@ -33,7 +91,6 @@ class OpenlayerClient {
|
|
|
33
91
|
latencyColumnName: 'latency',
|
|
34
92
|
numOfTokenColumnName: 'tokens',
|
|
35
93
|
outputColumnName: 'output',
|
|
36
|
-
promptColumnName: 'prompt',
|
|
37
94
|
timestampColumnName: 'timestamp',
|
|
38
95
|
};
|
|
39
96
|
this.openlayerServerUrl = 'https://api.openlayer.com/v1';
|
|
@@ -228,6 +285,18 @@ class OpenAIMonitor {
|
|
|
228
285
|
constructor({ openAiApiKey, openlayerApiKey, openlayerProjectName, openlayerInferencePipelineName, openlayerServerUrl, }) {
|
|
229
286
|
this.openlayerInferencePipelineName = 'production';
|
|
230
287
|
this.monitoringOn = false;
|
|
288
|
+
this.cost = (model, inputTokens, outputTokens) => {
|
|
289
|
+
const pricing = OpenAIPricing[model];
|
|
290
|
+
const inputCost = typeof pricing === 'undefined'
|
|
291
|
+
? undefined
|
|
292
|
+
: (inputTokens / 1000) * pricing.input;
|
|
293
|
+
const outputCost = typeof pricing === 'undefined'
|
|
294
|
+
? undefined
|
|
295
|
+
: (outputTokens / 1000) * pricing.output;
|
|
296
|
+
return typeof pricing === 'undefined'
|
|
297
|
+
? undefined
|
|
298
|
+
: (inputCost !== null && inputCost !== void 0 ? inputCost : 0) + (outputCost !== null && outputCost !== void 0 ? outputCost : 0);
|
|
299
|
+
};
|
|
231
300
|
this.formatChatCompletionInput = (messages) => messages.map(({ content, role }, i) => (role === 'user'
|
|
232
301
|
? `{{ message_${i} }}`
|
|
233
302
|
: content));
|
|
@@ -238,9 +307,9 @@ class OpenAIMonitor {
|
|
|
238
307
|
* @returns {Promise<ChatCompletion | Stream<ChatCompletionChunk>>} Promise of a ChatCompletion or a Stream
|
|
239
308
|
* @throws {Error} Throws an error if monitoring is not active or if no output is received from OpenAI.
|
|
240
309
|
*/
|
|
241
|
-
this.createChatCompletion = (body, options) => __awaiter(this, void 0, void 0, function* () {
|
|
310
|
+
this.createChatCompletion = (body, options, additionalLogs) => __awaiter(this, void 0, void 0, function* () {
|
|
242
311
|
var _a, e_1, _b, _c;
|
|
243
|
-
var _d, _e;
|
|
312
|
+
var _d, _e, _f, _g, _h, _j, _k;
|
|
244
313
|
if (!this.monitoringOn) {
|
|
245
314
|
throw new Error('Monitoring is not active.');
|
|
246
315
|
}
|
|
@@ -249,7 +318,7 @@ class OpenAIMonitor {
|
|
|
249
318
|
// Start a timer to measure latency
|
|
250
319
|
const startTime = Date.now();
|
|
251
320
|
// Accumulate output for streamed responses
|
|
252
|
-
let
|
|
321
|
+
let streamedOutput = '';
|
|
253
322
|
const response = yield this.openAIClient.chat.completions.create(body, options);
|
|
254
323
|
const prompt = this.formatChatCompletionInput(body.messages);
|
|
255
324
|
const inputVariableNames = prompt
|
|
@@ -259,28 +328,30 @@ class OpenAIMonitor {
|
|
|
259
328
|
.filter(({ role }) => role === 'user')
|
|
260
329
|
.map(({ content }) => content);
|
|
261
330
|
const inputVariablesMap = inputVariableNames.reduce((acc, name, i) => (Object.assign(Object.assign({}, acc), { [name]: inputVariables[i] })), {});
|
|
262
|
-
const config = Object.assign(Object.assign({}, this.openlayerClient.defaultConfig), { inputVariableNames
|
|
331
|
+
const config = Object.assign(Object.assign({}, this.openlayerClient.defaultConfig), { inputVariableNames,
|
|
332
|
+
prompt });
|
|
263
333
|
if (body.stream) {
|
|
264
334
|
const streamedResponse = response;
|
|
265
335
|
try {
|
|
266
|
-
for (var
|
|
336
|
+
for (var _l = true, streamedResponse_1 = __asyncValues(streamedResponse), streamedResponse_1_1; streamedResponse_1_1 = yield streamedResponse_1.next(), _a = streamedResponse_1_1.done, !_a; _l = true) {
|
|
267
337
|
_c = streamedResponse_1_1.value;
|
|
268
|
-
|
|
338
|
+
_l = false;
|
|
269
339
|
const chunk = _c;
|
|
270
340
|
// Process each chunk - for example, accumulate input data
|
|
271
|
-
|
|
341
|
+
const chunkOutput = (_d = chunk.choices[0].delta.content) !== null && _d !== void 0 ? _d : '';
|
|
342
|
+
streamedOutput += chunkOutput;
|
|
272
343
|
}
|
|
273
344
|
}
|
|
274
345
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
275
346
|
finally {
|
|
276
347
|
try {
|
|
277
|
-
if (!
|
|
348
|
+
if (!_l && !_a && (_b = streamedResponse_1.return)) yield _b.call(streamedResponse_1);
|
|
278
349
|
}
|
|
279
350
|
finally { if (e_1) throw e_1.error; }
|
|
280
351
|
}
|
|
281
352
|
const endTime = Date.now();
|
|
282
353
|
const latency = endTime - startTime;
|
|
283
|
-
this.openlayerClient.streamData(Object.assign({ latency, output:
|
|
354
|
+
this.openlayerClient.streamData(Object.assign(Object.assign({ latency, output: streamedOutput, timestamp: startTime }, inputVariablesMap), additionalLogs), config, inferencePipeline.id);
|
|
284
355
|
}
|
|
285
356
|
else {
|
|
286
357
|
const nonStreamedResponse = response;
|
|
@@ -288,12 +359,16 @@ class OpenAIMonitor {
|
|
|
288
359
|
const endTime = Date.now();
|
|
289
360
|
const latency = endTime - startTime;
|
|
290
361
|
const output = nonStreamedResponse.choices[0].message.content;
|
|
362
|
+
const tokens = (_f = (_e = nonStreamedResponse.usage) === null || _e === void 0 ? void 0 : _e.total_tokens) !== null && _f !== void 0 ? _f : 0;
|
|
363
|
+
const inputTokens = (_h = (_g = nonStreamedResponse.usage) === null || _g === void 0 ? void 0 : _g.prompt_tokens) !== null && _h !== void 0 ? _h : 0;
|
|
364
|
+
const outputTokens = (_k = (_j = nonStreamedResponse.usage) === null || _j === void 0 ? void 0 : _j.completion_tokens) !== null && _k !== void 0 ? _k : 0;
|
|
365
|
+
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
291
366
|
if (typeof output !== 'string') {
|
|
292
367
|
throw new Error('No output received from OpenAI.');
|
|
293
368
|
}
|
|
294
|
-
this.openlayerClient.streamData(Object.assign({
|
|
295
|
-
|
|
296
|
-
|
|
369
|
+
this.openlayerClient.streamData(Object.assign(Object.assign({ cost,
|
|
370
|
+
latency,
|
|
371
|
+
output, timestamp: startTime, tokens }, inputVariablesMap), additionalLogs), config, inferencePipeline.id);
|
|
297
372
|
}
|
|
298
373
|
return response;
|
|
299
374
|
});
|
|
@@ -304,9 +379,9 @@ class OpenAIMonitor {
|
|
|
304
379
|
* @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
|
|
305
380
|
* @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
|
|
306
381
|
*/
|
|
307
|
-
this.createCompletion = (body, options) => __awaiter(this, void 0, void 0, function* () {
|
|
308
|
-
var
|
|
309
|
-
var
|
|
382
|
+
this.createCompletion = (body, options, additionalLogs) => __awaiter(this, void 0, void 0, function* () {
|
|
383
|
+
var _m, e_2, _o, _p;
|
|
384
|
+
var _q, _r, _s, _t, _u, _v, _w, _x;
|
|
310
385
|
if (!this.monitoringOn) {
|
|
311
386
|
throw new Error('Monitoring is not active.');
|
|
312
387
|
}
|
|
@@ -318,51 +393,43 @@ class OpenAIMonitor {
|
|
|
318
393
|
// Start a timer to measure latency
|
|
319
394
|
const startTime = Date.now();
|
|
320
395
|
// Accumulate output and tokens data for streamed responses
|
|
321
|
-
let
|
|
322
|
-
let
|
|
396
|
+
let streamedOutput = '';
|
|
397
|
+
let streamedTokens = 0;
|
|
323
398
|
const response = yield this.openAIClient.completions.create(body, options);
|
|
324
399
|
const config = Object.assign(Object.assign({}, this.openlayerClient.defaultConfig), { inputVariableNames: ['input'] });
|
|
325
400
|
if (body.stream) {
|
|
326
401
|
const streamedResponse = response;
|
|
327
402
|
try {
|
|
328
|
-
for (var
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
const chunk =
|
|
403
|
+
for (var _y = true, streamedResponse_2 = __asyncValues(streamedResponse), streamedResponse_2_1; streamedResponse_2_1 = yield streamedResponse_2.next(), _m = streamedResponse_2_1.done, !_m; _y = true) {
|
|
404
|
+
_p = streamedResponse_2_1.value;
|
|
405
|
+
_y = false;
|
|
406
|
+
const chunk = _p;
|
|
332
407
|
// Process each chunk - for example, accumulate input data
|
|
333
|
-
|
|
334
|
-
|
|
408
|
+
streamedOutput += chunk.choices[0].text.trim();
|
|
409
|
+
streamedTokens += (_r = (_q = chunk.usage) === null || _q === void 0 ? void 0 : _q.total_tokens) !== null && _r !== void 0 ? _r : 0;
|
|
335
410
|
}
|
|
336
411
|
}
|
|
337
412
|
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
338
413
|
finally {
|
|
339
414
|
try {
|
|
340
|
-
if (!
|
|
415
|
+
if (!_y && !_m && (_o = streamedResponse_2.return)) yield _o.call(streamedResponse_2);
|
|
341
416
|
}
|
|
342
417
|
finally { if (e_2) throw e_2.error; }
|
|
343
418
|
}
|
|
344
419
|
const endTime = Date.now();
|
|
345
420
|
const latency = endTime - startTime;
|
|
346
|
-
this.openlayerClient.streamData({
|
|
347
|
-
input: body.prompt,
|
|
348
|
-
latency,
|
|
349
|
-
output: outputData,
|
|
350
|
-
timestamp: startTime,
|
|
351
|
-
tokens: tokensData,
|
|
352
|
-
}, config, inferencePipeline.id);
|
|
421
|
+
this.openlayerClient.streamData(Object.assign({ input: body.prompt, latency, output: streamedOutput, timestamp: startTime, tokens: streamedTokens }, additionalLogs), config, inferencePipeline.id);
|
|
353
422
|
}
|
|
354
423
|
else {
|
|
355
424
|
const nonStreamedResponse = response;
|
|
356
425
|
// Handle regular (non-streamed) response
|
|
357
426
|
const endTime = Date.now();
|
|
358
427
|
const latency = endTime - startTime;
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
tokens: (_o = (_m = nonStreamedResponse.usage) === null || _m === void 0 ? void 0 : _m.total_tokens) !== null && _o !== void 0 ? _o : 0,
|
|
365
|
-
}, config, inferencePipeline.id);
|
|
428
|
+
const tokens = (_t = (_s = nonStreamedResponse.usage) === null || _s === void 0 ? void 0 : _s.total_tokens) !== null && _t !== void 0 ? _t : 0;
|
|
429
|
+
const inputTokens = (_v = (_u = nonStreamedResponse.usage) === null || _u === void 0 ? void 0 : _u.prompt_tokens) !== null && _v !== void 0 ? _v : 0;
|
|
430
|
+
const outputTokens = (_x = (_w = nonStreamedResponse.usage) === null || _w === void 0 ? void 0 : _w.completion_tokens) !== null && _x !== void 0 ? _x : 0;
|
|
431
|
+
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
432
|
+
this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: nonStreamedResponse.choices[0].text, timestamp: startTime, tokens }, additionalLogs), config, inferencePipeline.id);
|
|
366
433
|
}
|
|
367
434
|
return response;
|
|
368
435
|
});
|