openlayer 0.1.25 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +0 -2
- package/dist/index.js +130 -102
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -184,7 +184,6 @@ export declare class OpenlayerClient {
|
|
|
184
184
|
* @param {StreamingData} data - The chat completion data to be streamed.
|
|
185
185
|
* @param {string} inferencePipelineId - The ID of the Openlayer inference pipeline to which data is streamed.
|
|
186
186
|
* @returns {Promise<void>} A promise that resolves when the data has been successfully streamed.
|
|
187
|
-
* @throws {Error} Throws an error if the Openlayer API key is not set or an error occurs in the streaming process.
|
|
188
187
|
*/
|
|
189
188
|
streamData: (data: StreamingData, config: StreamingDataConfig, inferencePipelineId: string) => Promise<void>;
|
|
190
189
|
}
|
|
@@ -216,7 +215,6 @@ export declare class OpenAIMonitor {
|
|
|
216
215
|
* @param {CompletionCreateParams} body - The parameters for creating a completion.
|
|
217
216
|
* @param {RequestOptions} [options] - Optional request options.
|
|
218
217
|
* @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
|
|
219
|
-
* @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
|
|
220
218
|
*/
|
|
221
219
|
createCompletion: (body: CompletionCreateParams, options?: RequestOptions, additionalLogs?: StreamingData) => Promise<Completion | Stream<Completion>>;
|
|
222
220
|
/**
|
package/dist/index.js
CHANGED
|
@@ -30,8 +30,12 @@ const OpenAIPricing = {
|
|
|
30
30
|
output: 0.002,
|
|
31
31
|
},
|
|
32
32
|
'gpt-3.5-turbo': {
|
|
33
|
-
input: 0.
|
|
34
|
-
output: 0.
|
|
33
|
+
input: 0.0005,
|
|
34
|
+
output: 0.0015,
|
|
35
|
+
},
|
|
36
|
+
'gpt-3.5-turbo-0125': {
|
|
37
|
+
input: 0.0005,
|
|
38
|
+
output: 0.0015,
|
|
35
39
|
},
|
|
36
40
|
'gpt-3.5-turbo-0301': {
|
|
37
41
|
input: 0.0015,
|
|
@@ -57,6 +61,10 @@ const OpenAIPricing = {
|
|
|
57
61
|
input: 0.03,
|
|
58
62
|
output: 0.06,
|
|
59
63
|
},
|
|
64
|
+
'gpt-4-0125-preview': {
|
|
65
|
+
input: 0.01,
|
|
66
|
+
output: 0.03,
|
|
67
|
+
},
|
|
60
68
|
'gpt-4-0314': {
|
|
61
69
|
input: 0.03,
|
|
62
70
|
output: 0.06,
|
|
@@ -240,12 +248,12 @@ class OpenlayerClient {
|
|
|
240
248
|
* @param {StreamingData} data - The chat completion data to be streamed.
|
|
241
249
|
* @param {string} inferencePipelineId - The ID of the Openlayer inference pipeline to which data is streamed.
|
|
242
250
|
* @returns {Promise<void>} A promise that resolves when the data has been successfully streamed.
|
|
243
|
-
* @throws {Error} Throws an error if the Openlayer API key is not set or an error occurs in the streaming process.
|
|
244
251
|
*/
|
|
245
252
|
this.streamData = (data, config, inferencePipelineId) => __awaiter(this, void 0, void 0, function* () {
|
|
246
253
|
var _c;
|
|
247
254
|
if (!this.openlayerApiKey) {
|
|
248
|
-
|
|
255
|
+
console.error('Openlayer API key are required for streaming data.');
|
|
256
|
+
return;
|
|
249
257
|
}
|
|
250
258
|
try {
|
|
251
259
|
const dataStreamEndpoint = `/inference-pipelines/${inferencePipelineId}/data-stream`;
|
|
@@ -265,13 +273,12 @@ class OpenlayerClient {
|
|
|
265
273
|
});
|
|
266
274
|
if (!response.ok) {
|
|
267
275
|
console.error('Error making POST request:', response.status);
|
|
268
|
-
|
|
276
|
+
console.error(`Error: ${response.status}`);
|
|
269
277
|
}
|
|
270
|
-
|
|
278
|
+
yield response.json();
|
|
271
279
|
}
|
|
272
280
|
catch (error) {
|
|
273
281
|
console.error('Error streaming data to Openlayer:', error);
|
|
274
|
-
throw error;
|
|
275
282
|
}
|
|
276
283
|
});
|
|
277
284
|
this.openlayerApiKey = openlayerApiKey;
|
|
@@ -279,7 +286,7 @@ class OpenlayerClient {
|
|
|
279
286
|
this.openlayerServerUrl = openlayerServerUrl;
|
|
280
287
|
}
|
|
281
288
|
if (!this.openlayerApiKey) {
|
|
282
|
-
|
|
289
|
+
console.error('Openlayer API key are required for publishing.');
|
|
283
290
|
}
|
|
284
291
|
}
|
|
285
292
|
}
|
|
@@ -319,64 +326,73 @@ class OpenAIMonitor {
|
|
|
319
326
|
var _a, e_1, _b, _c;
|
|
320
327
|
var _d, _e, _f, _g, _h, _j, _k;
|
|
321
328
|
if (!this.monitoringOn) {
|
|
322
|
-
|
|
329
|
+
console.warn('Monitoring is not active.');
|
|
323
330
|
}
|
|
324
|
-
if (typeof this.inferencePipeline === 'undefined') {
|
|
325
|
-
|
|
331
|
+
else if (typeof this.inferencePipeline === 'undefined') {
|
|
332
|
+
console.error('No inference pipeline found.');
|
|
326
333
|
}
|
|
327
334
|
// Start a timer to measure latency
|
|
328
335
|
const startTime = Date.now();
|
|
329
336
|
// Accumulate output for streamed responses
|
|
330
337
|
let streamedOutput = '';
|
|
331
338
|
const response = yield this.openAIClient.chat.completions.create(body, options);
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
339
|
+
try {
|
|
340
|
+
if (this.monitoringOn && typeof this.inferencePipeline !== 'undefined') {
|
|
341
|
+
const prompt = this.formatChatCompletionInput(body.messages);
|
|
342
|
+
const inputVariableNames = prompt
|
|
343
|
+
.filter(({ role }) => role === 'user')
|
|
344
|
+
.map(({ content }) => String(content).replace(/{{\s*|\s*}}/g, ''));
|
|
345
|
+
const inputVariables = body.messages
|
|
346
|
+
.filter(({ role }) => role === 'user')
|
|
347
|
+
.map(({ content }) => content);
|
|
348
|
+
const inputVariablesMap = inputVariableNames.reduce((acc, name, i) => (Object.assign(Object.assign({}, acc), { [name]: inputVariables[i] })), {});
|
|
349
|
+
const config = Object.assign(Object.assign({}, this.openlayerClient.defaultConfig), { inputVariableNames,
|
|
350
|
+
prompt });
|
|
351
|
+
if (body.stream) {
|
|
352
|
+
const streamedResponse = response;
|
|
353
|
+
try {
|
|
354
|
+
for (var _l = true, streamedResponse_1 = __asyncValues(streamedResponse), streamedResponse_1_1; streamedResponse_1_1 = yield streamedResponse_1.next(), _a = streamedResponse_1_1.done, !_a; _l = true) {
|
|
355
|
+
_c = streamedResponse_1_1.value;
|
|
356
|
+
_l = false;
|
|
357
|
+
const chunk = _c;
|
|
358
|
+
// Process each chunk - for example, accumulate input data
|
|
359
|
+
const chunkOutput = (_d = chunk.choices[0].delta.content) !== null && _d !== void 0 ? _d : '';
|
|
360
|
+
streamedOutput += chunkOutput;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
364
|
+
finally {
|
|
365
|
+
try {
|
|
366
|
+
if (!_l && !_a && (_b = streamedResponse_1.return)) yield _b.call(streamedResponse_1);
|
|
367
|
+
}
|
|
368
|
+
finally { if (e_1) throw e_1.error; }
|
|
369
|
+
}
|
|
370
|
+
const endTime = Date.now();
|
|
371
|
+
const latency = endTime - startTime;
|
|
372
|
+
this.openlayerClient.streamData(Object.assign(Object.assign({ latency, output: streamedOutput, timestamp: startTime }, inputVariablesMap), additionalLogs), config, this.inferencePipeline.id);
|
|
352
373
|
}
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
374
|
+
else {
|
|
375
|
+
const nonStreamedResponse = response;
|
|
376
|
+
// Handle regular (non-streamed) response
|
|
377
|
+
const endTime = Date.now();
|
|
378
|
+
const latency = endTime - startTime;
|
|
379
|
+
const output = nonStreamedResponse.choices[0].message.content;
|
|
380
|
+
const tokens = (_f = (_e = nonStreamedResponse.usage) === null || _e === void 0 ? void 0 : _e.total_tokens) !== null && _f !== void 0 ? _f : 0;
|
|
381
|
+
const inputTokens = (_h = (_g = nonStreamedResponse.usage) === null || _g === void 0 ? void 0 : _g.prompt_tokens) !== null && _h !== void 0 ? _h : 0;
|
|
382
|
+
const outputTokens = (_k = (_j = nonStreamedResponse.usage) === null || _j === void 0 ? void 0 : _j.completion_tokens) !== null && _k !== void 0 ? _k : 0;
|
|
383
|
+
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
384
|
+
if (typeof output === 'string') {
|
|
385
|
+
this.openlayerClient.streamData(Object.assign(Object.assign({ cost,
|
|
386
|
+
latency, model: nonStreamedResponse.model, output, timestamp: startTime, tokens }, inputVariablesMap), additionalLogs), config, this.inferencePipeline.id);
|
|
387
|
+
}
|
|
388
|
+
else {
|
|
389
|
+
console.error('No output received from OpenAI.');
|
|
390
|
+
}
|
|
358
391
|
}
|
|
359
|
-
finally { if (e_1) throw e_1.error; }
|
|
360
392
|
}
|
|
361
|
-
const endTime = Date.now();
|
|
362
|
-
const latency = endTime - startTime;
|
|
363
|
-
this.openlayerClient.streamData(Object.assign(Object.assign({ latency, output: streamedOutput, timestamp: startTime }, inputVariablesMap), additionalLogs), config, this.inferencePipeline.id);
|
|
364
393
|
}
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
// Handle regular (non-streamed) response
|
|
368
|
-
const endTime = Date.now();
|
|
369
|
-
const latency = endTime - startTime;
|
|
370
|
-
const output = nonStreamedResponse.choices[0].message.content;
|
|
371
|
-
const tokens = (_f = (_e = nonStreamedResponse.usage) === null || _e === void 0 ? void 0 : _e.total_tokens) !== null && _f !== void 0 ? _f : 0;
|
|
372
|
-
const inputTokens = (_h = (_g = nonStreamedResponse.usage) === null || _g === void 0 ? void 0 : _g.prompt_tokens) !== null && _h !== void 0 ? _h : 0;
|
|
373
|
-
const outputTokens = (_k = (_j = nonStreamedResponse.usage) === null || _j === void 0 ? void 0 : _j.completion_tokens) !== null && _k !== void 0 ? _k : 0;
|
|
374
|
-
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
375
|
-
if (typeof output !== 'string') {
|
|
376
|
-
throw new Error('No output received from OpenAI.');
|
|
377
|
-
}
|
|
378
|
-
this.openlayerClient.streamData(Object.assign(Object.assign({ cost,
|
|
379
|
-
latency, model: nonStreamedResponse.model, output, timestamp: startTime, tokens }, inputVariablesMap), additionalLogs), config, this.inferencePipeline.id);
|
|
394
|
+
catch (error) {
|
|
395
|
+
console.error(error);
|
|
380
396
|
}
|
|
381
397
|
return response;
|
|
382
398
|
});
|
|
@@ -385,19 +401,18 @@ class OpenAIMonitor {
|
|
|
385
401
|
* @param {CompletionCreateParams} body - The parameters for creating a completion.
|
|
386
402
|
* @param {RequestOptions} [options] - Optional request options.
|
|
387
403
|
* @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
|
|
388
|
-
* @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
|
|
389
404
|
*/
|
|
390
405
|
this.createCompletion = (body, options, additionalLogs) => __awaiter(this, void 0, void 0, function* () {
|
|
391
406
|
var _m, e_2, _o, _p;
|
|
392
407
|
var _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1;
|
|
393
|
-
if (!
|
|
394
|
-
|
|
408
|
+
if (!body.prompt) {
|
|
409
|
+
console.error('No prompt provided.');
|
|
395
410
|
}
|
|
396
|
-
if (
|
|
397
|
-
|
|
411
|
+
if (!this.monitoringOn) {
|
|
412
|
+
console.warn('Monitoring is not active.');
|
|
398
413
|
}
|
|
399
|
-
if (
|
|
400
|
-
|
|
414
|
+
else if (typeof this.inferencePipeline === 'undefined') {
|
|
415
|
+
console.error('No inference pipeline found.');
|
|
401
416
|
}
|
|
402
417
|
// Start a timer to measure latency
|
|
403
418
|
const startTime = Date.now();
|
|
@@ -408,44 +423,51 @@ class OpenAIMonitor {
|
|
|
408
423
|
let streamedInputTokens = 0;
|
|
409
424
|
let streamedOutputTokens = 0;
|
|
410
425
|
const response = yield this.openAIClient.completions.create(body, options);
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
426
|
+
try {
|
|
427
|
+
if (this.monitoringOn && typeof this.inferencePipeline !== 'undefined') {
|
|
428
|
+
const config = Object.assign(Object.assign({}, this.openlayerClient.defaultConfig), { inputVariableNames: ['input'] });
|
|
429
|
+
if (body.stream) {
|
|
430
|
+
const streamedResponse = response;
|
|
431
|
+
try {
|
|
432
|
+
for (var _2 = true, streamedResponse_2 = __asyncValues(streamedResponse), streamedResponse_2_1; streamedResponse_2_1 = yield streamedResponse_2.next(), _m = streamedResponse_2_1.done, !_m; _2 = true) {
|
|
433
|
+
_p = streamedResponse_2_1.value;
|
|
434
|
+
_2 = false;
|
|
435
|
+
const chunk = _p;
|
|
436
|
+
// Process each chunk - for example, accumulate input data
|
|
437
|
+
streamedModel = chunk.model;
|
|
438
|
+
streamedOutput += chunk.choices[0].text.trim();
|
|
439
|
+
streamedTokens += (_r = (_q = chunk.usage) === null || _q === void 0 ? void 0 : _q.total_tokens) !== null && _r !== void 0 ? _r : 0;
|
|
440
|
+
streamedInputTokens += (_t = (_s = chunk.usage) === null || _s === void 0 ? void 0 : _s.prompt_tokens) !== null && _t !== void 0 ? _t : 0;
|
|
441
|
+
streamedOutputTokens += (_v = (_u = chunk.usage) === null || _u === void 0 ? void 0 : _u.completion_tokens) !== null && _v !== void 0 ? _v : 0;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
445
|
+
finally {
|
|
446
|
+
try {
|
|
447
|
+
if (!_2 && !_m && (_o = streamedResponse_2.return)) yield _o.call(streamedResponse_2);
|
|
448
|
+
}
|
|
449
|
+
finally { if (e_2) throw e_2.error; }
|
|
450
|
+
}
|
|
451
|
+
const endTime = Date.now();
|
|
452
|
+
const latency = endTime - startTime;
|
|
453
|
+
const cost = this.cost(streamedModel, streamedInputTokens, streamedOutputTokens);
|
|
454
|
+
this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: streamedOutput, timestamp: startTime, tokens: streamedTokens }, additionalLogs), config, this.inferencePipeline.id);
|
|
425
455
|
}
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
456
|
+
else {
|
|
457
|
+
const nonStreamedResponse = response;
|
|
458
|
+
// Handle regular (non-streamed) response
|
|
459
|
+
const endTime = Date.now();
|
|
460
|
+
const latency = endTime - startTime;
|
|
461
|
+
const tokens = (_x = (_w = nonStreamedResponse.usage) === null || _w === void 0 ? void 0 : _w.total_tokens) !== null && _x !== void 0 ? _x : 0;
|
|
462
|
+
const inputTokens = (_z = (_y = nonStreamedResponse.usage) === null || _y === void 0 ? void 0 : _y.prompt_tokens) !== null && _z !== void 0 ? _z : 0;
|
|
463
|
+
const outputTokens = (_1 = (_0 = nonStreamedResponse.usage) === null || _0 === void 0 ? void 0 : _0.completion_tokens) !== null && _1 !== void 0 ? _1 : 0;
|
|
464
|
+
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
465
|
+
this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: nonStreamedResponse.choices[0].text, timestamp: startTime, tokens }, additionalLogs), config, this.inferencePipeline.id);
|
|
431
466
|
}
|
|
432
|
-
finally { if (e_2) throw e_2.error; }
|
|
433
467
|
}
|
|
434
|
-
const endTime = Date.now();
|
|
435
|
-
const latency = endTime - startTime;
|
|
436
|
-
const cost = this.cost(streamedModel, streamedInputTokens, streamedOutputTokens);
|
|
437
|
-
this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: streamedOutput, timestamp: startTime, tokens: streamedTokens }, additionalLogs), config, this.inferencePipeline.id);
|
|
438
468
|
}
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
// Handle regular (non-streamed) response
|
|
442
|
-
const endTime = Date.now();
|
|
443
|
-
const latency = endTime - startTime;
|
|
444
|
-
const tokens = (_x = (_w = nonStreamedResponse.usage) === null || _w === void 0 ? void 0 : _w.total_tokens) !== null && _x !== void 0 ? _x : 0;
|
|
445
|
-
const inputTokens = (_z = (_y = nonStreamedResponse.usage) === null || _y === void 0 ? void 0 : _y.prompt_tokens) !== null && _z !== void 0 ? _z : 0;
|
|
446
|
-
const outputTokens = (_1 = (_0 = nonStreamedResponse.usage) === null || _0 === void 0 ? void 0 : _0.completion_tokens) !== null && _1 !== void 0 ? _1 : 0;
|
|
447
|
-
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
448
|
-
this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: nonStreamedResponse.choices[0].text, timestamp: startTime, tokens }, additionalLogs), config, this.inferencePipeline.id);
|
|
469
|
+
catch (error) {
|
|
470
|
+
console.error(error);
|
|
449
471
|
}
|
|
450
472
|
return response;
|
|
451
473
|
});
|
|
@@ -472,13 +494,19 @@ class OpenAIMonitor {
|
|
|
472
494
|
return;
|
|
473
495
|
}
|
|
474
496
|
console.info('Starting monitor: creating or loading an Openlayer project and inference pipeline...');
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
this.
|
|
479
|
-
|
|
497
|
+
try {
|
|
498
|
+
this.monitoringOn = true;
|
|
499
|
+
this.project = yield this.openlayerClient.createProject(this.openlayerProjectName, 'llm-base');
|
|
500
|
+
if (typeof this.project !== 'undefined') {
|
|
501
|
+
this.inferencePipeline =
|
|
502
|
+
yield this.openlayerClient.createInferencePipeline(this.project.id, this.openlayerInferencePipelineName);
|
|
503
|
+
}
|
|
504
|
+
console.info('Monitor started');
|
|
505
|
+
}
|
|
506
|
+
catch (error) {
|
|
507
|
+
console.error('An error occurred while starting the monitor:', error);
|
|
508
|
+
this.stopMonitoring();
|
|
480
509
|
}
|
|
481
|
-
console.info('Monitor started');
|
|
482
510
|
});
|
|
483
511
|
}
|
|
484
512
|
/**
|