openlayer 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +15 -10
- package/dist/index.js +152 -77
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -7,6 +7,10 @@ import { Stream } from 'openai/streaming';
|
|
|
7
7
|
*/
|
|
8
8
|
export interface StreamingData {
|
|
9
9
|
[columnName: string]: any;
|
|
10
|
+
/**
|
|
11
|
+
* The total estimated cost of the chat completion in USD. Optional.
|
|
12
|
+
*/
|
|
13
|
+
cost?: number;
|
|
10
14
|
/**
|
|
11
15
|
* The latency of the chat completion in milliseconds. Optional.
|
|
12
16
|
*/
|
|
@@ -139,14 +143,6 @@ export declare class OpenlayerClient {
|
|
|
139
143
|
*/
|
|
140
144
|
constructor({ openlayerApiKey, openlayerServerUrl, }: OpenlayerClientConstructorProps);
|
|
141
145
|
private resolvedQuery;
|
|
142
|
-
/**
|
|
143
|
-
* Streams data to the Openlayer inference pipeline.
|
|
144
|
-
* @param {StreamingData} data - The chat completion data to be streamed.
|
|
145
|
-
* @param {string} inferencePipelineId - The ID of the Openlayer inference pipeline to which data is streamed.
|
|
146
|
-
* @returns {Promise<void>} A promise that resolves when the data has been successfully streamed.
|
|
147
|
-
* @throws {Error} Throws an error if the Openlayer API key is not set or an error occurs in the streaming process.
|
|
148
|
-
*/
|
|
149
|
-
streamData: (data: StreamingData, config: StreamingDataConfig, inferencePipelineId: string) => Promise<void>;
|
|
150
146
|
/**
|
|
151
147
|
* Creates a new inference pipeline in Openlayer or loads an existing one.
|
|
152
148
|
* @param {string} projectId - The ID of the project containing the inference pipeline.
|
|
@@ -179,6 +175,14 @@ export declare class OpenlayerClient {
|
|
|
179
175
|
* @throws {Error} Throws an error if the project is not found.
|
|
180
176
|
*/
|
|
181
177
|
loadProject: (name: string) => Promise<OpenlayerProject>;
|
|
178
|
+
/**
|
|
179
|
+
* Streams data to the Openlayer inference pipeline.
|
|
180
|
+
* @param {StreamingData} data - The chat completion data to be streamed.
|
|
181
|
+
* @param {string} inferencePipelineId - The ID of the Openlayer inference pipeline to which data is streamed.
|
|
182
|
+
* @returns {Promise<void>} A promise that resolves when the data has been successfully streamed.
|
|
183
|
+
* @throws {Error} Throws an error if the Openlayer API key is not set or an error occurs in the streaming process.
|
|
184
|
+
*/
|
|
185
|
+
streamData: (data: StreamingData, config: StreamingDataConfig, inferencePipelineId: string) => Promise<void>;
|
|
182
186
|
}
|
|
183
187
|
export declare class OpenAIMonitor {
|
|
184
188
|
private openlayerClient;
|
|
@@ -191,6 +195,7 @@ export declare class OpenAIMonitor {
|
|
|
191
195
|
* @param {OpenAIMonitorConstructorProps} props - The configuration properties for the OpenAI and Openlayer clients.
|
|
192
196
|
*/
|
|
193
197
|
constructor({ openAiApiKey, openlayerApiKey, openlayerProjectName, openlayerInferencePipelineName, openlayerServerUrl, }: OpenAIMonitorConstructorProps);
|
|
198
|
+
private cost;
|
|
194
199
|
private formatChatCompletionInput;
|
|
195
200
|
/**
|
|
196
201
|
* Creates a chat completion using the OpenAI client and streams the result to Openlayer.
|
|
@@ -199,7 +204,7 @@ export declare class OpenAIMonitor {
|
|
|
199
204
|
* @returns {Promise<ChatCompletion | Stream<ChatCompletionChunk>>} Promise of a ChatCompletion or a Stream
|
|
200
205
|
* @throws {Error} Throws an error if monitoring is not active or if no output is received from OpenAI.
|
|
201
206
|
*/
|
|
202
|
-
createChatCompletion: (body: ChatCompletionCreateParams, options?: RequestOptions) => Promise<ChatCompletion | Stream<ChatCompletionChunk>>;
|
|
207
|
+
createChatCompletion: (body: ChatCompletionCreateParams, options?: RequestOptions, additionalLogs?: StreamingData) => Promise<ChatCompletion | Stream<ChatCompletionChunk>>;
|
|
203
208
|
/**
|
|
204
209
|
* Creates a completion using the OpenAI client and streams the result to Openlayer.
|
|
205
210
|
* @param {CompletionCreateParams} body - The parameters for creating a completion.
|
|
@@ -207,7 +212,7 @@ export declare class OpenAIMonitor {
|
|
|
207
212
|
* @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
|
|
208
213
|
* @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
|
|
209
214
|
*/
|
|
210
|
-
createCompletion: (body: CompletionCreateParams, options?: RequestOptions) => Promise<Completion | Stream<Completion>>;
|
|
215
|
+
createCompletion: (body: CompletionCreateParams, options?: RequestOptions, additionalLogs?: StreamingData) => Promise<Completion | Stream<Completion>>;
|
|
211
216
|
/**
|
|
212
217
|
* Starts monitoring for the OpenAI Monitor instance. If monitoring is already active, a warning is logged.
|
|
213
218
|
*/
|
package/dist/index.js
CHANGED
|
@@ -20,6 +20,64 @@ exports.OpenAIMonitor = exports.OpenlayerClient = void 0;
|
|
|
20
20
|
const openai_1 = require("openai");
|
|
21
21
|
const uuid_1 = require("uuid");
|
|
22
22
|
const request_1 = require("./utils/request");
|
|
23
|
+
const OpenAIPricing = {
|
|
24
|
+
'babbage-002': {
|
|
25
|
+
input: 0.0004,
|
|
26
|
+
output: 0.0004,
|
|
27
|
+
},
|
|
28
|
+
'davinci-002': {
|
|
29
|
+
input: 0.002,
|
|
30
|
+
output: 0.002,
|
|
31
|
+
},
|
|
32
|
+
'gpt-3.5-turbo': {
|
|
33
|
+
input: 0.003,
|
|
34
|
+
output: 0.006,
|
|
35
|
+
},
|
|
36
|
+
'gpt-3.5-turbo-0301': {
|
|
37
|
+
input: 0.0015,
|
|
38
|
+
output: 0.002,
|
|
39
|
+
},
|
|
40
|
+
'gpt-3.5-turbo-0613': {
|
|
41
|
+
input: 0.0015,
|
|
42
|
+
output: 0.002,
|
|
43
|
+
},
|
|
44
|
+
'gpt-3.5-turbo-1106': {
|
|
45
|
+
input: 0.001,
|
|
46
|
+
output: 0.002,
|
|
47
|
+
},
|
|
48
|
+
'gpt-3.5-turbo-16k-0613': {
|
|
49
|
+
input: 0.003,
|
|
50
|
+
output: 0.004,
|
|
51
|
+
},
|
|
52
|
+
'gpt-3.5-turbo-instruct': {
|
|
53
|
+
input: 0.0015,
|
|
54
|
+
output: 0.002,
|
|
55
|
+
},
|
|
56
|
+
'gpt-4': {
|
|
57
|
+
input: 0.03,
|
|
58
|
+
output: 0.06,
|
|
59
|
+
},
|
|
60
|
+
'gpt-4-0314': {
|
|
61
|
+
input: 0.03,
|
|
62
|
+
output: 0.06,
|
|
63
|
+
},
|
|
64
|
+
'gpt-4-1106-preview': {
|
|
65
|
+
input: 0.01,
|
|
66
|
+
output: 0.03,
|
|
67
|
+
},
|
|
68
|
+
'gpt-4-1106-vision-preview': {
|
|
69
|
+
input: 0.01,
|
|
70
|
+
output: 0.03,
|
|
71
|
+
},
|
|
72
|
+
'gpt-4-32k': {
|
|
73
|
+
input: 0.06,
|
|
74
|
+
output: 0.12,
|
|
75
|
+
},
|
|
76
|
+
'gpt-4-32k-0314': {
|
|
77
|
+
input: 0.06,
|
|
78
|
+
output: 0.12,
|
|
79
|
+
},
|
|
80
|
+
};
|
|
23
81
|
class OpenlayerClient {
|
|
24
82
|
/**
|
|
25
83
|
* Constructs an OpenlayerClient instance.
|
|
@@ -38,45 +96,6 @@ class OpenlayerClient {
|
|
|
38
96
|
this.openlayerServerUrl = 'https://api.openlayer.com/v1';
|
|
39
97
|
this.version = '0.1.0a16';
|
|
40
98
|
this.resolvedQuery = (endpoint, args = {}) => (0, request_1.resolvedQuery)(this.openlayerServerUrl, endpoint, args);
|
|
41
|
-
/**
|
|
42
|
-
* Streams data to the Openlayer inference pipeline.
|
|
43
|
-
* @param {StreamingData} data - The chat completion data to be streamed.
|
|
44
|
-
* @param {string} inferencePipelineId - The ID of the Openlayer inference pipeline to which data is streamed.
|
|
45
|
-
* @returns {Promise<void>} A promise that resolves when the data has been successfully streamed.
|
|
46
|
-
* @throws {Error} Throws an error if the Openlayer API key is not set or an error occurs in the streaming process.
|
|
47
|
-
*/
|
|
48
|
-
this.streamData = (data, config, inferencePipelineId) => __awaiter(this, void 0, void 0, function* () {
|
|
49
|
-
var _a;
|
|
50
|
-
if (!this.openlayerApiKey) {
|
|
51
|
-
throw new Error('Openlayer API key are required for streaming data.');
|
|
52
|
-
}
|
|
53
|
-
try {
|
|
54
|
-
const dataStreamEndpoint = `/inference-pipelines/${inferencePipelineId}/data-stream`;
|
|
55
|
-
const dataStreamQuery = this.resolvedQuery(dataStreamEndpoint);
|
|
56
|
-
const response = yield fetch(dataStreamQuery, {
|
|
57
|
-
body: JSON.stringify({
|
|
58
|
-
config,
|
|
59
|
-
rows: [
|
|
60
|
-
Object.assign(Object.assign({}, data), { id: (0, uuid_1.v4)(), timestamp: Math.round(((_a = data.timestamp) !== null && _a !== void 0 ? _a : Date.now()) / 1000) }),
|
|
61
|
-
],
|
|
62
|
-
}),
|
|
63
|
-
headers: {
|
|
64
|
-
Authorization: `Bearer ${this.openlayerApiKey}`,
|
|
65
|
-
'Content-Type': 'application/json',
|
|
66
|
-
},
|
|
67
|
-
method: 'POST',
|
|
68
|
-
});
|
|
69
|
-
if (!response.ok) {
|
|
70
|
-
console.error('Error making POST request:', response.status);
|
|
71
|
-
throw new Error(`Error: ${response.status}`);
|
|
72
|
-
}
|
|
73
|
-
return yield response.json();
|
|
74
|
-
}
|
|
75
|
-
catch (error) {
|
|
76
|
-
console.error('Error streaming data to Openlayer:', error);
|
|
77
|
-
throw error;
|
|
78
|
-
}
|
|
79
|
-
});
|
|
80
99
|
/**
|
|
81
100
|
* Creates a new inference pipeline in Openlayer or loads an existing one.
|
|
82
101
|
* @param {string} projectId - The ID of the project containing the inference pipeline.
|
|
@@ -88,7 +107,7 @@ class OpenlayerClient {
|
|
|
88
107
|
try {
|
|
89
108
|
return yield this.loadInferencePipeline(projectId, name);
|
|
90
109
|
}
|
|
91
|
-
catch (
|
|
110
|
+
catch (_a) {
|
|
92
111
|
const createInferencePipelineEndpoint = `/projects/${projectId}/inference-pipelines`;
|
|
93
112
|
const createInferencePipelineQuery = this.resolvedQuery(createInferencePipelineEndpoint, { version: this.version });
|
|
94
113
|
const createInferencePipelineResponse = yield fetch(createInferencePipelineQuery, {
|
|
@@ -121,7 +140,7 @@ class OpenlayerClient {
|
|
|
121
140
|
try {
|
|
122
141
|
return yield this.loadProject(name);
|
|
123
142
|
}
|
|
124
|
-
catch (
|
|
143
|
+
catch (_b) {
|
|
125
144
|
const projectsEndpoint = '/projects';
|
|
126
145
|
const projectsQuery = this.resolvedQuery(projectsEndpoint);
|
|
127
146
|
const response = yield fetch(projectsQuery, {
|
|
@@ -209,6 +228,45 @@ class OpenlayerClient {
|
|
|
209
228
|
}
|
|
210
229
|
return project;
|
|
211
230
|
});
|
|
231
|
+
/**
|
|
232
|
+
* Streams data to the Openlayer inference pipeline.
|
|
233
|
+
* @param {StreamingData} data - The chat completion data to be streamed.
|
|
234
|
+
* @param {string} inferencePipelineId - The ID of the Openlayer inference pipeline to which data is streamed.
|
|
235
|
+
* @returns {Promise<void>} A promise that resolves when the data has been successfully streamed.
|
|
236
|
+
* @throws {Error} Throws an error if the Openlayer API key is not set or an error occurs in the streaming process.
|
|
237
|
+
*/
|
|
238
|
+
this.streamData = (data, config, inferencePipelineId) => __awaiter(this, void 0, void 0, function* () {
|
|
239
|
+
var _c;
|
|
240
|
+
if (!this.openlayerApiKey) {
|
|
241
|
+
throw new Error('Openlayer API key are required for streaming data.');
|
|
242
|
+
}
|
|
243
|
+
try {
|
|
244
|
+
const dataStreamEndpoint = `/inference-pipelines/${inferencePipelineId}/data-stream`;
|
|
245
|
+
const dataStreamQuery = this.resolvedQuery(dataStreamEndpoint);
|
|
246
|
+
const response = yield fetch(dataStreamQuery, {
|
|
247
|
+
body: JSON.stringify({
|
|
248
|
+
config,
|
|
249
|
+
rows: [
|
|
250
|
+
Object.assign(Object.assign({}, data), { id: (0, uuid_1.v4)(), timestamp: Math.round(((_c = data.timestamp) !== null && _c !== void 0 ? _c : Date.now()) / 1000) }),
|
|
251
|
+
],
|
|
252
|
+
}),
|
|
253
|
+
headers: {
|
|
254
|
+
Authorization: `Bearer ${this.openlayerApiKey}`,
|
|
255
|
+
'Content-Type': 'application/json',
|
|
256
|
+
},
|
|
257
|
+
method: 'POST',
|
|
258
|
+
});
|
|
259
|
+
if (!response.ok) {
|
|
260
|
+
console.error('Error making POST request:', response.status);
|
|
261
|
+
throw new Error(`Error: ${response.status}`);
|
|
262
|
+
}
|
|
263
|
+
return yield response.json();
|
|
264
|
+
}
|
|
265
|
+
catch (error) {
|
|
266
|
+
console.error('Error streaming data to Openlayer:', error);
|
|
267
|
+
throw error;
|
|
268
|
+
}
|
|
269
|
+
});
|
|
212
270
|
this.openlayerApiKey = openlayerApiKey;
|
|
213
271
|
if (openlayerServerUrl) {
|
|
214
272
|
this.openlayerServerUrl = openlayerServerUrl;
|
|
@@ -227,6 +285,18 @@ class OpenAIMonitor {
|
|
|
227
285
|
constructor({ openAiApiKey, openlayerApiKey, openlayerProjectName, openlayerInferencePipelineName, openlayerServerUrl, }) {
|
|
228
286
|
this.openlayerInferencePipelineName = 'production';
|
|
229
287
|
this.monitoringOn = false;
|
|
288
|
+
this.cost = (model, inputTokens, outputTokens) => {
|
|
289
|
+
const pricing = OpenAIPricing[model];
|
|
290
|
+
const inputCost = typeof pricing === 'undefined'
|
|
291
|
+
? undefined
|
|
292
|
+
: (inputTokens / 1000) * pricing.input;
|
|
293
|
+
const outputCost = typeof pricing === 'undefined'
|
|
294
|
+
? undefined
|
|
295
|
+
: (outputTokens / 1000) * pricing.output;
|
|
296
|
+
return typeof pricing === 'undefined'
|
|
297
|
+
? undefined
|
|
298
|
+
: (inputCost !== null && inputCost !== void 0 ? inputCost : 0) + (outputCost !== null && outputCost !== void 0 ? outputCost : 0);
|
|
299
|
+
};
|
|
230
300
|
this.formatChatCompletionInput = (messages) => messages.map(({ content, role }, i) => (role === 'user'
|
|
231
301
|
? `{{ message_${i} }}`
|
|
232
302
|
: content));
|
|
@@ -237,9 +307,9 @@ class OpenAIMonitor {
|
|
|
237
307
|
* @returns {Promise<ChatCompletion | Stream<ChatCompletionChunk>>} Promise of a ChatCompletion or a Stream
|
|
238
308
|
* @throws {Error} Throws an error if monitoring is not active or if no output is received from OpenAI.
|
|
239
309
|
*/
|
|
240
|
-
this.createChatCompletion = (body, options) => __awaiter(this, void 0, void 0, function* () {
|
|
310
|
+
this.createChatCompletion = (body, options, additionalLogs) => __awaiter(this, void 0, void 0, function* () {
|
|
241
311
|
var _a, e_1, _b, _c;
|
|
242
|
-
var _d, _e;
|
|
312
|
+
var _d, _e, _f, _g, _h, _j, _k;
|
|
243
313
|
if (!this.monitoringOn) {
|
|
244
314
|
throw new Error('Monitoring is not active.');
|
|
245
315
|
}
|
|
@@ -248,7 +318,7 @@ class OpenAIMonitor {
|
|
|
248
318
|
// Start a timer to measure latency
|
|
249
319
|
const startTime = Date.now();
|
|
250
320
|
// Accumulate output for streamed responses
|
|
251
|
-
let
|
|
321
|
+
let streamedOutput = '';
|
|
252
322
|
const response = yield this.openAIClient.chat.completions.create(body, options);
|
|
253
323
|
const prompt = this.formatChatCompletionInput(body.messages);
|
|
254
324
|
const inputVariableNames = prompt
|
|
@@ -263,24 +333,25 @@ class OpenAIMonitor {
|
|
|
263
333
|
if (body.stream) {
|
|
264
334
|
const streamedResponse = response;
|
|
265
335
|
try {
|
|
266
|
-
for (var
|
|
336
|
+
for (var _l = true, streamedResponse_1 = __asyncValues(streamedResponse), streamedResponse_1_1; streamedResponse_1_1 = yield streamedResponse_1.next(), _a = streamedResponse_1_1.done, !_a; _l = true) {
|
|
267
337
|
_c = streamedResponse_1_1.value;
|
|
268
|
-
|
|
338
|
+
_l = false;
|
|
269
339
|
const chunk = _c;
|
|
270
340
|
// Process each chunk - for example, accumulate input data
|
|
271
|
-
|
|
341
|
+
const chunkOutput = (_d = chunk.choices[0].delta.content) !== null && _d !== void 0 ? _d : '';
|
|
342
|
+
streamedOutput += chunkOutput;
|
|
272
343
|
}
|
|
273
344
|
}
|
|
274
345
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
275
346
|
finally {
|
|
276
347
|
try {
|
|
277
|
-
if (!
|
|
348
|
+
if (!_l && !_a && (_b = streamedResponse_1.return)) yield _b.call(streamedResponse_1);
|
|
278
349
|
}
|
|
279
350
|
finally { if (e_1) throw e_1.error; }
|
|
280
351
|
}
|
|
281
352
|
const endTime = Date.now();
|
|
282
353
|
const latency = endTime - startTime;
|
|
283
|
-
this.openlayerClient.streamData(Object.assign({ latency, output:
|
|
354
|
+
this.openlayerClient.streamData(Object.assign(Object.assign({ latency, output: streamedOutput, timestamp: startTime }, inputVariablesMap), additionalLogs), config, inferencePipeline.id);
|
|
284
355
|
}
|
|
285
356
|
else {
|
|
286
357
|
const nonStreamedResponse = response;
|
|
@@ -288,11 +359,16 @@ class OpenAIMonitor {
|
|
|
288
359
|
const endTime = Date.now();
|
|
289
360
|
const latency = endTime - startTime;
|
|
290
361
|
const output = nonStreamedResponse.choices[0].message.content;
|
|
362
|
+
const tokens = (_f = (_e = nonStreamedResponse.usage) === null || _e === void 0 ? void 0 : _e.total_tokens) !== null && _f !== void 0 ? _f : 0;
|
|
363
|
+
const inputTokens = (_h = (_g = nonStreamedResponse.usage) === null || _g === void 0 ? void 0 : _g.prompt_tokens) !== null && _h !== void 0 ? _h : 0;
|
|
364
|
+
const outputTokens = (_k = (_j = nonStreamedResponse.usage) === null || _j === void 0 ? void 0 : _j.completion_tokens) !== null && _k !== void 0 ? _k : 0;
|
|
365
|
+
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
291
366
|
if (typeof output !== 'string') {
|
|
292
367
|
throw new Error('No output received from OpenAI.');
|
|
293
368
|
}
|
|
294
|
-
this.openlayerClient.streamData(Object.assign({
|
|
295
|
-
|
|
369
|
+
this.openlayerClient.streamData(Object.assign(Object.assign({ cost,
|
|
370
|
+
latency,
|
|
371
|
+
output, timestamp: startTime, tokens }, inputVariablesMap), additionalLogs), config, inferencePipeline.id);
|
|
296
372
|
}
|
|
297
373
|
return response;
|
|
298
374
|
});
|
|
@@ -303,9 +379,9 @@ class OpenAIMonitor {
|
|
|
303
379
|
* @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
|
|
304
380
|
* @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
|
|
305
381
|
*/
|
|
306
|
-
this.createCompletion = (body, options) => __awaiter(this, void 0, void 0, function* () {
|
|
307
|
-
var
|
|
308
|
-
var
|
|
382
|
+
this.createCompletion = (body, options, additionalLogs) => __awaiter(this, void 0, void 0, function* () {
|
|
383
|
+
var _m, e_2, _o, _p;
|
|
384
|
+
var _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1;
|
|
309
385
|
if (!this.monitoringOn) {
|
|
310
386
|
throw new Error('Monitoring is not active.');
|
|
311
387
|
}
|
|
@@ -317,51 +393,50 @@ class OpenAIMonitor {
|
|
|
317
393
|
// Start a timer to measure latency
|
|
318
394
|
const startTime = Date.now();
|
|
319
395
|
// Accumulate output and tokens data for streamed responses
|
|
320
|
-
let
|
|
321
|
-
let
|
|
396
|
+
let streamedModel = body.model;
|
|
397
|
+
let streamedOutput = '';
|
|
398
|
+
let streamedTokens = 0;
|
|
399
|
+
let streamedInputTokens = 0;
|
|
400
|
+
let streamedOutputTokens = 0;
|
|
322
401
|
const response = yield this.openAIClient.completions.create(body, options);
|
|
323
402
|
const config = Object.assign(Object.assign({}, this.openlayerClient.defaultConfig), { inputVariableNames: ['input'] });
|
|
324
403
|
if (body.stream) {
|
|
325
404
|
const streamedResponse = response;
|
|
326
405
|
try {
|
|
327
|
-
for (var
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
const chunk =
|
|
406
|
+
for (var _2 = true, streamedResponse_2 = __asyncValues(streamedResponse), streamedResponse_2_1; streamedResponse_2_1 = yield streamedResponse_2.next(), _m = streamedResponse_2_1.done, !_m; _2 = true) {
|
|
407
|
+
_p = streamedResponse_2_1.value;
|
|
408
|
+
_2 = false;
|
|
409
|
+
const chunk = _p;
|
|
331
410
|
// Process each chunk - for example, accumulate input data
|
|
332
|
-
|
|
333
|
-
|
|
411
|
+
streamedModel = chunk.model;
|
|
412
|
+
streamedOutput += chunk.choices[0].text.trim();
|
|
413
|
+
streamedTokens += (_r = (_q = chunk.usage) === null || _q === void 0 ? void 0 : _q.total_tokens) !== null && _r !== void 0 ? _r : 0;
|
|
414
|
+
streamedInputTokens += (_t = (_s = chunk.usage) === null || _s === void 0 ? void 0 : _s.prompt_tokens) !== null && _t !== void 0 ? _t : 0;
|
|
415
|
+
streamedOutputTokens += (_v = (_u = chunk.usage) === null || _u === void 0 ? void 0 : _u.completion_tokens) !== null && _v !== void 0 ? _v : 0;
|
|
334
416
|
}
|
|
335
417
|
}
|
|
336
418
|
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
337
419
|
finally {
|
|
338
420
|
try {
|
|
339
|
-
if (!
|
|
421
|
+
if (!_2 && !_m && (_o = streamedResponse_2.return)) yield _o.call(streamedResponse_2);
|
|
340
422
|
}
|
|
341
423
|
finally { if (e_2) throw e_2.error; }
|
|
342
424
|
}
|
|
343
425
|
const endTime = Date.now();
|
|
344
426
|
const latency = endTime - startTime;
|
|
345
|
-
this.
|
|
346
|
-
|
|
347
|
-
latency,
|
|
348
|
-
output: outputData,
|
|
349
|
-
timestamp: startTime,
|
|
350
|
-
tokens: tokensData,
|
|
351
|
-
}, config, inferencePipeline.id);
|
|
427
|
+
const cost = this.cost(streamedModel, streamedInputTokens, streamedOutputTokens);
|
|
428
|
+
this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: streamedOutput, timestamp: startTime, tokens: streamedTokens }, additionalLogs), config, inferencePipeline.id);
|
|
352
429
|
}
|
|
353
430
|
else {
|
|
354
431
|
const nonStreamedResponse = response;
|
|
355
432
|
// Handle regular (non-streamed) response
|
|
356
433
|
const endTime = Date.now();
|
|
357
434
|
const latency = endTime - startTime;
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
tokens: (_o = (_m = nonStreamedResponse.usage) === null || _m === void 0 ? void 0 : _m.total_tokens) !== null && _o !== void 0 ? _o : 0,
|
|
364
|
-
}, config, inferencePipeline.id);
|
|
435
|
+
const tokens = (_x = (_w = nonStreamedResponse.usage) === null || _w === void 0 ? void 0 : _w.total_tokens) !== null && _x !== void 0 ? _x : 0;
|
|
436
|
+
const inputTokens = (_z = (_y = nonStreamedResponse.usage) === null || _y === void 0 ? void 0 : _y.prompt_tokens) !== null && _z !== void 0 ? _z : 0;
|
|
437
|
+
const outputTokens = (_1 = (_0 = nonStreamedResponse.usage) === null || _0 === void 0 ? void 0 : _0.completion_tokens) !== null && _1 !== void 0 ? _1 : 0;
|
|
438
|
+
const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
|
|
439
|
+
this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: nonStreamedResponse.choices[0].text, timestamp: startTime, tokens }, additionalLogs), config, inferencePipeline.id);
|
|
365
440
|
}
|
|
366
441
|
return response;
|
|
367
442
|
});
|