npm - openlayer - Versions diffs - 0.1.16 → 0.1.17 - Mend

openlayer 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -7,6 +7,10 @@ import { Stream } from 'openai/streaming';
  */
 export interface StreamingData {
     [columnName: string]: any;
+    /**
+     * The total estimated cost of the chat completion in USD. Optional.
+     */
+    cost?: number;
     /**
      * The latency of the chat completion in milliseconds. Optional.
      */
@@ -191,6 +195,7 @@ export declare class OpenAIMonitor {
      * @param {OpenAIMonitorConstructorProps} props - The configuration properties for the OpenAI and Openlayer clients.
      */
     constructor({ openAiApiKey, openlayerApiKey, openlayerProjectName, openlayerInferencePipelineName, openlayerServerUrl, }: OpenAIMonitorConstructorProps);
+    private cost;
     private formatChatCompletionInput;
     /**
      * Creates a chat completion using the OpenAI client and streams the result to Openlayer.
@@ -199,7 +204,7 @@ export declare class OpenAIMonitor {
      * @returns {Promise<ChatCompletion | Stream<ChatCompletionChunk>>} Promise of a ChatCompletion or a Stream
      * @throws {Error} Throws an error if monitoring is not active or if no output is received from OpenAI.
      */
-    createChatCompletion: (body: ChatCompletionCreateParams, options?: RequestOptions) => Promise<ChatCompletion | Stream<ChatCompletionChunk>>;
+    createChatCompletion: (body: ChatCompletionCreateParams, options?: RequestOptions, additionalLogs?: StreamingData) => Promise<ChatCompletion | Stream<ChatCompletionChunk>>;
     /**
      * Creates a completion using the OpenAI client and streams the result to Openlayer.
      * @param {CompletionCreateParams} body - The parameters for creating a completion.
@@ -207,7 +212,7 @@ export declare class OpenAIMonitor {
      * @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
      * @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
      */
-    createCompletion: (body: CompletionCreateParams, options?: RequestOptions) => Promise<Completion | Stream<Completion>>;
+    createCompletion: (body: CompletionCreateParams, options?: RequestOptions, additionalLogs?: StreamingData) => Promise<Completion | Stream<Completion>>;
     /**
      * Starts monitoring for the OpenAI Monitor instance. If monitoring is already active, a warning is logged.
      */

package/dist/index.js CHANGED Viewed

@@ -20,6 +20,64 @@ exports.OpenAIMonitor = exports.OpenlayerClient = void 0;
 const openai_1 = require("openai");
 const uuid_1 = require("uuid");
 const request_1 = require("./utils/request");
+const OpenAIPricing = {
+    'babbage-002': {
+        input: 0.0004,
+        output: 0.0004,
+    },
+    'davinci-002': {
+        input: 0.002,
+        output: 0.002,
+    },
+    'gpt-3.5-turbo': {
+        input: 0.003,
+        output: 0.006,
+    },
+    'gpt-3.5-turbo-0301': {
+        input: 0.0015,
+        output: 0.002,
+    },
+    'gpt-3.5-turbo-0613': {
+        input: 0.0015,
+        output: 0.002,
+    },
+    'gpt-3.5-turbo-1106': {
+        input: 0.001,
+        output: 0.002,
+    },
+    'gpt-3.5-turbo-16k-0613': {
+        input: 0.003,
+        output: 0.004,
+    },
+    'gpt-3.5-turbo-instruct': {
+        input: 0.0015,
+        output: 0.002,
+    },
+    'gpt-4': {
+        input: 0.03,
+        output: 0.06,
+    },
+    'gpt-4-0314': {
+        input: 0.03,
+        output: 0.06,
+    },
+    'gpt-4-1106-preview': {
+        input: 0.01,
+        output: 0.03,
+    },
+    'gpt-4-1106-vision-preview': {
+        input: 0.01,
+        output: 0.03,
+    },
+    'gpt-4-32k': {
+        input: 0.06,
+        output: 0.12,
+    },
+    'gpt-4-32k-0314': {
+        input: 0.06,
+        output: 0.12,
+    },
+};
 class OpenlayerClient {
     /**
      * Constructs an OpenlayerClient instance.
@@ -227,6 +285,18 @@ class OpenAIMonitor {
     constructor({ openAiApiKey, openlayerApiKey, openlayerProjectName, openlayerInferencePipelineName, openlayerServerUrl, }) {
         this.openlayerInferencePipelineName = 'production';
         this.monitoringOn = false;
+        this.cost = (model, inputTokens, outputTokens) => {
+            const pricing = OpenAIPricing[model];
+            const inputCost = typeof pricing === 'undefined'
+                ? undefined
+                : (inputTokens / 1000) * pricing.input;
+            const outputCost = typeof pricing === 'undefined'
+                ? undefined
+                : (outputTokens / 1000) * pricing.output;
+            return typeof pricing === 'undefined'
+                ? undefined
+                : (inputCost !== null && inputCost !== void 0 ? inputCost : 0) + (outputCost !== null && outputCost !== void 0 ? outputCost : 0);
+        };
         this.formatChatCompletionInput = (messages) => messages.map(({ content, role }, i) => (role === 'user'
             ? `{{ message_${i} }}`
             : content));
@@ -237,9 +307,9 @@ class OpenAIMonitor {
          * @returns {Promise<ChatCompletion | Stream<ChatCompletionChunk>>} Promise of a ChatCompletion or a Stream
          * @throws {Error} Throws an error if monitoring is not active or if no output is received from OpenAI.
          */
-        this.createChatCompletion = (body, options) => __awaiter(this, void 0, void 0, function* () {
+        this.createChatCompletion = (body, options, additionalLogs) => __awaiter(this, void 0, void 0, function* () {
             var _a, e_1, _b, _c;
-            var _d, _e;
+            var _d, _e, _f, _g, _h, _j, _k;
             if (!this.monitoringOn) {
                 throw new Error('Monitoring is not active.');
             }
@@ -248,7 +318,7 @@ class OpenAIMonitor {
             // Start a timer to measure latency
             const startTime = Date.now();
             // Accumulate output for streamed responses
-            let outputData = '';
+            let streamedOutput = '';
             const response = yield this.openAIClient.chat.completions.create(body, options);
             const prompt = this.formatChatCompletionInput(body.messages);
             const inputVariableNames = prompt
@@ -263,24 +333,25 @@ class OpenAIMonitor {
             if (body.stream) {
                 const streamedResponse = response;
                 try {
-                    for (var _f = true, streamedResponse_1 = __asyncValues(streamedResponse), streamedResponse_1_1; streamedResponse_1_1 = yield streamedResponse_1.next(), _a = streamedResponse_1_1.done, !_a; _f = true) {
+                    for (var _l = true, streamedResponse_1 = __asyncValues(streamedResponse), streamedResponse_1_1; streamedResponse_1_1 = yield streamedResponse_1.next(), _a = streamedResponse_1_1.done, !_a; _l = true) {
                         _c = streamedResponse_1_1.value;
-                        _f = false;
+                        _l = false;
                         const chunk = _c;
                         // Process each chunk - for example, accumulate input data
-                        outputData += chunk.choices[0].delta.content;
+                        const chunkOutput = (_d = chunk.choices[0].delta.content) !== null && _d !== void 0 ? _d : '';
+                        streamedOutput += chunkOutput;
                     }
                 }
                 catch (e_1_1) { e_1 = { error: e_1_1 }; }
                 finally {
                     try {
-                        if (!_f && !_a && (_b = streamedResponse_1.return)) yield _b.call(streamedResponse_1);
+                        if (!_l && !_a && (_b = streamedResponse_1.return)) yield _b.call(streamedResponse_1);
                     }
                     finally { if (e_1) throw e_1.error; }
                 }
                 const endTime = Date.now();
                 const latency = endTime - startTime;
-                this.openlayerClient.streamData(Object.assign({ latency, output: outputData, timestamp: startTime }, inputVariablesMap), config, inferencePipeline.id);
+                this.openlayerClient.streamData(Object.assign(Object.assign({ latency, output: streamedOutput, timestamp: startTime }, inputVariablesMap), additionalLogs), config, inferencePipeline.id);
             }
             else {
                 const nonStreamedResponse = response;
@@ -288,11 +359,16 @@ class OpenAIMonitor {
                 const endTime = Date.now();
                 const latency = endTime - startTime;
                 const output = nonStreamedResponse.choices[0].message.content;
+                const tokens = (_f = (_e = nonStreamedResponse.usage) === null || _e === void 0 ? void 0 : _e.total_tokens) !== null && _f !== void 0 ? _f : 0;
+                const inputTokens = (_h = (_g = nonStreamedResponse.usage) === null || _g === void 0 ? void 0 : _g.prompt_tokens) !== null && _h !== void 0 ? _h : 0;
+                const outputTokens = (_k = (_j = nonStreamedResponse.usage) === null || _j === void 0 ? void 0 : _j.completion_tokens) !== null && _k !== void 0 ? _k : 0;
+                const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
                 if (typeof output !== 'string') {
                     throw new Error('No output received from OpenAI.');
                 }
-                this.openlayerClient.streamData(Object.assign({ latency,
-                    output, timestamp: startTime, tokens: (_e = (_d = nonStreamedResponse.usage) === null || _d === void 0 ? void 0 : _d.total_tokens) !== null && _e !== void 0 ? _e : 0 }, inputVariablesMap), config, inferencePipeline.id);
+                this.openlayerClient.streamData(Object.assign(Object.assign({ cost,
+                    latency,
+                    output, timestamp: startTime, tokens }, inputVariablesMap), additionalLogs), config, inferencePipeline.id);
             }
             return response;
         });
@@ -303,9 +379,9 @@ class OpenAIMonitor {
          * @returns {Promise<Completion | Stream<Completion>>} Promise that resolves to a Completion or a Stream.
          * @throws {Error} Throws an error if monitoring is not active or if no prompt is provided.
          */
-        this.createCompletion = (body, options) => __awaiter(this, void 0, void 0, function* () {
-            var _g, e_2, _h, _j;
-            var _k, _l, _m, _o;
+        this.createCompletion = (body, options, additionalLogs) => __awaiter(this, void 0, void 0, function* () {
+            var _m, e_2, _o, _p;
+            var _q, _r, _s, _t, _u, _v, _w, _x;
             if (!this.monitoringOn) {
                 throw new Error('Monitoring is not active.');
             }
@@ -317,51 +393,43 @@ class OpenAIMonitor {
             // Start a timer to measure latency
             const startTime = Date.now();
             // Accumulate output and tokens data for streamed responses
-            let outputData = '';
-            let tokensData = 0;
+            let streamedOutput = '';
+            let streamedTokens = 0;
             const response = yield this.openAIClient.completions.create(body, options);
             const config = Object.assign(Object.assign({}, this.openlayerClient.defaultConfig), { inputVariableNames: ['input'] });
             if (body.stream) {
                 const streamedResponse = response;
                 try {
-                    for (var _p = true, streamedResponse_2 = __asyncValues(streamedResponse), streamedResponse_2_1; streamedResponse_2_1 = yield streamedResponse_2.next(), _g = streamedResponse_2_1.done, !_g; _p = true) {
-                        _j = streamedResponse_2_1.value;
-                        _p = false;
-                        const chunk = _j;
+                    for (var _y = true, streamedResponse_2 = __asyncValues(streamedResponse), streamedResponse_2_1; streamedResponse_2_1 = yield streamedResponse_2.next(), _m = streamedResponse_2_1.done, !_m; _y = true) {
+                        _p = streamedResponse_2_1.value;
+                        _y = false;
+                        const chunk = _p;
                         // Process each chunk - for example, accumulate input data
-                        outputData += chunk.choices[0].text.trim();
-                        tokensData += (_l = (_k = chunk.usage) === null || _k === void 0 ? void 0 : _k.total_tokens) !== null && _l !== void 0 ? _l : 0;
+                        streamedOutput += chunk.choices[0].text.trim();
+                        streamedTokens += (_r = (_q = chunk.usage) === null || _q === void 0 ? void 0 : _q.total_tokens) !== null && _r !== void 0 ? _r : 0;
                     }
                 }
                 catch (e_2_1) { e_2 = { error: e_2_1 }; }
                 finally {
                     try {
-                        if (!_p && !_g && (_h = streamedResponse_2.return)) yield _h.call(streamedResponse_2);
+                        if (!_y && !_m && (_o = streamedResponse_2.return)) yield _o.call(streamedResponse_2);
                     }
                     finally { if (e_2) throw e_2.error; }
                 }
                 const endTime = Date.now();
                 const latency = endTime - startTime;
-                this.openlayerClient.streamData({
-                    input: body.prompt,
-                    latency,
-                    output: outputData,
-                    timestamp: startTime,
-                    tokens: tokensData,
-                }, config, inferencePipeline.id);
+                this.openlayerClient.streamData(Object.assign({ input: body.prompt, latency, output: streamedOutput, timestamp: startTime, tokens: streamedTokens }, additionalLogs), config, inferencePipeline.id);
             }
             else {
                 const nonStreamedResponse = response;
                 // Handle regular (non-streamed) response
                 const endTime = Date.now();
                 const latency = endTime - startTime;
-                this.openlayerClient.streamData({
-                    input: body.prompt,
-                    latency,
-                    output: nonStreamedResponse.choices[0].text,
-                    timestamp: startTime,
-                    tokens: (_o = (_m = nonStreamedResponse.usage) === null || _m === void 0 ? void 0 : _m.total_tokens) !== null && _o !== void 0 ? _o : 0,
-                }, config, inferencePipeline.id);
+                const tokens = (_t = (_s = nonStreamedResponse.usage) === null || _s === void 0 ? void 0 : _s.total_tokens) !== null && _t !== void 0 ? _t : 0;
+                const inputTokens = (_v = (_u = nonStreamedResponse.usage) === null || _u === void 0 ? void 0 : _u.prompt_tokens) !== null && _v !== void 0 ? _v : 0;
+                const outputTokens = (_x = (_w = nonStreamedResponse.usage) === null || _w === void 0 ? void 0 : _w.completion_tokens) !== null && _x !== void 0 ? _x : 0;
+                const cost = this.cost(nonStreamedResponse.model, inputTokens, outputTokens);
+                this.openlayerClient.streamData(Object.assign({ cost, input: body.prompt, latency, output: nonStreamedResponse.choices[0].text, timestamp: startTime, tokens }, additionalLogs), config, inferencePipeline.id);
             }
             return response;
         });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "openlayer",
-  "version": "0.1.16",
+  "version": "0.1.17",
   "description": "The Openlayer TypeScript client",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",