npm - @crewdle/mist-connector-openai - Versions diffs - 1.0.22 → 1.0.24 - Mend

@crewdle/mist-connector-openai 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/models/OpenAIGenerativeAIWorkerConnector.js +163 -9
package/dist/types/models/OpenAIGenerativeAIWorkerConnector.d.ts +25 -0
package/package.json +2 -2

package/dist/models/OpenAIGenerativeAIWorkerConnector.js CHANGED Viewed

@@ -70,16 +70,17 @@ export class OpenAIGenerativeAIWorkerConnector {
             const response = await this.client.audio.transcriptions.create({
                 model: options.model.id,
                 file,
-                response_format: parameters.responseFormat || 'json',
+                response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
                 language: parameters.language,
                 timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
             });
             console.log('OpenAIGenerativeAIWorkerConnector.processJob audio transcription response');
+            const transcriptionUsage = this.transcriptionUsage(response);
             return {
                 type: "prompt" /* GenerativeAIJobType.Prompt */,
                 output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
-                inputTokens: response.usage?.type === 'tokens' ? response.usage.input_tokens : 0,
-                outputTokens: response.usage?.type === 'tokens' ? response.usage.output_tokens : 0,
+                inputTokens: transcriptionUsage.inputTokens,
+                outputTokens: transcriptionUsage.outputTokens,
             };
         }
         if (options.model.taskType === GenerativeAITaskType.ImageGeneration) {
@@ -138,6 +139,7 @@ export class OpenAIGenerativeAIWorkerConnector {
         const reasoning = this.getReasoning(parameters, options.model.id);
         let inputTokens = 0;
         let outputTokens = 0;
+        const inputBuckets = { base: 0, cached: 0 };
         let output = '';
         let resultFile;
         let partial = '';
@@ -178,6 +180,11 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
             partial = '';
             inputTokens += response.usage?.input_tokens ?? 0;
             outputTokens += response.usage?.output_tokens ?? 0;
+            {
+                const segment = this.usageBuckets(response.usage);
+                inputBuckets.base += segment.input.base;
+                inputBuckets.cached += segment.input.cached;
+            }
             const promises = [];
             for (const content of response.output) {
                 if (content.type === 'message') {
@@ -185,6 +192,19 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
                         if (message.type === 'output_text') {
                             output += message.text;
                             partial += message.text;
+                            if (message.annotations && message.annotations.length > 0) {
+                                for (const annotation of message.annotations) {
+                                    if (annotation.type === 'container_file_citation') {
+                                        const mimeType = this.getMimeType(annotation.filename);
+                                        const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
+                                            container_id: annotation.container_id,
+                                        });
+                                        console.log('OpenAIGenerativeAIWorkerConnector.processJob file', mimeType);
+                                        const buffer = await file.arrayBuffer();
+                                        resultFile = `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`;
+                                    }
+                                }
+                            }
                         }
                     }
                 }
@@ -219,9 +239,55 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
                 resultFile,
                 inputTokens,
                 outputTokens,
+                tokenBuckets: { input: inputBuckets, output: { base: outputTokens } },
             };
         }
     }
+    /**
+     * OpenAI reports cached prompt tokens inside the input token total; split
+     * them out so billing can rate the cached share via the model's tokenRates.
+     * Image models (gpt-image) also detail text vs image input tokens — bucket
+     * the text share so docs whose base input SKU is the image price can rate
+     * it (e.g. gpt-image-2 `text: 0.625`). Unrated buckets bill at base, so
+     * this is a no-op for plain text models. cached can overlap the modality
+     * details, hence the clamp.
+     */
+    usageBuckets(usage) {
+        const total = usage?.input_tokens ?? 0;
+        const details = usage?.input_tokens_details ?? {};
+        const cached = details.cached_tokens ?? 0;
+        const text = details.text_tokens ?? 0;
+        const audio = details.audio_tokens ?? 0;
+        return { input: { base: Math.max(0, total - cached - text - audio), cached, text, audio }, output: { base: usage?.output_tokens ?? 0 } };
+    }
+    /**
+     * whisper-1 reports no token usage; verbose_json carries the audio duration
+     * the platform bills on, so it replaces whisper's default/json format
+     * (verbose_json still carries `text`, so json callers see the same output).
+     */
+    transcriptionFormat(modelId, requested) {
+        if (modelId.startsWith('whisper') && (!requested || requested === 'json')) {
+            return 'verbose_json';
+        }
+        return requested || 'json';
+    }
+    /**
+     * Token-billed transcription models (gpt-4o-transcribe*) report
+     * usage.type === 'tokens'. whisper-1 reports no token usage — bill the
+     * audio duration (usage.type === 'duration', or verbose_json's `duration`)
+     * at 1,000 output tokens per minute, so whisper's catalog outputPrice is
+     * USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
+     */
+    transcriptionUsage(response) {
+        const usage = response?.usage;
+        if (usage?.type === 'tokens') {
+            return { inputTokens: usage.input_tokens ?? 0, outputTokens: usage.output_tokens ?? 0 };
+        }
+        const seconds = usage?.type === 'duration'
+            ? (usage.seconds ?? 0)
+            : (typeof response?.duration === 'number' ? response.duration : 0);
+        return { inputTokens: 0, outputTokens: Math.round((seconds / 60) * 1000) };
+    }
     async *processJobStream(parameters, options) {
         if (!this.client) {
             throw new Error('Client not initialized');
@@ -248,7 +314,10 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
                 throw new Error('No file found');
             }
             console.log('OpenAIGenerativeAIWorkerConnector.processJobStream audio transcription');
-            if (!parameters.responseFormat || parameters.responseFormat === 'json') {
+            // whisper-1 does not support streaming transcription — it always takes
+            // the non-streaming path below (with verbose_json so duration is billed).
+            const isWhisper = options.model.id.startsWith('whisper');
+            if ((!parameters.responseFormat || parameters.responseFormat === 'json') && !isWhisper) {
                 const stream = await this.client.audio.transcriptions.create({
                     model: options.model.id,
                     file,
@@ -270,15 +339,16 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
                 const response = await this.client.audio.transcriptions.create({
                     model: options.model.id,
                     file,
-                    response_format: parameters.responseFormat ? parameters.responseFormat : 'json',
+                    response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
                     language: parameters.language,
                     timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
                 });
+                const transcriptionUsage = this.transcriptionUsage(response);
                 yield {
                     type: "prompt" /* GenerativeAIJobType.Prompt */,
-                    output: JSON.stringify(response),
-                    inputTokens: 0,
-                    outputTokens: 0,
+                    output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
+                    inputTokens: transcriptionUsage.inputTokens,
+                    outputTokens: transcriptionUsage.outputTokens,
                 };
             }
             return;
@@ -392,12 +462,31 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
                         outputTokens: 0,
                     };
                 }
+                if (chunk.type === 'response.output_text.annotation.added') {
+                    const annotation = chunk.annotation;
+                    if (annotation.type === 'container_file_citation') {
+                        const mimeType = this.getMimeType(annotation.filename);
+                        const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
+                            container_id: annotation.container_id,
+                        });
+                        console.log('OpenAIGenerativeAIWorkerConnector.processJobStream file', mimeType);
+                        const buffer = await file.arrayBuffer();
+                        yield {
+                            type: "prompt" /* GenerativeAIJobType.Prompt */,
+                            output: '',
+                            resultFile: `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`,
+                            inputTokens: 0,
+                            outputTokens: 0,
+                        };
+                    }
+                }
                 if (chunk.type === 'response.completed') {
                     yield {
                         type: "prompt" /* GenerativeAIJobType.Prompt */,
                         output: '',
                         inputTokens: chunk.response.usage?.input_tokens ?? 0,
                         outputTokens: chunk.response.usage?.output_tokens ?? 0,
+                        tokenBuckets: this.usageBuckets(chunk.response.usage),
                     };
                 }
                 if (chunk.type === 'response.incomplete') {
@@ -407,6 +496,7 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
                         output: '',
                         inputTokens: chunk.response.usage?.input_tokens ?? 0,
                         outputTokens: chunk.response.usage?.output_tokens ?? 0,
+                        tokenBuckets: this.usageBuckets(chunk.response.usage),
                     };
                     if (!parameters.privacy && chunk.response.incomplete_details?.reason === 'max_output_tokens') {
                         if (continuationCount < MAX_CONTINUATIONS) {
@@ -534,7 +624,7 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
             if (c.type === 'file') {
                 return {
                     type: 'input_file',
-                    filename: c.file.filename,
+                    filename: c.file.filename ?? `${Math.random().toString(36).substring(2, 15)}.pdf`,
                     file_data: c.file.file_data,
                 };
             }
@@ -639,4 +729,68 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
         }
         return undefined;
     }
+    getMimeType(filename) {
+        const extension = filename.split('.').pop();
+        if (extension === 'pdf') {
+            return 'application/pdf';
+        }
+        if (extension === 'jpg' || extension === 'jpeg') {
+            return 'image/jpeg';
+        }
+        if (extension === 'png') {
+            return 'image/png';
+        }
+        if (extension === 'gif') {
+            return 'image/gif';
+        }
+        if (extension === 'webp') {
+            return 'image/webp';
+        }
+        if (extension === 'svg') {
+            return 'image/svg+xml';
+        }
+        if (extension === 'txt') {
+            return 'text/plain';
+        }
+        if (extension === 'html') {
+            return 'text/html';
+        }
+        if (extension === 'css') {
+            return 'text/css';
+        }
+        if (extension === 'js') {
+            return 'application/javascript';
+        }
+        if (extension === 'json') {
+            return 'application/json';
+        }
+        if (extension === 'xml') {
+            return 'application/xml';
+        }
+        if (extension === 'csv') {
+            return 'text/csv';
+        }
+        if (extension === 'tsv') {
+            return 'text/tab-separated-values';
+        }
+        if (extension === 'docx') {
+            return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
+        }
+        if (extension === 'doc') {
+            return 'application/msword';
+        }
+        if (extension === 'xls') {
+            return 'application/vnd.ms-excel';
+        }
+        if (extension === 'xlsx') {
+            return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
+        }
+        if (extension === 'pptx') {
+            return 'application/vnd.openxmlformats-officedocument.presentationml.presentation';
+        }
+        if (extension === 'ppt') {
+            return 'application/vnd.ms-powerpoint';
+        }
+        return 'application/octet-stream';
+    }
 }

package/dist/types/models/OpenAIGenerativeAIWorkerConnector.d.ts CHANGED Viewed

@@ -8,6 +8,30 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
     close(): Promise<void>;
     getEngineType(): GenerativeAIEngineType;
     processJob(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): Promise<IGenerativeAIWorkerConnectorPromptResult>;
+    /**
+     * OpenAI reports cached prompt tokens inside the input token total; split
+     * them out so billing can rate the cached share via the model's tokenRates.
+     * Image models (gpt-image) also detail text vs image input tokens — bucket
+     * the text share so docs whose base input SKU is the image price can rate
+     * it (e.g. gpt-image-2 `text: 0.625`). Unrated buckets bill at base, so
+     * this is a no-op for plain text models. cached can overlap the modality
+     * details, hence the clamp.
+     */
+    private usageBuckets;
+    /**
+     * whisper-1 reports no token usage; verbose_json carries the audio duration
+     * the platform bills on, so it replaces whisper's default/json format
+     * (verbose_json still carries `text`, so json callers see the same output).
+     */
+    private transcriptionFormat;
+    /**
+     * Token-billed transcription models (gpt-4o-transcribe*) report
+     * usage.type === 'tokens'. whisper-1 reports no token usage — bill the
+     * audio duration (usage.type === 'duration', or verbose_json's `duration`)
+     * at 1,000 output tokens per minute, so whisper's catalog outputPrice is
+     * USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
+     */
+    private transcriptionUsage;
     processJobStream(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): AsyncGenerator<IGenerativeAIWorkerConnectorPromptResult>;
     private processToolCall;
     private getMessages;
@@ -15,4 +39,5 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
     private getTools;
     private getReasoning;
     private getResponseFormat;
+    getMimeType(filename: string): string;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@crewdle/mist-connector-openai",
-  "version": "1.0.22",
+  "version": "1.0.24",
   "description": "",
   "main": "dist/index.js",
   "types": "dist/types/index.d.ts",
@@ -15,7 +15,7 @@
     "dist/"
   ],
   "devDependencies": {
-    "@crewdle/web-sdk-types": "^1.0.55",
+    "@crewdle/web-sdk-types": "^1.0.58",
     "@types/node": "^22.13.9",
     "typescript": "^5.8.2"
   },