npm - @crewdle/mist-connector-openai - Versions diffs - 1.0.21 → 1.0.23 - Mend

@crewdle/mist-connector-openai 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/models/OpenAIGenerativeAIWorkerConnector.js +246 -16
package/dist/types/models/OpenAIGenerativeAIWorkerConnector.d.ts +20 -0
package/package.json +2 -2

package/dist/models/OpenAIGenerativeAIWorkerConnector.js CHANGED Viewed

@@ -70,16 +70,17 @@ export class OpenAIGenerativeAIWorkerConnector {
             const response = await this.client.audio.transcriptions.create({
                 model: options.model.id,
                 file,
-                response_format: parameters.responseFormat || 'json',
+                response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
                 language: parameters.language,
                 timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
             });
             console.log('OpenAIGenerativeAIWorkerConnector.processJob audio transcription response');
+            const transcriptionUsage = this.transcriptionUsage(response);
             return {
                 type: "prompt" /* GenerativeAIJobType.Prompt */,
                 output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
-                inputTokens: response.usage?.type === 'tokens' ? response.usage.input_tokens : 0,
-                outputTokens: response.usage?.type === 'tokens' ? response.usage.output_tokens : 0,
+                inputTokens: transcriptionUsage.inputTokens,
+                outputTokens: transcriptionUsage.outputTokens,
             };
         }
         if (options.model.taskType === GenerativeAITaskType.ImageGeneration) {
@@ -138,29 +139,72 @@ export class OpenAIGenerativeAIWorkerConnector {
         const reasoning = this.getReasoning(parameters, options.model.id);
         let inputTokens = 0;
         let outputTokens = 0;
+        const inputBuckets = { base: 0, cached: 0 };
         let output = '';
         let resultFile;
+        let partial = '';
+        let responseId;
+        let continuationCount = 0;
+        const MAX_CONTINUATIONS = 5;
         while (true) {
             console.log('OpenAIGenerativeAIWorkerConnector.processJob', options.model.id);
             const response = await this.client.responses.create({
                 model: options.model.id,
-                input: messages,
-                max_output_tokens: parameters.maxTokens,
-                temperature: parameters.temperature,
+                input: responseId
+                    ? [
+                        {
+                            role: 'developer',
+                            content: [
+                                {
+                                    type: "input_text",
+                                    text: `Continue exactly after the following already-emitted text (do NOT repeat any of it).
+ANCHOR START:
+${partial.slice(-400)}
+ANCHOR END.
+Only output the missing remainder. Do not restart or re-open tags already emitted.`,
+                                },
+                            ],
+                        },
+                    ]
+                    : messages,
+                max_output_tokens: !options.model.id.startsWith('gpt-5') ? parameters.maxTokens : Math.max(parameters.maxTokens ?? 0, 30000),
+                temperature: !options.model.id.startsWith('gpt-5') ? parameters.temperature : undefined,
                 text: responseFormat,
                 tools,
+                previous_response_id: responseId,
                 reasoning,
                 store: parameters.privacy === true ? false : true,
             });
             console.log('OpenAIGenerativeAIWorkerConnector.processJob response');
+            responseId = undefined;
+            partial = '';
             inputTokens += response.usage?.input_tokens ?? 0;
             outputTokens += response.usage?.output_tokens ?? 0;
+            {
+                const segment = this.usageBuckets(response.usage);
+                inputBuckets.base += segment.input.base;
+                inputBuckets.cached += segment.input.cached;
+            }
             const promises = [];
             for (const content of response.output) {
                 if (content.type === 'message') {
                     for (const message of content.content) {
                         if (message.type === 'output_text') {
                             output += message.text;
+                            partial += message.text;
+                            if (message.annotations && message.annotations.length > 0) {
+                                for (const annotation of message.annotations) {
+                                    if (annotation.type === 'container_file_citation') {
+                                        const mimeType = this.getMimeType(annotation.filename);
+                                        const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
+                                            container_id: annotation.container_id,
+                                        });
+                                        console.log('OpenAIGenerativeAIWorkerConnector.processJob file', mimeType);
+                                        const buffer = await file.arrayBuffer();
+                                        resultFile = `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`;
+                                    }
+                                }
+                            }
                         }
                     }
                 }
@@ -171,8 +215,22 @@ export class OpenAIGenerativeAIWorkerConnector {
                     promises.push(this.processToolCall(parameters, messages, content.name, content.call_id, content.arguments));
                 }
             }
+            if (response.status === 'incomplete') {
+                console.log('OpenAIGenerativeAIWorkerConnector.processJob response.incomplete', response.incomplete_details?.reason, parameters.maxTokens);
+                if (response.incomplete_details?.reason === 'max_output_tokens') {
+                    if (continuationCount < MAX_CONTINUATIONS) {
+                        responseId = response.id;
+                        continuationCount++;
+                        continue;
+                    }
+                    else {
+                        console.log('OpenAIGenerativeAIWorkerConnector.processJob max continuations reached');
+                    }
+                }
+            }
             if (promises.length > 0) {
                 await Promise.all(promises);
+                output += '\n\n';
                 continue;
             }
             return {
@@ -181,9 +239,47 @@ export class OpenAIGenerativeAIWorkerConnector {
                 resultFile,
                 inputTokens,
                 outputTokens,
+                tokenBuckets: { input: inputBuckets, output: { base: outputTokens } },
             };
         }
     }
+    /**
+     * OpenAI reports cached prompt tokens inside the input token total; split
+     * them out so billing can rate the cached share via the model's tokenRates.
+     */
+    usageBuckets(usage) {
+        const total = usage?.input_tokens ?? 0;
+        const cached = usage?.input_tokens_details?.cached_tokens ?? 0;
+        return { input: { base: total - cached, cached }, output: { base: usage?.output_tokens ?? 0 } };
+    }
+    /**
+     * whisper-1 reports no token usage; verbose_json carries the audio duration
+     * the platform bills on, so it replaces whisper's default/json format
+     * (verbose_json still carries `text`, so json callers see the same output).
+     */
+    transcriptionFormat(modelId, requested) {
+        if (modelId.startsWith('whisper') && (!requested || requested === 'json')) {
+            return 'verbose_json';
+        }
+        return requested || 'json';
+    }
+    /**
+     * Token-billed transcription models (gpt-4o-transcribe*) report
+     * usage.type === 'tokens'. whisper-1 reports no token usage — bill the
+     * audio duration (usage.type === 'duration', or verbose_json's `duration`)
+     * at 1,000 output tokens per minute, so whisper's catalog outputPrice is
+     * USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
+     */
+    transcriptionUsage(response) {
+        const usage = response?.usage;
+        if (usage?.type === 'tokens') {
+            return { inputTokens: usage.input_tokens ?? 0, outputTokens: usage.output_tokens ?? 0 };
+        }
+        const seconds = usage?.type === 'duration'
+            ? (usage.seconds ?? 0)
+            : (typeof response?.duration === 'number' ? response.duration : 0);
+        return { inputTokens: 0, outputTokens: Math.round((seconds / 60) * 1000) };
+    }
     async *processJobStream(parameters, options) {
         if (!this.client) {
             throw new Error('Client not initialized');
@@ -210,7 +306,10 @@ export class OpenAIGenerativeAIWorkerConnector {
                 throw new Error('No file found');
             }
             console.log('OpenAIGenerativeAIWorkerConnector.processJobStream audio transcription');
-            if (!parameters.responseFormat || parameters.responseFormat === 'json') {
+            // whisper-1 does not support streaming transcription — it always takes
+            // the non-streaming path below (with verbose_json so duration is billed).
+            const isWhisper = options.model.id.startsWith('whisper');
+            if ((!parameters.responseFormat || parameters.responseFormat === 'json') && !isWhisper) {
                 const stream = await this.client.audio.transcriptions.create({
                     model: options.model.id,
                     file,
@@ -232,15 +331,16 @@ export class OpenAIGenerativeAIWorkerConnector {
                 const response = await this.client.audio.transcriptions.create({
                     model: options.model.id,
                     file,
-                    response_format: parameters.responseFormat ? parameters.responseFormat : 'json',
+                    response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
                     language: parameters.language,
                     timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
                 });
+                const transcriptionUsage = this.transcriptionUsage(response);
                 yield {
                     type: "prompt" /* GenerativeAIJobType.Prompt */,
-                    output: JSON.stringify(response),
-                    inputTokens: 0,
-                    outputTokens: 0,
+                    output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
+                    inputTokens: transcriptionUsage.inputTokens,
+                    outputTokens: transcriptionUsage.outputTokens,
                 };
             }
             return;
@@ -302,20 +402,43 @@ export class OpenAIGenerativeAIWorkerConnector {
         const messages = this.getMessages(parameters);
         const reasoning = this.getReasoning(parameters, options.model.id);
         let firstChunk = true;
+        let responseId;
+        let partial = '';
+        let continuationCount = 0;
+        const MAX_CONTINUATIONS = 5;
         while (true) {
             console.log('OpenAIGenerativeAIWorkerConnector.processJobStream', options.model.id);
             const stream = await this.client.responses.create({
                 model: options.model.id,
-                input: messages,
-                max_output_tokens: parameters.maxTokens,
-                temperature: parameters.temperature,
+                input: responseId
+                    ? [
+                        {
+                            role: 'developer',
+                            content: [
+                                {
+                                    type: "input_text",
+                                    text: `Continue exactly after the following already-emitted text (do NOT repeat any of it).
+ANCHOR START:
+${partial.slice(-400)}
+ANCHOR END.
+Only output the missing remainder. Do not restart or re-open tags already emitted.`,
+                                },
+                            ],
+                        },
+                    ]
+                    : messages,
+                max_output_tokens: !options.model.id.startsWith('gpt-5') ? parameters.maxTokens : Math.max(parameters.maxTokens ?? 0, 30000),
+                temperature: !options.model.id.startsWith('gpt-5') ? parameters.temperature : undefined,
                 text: responseFormat,
                 tools,
+                previous_response_id: responseId,
                 reasoning,
                 stream: true,
                 store: parameters.privacy === true ? false : true,
             });
             console.log('OpenAIGenerativeAIWorkerConnector.processJobStream response');
+            responseId = undefined;
+            partial = '';
             const promises = [];
             for await (const chunk of stream) {
                 if (chunk.type === 'response.output_text.delta') {
@@ -323,6 +446,7 @@ export class OpenAIGenerativeAIWorkerConnector {
                         console.log('OpenAIGenerativeAIWorkerConnector.processJobStream first chunk');
                         firstChunk = false;
                     }
+                    partial += chunk.delta;
                     yield {
                         type: "prompt" /* GenerativeAIJobType.Prompt */,
                         output: chunk.delta,
@@ -330,13 +454,51 @@ export class OpenAIGenerativeAIWorkerConnector {
                         outputTokens: 0,
                     };
                 }
+                if (chunk.type === 'response.output_text.annotation.added') {
+                    const annotation = chunk.annotation;
+                    if (annotation.type === 'container_file_citation') {
+                        const mimeType = this.getMimeType(annotation.filename);
+                        const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
+                            container_id: annotation.container_id,
+                        });
+                        console.log('OpenAIGenerativeAIWorkerConnector.processJobStream file', mimeType);
+                        const buffer = await file.arrayBuffer();
+                        yield {
+                            type: "prompt" /* GenerativeAIJobType.Prompt */,
+                            output: '',
+                            resultFile: `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`,
+                            inputTokens: 0,
+                            outputTokens: 0,
+                        };
+                    }
+                }
                 if (chunk.type === 'response.completed') {
                     yield {
                         type: "prompt" /* GenerativeAIJobType.Prompt */,
                         output: '',
                         inputTokens: chunk.response.usage?.input_tokens ?? 0,
                         outputTokens: chunk.response.usage?.output_tokens ?? 0,
+                        tokenBuckets: this.usageBuckets(chunk.response.usage),
+                    };
+                }
+                if (chunk.type === 'response.incomplete') {
+                    console.log('OpenAIGenerativeAIWorkerConnector.processJobStream response.incomplete', chunk.response.incomplete_details?.reason, parameters.maxTokens);
+                    yield {
+                        type: "prompt" /* GenerativeAIJobType.Prompt */,
+                        output: '',
+                        inputTokens: chunk.response.usage?.input_tokens ?? 0,
+                        outputTokens: chunk.response.usage?.output_tokens ?? 0,
+                        tokenBuckets: this.usageBuckets(chunk.response.usage),
                     };
+                    if (!parameters.privacy && chunk.response.incomplete_details?.reason === 'max_output_tokens') {
+                        if (continuationCount < MAX_CONTINUATIONS) {
+                            responseId = chunk.response.id;
+                            continuationCount++;
+                        }
+                        else {
+                            console.log('OpenAIGenerativeAIWorkerConnector.processJobStream max continuations reached');
+                        }
+                    }
                 }
                 if (chunk.type === 'response.output_item.done') {
                     if (chunk.item.type === 'function_call') {
@@ -353,6 +515,10 @@ export class OpenAIGenerativeAIWorkerConnector {
                     }
                 }
             }
+            if (responseId) {
+                console.log('OpenAIGenerativeAIWorkerConnector.processJobStream needContinue');
+                continue;
+            }
             if (promises.length > 0) {
                 await Promise.all(promises);
                 yield {
@@ -426,7 +592,7 @@ export class OpenAIGenerativeAIWorkerConnector {
             for (const message of parameters.history) {
                 messages.push({
                     role: message.source === 'human' ? 'user' : 'assistant',
-                    content: message.message,
+                    content: this.getInnerMessages(message.message),
                 });
             }
         }
@@ -450,7 +616,7 @@ export class OpenAIGenerativeAIWorkerConnector {
             if (c.type === 'file') {
                 return {
                     type: 'input_file',
-                    filename: c.file.filename,
+                    filename: c.file.filename ?? `${Math.random().toString(36).substring(2, 15)}.pdf`,
                     file_data: c.file.file_data,
                 };
             }
@@ -555,4 +721,68 @@ export class OpenAIGenerativeAIWorkerConnector {
         }
         return undefined;
     }
+    getMimeType(filename) {
+        const extension = filename.split('.').pop();
+        if (extension === 'pdf') {
+            return 'application/pdf';
+        }
+        if (extension === 'jpg' || extension === 'jpeg') {
+            return 'image/jpeg';
+        }
+        if (extension === 'png') {
+            return 'image/png';
+        }
+        if (extension === 'gif') {
+            return 'image/gif';
+        }
+        if (extension === 'webp') {
+            return 'image/webp';
+        }
+        if (extension === 'svg') {
+            return 'image/svg+xml';
+        }
+        if (extension === 'txt') {
+            return 'text/plain';
+        }
+        if (extension === 'html') {
+            return 'text/html';
+        }
+        if (extension === 'css') {
+            return 'text/css';
+        }
+        if (extension === 'js') {
+            return 'application/javascript';
+        }
+        if (extension === 'json') {
+            return 'application/json';
+        }
+        if (extension === 'xml') {
+            return 'application/xml';
+        }
+        if (extension === 'csv') {
+            return 'text/csv';
+        }
+        if (extension === 'tsv') {
+            return 'text/tab-separated-values';
+        }
+        if (extension === 'docx') {
+            return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
+        }
+        if (extension === 'doc') {
+            return 'application/msword';
+        }
+        if (extension === 'xls') {
+            return 'application/vnd.ms-excel';
+        }
+        if (extension === 'xlsx') {
+            return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
+        }
+        if (extension === 'pptx') {
+            return 'application/vnd.openxmlformats-officedocument.presentationml.presentation';
+        }
+        if (extension === 'ppt') {
+            return 'application/vnd.ms-powerpoint';
+        }
+        return 'application/octet-stream';
+    }
 }

package/dist/types/models/OpenAIGenerativeAIWorkerConnector.d.ts CHANGED Viewed

@@ -8,6 +8,25 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
     close(): Promise<void>;
     getEngineType(): GenerativeAIEngineType;
     processJob(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): Promise<IGenerativeAIWorkerConnectorPromptResult>;
+    /**
+     * OpenAI reports cached prompt tokens inside the input token total; split
+     * them out so billing can rate the cached share via the model's tokenRates.
+     */
+    private usageBuckets;
+    /**
+     * whisper-1 reports no token usage; verbose_json carries the audio duration
+     * the platform bills on, so it replaces whisper's default/json format
+     * (verbose_json still carries `text`, so json callers see the same output).
+     */
+    private transcriptionFormat;
+    /**
+     * Token-billed transcription models (gpt-4o-transcribe*) report
+     * usage.type === 'tokens'. whisper-1 reports no token usage — bill the
+     * audio duration (usage.type === 'duration', or verbose_json's `duration`)
+     * at 1,000 output tokens per minute, so whisper's catalog outputPrice is
+     * USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
+     */
+    private transcriptionUsage;
     processJobStream(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): AsyncGenerator<IGenerativeAIWorkerConnectorPromptResult>;
     private processToolCall;
     private getMessages;
@@ -15,4 +34,5 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
     private getTools;
     private getReasoning;
     private getResponseFormat;
+    getMimeType(filename: string): string;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@crewdle/mist-connector-openai",
-  "version": "1.0.21",
+  "version": "1.0.23",
   "description": "",
   "main": "dist/index.js",
   "types": "dist/types/index.d.ts",
@@ -15,7 +15,7 @@
     "dist/"
   ],
   "devDependencies": {
-    "@crewdle/web-sdk-types": "^1.0.54",
+    "@crewdle/web-sdk-types": "^1.0.58",
     "@types/node": "^22.13.9",
     "typescript": "^5.8.2"
   },