@crewdle/mist-connector-openai 1.0.22 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -70,16 +70,17 @@ export class OpenAIGenerativeAIWorkerConnector {
|
|
|
70
70
|
const response = await this.client.audio.transcriptions.create({
|
|
71
71
|
model: options.model.id,
|
|
72
72
|
file,
|
|
73
|
-
response_format: parameters.responseFormat
|
|
73
|
+
response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
|
|
74
74
|
language: parameters.language,
|
|
75
75
|
timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
|
|
76
76
|
});
|
|
77
77
|
console.log('OpenAIGenerativeAIWorkerConnector.processJob audio transcription response');
|
|
78
|
+
const transcriptionUsage = this.transcriptionUsage(response);
|
|
78
79
|
return {
|
|
79
80
|
type: "prompt" /* GenerativeAIJobType.Prompt */,
|
|
80
81
|
output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
|
|
81
|
-
inputTokens:
|
|
82
|
-
outputTokens:
|
|
82
|
+
inputTokens: transcriptionUsage.inputTokens,
|
|
83
|
+
outputTokens: transcriptionUsage.outputTokens,
|
|
83
84
|
};
|
|
84
85
|
}
|
|
85
86
|
if (options.model.taskType === GenerativeAITaskType.ImageGeneration) {
|
|
@@ -138,6 +139,7 @@ export class OpenAIGenerativeAIWorkerConnector {
|
|
|
138
139
|
const reasoning = this.getReasoning(parameters, options.model.id);
|
|
139
140
|
let inputTokens = 0;
|
|
140
141
|
let outputTokens = 0;
|
|
142
|
+
const inputBuckets = { base: 0, cached: 0 };
|
|
141
143
|
let output = '';
|
|
142
144
|
let resultFile;
|
|
143
145
|
let partial = '';
|
|
@@ -178,6 +180,11 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
178
180
|
partial = '';
|
|
179
181
|
inputTokens += response.usage?.input_tokens ?? 0;
|
|
180
182
|
outputTokens += response.usage?.output_tokens ?? 0;
|
|
183
|
+
{
|
|
184
|
+
const segment = this.usageBuckets(response.usage);
|
|
185
|
+
inputBuckets.base += segment.input.base;
|
|
186
|
+
inputBuckets.cached += segment.input.cached;
|
|
187
|
+
}
|
|
181
188
|
const promises = [];
|
|
182
189
|
for (const content of response.output) {
|
|
183
190
|
if (content.type === 'message') {
|
|
@@ -185,6 +192,19 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
185
192
|
if (message.type === 'output_text') {
|
|
186
193
|
output += message.text;
|
|
187
194
|
partial += message.text;
|
|
195
|
+
if (message.annotations && message.annotations.length > 0) {
|
|
196
|
+
for (const annotation of message.annotations) {
|
|
197
|
+
if (annotation.type === 'container_file_citation') {
|
|
198
|
+
const mimeType = this.getMimeType(annotation.filename);
|
|
199
|
+
const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
|
|
200
|
+
container_id: annotation.container_id,
|
|
201
|
+
});
|
|
202
|
+
console.log('OpenAIGenerativeAIWorkerConnector.processJob file', mimeType);
|
|
203
|
+
const buffer = await file.arrayBuffer();
|
|
204
|
+
resultFile = `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
188
208
|
}
|
|
189
209
|
}
|
|
190
210
|
}
|
|
@@ -219,9 +239,47 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
219
239
|
resultFile,
|
|
220
240
|
inputTokens,
|
|
221
241
|
outputTokens,
|
|
242
|
+
tokenBuckets: { input: inputBuckets, output: { base: outputTokens } },
|
|
222
243
|
};
|
|
223
244
|
}
|
|
224
245
|
}
|
|
246
|
+
/**
|
|
247
|
+
* OpenAI reports cached prompt tokens inside the input token total; split
|
|
248
|
+
* them out so billing can rate the cached share via the model's tokenRates.
|
|
249
|
+
*/
|
|
250
|
+
usageBuckets(usage) {
|
|
251
|
+
const total = usage?.input_tokens ?? 0;
|
|
252
|
+
const cached = usage?.input_tokens_details?.cached_tokens ?? 0;
|
|
253
|
+
return { input: { base: total - cached, cached }, output: { base: usage?.output_tokens ?? 0 } };
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* whisper-1 reports no token usage; verbose_json carries the audio duration
|
|
257
|
+
* the platform bills on, so it replaces whisper's default/json format
|
|
258
|
+
* (verbose_json still carries `text`, so json callers see the same output).
|
|
259
|
+
*/
|
|
260
|
+
transcriptionFormat(modelId, requested) {
|
|
261
|
+
if (modelId.startsWith('whisper') && (!requested || requested === 'json')) {
|
|
262
|
+
return 'verbose_json';
|
|
263
|
+
}
|
|
264
|
+
return requested || 'json';
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Token-billed transcription models (gpt-4o-transcribe*) report
|
|
268
|
+
* usage.type === 'tokens'. whisper-1 reports no token usage — bill the
|
|
269
|
+
* audio duration (usage.type === 'duration', or verbose_json's `duration`)
|
|
270
|
+
* at 1,000 output tokens per minute, so whisper's catalog outputPrice is
|
|
271
|
+
* USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
|
|
272
|
+
*/
|
|
273
|
+
transcriptionUsage(response) {
|
|
274
|
+
const usage = response?.usage;
|
|
275
|
+
if (usage?.type === 'tokens') {
|
|
276
|
+
return { inputTokens: usage.input_tokens ?? 0, outputTokens: usage.output_tokens ?? 0 };
|
|
277
|
+
}
|
|
278
|
+
const seconds = usage?.type === 'duration'
|
|
279
|
+
? (usage.seconds ?? 0)
|
|
280
|
+
: (typeof response?.duration === 'number' ? response.duration : 0);
|
|
281
|
+
return { inputTokens: 0, outputTokens: Math.round((seconds / 60) * 1000) };
|
|
282
|
+
}
|
|
225
283
|
async *processJobStream(parameters, options) {
|
|
226
284
|
if (!this.client) {
|
|
227
285
|
throw new Error('Client not initialized');
|
|
@@ -248,7 +306,10 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
248
306
|
throw new Error('No file found');
|
|
249
307
|
}
|
|
250
308
|
console.log('OpenAIGenerativeAIWorkerConnector.processJobStream audio transcription');
|
|
251
|
-
|
|
309
|
+
// whisper-1 does not support streaming transcription — it always takes
|
|
310
|
+
// the non-streaming path below (with verbose_json so duration is billed).
|
|
311
|
+
const isWhisper = options.model.id.startsWith('whisper');
|
|
312
|
+
if ((!parameters.responseFormat || parameters.responseFormat === 'json') && !isWhisper) {
|
|
252
313
|
const stream = await this.client.audio.transcriptions.create({
|
|
253
314
|
model: options.model.id,
|
|
254
315
|
file,
|
|
@@ -270,15 +331,16 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
270
331
|
const response = await this.client.audio.transcriptions.create({
|
|
271
332
|
model: options.model.id,
|
|
272
333
|
file,
|
|
273
|
-
response_format:
|
|
334
|
+
response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
|
|
274
335
|
language: parameters.language,
|
|
275
336
|
timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
|
|
276
337
|
});
|
|
338
|
+
const transcriptionUsage = this.transcriptionUsage(response);
|
|
277
339
|
yield {
|
|
278
340
|
type: "prompt" /* GenerativeAIJobType.Prompt */,
|
|
279
|
-
output: JSON.stringify(response),
|
|
280
|
-
inputTokens:
|
|
281
|
-
outputTokens:
|
|
341
|
+
output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
|
|
342
|
+
inputTokens: transcriptionUsage.inputTokens,
|
|
343
|
+
outputTokens: transcriptionUsage.outputTokens,
|
|
282
344
|
};
|
|
283
345
|
}
|
|
284
346
|
return;
|
|
@@ -392,12 +454,31 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
392
454
|
outputTokens: 0,
|
|
393
455
|
};
|
|
394
456
|
}
|
|
457
|
+
if (chunk.type === 'response.output_text.annotation.added') {
|
|
458
|
+
const annotation = chunk.annotation;
|
|
459
|
+
if (annotation.type === 'container_file_citation') {
|
|
460
|
+
const mimeType = this.getMimeType(annotation.filename);
|
|
461
|
+
const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
|
|
462
|
+
container_id: annotation.container_id,
|
|
463
|
+
});
|
|
464
|
+
console.log('OpenAIGenerativeAIWorkerConnector.processJobStream file', mimeType);
|
|
465
|
+
const buffer = await file.arrayBuffer();
|
|
466
|
+
yield {
|
|
467
|
+
type: "prompt" /* GenerativeAIJobType.Prompt */,
|
|
468
|
+
output: '',
|
|
469
|
+
resultFile: `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`,
|
|
470
|
+
inputTokens: 0,
|
|
471
|
+
outputTokens: 0,
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
}
|
|
395
475
|
if (chunk.type === 'response.completed') {
|
|
396
476
|
yield {
|
|
397
477
|
type: "prompt" /* GenerativeAIJobType.Prompt */,
|
|
398
478
|
output: '',
|
|
399
479
|
inputTokens: chunk.response.usage?.input_tokens ?? 0,
|
|
400
480
|
outputTokens: chunk.response.usage?.output_tokens ?? 0,
|
|
481
|
+
tokenBuckets: this.usageBuckets(chunk.response.usage),
|
|
401
482
|
};
|
|
402
483
|
}
|
|
403
484
|
if (chunk.type === 'response.incomplete') {
|
|
@@ -407,6 +488,7 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
407
488
|
output: '',
|
|
408
489
|
inputTokens: chunk.response.usage?.input_tokens ?? 0,
|
|
409
490
|
outputTokens: chunk.response.usage?.output_tokens ?? 0,
|
|
491
|
+
tokenBuckets: this.usageBuckets(chunk.response.usage),
|
|
410
492
|
};
|
|
411
493
|
if (!parameters.privacy && chunk.response.incomplete_details?.reason === 'max_output_tokens') {
|
|
412
494
|
if (continuationCount < MAX_CONTINUATIONS) {
|
|
@@ -534,7 +616,7 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
534
616
|
if (c.type === 'file') {
|
|
535
617
|
return {
|
|
536
618
|
type: 'input_file',
|
|
537
|
-
filename: c.file.filename,
|
|
619
|
+
filename: c.file.filename ?? `${Math.random().toString(36).substring(2, 15)}.pdf`,
|
|
538
620
|
file_data: c.file.file_data,
|
|
539
621
|
};
|
|
540
622
|
}
|
|
@@ -639,4 +721,68 @@ Only output the missing remainder. Do not restart or re-open tags already emitte
|
|
|
639
721
|
}
|
|
640
722
|
return undefined;
|
|
641
723
|
}
|
|
724
|
+
getMimeType(filename) {
|
|
725
|
+
const extension = filename.split('.').pop();
|
|
726
|
+
if (extension === 'pdf') {
|
|
727
|
+
return 'application/pdf';
|
|
728
|
+
}
|
|
729
|
+
if (extension === 'jpg' || extension === 'jpeg') {
|
|
730
|
+
return 'image/jpeg';
|
|
731
|
+
}
|
|
732
|
+
if (extension === 'png') {
|
|
733
|
+
return 'image/png';
|
|
734
|
+
}
|
|
735
|
+
if (extension === 'gif') {
|
|
736
|
+
return 'image/gif';
|
|
737
|
+
}
|
|
738
|
+
if (extension === 'webp') {
|
|
739
|
+
return 'image/webp';
|
|
740
|
+
}
|
|
741
|
+
if (extension === 'svg') {
|
|
742
|
+
return 'image/svg+xml';
|
|
743
|
+
}
|
|
744
|
+
if (extension === 'txt') {
|
|
745
|
+
return 'text/plain';
|
|
746
|
+
}
|
|
747
|
+
if (extension === 'html') {
|
|
748
|
+
return 'text/html';
|
|
749
|
+
}
|
|
750
|
+
if (extension === 'css') {
|
|
751
|
+
return 'text/css';
|
|
752
|
+
}
|
|
753
|
+
if (extension === 'js') {
|
|
754
|
+
return 'application/javascript';
|
|
755
|
+
}
|
|
756
|
+
if (extension === 'json') {
|
|
757
|
+
return 'application/json';
|
|
758
|
+
}
|
|
759
|
+
if (extension === 'xml') {
|
|
760
|
+
return 'application/xml';
|
|
761
|
+
}
|
|
762
|
+
if (extension === 'csv') {
|
|
763
|
+
return 'text/csv';
|
|
764
|
+
}
|
|
765
|
+
if (extension === 'tsv') {
|
|
766
|
+
return 'text/tab-separated-values';
|
|
767
|
+
}
|
|
768
|
+
if (extension === 'docx') {
|
|
769
|
+
return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
|
770
|
+
}
|
|
771
|
+
if (extension === 'doc') {
|
|
772
|
+
return 'application/msword';
|
|
773
|
+
}
|
|
774
|
+
if (extension === 'xls') {
|
|
775
|
+
return 'application/vnd.ms-excel';
|
|
776
|
+
}
|
|
777
|
+
if (extension === 'xlsx') {
|
|
778
|
+
return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
|
|
779
|
+
}
|
|
780
|
+
if (extension === 'pptx') {
|
|
781
|
+
return 'application/vnd.openxmlformats-officedocument.presentationml.presentation';
|
|
782
|
+
}
|
|
783
|
+
if (extension === 'ppt') {
|
|
784
|
+
return 'application/vnd.ms-powerpoint';
|
|
785
|
+
}
|
|
786
|
+
return 'application/octet-stream';
|
|
787
|
+
}
|
|
642
788
|
}
|
|
@@ -8,6 +8,25 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
|
|
|
8
8
|
close(): Promise<void>;
|
|
9
9
|
getEngineType(): GenerativeAIEngineType;
|
|
10
10
|
processJob(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): Promise<IGenerativeAIWorkerConnectorPromptResult>;
|
|
11
|
+
/**
|
|
12
|
+
* OpenAI reports cached prompt tokens inside the input token total; split
|
|
13
|
+
* them out so billing can rate the cached share via the model's tokenRates.
|
|
14
|
+
*/
|
|
15
|
+
private usageBuckets;
|
|
16
|
+
/**
|
|
17
|
+
* whisper-1 reports no token usage; verbose_json carries the audio duration
|
|
18
|
+
* the platform bills on, so it replaces whisper's default/json format
|
|
19
|
+
* (verbose_json still carries `text`, so json callers see the same output).
|
|
20
|
+
*/
|
|
21
|
+
private transcriptionFormat;
|
|
22
|
+
/**
|
|
23
|
+
* Token-billed transcription models (gpt-4o-transcribe*) report
|
|
24
|
+
* usage.type === 'tokens'. whisper-1 reports no token usage — bill the
|
|
25
|
+
* audio duration (usage.type === 'duration', or verbose_json's `duration`)
|
|
26
|
+
* at 1,000 output tokens per minute, so whisper's catalog outputPrice is
|
|
27
|
+
* USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
|
|
28
|
+
*/
|
|
29
|
+
private transcriptionUsage;
|
|
11
30
|
processJobStream(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): AsyncGenerator<IGenerativeAIWorkerConnectorPromptResult>;
|
|
12
31
|
private processToolCall;
|
|
13
32
|
private getMessages;
|
|
@@ -15,4 +34,5 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
|
|
|
15
34
|
private getTools;
|
|
16
35
|
private getReasoning;
|
|
17
36
|
private getResponseFormat;
|
|
37
|
+
getMimeType(filename: string): string;
|
|
18
38
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crewdle/mist-connector-openai",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.23",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/types/index.d.ts",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"dist/"
|
|
16
16
|
],
|
|
17
17
|
"devDependencies": {
|
|
18
|
-
"@crewdle/web-sdk-types": "^1.0.
|
|
18
|
+
"@crewdle/web-sdk-types": "^1.0.58",
|
|
19
19
|
"@types/node": "^22.13.9",
|
|
20
20
|
"typescript": "^5.8.2"
|
|
21
21
|
},
|