@crewdle/mist-connector-openai 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,16 +70,17 @@ export class OpenAIGenerativeAIWorkerConnector {
70
70
  const response = await this.client.audio.transcriptions.create({
71
71
  model: options.model.id,
72
72
  file,
73
- response_format: parameters.responseFormat || 'json',
73
+ response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
74
74
  language: parameters.language,
75
75
  timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
76
76
  });
77
77
  console.log('OpenAIGenerativeAIWorkerConnector.processJob audio transcription response');
78
+ const transcriptionUsage = this.transcriptionUsage(response);
78
79
  return {
79
80
  type: "prompt" /* GenerativeAIJobType.Prompt */,
80
81
  output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
81
- inputTokens: response.usage?.type === 'tokens' ? response.usage.input_tokens : 0,
82
- outputTokens: response.usage?.type === 'tokens' ? response.usage.output_tokens : 0,
82
+ inputTokens: transcriptionUsage.inputTokens,
83
+ outputTokens: transcriptionUsage.outputTokens,
83
84
  };
84
85
  }
85
86
  if (options.model.taskType === GenerativeAITaskType.ImageGeneration) {
@@ -138,29 +139,72 @@ export class OpenAIGenerativeAIWorkerConnector {
138
139
  const reasoning = this.getReasoning(parameters, options.model.id);
139
140
  let inputTokens = 0;
140
141
  let outputTokens = 0;
142
+ const inputBuckets = { base: 0, cached: 0 };
141
143
  let output = '';
142
144
  let resultFile;
145
+ let partial = '';
146
+ let responseId;
147
+ let continuationCount = 0;
148
+ const MAX_CONTINUATIONS = 5;
143
149
  while (true) {
144
150
  console.log('OpenAIGenerativeAIWorkerConnector.processJob', options.model.id);
145
151
  const response = await this.client.responses.create({
146
152
  model: options.model.id,
147
- input: messages,
148
- max_output_tokens: parameters.maxTokens,
149
- temperature: parameters.temperature,
153
+ input: responseId
154
+ ? [
155
+ {
156
+ role: 'developer',
157
+ content: [
158
+ {
159
+ type: "input_text",
160
+ text: `Continue exactly after the following already-emitted text (do NOT repeat any of it).
161
+ ANCHOR START:
162
+ ${partial.slice(-400)}
163
+ ANCHOR END.
164
+ Only output the missing remainder. Do not restart or re-open tags already emitted.`,
165
+ },
166
+ ],
167
+ },
168
+ ]
169
+ : messages,
170
+ max_output_tokens: !options.model.id.startsWith('gpt-5') ? parameters.maxTokens : Math.max(parameters.maxTokens ?? 0, 30000),
171
+ temperature: !options.model.id.startsWith('gpt-5') ? parameters.temperature : undefined,
150
172
  text: responseFormat,
151
173
  tools,
174
+ previous_response_id: responseId,
152
175
  reasoning,
153
176
  store: parameters.privacy === true ? false : true,
154
177
  });
155
178
  console.log('OpenAIGenerativeAIWorkerConnector.processJob response');
179
+ responseId = undefined;
180
+ partial = '';
156
181
  inputTokens += response.usage?.input_tokens ?? 0;
157
182
  outputTokens += response.usage?.output_tokens ?? 0;
183
+ {
184
+ const segment = this.usageBuckets(response.usage);
185
+ inputBuckets.base += segment.input.base;
186
+ inputBuckets.cached += segment.input.cached;
187
+ }
158
188
  const promises = [];
159
189
  for (const content of response.output) {
160
190
  if (content.type === 'message') {
161
191
  for (const message of content.content) {
162
192
  if (message.type === 'output_text') {
163
193
  output += message.text;
194
+ partial += message.text;
195
+ if (message.annotations && message.annotations.length > 0) {
196
+ for (const annotation of message.annotations) {
197
+ if (annotation.type === 'container_file_citation') {
198
+ const mimeType = this.getMimeType(annotation.filename);
199
+ const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
200
+ container_id: annotation.container_id,
201
+ });
202
+ console.log('OpenAIGenerativeAIWorkerConnector.processJob file', mimeType);
203
+ const buffer = await file.arrayBuffer();
204
+ resultFile = `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`;
205
+ }
206
+ }
207
+ }
164
208
  }
165
209
  }
166
210
  }
@@ -171,8 +215,22 @@ export class OpenAIGenerativeAIWorkerConnector {
171
215
  promises.push(this.processToolCall(parameters, messages, content.name, content.call_id, content.arguments));
172
216
  }
173
217
  }
218
+ if (response.status === 'incomplete') {
219
+ console.log('OpenAIGenerativeAIWorkerConnector.processJob response.incomplete', response.incomplete_details?.reason, parameters.maxTokens);
220
+ if (response.incomplete_details?.reason === 'max_output_tokens') {
221
+ if (continuationCount < MAX_CONTINUATIONS) {
222
+ responseId = response.id;
223
+ continuationCount++;
224
+ continue;
225
+ }
226
+ else {
227
+ console.log('OpenAIGenerativeAIWorkerConnector.processJob max continuations reached');
228
+ }
229
+ }
230
+ }
174
231
  if (promises.length > 0) {
175
232
  await Promise.all(promises);
233
+ output += '\n\n';
176
234
  continue;
177
235
  }
178
236
  return {
@@ -181,9 +239,47 @@ export class OpenAIGenerativeAIWorkerConnector {
181
239
  resultFile,
182
240
  inputTokens,
183
241
  outputTokens,
242
+ tokenBuckets: { input: inputBuckets, output: { base: outputTokens } },
184
243
  };
185
244
  }
186
245
  }
246
+ /**
247
+ * OpenAI reports cached prompt tokens inside the input token total; split
248
+ * them out so billing can rate the cached share via the model's tokenRates.
249
+ */
250
+ usageBuckets(usage) {
251
+ const total = usage?.input_tokens ?? 0;
252
+ const cached = usage?.input_tokens_details?.cached_tokens ?? 0;
253
+ return { input: { base: total - cached, cached }, output: { base: usage?.output_tokens ?? 0 } };
254
+ }
255
+ /**
256
+ * whisper-1 reports no token usage; verbose_json carries the audio duration
257
+ * the platform bills on, so it replaces whisper's default/json format
258
+ * (verbose_json still carries `text`, so json callers see the same output).
259
+ */
260
+ transcriptionFormat(modelId, requested) {
261
+ if (modelId.startsWith('whisper') && (!requested || requested === 'json')) {
262
+ return 'verbose_json';
263
+ }
264
+ return requested || 'json';
265
+ }
266
+ /**
267
+ * Token-billed transcription models (gpt-4o-transcribe*) report
268
+ * usage.type === 'tokens'. whisper-1 reports no token usage — bill the
269
+ * audio duration (usage.type === 'duration', or verbose_json's `duration`)
270
+ * at 1,000 output tokens per minute, so whisper's catalog outputPrice is
271
+ * USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
272
+ */
273
+ transcriptionUsage(response) {
274
+ const usage = response?.usage;
275
+ if (usage?.type === 'tokens') {
276
+ return { inputTokens: usage.input_tokens ?? 0, outputTokens: usage.output_tokens ?? 0 };
277
+ }
278
+ const seconds = usage?.type === 'duration'
279
+ ? (usage.seconds ?? 0)
280
+ : (typeof response?.duration === 'number' ? response.duration : 0);
281
+ return { inputTokens: 0, outputTokens: Math.round((seconds / 60) * 1000) };
282
+ }
187
283
  async *processJobStream(parameters, options) {
188
284
  if (!this.client) {
189
285
  throw new Error('Client not initialized');
@@ -210,7 +306,10 @@ export class OpenAIGenerativeAIWorkerConnector {
210
306
  throw new Error('No file found');
211
307
  }
212
308
  console.log('OpenAIGenerativeAIWorkerConnector.processJobStream audio transcription');
213
- if (!parameters.responseFormat || parameters.responseFormat === 'json') {
309
+ // whisper-1 does not support streaming transcription — it always takes
310
+ // the non-streaming path below (with verbose_json so duration is billed).
311
+ const isWhisper = options.model.id.startsWith('whisper');
312
+ if ((!parameters.responseFormat || parameters.responseFormat === 'json') && !isWhisper) {
214
313
  const stream = await this.client.audio.transcriptions.create({
215
314
  model: options.model.id,
216
315
  file,
@@ -232,15 +331,16 @@ export class OpenAIGenerativeAIWorkerConnector {
232
331
  const response = await this.client.audio.transcriptions.create({
233
332
  model: options.model.id,
234
333
  file,
235
- response_format: parameters.responseFormat ? parameters.responseFormat : 'json',
334
+ response_format: this.transcriptionFormat(options.model.id, parameters.responseFormat),
236
335
  language: parameters.language,
237
336
  timestamp_granularities: parameters.timestampGranularities ? [parameters.timestampGranularities] : undefined,
238
337
  });
338
+ const transcriptionUsage = this.transcriptionUsage(response);
239
339
  yield {
240
340
  type: "prompt" /* GenerativeAIJobType.Prompt */,
241
- output: JSON.stringify(response),
242
- inputTokens: 0,
243
- outputTokens: 0,
341
+ output: (!parameters.responseFormat || parameters.responseFormat === 'json') ? response.text : JSON.stringify(response),
342
+ inputTokens: transcriptionUsage.inputTokens,
343
+ outputTokens: transcriptionUsage.outputTokens,
244
344
  };
245
345
  }
246
346
  return;
@@ -302,20 +402,43 @@ export class OpenAIGenerativeAIWorkerConnector {
302
402
  const messages = this.getMessages(parameters);
303
403
  const reasoning = this.getReasoning(parameters, options.model.id);
304
404
  let firstChunk = true;
405
+ let responseId;
406
+ let partial = '';
407
+ let continuationCount = 0;
408
+ const MAX_CONTINUATIONS = 5;
305
409
  while (true) {
306
410
  console.log('OpenAIGenerativeAIWorkerConnector.processJobStream', options.model.id);
307
411
  const stream = await this.client.responses.create({
308
412
  model: options.model.id,
309
- input: messages,
310
- max_output_tokens: parameters.maxTokens,
311
- temperature: parameters.temperature,
413
+ input: responseId
414
+ ? [
415
+ {
416
+ role: 'developer',
417
+ content: [
418
+ {
419
+ type: "input_text",
420
+ text: `Continue exactly after the following already-emitted text (do NOT repeat any of it).
421
+ ANCHOR START:
422
+ ${partial.slice(-400)}
423
+ ANCHOR END.
424
+ Only output the missing remainder. Do not restart or re-open tags already emitted.`,
425
+ },
426
+ ],
427
+ },
428
+ ]
429
+ : messages,
430
+ max_output_tokens: !options.model.id.startsWith('gpt-5') ? parameters.maxTokens : Math.max(parameters.maxTokens ?? 0, 30000),
431
+ temperature: !options.model.id.startsWith('gpt-5') ? parameters.temperature : undefined,
312
432
  text: responseFormat,
313
433
  tools,
434
+ previous_response_id: responseId,
314
435
  reasoning,
315
436
  stream: true,
316
437
  store: parameters.privacy === true ? false : true,
317
438
  });
318
439
  console.log('OpenAIGenerativeAIWorkerConnector.processJobStream response');
440
+ responseId = undefined;
441
+ partial = '';
319
442
  const promises = [];
320
443
  for await (const chunk of stream) {
321
444
  if (chunk.type === 'response.output_text.delta') {
@@ -323,6 +446,7 @@ export class OpenAIGenerativeAIWorkerConnector {
323
446
  console.log('OpenAIGenerativeAIWorkerConnector.processJobStream first chunk');
324
447
  firstChunk = false;
325
448
  }
449
+ partial += chunk.delta;
326
450
  yield {
327
451
  type: "prompt" /* GenerativeAIJobType.Prompt */,
328
452
  output: chunk.delta,
@@ -330,13 +454,51 @@ export class OpenAIGenerativeAIWorkerConnector {
330
454
  outputTokens: 0,
331
455
  };
332
456
  }
457
+ if (chunk.type === 'response.output_text.annotation.added') {
458
+ const annotation = chunk.annotation;
459
+ if (annotation.type === 'container_file_citation') {
460
+ const mimeType = this.getMimeType(annotation.filename);
461
+ const file = await this.client.containers.files.content.retrieve(annotation.file_id, {
462
+ container_id: annotation.container_id,
463
+ });
464
+ console.log('OpenAIGenerativeAIWorkerConnector.processJobStream file', mimeType);
465
+ const buffer = await file.arrayBuffer();
466
+ yield {
467
+ type: "prompt" /* GenerativeAIJobType.Prompt */,
468
+ output: '',
469
+ resultFile: `data:${mimeType};base64,${Buffer.from(buffer).toString('base64')}`,
470
+ inputTokens: 0,
471
+ outputTokens: 0,
472
+ };
473
+ }
474
+ }
333
475
  if (chunk.type === 'response.completed') {
334
476
  yield {
335
477
  type: "prompt" /* GenerativeAIJobType.Prompt */,
336
478
  output: '',
337
479
  inputTokens: chunk.response.usage?.input_tokens ?? 0,
338
480
  outputTokens: chunk.response.usage?.output_tokens ?? 0,
481
+ tokenBuckets: this.usageBuckets(chunk.response.usage),
482
+ };
483
+ }
484
+ if (chunk.type === 'response.incomplete') {
485
+ console.log('OpenAIGenerativeAIWorkerConnector.processJobStream response.incomplete', chunk.response.incomplete_details?.reason, parameters.maxTokens);
486
+ yield {
487
+ type: "prompt" /* GenerativeAIJobType.Prompt */,
488
+ output: '',
489
+ inputTokens: chunk.response.usage?.input_tokens ?? 0,
490
+ outputTokens: chunk.response.usage?.output_tokens ?? 0,
491
+ tokenBuckets: this.usageBuckets(chunk.response.usage),
339
492
  };
493
+ if (!parameters.privacy && chunk.response.incomplete_details?.reason === 'max_output_tokens') {
494
+ if (continuationCount < MAX_CONTINUATIONS) {
495
+ responseId = chunk.response.id;
496
+ continuationCount++;
497
+ }
498
+ else {
499
+ console.log('OpenAIGenerativeAIWorkerConnector.processJobStream max continuations reached');
500
+ }
501
+ }
340
502
  }
341
503
  if (chunk.type === 'response.output_item.done') {
342
504
  if (chunk.item.type === 'function_call') {
@@ -353,6 +515,10 @@ export class OpenAIGenerativeAIWorkerConnector {
353
515
  }
354
516
  }
355
517
  }
518
+ if (responseId) {
519
+ console.log('OpenAIGenerativeAIWorkerConnector.processJobStream needContinue');
520
+ continue;
521
+ }
356
522
  if (promises.length > 0) {
357
523
  await Promise.all(promises);
358
524
  yield {
@@ -426,7 +592,7 @@ export class OpenAIGenerativeAIWorkerConnector {
426
592
  for (const message of parameters.history) {
427
593
  messages.push({
428
594
  role: message.source === 'human' ? 'user' : 'assistant',
429
- content: message.message,
595
+ content: this.getInnerMessages(message.message),
430
596
  });
431
597
  }
432
598
  }
@@ -450,7 +616,7 @@ export class OpenAIGenerativeAIWorkerConnector {
450
616
  if (c.type === 'file') {
451
617
  return {
452
618
  type: 'input_file',
453
- filename: c.file.filename,
619
+ filename: c.file.filename ?? `${Math.random().toString(36).substring(2, 15)}.pdf`,
454
620
  file_data: c.file.file_data,
455
621
  };
456
622
  }
@@ -555,4 +721,68 @@ export class OpenAIGenerativeAIWorkerConnector {
555
721
  }
556
722
  return undefined;
557
723
  }
724
+ getMimeType(filename) {
725
+ const extension = filename.split('.').pop();
726
+ if (extension === 'pdf') {
727
+ return 'application/pdf';
728
+ }
729
+ if (extension === 'jpg' || extension === 'jpeg') {
730
+ return 'image/jpeg';
731
+ }
732
+ if (extension === 'png') {
733
+ return 'image/png';
734
+ }
735
+ if (extension === 'gif') {
736
+ return 'image/gif';
737
+ }
738
+ if (extension === 'webp') {
739
+ return 'image/webp';
740
+ }
741
+ if (extension === 'svg') {
742
+ return 'image/svg+xml';
743
+ }
744
+ if (extension === 'txt') {
745
+ return 'text/plain';
746
+ }
747
+ if (extension === 'html') {
748
+ return 'text/html';
749
+ }
750
+ if (extension === 'css') {
751
+ return 'text/css';
752
+ }
753
+ if (extension === 'js') {
754
+ return 'application/javascript';
755
+ }
756
+ if (extension === 'json') {
757
+ return 'application/json';
758
+ }
759
+ if (extension === 'xml') {
760
+ return 'application/xml';
761
+ }
762
+ if (extension === 'csv') {
763
+ return 'text/csv';
764
+ }
765
+ if (extension === 'tsv') {
766
+ return 'text/tab-separated-values';
767
+ }
768
+ if (extension === 'docx') {
769
+ return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
770
+ }
771
+ if (extension === 'doc') {
772
+ return 'application/msword';
773
+ }
774
+ if (extension === 'xls') {
775
+ return 'application/vnd.ms-excel';
776
+ }
777
+ if (extension === 'xlsx') {
778
+ return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
779
+ }
780
+ if (extension === 'pptx') {
781
+ return 'application/vnd.openxmlformats-officedocument.presentationml.presentation';
782
+ }
783
+ if (extension === 'ppt') {
784
+ return 'application/vnd.ms-powerpoint';
785
+ }
786
+ return 'application/octet-stream';
787
+ }
558
788
  }
@@ -8,6 +8,25 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
8
8
  close(): Promise<void>;
9
9
  getEngineType(): GenerativeAIEngineType;
10
10
  processJob(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): Promise<IGenerativeAIWorkerConnectorPromptResult>;
11
+ /**
12
+ * OpenAI reports cached prompt tokens inside the input token total; split
13
+ * them out so billing can rate the cached share via the model's tokenRates.
14
+ */
15
+ private usageBuckets;
16
+ /**
17
+ * whisper-1 reports no token usage; verbose_json carries the audio duration
18
+ * the platform bills on, so it replaces whisper's default/json format
19
+ * (verbose_json still carries `text`, so json callers see the same output).
20
+ */
21
+ private transcriptionFormat;
22
+ /**
23
+ * Token-billed transcription models (gpt-4o-transcribe*) report
24
+ * usage.type === 'tokens'. whisper-1 reports no token usage — bill the
25
+ * audio duration (usage.type === 'duration', or verbose_json's `duration`)
26
+ * at 1,000 output tokens per minute, so whisper's catalog outputPrice is
27
+ * USD per 1,000 minutes of audio (mirrors the cloudlet proxy's metering).
28
+ */
29
+ private transcriptionUsage;
11
30
  processJobStream(parameters: GenerativeAIWorkerConnectorParameters, options?: IGenerativeAIWorkerOptions): AsyncGenerator<IGenerativeAIWorkerConnectorPromptResult>;
12
31
  private processToolCall;
13
32
  private getMessages;
@@ -15,4 +34,5 @@ export declare class OpenAIGenerativeAIWorkerConnector implements IGenerativeAIW
15
34
  private getTools;
16
35
  private getReasoning;
17
36
  private getResponseFormat;
37
+ getMimeType(filename: string): string;
18
38
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crewdle/mist-connector-openai",
3
- "version": "1.0.21",
3
+ "version": "1.0.23",
4
4
  "description": "",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/types/index.d.ts",
@@ -15,7 +15,7 @@
15
15
  "dist/"
16
16
  ],
17
17
  "devDependencies": {
18
- "@crewdle/web-sdk-types": "^1.0.54",
18
+ "@crewdle/web-sdk-types": "^1.0.58",
19
19
  "@types/node": "^22.13.9",
20
20
  "typescript": "^5.8.2"
21
21
  },