@purista/harness-azure-foundry 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +61 -10
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -50,6 +50,7 @@ class AzureFoundryModelProvider extends BaseModelProvider {
50
50
  req.signal.throwIfAborted();
51
51
  let usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
52
52
  let finishReason = 'stop';
53
+ const toolState = new Map();
53
54
  for await (const event of streamChat(this.client, req, false)) {
54
55
  req.signal.throwIfAborted();
55
56
  const data = parseStreamData(event, req, 'textStream');
@@ -59,16 +60,20 @@ class AzureFoundryModelProvider extends BaseModelProvider {
59
60
  if (choice.delta?.content) {
60
61
  yield { kind: 'delta', text: choice.delta.content };
61
62
  }
62
- const toolCalls = extractToolCalls(choice.delta?.tool_calls, req, 'textStream');
63
- for (const call of toolCalls ?? []) {
64
- yield { kind: 'tool_call', call };
63
+ if (choice.delta?.tool_calls) {
64
+ accumulateToolCallDeltas(toolState, choice.delta.tool_calls);
65
+ }
66
+ if (choice.finish_reason) {
67
+ finishReason = toFinishReason(choice.finish_reason);
65
68
  }
66
- finishReason = toFinishReason(choice.finish_reason ?? finishReason);
67
69
  }
68
70
  if (data.usage) {
69
71
  usage = toUsage(data.usage.prompt_tokens, data.usage.completion_tokens, data.usage.total_tokens);
70
72
  }
71
73
  }
74
+ for (const call of finalizeStreamToolCalls(toolState, req, 'textStream')) {
75
+ yield { kind: 'tool_call', call };
76
+ }
72
77
  yield { kind: 'finish', usage, finishReason };
73
78
  }
74
79
  async doObject(req) {
@@ -91,6 +96,7 @@ class AzureFoundryModelProvider extends BaseModelProvider {
91
96
  let partial = '';
92
97
  let usage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
93
98
  let finishReason = 'stop';
99
+ const toolState = new Map();
94
100
  for await (const event of streamChat(this.client, req, true)) {
95
101
  req.signal.throwIfAborted();
96
102
  const data = parseStreamData(event, req, 'objectStream');
@@ -101,16 +107,20 @@ class AzureFoundryModelProvider extends BaseModelProvider {
101
107
  partial += choice.delta.content;
102
108
  yield { kind: 'partial', partial: safePartialJson(partial) };
103
109
  }
104
- const toolCalls = extractToolCalls(choice.delta?.tool_calls, req, 'objectStream');
105
- for (const call of toolCalls ?? []) {
106
- yield { kind: 'tool_call', call };
110
+ if (choice.delta?.tool_calls) {
111
+ accumulateToolCallDeltas(toolState, choice.delta.tool_calls);
112
+ }
113
+ if (choice.finish_reason) {
114
+ finishReason = toFinishReason(choice.finish_reason);
107
115
  }
108
- finishReason = toFinishReason(choice.finish_reason ?? finishReason);
109
116
  }
110
117
  if (data.usage) {
111
118
  usage = toUsage(data.usage.prompt_tokens, data.usage.completion_tokens, data.usage.total_tokens);
112
119
  }
113
120
  }
121
+ for (const call of finalizeStreamToolCalls(toolState, req, 'objectStream')) {
122
+ yield { kind: 'tool_call', call };
123
+ }
114
124
  const object = parseJson(partial || '{}', req, 'objectStream');
115
125
  yield { kind: 'finish', object, usage, finishReason };
116
126
  }
@@ -163,11 +173,14 @@ async function postChat(client, req, stream) {
163
173
  model: req.model,
164
174
  messages: toAzureMessages(req.messages),
165
175
  stream,
176
+ // Only emits a usage event during streaming when this is set.
177
+ ...(stream ? { stream_options: { include_usage: true } } : {}),
166
178
  tools: toTools(req.tools),
167
179
  temperature: req.call?.temperature ?? req.defaults?.temperature,
168
180
  max_tokens: req.call?.maxTokens ?? req.defaults?.maxTokens,
169
181
  top_p: req.call?.topP ?? req.defaults?.topP,
170
182
  stop: req.call?.stopSequences ?? req.defaults?.stopSequences,
183
+ ...(req.tools && (req.call?.parallelToolCalls ?? req.defaults?.parallelToolCalls) !== undefined ? { parallel_tool_calls: req.call?.parallelToolCalls ?? req.defaults?.parallelToolCalls } : {}),
171
184
  response_format: toResponseFormat(req),
172
185
  ...bodyOptions
173
186
  },
@@ -179,7 +192,7 @@ async function* streamChat(client, req, objectMode) {
179
192
  const response = await postChat(client, req, true);
180
193
  const nodeResponse = typeof response.asNodeStream === 'function' ? await response.asNodeStream() : response;
181
194
  if (nodeResponse.status && nodeResponse.status !== '200' && nodeResponse.status !== 200) {
182
- throw nodeResponse.body?.error ?? new Error('Azure AI Foundry streaming request failed.');
195
+ throw azureFailure(nodeResponse, 'Azure AI Foundry streaming request failed.');
183
196
  }
184
197
  if (nodeResponse.body?.[Symbol.asyncIterator]) {
185
198
  const sses = createSseStream(nodeResponse.body);
@@ -196,10 +209,25 @@ async function* streamChat(client, req, objectMode) {
196
209
  }
197
210
  function ensureOk(response) {
198
211
  if (response.status && response.status !== '200' && response.status !== 200) {
199
- throw response.body?.error ?? new Error('Azure AI Foundry request failed.');
212
+ throw azureFailure(response, 'Azure AI Foundry request failed.');
200
213
  }
201
214
  return response.body ?? response;
202
215
  }
216
+ /**
217
+ * Build an error that preserves the HTTP status (and body/headers) so the base
218
+ * provider's `normalizeError` can classify retriability (429/5xx) instead of
219
+ * misclassifying every failure as a non-retriable network error.
220
+ */
221
+ function azureFailure(response, fallbackMessage) {
222
+ const status = Number(response?.status);
223
+ const body = response?.body;
224
+ const message = (body?.error?.message ?? body?.message);
225
+ return Object.assign(new Error(message ?? fallbackMessage), {
226
+ ...(Number.isFinite(status) ? { status } : {}),
227
+ ...(body?.error ? { error: body.error } : body !== undefined ? { body } : {}),
228
+ ...(response?.headers ? { headers: response.headers } : {})
229
+ });
230
+ }
203
231
  function toAzureMessages(messages) {
204
232
  return messages.map((message) => {
205
233
  if (message.role === 'assistant' && message.toolCalls && message.toolCalls.length > 0) {
@@ -271,6 +299,29 @@ function extractToolCalls(toolCalls, req, method) {
271
299
  arguments: parseJson(call.function.arguments ?? '{}', req, method)
272
300
  }));
273
301
  }
302
+ function accumulateToolCallDeltas(state, deltas) {
303
+ for (const delta of deltas) {
304
+ const index = typeof delta?.index === 'number' ? delta.index : 0;
305
+ const existing = state.get(index) ?? { args: '' };
306
+ if (delta?.id)
307
+ existing.id = String(delta.id);
308
+ if (delta?.function?.name)
309
+ existing.name = String(delta.function.name);
310
+ if (typeof delta?.function?.arguments === 'string')
311
+ existing.args += delta.function.arguments;
312
+ state.set(index, existing);
313
+ }
314
+ }
315
+ function finalizeStreamToolCalls(state, req, method) {
316
+ return [...state.entries()]
317
+ .sort((a, b) => a[0] - b[0])
318
+ .filter(([, call]) => call.id && call.name)
319
+ .map(([, call]) => ({
320
+ id: call.id,
321
+ name: call.name,
322
+ arguments: parseJson(call.args || '{}', req, method)
323
+ }));
324
+ }
274
325
  function parseStreamData(event, req, method) {
275
326
  if (event === '[DONE]')
276
327
  return undefined;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@purista/harness-azure-foundry",
3
- "version": "1.2.1",
3
+ "version": "1.2.2",
4
4
  "description": "Azure AI Foundry model provider adapter for @purista/harness.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",