pi-sap-aicore 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ import { randomUUID } from "node:crypto";
2
+
3
+ import {
4
+ type Api,
5
+ type AssistantMessage,
6
+ type AssistantMessageEventStream,
7
+ calculateCost,
8
+ type Context,
9
+ createAssistantMessageEventStream,
10
+ type Model,
11
+ type SimpleStreamOptions,
12
+ } from "@earendil-works/pi-ai";
13
+ import type { AzureOpenAiChatCompletionParameters } from "@sap-ai-sdk/foundation-models";
14
+
15
+ import { buildAzureOpenAiParams } from "./foundation-params.ts";
16
+ import {
17
+ debugLog,
18
+ ensureServiceKey,
19
+ type ExtendedDelta,
20
+ formatError,
21
+ latchFinishReason,
22
+ mapUsage,
23
+ pickReasoning,
24
+ resolveResourceGroup,
25
+ type ToolCallSlot,
26
+ } from "./stream.ts";
27
+ import { mapFinishReason } from "./translate.ts";
28
+ import { piContextToAzureOpenAi } from "./translate-foundation.ts";
29
+
30
+ // Loaded dynamically (not at module load) so a missing dependency surfaces as
31
+ // an actionable in-stream error instead of an ERR_MODULE_NOT_FOUND crash at pi
32
+ // startup. Mirrors `importOrchestration` in stream.ts.
33
+ async function importFoundation(): Promise<
34
+ typeof import("@sap-ai-sdk/foundation-models")
35
+ > {
36
+ try {
37
+ return await import("@sap-ai-sdk/foundation-models");
38
+ } catch (err) {
39
+ const code = (err as NodeJS.ErrnoException)?.code;
40
+ const msg = (err as Error)?.message ?? "";
41
+ const isMissing =
42
+ code === "ERR_MODULE_NOT_FOUND" &&
43
+ msg.includes("@sap-ai-sdk/foundation-models");
44
+ if (!isMissing) throw err;
45
+
46
+ throw new Error(
47
+ "The SAP AI Core foundation-models SDK (@sap-ai-sdk/foundation-models) " +
48
+ "isn't installed, so the foundation provider can't make requests. Fix: " +
49
+ "run `npm install` in the pi-sap-aicore directory (where pi installed " +
50
+ "it, e.g. under ~/.pi/agent/), then restart pi.",
51
+ );
52
+ }
53
+ }
54
+
55
+ // Direct (foundation) provider: routes OpenAI models through their own
56
+ // SAP AI Core deployment via @sap-ai-sdk/foundation-models'
57
+ // AzureOpenAiChatClient — bypassing the orchestration service entirely.
58
+ // Unlike streamSapAiCore there is NO streaming-unsupported fallback: the
59
+ // direct Azure OpenAI endpoint streams natively (that's the whole reason this
60
+ // path exists for new models orchestration won't stream). The SDK injects
61
+ // `stream_options: { include_usage: true }` itself, so usage arrives on the
62
+ // final chunk and `response.getTokenUsage()` is populated.
63
+ export function streamSapFoundation(
64
+ model: Model<Api>,
65
+ context: Context,
66
+ options?: SimpleStreamOptions,
67
+ ): AssistantMessageEventStream {
68
+ const stream = createAssistantMessageEventStream();
69
+
70
+ const output: AssistantMessage = {
71
+ role: "assistant",
72
+ content: [],
73
+ api: model.api,
74
+ provider: model.provider,
75
+ model: model.id,
76
+ usage: {
77
+ input: 0,
78
+ output: 0,
79
+ cacheRead: 0,
80
+ cacheWrite: 0,
81
+ totalTokens: 0,
82
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
83
+ },
84
+ stopReason: "stop",
85
+ timestamp: Date.now(),
86
+ };
87
+
88
+ (async () => {
89
+ const requestId = randomUUID();
90
+ try {
91
+ stream.push({ type: "start", partial: output });
92
+
93
+ const serviceKey = ensureServiceKey(options?.apiKey);
94
+ process.env.AICORE_SERVICE_KEY = serviceKey.raw;
95
+ const resourceGroup = resolveResourceGroup(serviceKey);
96
+
97
+ const { messages, tools } = piContextToAzureOpenAi(context);
98
+ const params = buildAzureOpenAiParams(model, options);
99
+
100
+ const { AzureOpenAiChatClient } = await importFoundation();
101
+
102
+ const request: AzureOpenAiChatCompletionParameters = {
103
+ messages,
104
+ ...(tools.length > 0 ? { tools } : {}),
105
+ ...params,
106
+ };
107
+
108
+ debugLog({
109
+ requestId,
110
+ kind: "request",
111
+ provider: "foundation",
112
+ model: model.id,
113
+ resourceGroup,
114
+ params,
115
+ messageRoles: messages.map((m) => m.role),
116
+ messages,
117
+ });
118
+
119
+ // Name-based deployment resolution: the SDK finds THE foundation
120
+ // deployment serving this model in the resource group. SAP allows
121
+ // only one deployment per (model, version, resource group), so the
122
+ // match is unambiguous — no deployment ID needed.
123
+ const client = new AzureOpenAiChatClient({
124
+ modelName: model.id,
125
+ ...(resourceGroup ? { resourceGroup } : {}),
126
+ });
127
+
128
+ const response = await client.stream(request, options?.signal);
129
+
130
+ let textIndex = -1;
131
+ let thinkingIndex = -1;
132
+ let reasoningField: string | undefined;
133
+ let refusalText = "";
134
+ const toolSlots = new Map<number, ToolCallSlot>();
135
+ let finishReason: string | undefined;
136
+
137
+ const closeText = () => {
138
+ if (textIndex < 0) return;
139
+ const block = output.content[textIndex];
140
+ if (block?.type === "text") {
141
+ stream.push({
142
+ type: "text_end",
143
+ contentIndex: textIndex,
144
+ content: block.text,
145
+ partial: output,
146
+ });
147
+ }
148
+ textIndex = -1;
149
+ };
150
+
151
+ const closeThinking = () => {
152
+ if (thinkingIndex < 0) return;
153
+ const block = output.content[thinkingIndex];
154
+ if (block?.type === "thinking") {
155
+ stream.push({
156
+ type: "thinking_end",
157
+ contentIndex: thinkingIndex,
158
+ content: block.thinking,
159
+ partial: output,
160
+ });
161
+ }
162
+ thinkingIndex = -1;
163
+ };
164
+
165
+ for await (const chunk of response.stream) {
166
+ if (options?.signal?.aborted) break;
167
+
168
+ const choice = chunk.findChoiceByIndex(0);
169
+ const rawDelta = (choice?.delta ?? {}) as ExtendedDelta;
170
+
171
+ // Reasoning first — providers emit it before visible text, and
172
+ // pi's UI expects the thinking block to precede the text block.
173
+ // (gpt-5* on the direct route are unlikely to pass structured
174
+ // reasoning through, but we handle it for free if they do.)
175
+ const reasoning = pickReasoning(rawDelta, reasoningField);
176
+ if (reasoning) {
177
+ reasoningField = reasoning.field;
178
+ if (thinkingIndex < 0) {
179
+ closeText();
180
+ output.content.push({ type: "thinking", thinking: "" });
181
+ thinkingIndex = output.content.length - 1;
182
+ stream.push({
183
+ type: "thinking_start",
184
+ contentIndex: thinkingIndex,
185
+ partial: output,
186
+ });
187
+ }
188
+ const block = output.content[thinkingIndex];
189
+ if (block?.type === "thinking") {
190
+ block.thinking += reasoning.text;
191
+ stream.push({
192
+ type: "thinking_delta",
193
+ contentIndex: thinkingIndex,
194
+ delta: reasoning.text,
195
+ partial: output,
196
+ });
197
+ }
198
+ }
199
+
200
+ const delta = chunk.getDeltaContent();
201
+ if (delta) {
202
+ if (textIndex < 0) {
203
+ closeThinking();
204
+ output.content.push({ type: "text", text: "" });
205
+ textIndex = output.content.length - 1;
206
+ stream.push({
207
+ type: "text_start",
208
+ contentIndex: textIndex,
209
+ partial: output,
210
+ });
211
+ }
212
+ const block = output.content[textIndex];
213
+ if (block?.type === "text") {
214
+ block.text += delta;
215
+ stream.push({
216
+ type: "text_delta",
217
+ contentIndex: textIndex,
218
+ delta,
219
+ partial: output,
220
+ });
221
+ }
222
+ }
223
+
224
+ if (
225
+ typeof rawDelta.refusal === "string" &&
226
+ rawDelta.refusal.length > 0
227
+ ) {
228
+ refusalText += rawDelta.refusal;
229
+ }
230
+
231
+ const toolDeltas = chunk.getDeltaToolCalls();
232
+ if (toolDeltas && toolDeltas.length > 0) {
233
+ closeText();
234
+ closeThinking();
235
+
236
+ for (const td of toolDeltas) {
237
+ let slot = toolSlots.get(td.index);
238
+ if (!slot) {
239
+ output.content.push({
240
+ type: "toolCall",
241
+ id: td.id ?? "",
242
+ name: td.function?.name ?? "",
243
+ arguments: {},
244
+ });
245
+ slot = {
246
+ contentIndex: output.content.length - 1,
247
+ partialJson: "",
248
+ };
249
+ toolSlots.set(td.index, slot);
250
+ stream.push({
251
+ type: "toolcall_start",
252
+ contentIndex: slot.contentIndex,
253
+ partial: output,
254
+ });
255
+ }
256
+
257
+ const block = output.content[slot.contentIndex];
258
+ if (block?.type === "toolCall") {
259
+ if (td.id && !block.id) block.id = td.id;
260
+ if (td.function?.name && !block.name)
261
+ block.name = td.function.name;
262
+
263
+ const fragment = td.function?.arguments ?? "";
264
+ if (fragment) {
265
+ slot.partialJson += fragment;
266
+ try {
267
+ block.arguments = JSON.parse(slot.partialJson);
268
+ } catch {
269
+ // Partial JSON — keep accumulating until valid
270
+ }
271
+ stream.push({
272
+ type: "toolcall_delta",
273
+ contentIndex: slot.contentIndex,
274
+ delta: fragment,
275
+ partial: output,
276
+ });
277
+ }
278
+ }
279
+ }
280
+ }
281
+
282
+ finishReason = latchFinishReason(
283
+ finishReason,
284
+ chunk.getFinishReason() ?? undefined,
285
+ );
286
+ }
287
+
288
+ closeText();
289
+ closeThinking();
290
+
291
+ for (const slot of toolSlots.values()) {
292
+ const block = output.content[slot.contentIndex];
293
+ if (block?.type === "toolCall") {
294
+ if (slot.partialJson) {
295
+ try {
296
+ block.arguments = JSON.parse(slot.partialJson);
297
+ } catch {
298
+ // Leave arguments as last successfully-parsed value
299
+ }
300
+ }
301
+ stream.push({
302
+ type: "toolcall_end",
303
+ contentIndex: slot.contentIndex,
304
+ toolCall: {
305
+ type: "toolCall",
306
+ id: block.id,
307
+ name: block.name,
308
+ arguments: block.arguments,
309
+ },
310
+ partial: output,
311
+ });
312
+ }
313
+ }
314
+
315
+ const usage = response.getTokenUsage();
316
+ if (usage) {
317
+ output.usage = mapUsage(usage);
318
+ calculateCost(model, output.usage);
319
+ }
320
+
321
+ // A refusal terminates the turn with no real content. Promote it to a
322
+ // visible error so pi doesn't render an empty assistant turn.
323
+ if (refusalText) {
324
+ output.stopReason = "error";
325
+ output.errorMessage = `Model refused: ${refusalText}`;
326
+ stream.push({ type: "error", reason: "error", error: output });
327
+ stream.end();
328
+ return;
329
+ }
330
+
331
+ output.stopReason = mapFinishReason(
332
+ finishReason ?? response.getFinishReason() ?? undefined,
333
+ );
334
+ stream.push({
335
+ type: "done",
336
+ reason: output.stopReason as "stop" | "length" | "toolUse",
337
+ message: output,
338
+ });
339
+ stream.end();
340
+ } catch (error) {
341
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
342
+ output.errorMessage = formatError(error);
343
+ debugLog({
344
+ requestId,
345
+ kind: "error",
346
+ provider: "foundation",
347
+ model: model.id,
348
+ stopReason: output.stopReason,
349
+ error: output.errorMessage,
350
+ });
351
+ stream.push({
352
+ type: "error",
353
+ reason: output.stopReason as "error" | "aborted",
354
+ error: output,
355
+ });
356
+ stream.end();
357
+ }
358
+ })();
359
+
360
+ return stream;
361
+ }