@ryanfw/prompt-orchestration-pipeline 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ryanfw/prompt-orchestration-pipeline",
3
- "version": "1.2.2",
3
+ "version": "1.2.4",
4
4
  "description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
5
5
  "type": "module",
6
6
  "main": "src/ui/server/index.ts",
@@ -94,7 +94,7 @@ export const defaultConfig = {
94
94
  taskRunner: {
95
95
  maxRefinementAttempts: 3,
96
96
  stageTimeout: 300000,
97
- llmRequestTimeout: 120000,
97
+ llmRequestTimeout: 3600000,
98
98
  },
99
99
  llm: {
100
100
  defaultProvider: "openai",
@@ -245,6 +245,16 @@ export function deriveModelKeyAndTokens(metric: Record<string, unknown>): TokenU
245
245
  return [modelKey, inputTokens, outputTokens, cost];
246
246
  }
247
247
 
248
+ // ─── Safe clone ─────────────────────────────────────────────────────────────
249
+
250
+ function safeClone<T>(value: T): T {
251
+ try {
252
+ return structuredClone(value);
253
+ } catch {
254
+ return JSON.parse(JSON.stringify(value));
255
+ }
256
+ }
257
+
248
258
  // ─── Error normalization ──────────────────────────────────────────────────────
249
259
 
250
260
  /**
@@ -630,15 +640,18 @@ export async function runPipeline(
630
640
  // Best-effort: swallow status write errors
631
641
  }
632
642
 
633
- // Clone data, flags, output via structuredClone into StageContext
643
+ // Clone data, flags, output into StageContext.
644
+ // structuredClone throws on non-cloneable values (functions, streams,
645
+ // class instances with internal slots). Fall back to JSON round-trip
646
+ // which silently strips those fields.
634
647
  const stageContext: StageContext = {
635
648
  io,
636
649
  llm,
637
650
  meta: context.meta,
638
- data: structuredClone(context.data),
639
- flags: structuredClone(context.flags),
651
+ data: safeClone(context.data),
652
+ flags: safeClone(context.flags),
640
653
  currentStage: stageName,
641
- output: structuredClone(lastStageOutput),
654
+ output: safeClone(lastStageOutput),
642
655
  previousStage,
643
656
  validators: context.validators,
644
657
  };
package/src/llm/index.ts CHANGED
@@ -92,43 +92,43 @@ function inferJsonFormat(options: ChatOptions): ChatOptions {
92
92
  async function callAdapter(
93
93
  options: ChatOptions,
94
94
  ): Promise<AdapterResponse> {
95
- const { provider, messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries } = options;
95
+ const { provider, messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs } = options;
96
96
 
97
97
  switch (provider) {
98
98
  case "alibaba":
99
99
  return alibabaChat({
100
- messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
100
+ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
101
101
  frequencyPenalty: options.frequencyPenalty,
102
102
  presencePenalty: options.presencePenalty,
103
103
  });
104
104
  case "anthropic":
105
- return anthropicChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries });
105
+ return anthropicChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs });
106
106
  case "openai":
107
107
  return openaiChat({
108
- messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
108
+ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
109
109
  seed: undefined,
110
110
  frequencyPenalty: options.frequencyPenalty,
111
111
  presencePenalty: options.presencePenalty,
112
112
  });
113
113
  case "gemini":
114
114
  return geminiChat({
115
- messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
115
+ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
116
116
  frequencyPenalty: options.frequencyPenalty,
117
117
  presencePenalty: options.presencePenalty,
118
118
  });
119
119
  case "deepseek":
120
120
  return deepseekChat({
121
- messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries,
121
+ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs,
122
122
  frequencyPenalty: options.frequencyPenalty,
123
123
  presencePenalty: options.presencePenalty,
124
124
  });
125
125
  case "moonshot":
126
- return moonshotChat({ messages, model, maxTokens, responseFormat, maxRetries });
126
+ return moonshotChat({ messages, model, maxTokens, responseFormat, maxRetries, requestTimeoutMs });
127
127
  case "zai":
128
128
  case "zhipu":
129
- return zaiChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries });
129
+ return zaiChat({ messages, model, temperature, maxTokens, responseFormat, topP, stop, maxRetries, requestTimeoutMs });
130
130
  case "claudecode":
131
- return claudeCodeChat({ messages, model, maxTokens, responseFormat, maxRetries });
131
+ return claudeCodeChat({ messages, model, maxTokens, responseFormat, maxRetries, requestTimeoutMs });
132
132
  case "mock": {
133
133
  if (!mockProvider) {
134
134
  throw new Error("No mock provider registered. Call registerMockProvider() first.");
@@ -180,7 +180,11 @@ async function writeDebugLog(options: ChatOptions, response: ChatResponse): Prom
180
180
 
181
181
  export async function chat(options: ChatOptions): Promise<ChatResponse> {
182
182
  ensureMessagesPresent(options.messages, options.provider);
183
- const opts = inferJsonFormat(options);
183
+ const configTimeout = getConfig().taskRunner.llmRequestTimeout;
184
+ const opts = inferJsonFormat({
185
+ ...options,
186
+ requestTimeoutMs: options.requestTimeoutMs ?? configTimeout,
187
+ });
184
188
  const id = `llm-${++requestCounter}-${Date.now()}`;
185
189
  const model = opts.model ?? "";
186
190
  const startTime = Date.now();
@@ -4,25 +4,66 @@ import { ProviderJsonParseError } from "../types.ts";
4
4
  import type { AlibabaOptions } from "../types.ts";
5
5
  import type { Mock } from "vitest";
6
6
 
7
- function makeAlibabaResponse(
8
- content: string,
9
- promptTokens = 10,
10
- completionTokens = 20,
11
- ) {
12
- return {
13
- choices: [{ message: { content } }],
14
- usage: {
15
- prompt_tokens: promptTokens,
16
- completion_tokens: completionTokens,
17
- total_tokens: promptTokens + completionTokens,
7
+ /**
8
+ * Creates a mock ReadableStream that yields SSE-formatted data.
9
+ */
10
+ function makeSSEStream(events: string[]): ReadableStream<Uint8Array> {
11
+ const encoder = new TextEncoder();
12
+ const chunks = events.map((e) => encoder.encode(e));
13
+ let index = 0;
14
+
15
+ return new ReadableStream({
16
+ pull(controller) {
17
+ if (index < chunks.length) {
18
+ controller.enqueue(chunks[index]!);
19
+ index++;
20
+ } else {
21
+ controller.close();
22
+ }
18
23
  },
19
- };
24
+ });
25
+ }
26
+
27
+ /**
28
+ * Builds SSE events for an OpenAI-compatible streaming response.
29
+ */
30
+ function makeOpenAiSseEvents(
31
+ textChunks: string[],
32
+ usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number },
33
+ ): string[] {
34
+ const events: string[] = [];
35
+ for (const chunk of textChunks) {
36
+ events.push(
37
+ `data: ${JSON.stringify({ choices: [{ delta: { content: chunk } }] })}\n\n`,
38
+ );
39
+ }
40
+ if (usage) {
41
+ events.push(
42
+ `data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: "stop" }], usage })}\n\n`,
43
+ );
44
+ } else {
45
+ events.push(
46
+ `data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: "stop" }] })}\n\n`,
47
+ );
48
+ }
49
+ events.push("data: [DONE]\n\n");
50
+ return events;
20
51
  }
21
52
 
22
- function mockFetchResponse(body: unknown, status = 200) {
53
+ function mockStreamingResponse(events: string[], status = 200) {
23
54
  return {
24
55
  ok: status >= 200 && status < 300,
25
56
  status,
57
+ body: makeSSEStream(events),
58
+ json: vi.fn(),
59
+ text: vi.fn(),
60
+ } as unknown as Response;
61
+ }
62
+
63
+ function mockErrorResponse(body: unknown, status: number) {
64
+ return {
65
+ ok: false,
66
+ status,
26
67
  json: vi.fn().mockResolvedValue(body),
27
68
  text: vi.fn().mockResolvedValue(JSON.stringify(body)),
28
69
  } as unknown as Response;
@@ -56,11 +97,11 @@ describe("alibabaChat", () => {
56
97
 
57
98
  it("returns parsed JSON content with usage on success", async () => {
58
99
  const jsonPayload = { result: "success", count: 42 };
59
- fetchMock.mockResolvedValue(
60
- mockFetchResponse(
61
- makeAlibabaResponse(JSON.stringify(jsonPayload), 15, 25),
62
- ),
100
+ const events = makeOpenAiSseEvents(
101
+ [JSON.stringify(jsonPayload)],
102
+ { prompt_tokens: 15, completion_tokens: 25, total_tokens: 40 },
63
103
  );
104
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
64
105
 
65
106
  const result = await alibabaChat(baseOptions);
66
107
 
@@ -70,14 +111,25 @@ describe("alibabaChat", () => {
70
111
  completion_tokens: 25,
71
112
  total_tokens: 40,
72
113
  });
73
- expect(result.raw).toBeDefined();
114
+ });
115
+
116
+ it("sends stream: true and stream_options in request body", async () => {
117
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
118
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
119
+
120
+ await alibabaChat(baseOptions);
121
+
122
+ const body = JSON.parse(
123
+ (fetchMock.mock.calls[0] as [string, RequestInit])[1].body as string,
124
+ );
125
+ expect(body.stream).toBe(true);
126
+ expect(body.stream_options).toEqual({ include_usage: true });
74
127
  });
75
128
 
76
129
  it("throws ProviderJsonParseError on invalid JSON when responseFormat is json_object", async () => {
77
130
  const nonJsonText = "This is plain text, not JSON at all.";
78
- fetchMock.mockResolvedValue(
79
- mockFetchResponse(makeAlibabaResponse(nonJsonText)),
80
- );
131
+ const events = makeOpenAiSseEvents([nonJsonText]);
132
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
81
133
 
82
134
  try {
83
135
  await alibabaChat(baseOptions);
@@ -93,18 +145,15 @@ describe("alibabaChat", () => {
93
145
 
94
146
  it("retries on HTTP 500 with exponential backoff", async () => {
95
147
  const jsonPayload = { retried: true };
148
+ const events = makeOpenAiSseEvents([JSON.stringify(jsonPayload)]);
96
149
  fetchMock
97
150
  .mockResolvedValueOnce(
98
- mockFetchResponse({ error: { message: "Server error" } }, 500),
151
+ mockErrorResponse({ error: { message: "Server error" } }, 500),
99
152
  )
100
153
  .mockResolvedValueOnce(
101
- mockFetchResponse({ error: { message: "Server error" } }, 500),
154
+ mockErrorResponse({ error: { message: "Server error" } }, 500),
102
155
  )
103
- .mockResolvedValueOnce(
104
- mockFetchResponse(
105
- makeAlibabaResponse(JSON.stringify(jsonPayload)),
106
- ),
107
- );
156
+ .mockResolvedValueOnce(mockStreamingResponse(events));
108
157
 
109
158
  const result = await alibabaChat({ ...baseOptions, maxRetries: 3 });
110
159
 
@@ -114,7 +163,7 @@ describe("alibabaChat", () => {
114
163
 
115
164
  it("does NOT retry on HTTP 401", async () => {
116
165
  fetchMock.mockResolvedValue(
117
- mockFetchResponse({ error: { message: "Unauthorized" } }, 401),
166
+ mockErrorResponse({ error: { message: "Unauthorized" } }, 401),
118
167
  );
119
168
 
120
169
  await expect(
@@ -126,10 +175,8 @@ describe("alibabaChat", () => {
126
175
 
127
176
  it("uses ALIBABA_BASE_URL env var when set", async () => {
128
177
  process.env["ALIBABA_BASE_URL"] = "https://custom.api.example.com";
129
- const jsonPayload = { ok: true };
130
- fetchMock.mockResolvedValue(
131
- mockFetchResponse(makeAlibabaResponse(JSON.stringify(jsonPayload))),
132
- );
178
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
179
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
133
180
 
134
181
  await alibabaChat(baseOptions);
135
182
 
@@ -140,10 +187,8 @@ describe("alibabaChat", () => {
140
187
  });
141
188
 
142
189
  it("passes frequencyPenalty and presencePenalty in request body", async () => {
143
- const jsonPayload = { ok: true };
144
- fetchMock.mockResolvedValue(
145
- mockFetchResponse(makeAlibabaResponse(JSON.stringify(jsonPayload))),
146
- );
190
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
191
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
147
192
 
148
193
  await alibabaChat({
149
194
  ...baseOptions,
@@ -158,10 +203,9 @@ describe("alibabaChat", () => {
158
203
  expect(body.presence_penalty).toBe(0.2);
159
204
  });
160
205
 
161
- it("passes an AbortSignal to fetch", async () => {
162
- fetchMock.mockResolvedValue(
163
- mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
164
- );
206
+ it("passes an AbortSignal to fetch (IdleTimeoutController)", async () => {
207
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
208
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
165
209
 
166
210
  await alibabaChat(baseOptions);
167
211
 
@@ -169,22 +213,9 @@ describe("alibabaChat", () => {
169
213
  expect(init.signal).toBeInstanceOf(AbortSignal);
170
214
  });
171
215
 
172
- it("uses custom requestTimeoutMs for the abort signal", async () => {
173
- const timeoutSpy = vi.spyOn(AbortSignal, "timeout");
174
- fetchMock.mockResolvedValue(
175
- mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
176
- );
177
-
178
- await alibabaChat({ ...baseOptions, requestTimeoutMs: 5000 });
179
-
180
- expect(timeoutSpy).toHaveBeenCalledWith(5000);
181
- timeoutSpy.mockRestore();
182
- });
183
-
184
216
  it("sends enable_thinking true by default", async () => {
185
- fetchMock.mockResolvedValue(
186
- mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
187
- );
217
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
218
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
188
219
 
189
220
  await alibabaChat(baseOptions);
190
221
 
@@ -195,9 +226,8 @@ describe("alibabaChat", () => {
195
226
  });
196
227
 
197
228
  it("sends enable_thinking false when thinking is disabled", async () => {
198
- fetchMock.mockResolvedValue(
199
- mockFetchResponse(makeAlibabaResponse(JSON.stringify({ ok: true }))),
200
- );
229
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
230
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
201
231
 
202
232
  await alibabaChat({ ...baseOptions, thinking: "disabled" });
203
233
 
@@ -206,4 +236,67 @@ describe("alibabaChat", () => {
206
236
  );
207
237
  expect(body.enable_thinking).toBe(false);
208
238
  });
239
+
240
+ describe("streaming accumulation", () => {
241
+ it("accumulates text across multiple SSE chunks", async () => {
242
+ const events = [
243
+ 'data: {"choices":[{"delta":{"content":"{\\"he"}}]}\n\n',
244
+ 'data: {"choices":[{"delta":{"content":"llo\\":\\"world\\"}"}}]}\n\n',
245
+ 'data: {"choices":[{"delta":{},"finish_reason":"stop"}]}\n\n',
246
+ "data: [DONE]\n\n",
247
+ ];
248
+
249
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
250
+
251
+ const result = await alibabaChat(baseOptions);
252
+ expect(result.content).toEqual({ hello: "world" });
253
+ });
254
+
255
+ it("captures usage from the final streaming chunk", async () => {
256
+ const events = makeOpenAiSseEvents(
257
+ [JSON.stringify({ ok: true })],
258
+ { prompt_tokens: 50, completion_tokens: 30, total_tokens: 80 },
259
+ );
260
+
261
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
262
+
263
+ const result = await alibabaChat(baseOptions);
264
+ expect(result.usage).toEqual({
265
+ prompt_tokens: 50,
266
+ completion_tokens: 30,
267
+ total_tokens: 80,
268
+ });
269
+ });
270
+
271
+ it("defaults usage to zeros when stream provides no usage", async () => {
272
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
273
+
274
+ fetchMock.mockResolvedValue(mockStreamingResponse(events));
275
+
276
+ const result = await alibabaChat(baseOptions);
277
+ expect(result.usage).toEqual({
278
+ prompt_tokens: 0,
279
+ completion_tokens: 0,
280
+ total_tokens: 0,
281
+ });
282
+ });
283
+
284
+ it("retries on timeout then succeeds on second attempt", async () => {
285
+ const events = makeOpenAiSseEvents([JSON.stringify({ ok: true })]);
286
+
287
+ fetchMock
288
+ .mockRejectedValueOnce(
289
+ new DOMException("signal timed out", "TimeoutError"),
290
+ )
291
+ .mockResolvedValueOnce(mockStreamingResponse(events));
292
+
293
+ const result = await alibabaChat({
294
+ ...baseOptions,
295
+ maxRetries: 1,
296
+ });
297
+
298
+ expect(fetchMock).toHaveBeenCalledTimes(2);
299
+ expect(result.content).toEqual({ ok: true });
300
+ });
301
+ });
209
302
  });