@ryanfw/prompt-orchestration-pipeline 0.16.4 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api/validators/json.js +6 -1
- package/src/config/models.js +63 -1
- package/src/core/orchestrator.js +28 -56
- package/src/core/pipeline-runner.js +51 -1
- package/src/core/task-runner.js +17 -7
- package/src/llm/index.js +148 -0
- package/src/pages/Code.jsx +201 -2
- package/src/providers/anthropic.js +3 -2
- package/src/providers/base.js +19 -0
- package/src/providers/deepseek.js +3 -2
- package/src/providers/moonshot.js +218 -0
- package/src/ui/dist/assets/{index-DI_nRqVI.js → index-xx8otyG0.js} +142 -1
- package/src/ui/dist/assets/{index-DI_nRqVI.js.map → index-xx8otyG0.js.map} +1 -1
- package/src/ui/dist/index.html +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ryanfw/prompt-orchestration-pipeline",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.1",
|
|
4
4
|
"description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/ui/server.js",
|
|
@@ -27,7 +27,12 @@ export const validateWithSchema = (schema, data) => {
|
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
// Check if schema already exists, otherwise compile and cache it
|
|
31
|
+
let validateFunction = schema.$id ? ajv.getSchema(schema.$id) : null;
|
|
32
|
+
if (!validateFunction) {
|
|
33
|
+
validateFunction = ajv.compile(schema);
|
|
34
|
+
}
|
|
35
|
+
|
|
31
36
|
const isValid = validateFunction(parsedData);
|
|
32
37
|
|
|
33
38
|
if (isValid) {
|
package/src/config/models.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Canonical model configuration for prompt orchestration pipeline.
|
|
3
3
|
* This module serves as single source of truth for all model metadata.
|
|
4
4
|
*
|
|
5
|
-
* Last updated:
|
|
5
|
+
* Last updated: January 2026
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
// Model alias constants grouped by provider
|
|
@@ -46,6 +46,16 @@ export const ModelAlias = Object.freeze({
|
|
|
46
46
|
CLAUDE_CODE_SONNET: "claudecode:sonnet",
|
|
47
47
|
CLAUDE_CODE_OPUS: "claudecode:opus",
|
|
48
48
|
CLAUDE_CODE_HAIKU: "claudecode:haiku",
|
|
49
|
+
|
|
50
|
+
// Moonshot/Kimi (K2/K2.5 series as of Jan 2026)
|
|
51
|
+
MOONSHOT_K2: "moonshot:kimi-k2",
|
|
52
|
+
MOONSHOT_K2_TURBO: "moonshot:kimi-k2-turbo",
|
|
53
|
+
MOONSHOT_K2_THINKING: "moonshot:kimi-k2-thinking",
|
|
54
|
+
MOONSHOT_K2_THINKING_TURBO: "moonshot:kimi-k2-thinking-turbo",
|
|
55
|
+
MOONSHOT_K2_5: "moonshot:kimi-k2.5",
|
|
56
|
+
MOONSHOT_LATEST_8K: "moonshot:kimi-latest-8k",
|
|
57
|
+
MOONSHOT_LATEST_32K: "moonshot:kimi-latest-32k",
|
|
58
|
+
MOONSHOT_LATEST_128K: "moonshot:kimi-latest-128k",
|
|
49
59
|
});
|
|
50
60
|
|
|
51
61
|
// Consolidated model configuration with pricing metadata
|
|
@@ -225,6 +235,57 @@ export const MODEL_CONFIG = Object.freeze({
|
|
|
225
235
|
tokenCostInPerMillion: 0,
|
|
226
236
|
tokenCostOutPerMillion: 0,
|
|
227
237
|
},
|
|
238
|
+
|
|
239
|
+
// ─── Moonshot/Kimi (Jan 2026) ───
|
|
240
|
+
// K2 and K2.5 series with built-in context caching
|
|
241
|
+
[ModelAlias.MOONSHOT_K2]: {
|
|
242
|
+
provider: "moonshot",
|
|
243
|
+
model: "kimi-k2",
|
|
244
|
+
tokenCostInPerMillion: 0.6,
|
|
245
|
+
tokenCostOutPerMillion: 2.5,
|
|
246
|
+
},
|
|
247
|
+
[ModelAlias.MOONSHOT_K2_TURBO]: {
|
|
248
|
+
provider: "moonshot",
|
|
249
|
+
model: "kimi-k2-turbo",
|
|
250
|
+
tokenCostInPerMillion: 1.15,
|
|
251
|
+
tokenCostOutPerMillion: 8.0,
|
|
252
|
+
},
|
|
253
|
+
[ModelAlias.MOONSHOT_K2_THINKING]: {
|
|
254
|
+
provider: "moonshot",
|
|
255
|
+
model: "kimi-k2-thinking",
|
|
256
|
+
tokenCostInPerMillion: 1.15,
|
|
257
|
+
tokenCostOutPerMillion: 8.0,
|
|
258
|
+
},
|
|
259
|
+
[ModelAlias.MOONSHOT_K2_THINKING_TURBO]: {
|
|
260
|
+
provider: "moonshot",
|
|
261
|
+
model: "kimi-k2-thinking-turbo",
|
|
262
|
+
tokenCostInPerMillion: 1.15,
|
|
263
|
+
tokenCostOutPerMillion: 8.0,
|
|
264
|
+
},
|
|
265
|
+
[ModelAlias.MOONSHOT_K2_5]: {
|
|
266
|
+
provider: "moonshot",
|
|
267
|
+
model: "kimi-k2.5", // Latest model (Jan 2026)
|
|
268
|
+
tokenCostInPerMillion: 0.6,
|
|
269
|
+
tokenCostOutPerMillion: 3.0,
|
|
270
|
+
},
|
|
271
|
+
[ModelAlias.MOONSHOT_LATEST_8K]: {
|
|
272
|
+
provider: "moonshot",
|
|
273
|
+
model: "kimi-latest-8k", // 8K context tier
|
|
274
|
+
tokenCostInPerMillion: 0.2,
|
|
275
|
+
tokenCostOutPerMillion: 2.0,
|
|
276
|
+
},
|
|
277
|
+
[ModelAlias.MOONSHOT_LATEST_32K]: {
|
|
278
|
+
provider: "moonshot",
|
|
279
|
+
model: "kimi-latest-32k", // 32K context tier
|
|
280
|
+
tokenCostInPerMillion: 1.0,
|
|
281
|
+
tokenCostOutPerMillion: 3.0,
|
|
282
|
+
},
|
|
283
|
+
[ModelAlias.MOONSHOT_LATEST_128K]: {
|
|
284
|
+
provider: "moonshot",
|
|
285
|
+
model: "kimi-latest-128k", // 128K context tier
|
|
286
|
+
tokenCostInPerMillion: 2.0,
|
|
287
|
+
tokenCostOutPerMillion: 5.0,
|
|
288
|
+
},
|
|
228
289
|
});
|
|
229
290
|
|
|
230
291
|
// Validation set of all valid model aliases
|
|
@@ -238,6 +299,7 @@ export const DEFAULT_MODEL_BY_PROVIDER = Object.freeze({
|
|
|
238
299
|
zhipu: ModelAlias.ZAI_GLM_4_6,
|
|
239
300
|
anthropic: ModelAlias.ANTHROPIC_OPUS_4_5, // Updated: Opus 4.5 available at better price
|
|
240
301
|
claudecode: ModelAlias.CLAUDE_CODE_SONNET,
|
|
302
|
+
moonshot: ModelAlias.MOONSHOT_K2_5, // Latest K2.5 model (Jan 2026)
|
|
241
303
|
});
|
|
242
304
|
|
|
243
305
|
/**
|
package/src/core/orchestrator.js
CHANGED
|
@@ -245,7 +245,14 @@ export async function startOrchestrator(opts) {
|
|
|
245
245
|
|
|
246
246
|
watcher.on("add", (file) => {
|
|
247
247
|
// Return promise so tests awaiting the add handler block until processing completes
|
|
248
|
-
|
|
248
|
+
// Catch rejections to prevent unhandled promise rejection crashes
|
|
249
|
+
return handleSeedAdd(file).catch((error) => {
|
|
250
|
+
logger.error("Failed to handle seed file", {
|
|
251
|
+
file,
|
|
252
|
+
error: error instanceof Error ? error.message : String(error),
|
|
253
|
+
stack: error instanceof Error ? error.stack : undefined,
|
|
254
|
+
});
|
|
255
|
+
});
|
|
249
256
|
});
|
|
250
257
|
|
|
251
258
|
async function stop() {
|
|
@@ -380,65 +387,30 @@ function spawnRunner(
|
|
|
380
387
|
|
|
381
388
|
child.on("exit", (code, signal) => {
|
|
382
389
|
running.delete(jobId);
|
|
383
|
-
|
|
384
|
-
//
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
timestamp: new Date().toISOString(),
|
|
395
|
-
completionType: code === 0 ? "success" : "failure",
|
|
396
|
-
},
|
|
397
|
-
null,
|
|
398
|
-
2
|
|
399
|
-
),
|
|
400
|
-
{ mode: "replace" }
|
|
401
|
-
);
|
|
402
|
-
} catch (error) {
|
|
403
|
-
logger.error("Failed to write job completion log", {
|
|
404
|
-
jobId,
|
|
405
|
-
error: error.message,
|
|
406
|
-
});
|
|
407
|
-
}
|
|
408
|
-
}
|
|
390
|
+
// Note: We intentionally don't write completion logs here because
|
|
391
|
+
// the pipeline-runner moves the job directory from current/ to complete/
|
|
392
|
+
// before exiting. Writing here would create a ghost directory under current/
|
|
393
|
+
// due to the race condition between fs.rename() and this exit handler.
|
|
394
|
+
// The pipeline-runner already writes its own execution logs and runs.jsonl.
|
|
395
|
+
logger.log("Pipeline runner exited", {
|
|
396
|
+
jobId,
|
|
397
|
+
exitCode: code,
|
|
398
|
+
signal: signal,
|
|
399
|
+
completionType: code === 0 ? "success" : "failure",
|
|
400
|
+
});
|
|
409
401
|
});
|
|
410
402
|
|
|
411
403
|
child.on("error", (error) => {
|
|
412
404
|
running.delete(jobId);
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
error: {
|
|
423
|
-
message: error.message,
|
|
424
|
-
name: error.name,
|
|
425
|
-
code: error.code,
|
|
426
|
-
},
|
|
427
|
-
timestamp: new Date().toISOString(),
|
|
428
|
-
completionType: "error",
|
|
429
|
-
},
|
|
430
|
-
null,
|
|
431
|
-
2
|
|
432
|
-
),
|
|
433
|
-
{ mode: "replace" }
|
|
434
|
-
);
|
|
435
|
-
} catch (logError) {
|
|
436
|
-
logger.error("Failed to write job error log", {
|
|
437
|
-
jobId,
|
|
438
|
-
error: logError.message,
|
|
439
|
-
});
|
|
440
|
-
}
|
|
441
|
-
}
|
|
405
|
+
// Log spawn errors but don't write to filesystem to avoid race conditions
|
|
406
|
+
logger.error("Pipeline runner spawn error", {
|
|
407
|
+
jobId,
|
|
408
|
+
error: {
|
|
409
|
+
message: error.message,
|
|
410
|
+
name: error.name,
|
|
411
|
+
code: error.code,
|
|
412
|
+
},
|
|
413
|
+
});
|
|
442
414
|
});
|
|
443
415
|
|
|
444
416
|
// In test mode: return immediately; in real mode you might await readiness
|
|
@@ -2,6 +2,27 @@ import fs from "node:fs/promises";
|
|
|
2
2
|
import fsSync from "node:fs";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import { runPipeline } from "./task-runner.js";
|
|
5
|
+
|
|
6
|
+
// Global unhandled rejection handler to prevent hanging on unexpected errors
|
|
7
|
+
// This must be registered early before any async operations
|
|
8
|
+
process.on("unhandledRejection", (reason, promise) => {
|
|
9
|
+
console.error("[PipelineRunner] Unhandled promise rejection:", reason);
|
|
10
|
+
console.error("[PipelineRunner] Promise:", promise);
|
|
11
|
+
// Force exit after a brief delay to allow logs to flush
|
|
12
|
+
setTimeout(() => {
|
|
13
|
+
console.error("[PipelineRunner] Forcing exit due to unhandled rejection");
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}, 100);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
process.on("uncaughtException", (error) => {
|
|
19
|
+
console.error("[PipelineRunner] Uncaught exception:", error);
|
|
20
|
+
// Force exit after a brief delay to allow logs to flush
|
|
21
|
+
setTimeout(() => {
|
|
22
|
+
console.error("[PipelineRunner] Forcing exit due to uncaught exception");
|
|
23
|
+
process.exit(1);
|
|
24
|
+
}, 100);
|
|
25
|
+
});
|
|
5
26
|
import { loadFreshModule } from "./module-loader.js";
|
|
6
27
|
import { validatePipelineOrThrow } from "./validation.js";
|
|
7
28
|
import { getPipelineConfig } from "./config.js";
|
|
@@ -106,6 +127,9 @@ const pipeline = JSON.parse(await fs.readFile(PIPELINE_DEF_PATH, "utf8"));
|
|
|
106
127
|
// Validate pipeline format early with a friendly error message
|
|
107
128
|
validatePipelineOrThrow(pipeline, PIPELINE_DEF_PATH);
|
|
108
129
|
|
|
130
|
+
// Extract optional LLM override from pipeline config
|
|
131
|
+
const llmOverride = pipeline.llm || null;
|
|
132
|
+
|
|
109
133
|
const taskNames = pipeline.tasks.map(getTaskName);
|
|
110
134
|
|
|
111
135
|
const tasks = (await loadFreshModule(TASK_REGISTRY)).default;
|
|
@@ -203,6 +227,7 @@ try {
|
|
|
203
227
|
taskConfig: pipeline.taskConfig?.[taskName] || {},
|
|
204
228
|
statusPath: tasksStatusPath,
|
|
205
229
|
jobId,
|
|
230
|
+
llmOverride,
|
|
206
231
|
meta: {
|
|
207
232
|
pipelineTasks: [...pipeline.tasks],
|
|
208
233
|
},
|
|
@@ -416,7 +441,32 @@ try {
|
|
|
416
441
|
await cleanupTaskSymlinks(dest);
|
|
417
442
|
}
|
|
418
443
|
} catch (error) {
|
|
419
|
-
|
|
444
|
+
// Log the error with full context instead of re-throwing
|
|
445
|
+
// Re-throwing at top-level causes unhandled promise rejection and hanging
|
|
446
|
+
logger.error("Pipeline execution failed with unhandled error", {
|
|
447
|
+
jobId,
|
|
448
|
+
pipelineSlug,
|
|
449
|
+
error: normalizeError(error),
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
console.error("[PipelineRunner] Fatal error:", error);
|
|
453
|
+
|
|
454
|
+
// Ensure we exit with failure code
|
|
455
|
+
process.exitCode = 1;
|
|
456
|
+
|
|
457
|
+
// Set a forced exit timeout to prevent indefinite hanging
|
|
458
|
+
// This catches cases where cleanup or logging doesn't complete
|
|
459
|
+
const forceExitTimeout = setTimeout(() => {
|
|
460
|
+
console.error("[PipelineRunner] Force exit timeout reached, terminating process");
|
|
461
|
+
process.exit(1);
|
|
462
|
+
}, 5000);
|
|
463
|
+
|
|
464
|
+
// Make the timeout non-blocking so it doesn't keep the event loop alive
|
|
465
|
+
forceExitTimeout.unref();
|
|
466
|
+
|
|
467
|
+
// Clean up and exit
|
|
468
|
+
await cleanupRunnerPid();
|
|
469
|
+
process.exit(1);
|
|
420
470
|
} finally {
|
|
421
471
|
// Always ensure PID cleanup at the end of execution
|
|
422
472
|
await cleanupRunnerPid();
|
package/src/core/task-runner.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import { pathToFileURL } from "node:url";
|
|
3
3
|
import fs from "fs";
|
|
4
|
-
import { createLLM, getLLMEvents } from "../llm/index.js";
|
|
4
|
+
import { createLLM, createLLMWithOverride, getLLMEvents } from "../llm/index.js";
|
|
5
5
|
import { loadFreshModule } from "./module-loader.js";
|
|
6
6
|
import { loadEnvironment } from "./environment.js";
|
|
7
7
|
import { createTaskFileIO, generateLogName } from "./file-io.js";
|
|
@@ -353,7 +353,11 @@ export async function runPipeline(modulePath, initialContext = {}) {
|
|
|
353
353
|
initialContext.envLoaded = true;
|
|
354
354
|
}
|
|
355
355
|
|
|
356
|
-
if (!initialContext.llm)
|
|
356
|
+
if (!initialContext.llm) {
|
|
357
|
+
initialContext.llm = initialContext.llmOverride
|
|
358
|
+
? createLLMWithOverride(initialContext.llmOverride)
|
|
359
|
+
: createLLM();
|
|
360
|
+
}
|
|
357
361
|
|
|
358
362
|
const llmMetrics = [];
|
|
359
363
|
const llmEvents = getLLMEvents();
|
|
@@ -399,10 +403,16 @@ export async function runPipeline(modulePath, initialContext = {}) {
|
|
|
399
403
|
}
|
|
400
404
|
};
|
|
401
405
|
|
|
406
|
+
const onLLMError = (m) => llmMetrics.push({ ...m, failed: true });
|
|
407
|
+
|
|
402
408
|
llmEvents.on("llm:request:complete", onLLMComplete);
|
|
403
|
-
llmEvents.on("llm:request:error",
|
|
404
|
-
|
|
405
|
-
|
|
409
|
+
llmEvents.on("llm:request:error", onLLMError);
|
|
410
|
+
|
|
411
|
+
// Helper to clean up all LLM event listeners
|
|
412
|
+
const cleanupLLMListeners = () => {
|
|
413
|
+
llmEvents.off("llm:request:complete", onLLMComplete);
|
|
414
|
+
llmEvents.off("llm:request:error", onLLMError);
|
|
415
|
+
};
|
|
406
416
|
|
|
407
417
|
const abs = toAbsFileURL(modulePath);
|
|
408
418
|
const mod = await loadFreshModule(abs);
|
|
@@ -782,7 +792,7 @@ export async function runPipeline(modulePath, initialContext = {}) {
|
|
|
782
792
|
}
|
|
783
793
|
|
|
784
794
|
await tokenWriteQueue.catch(() => {});
|
|
785
|
-
|
|
795
|
+
cleanupLLMListeners();
|
|
786
796
|
|
|
787
797
|
// Fail immediately on any stage error
|
|
788
798
|
return {
|
|
@@ -801,7 +811,7 @@ export async function runPipeline(modulePath, initialContext = {}) {
|
|
|
801
811
|
// Flush any trailing token usage appends before cleanup
|
|
802
812
|
await tokenWriteQueue.catch(() => {}); // absorb last error to not mask pipeline result
|
|
803
813
|
|
|
804
|
-
|
|
814
|
+
cleanupLLMListeners();
|
|
805
815
|
|
|
806
816
|
// Write final status with currentStage: null to indicate completion
|
|
807
817
|
if (context.meta.workDir && context.meta.taskName) {
|
package/src/llm/index.js
CHANGED
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
claudeCodeChat,
|
|
8
8
|
isClaudeCodeAvailable,
|
|
9
9
|
} from "../providers/claude-code.js";
|
|
10
|
+
import { moonshotChat } from "../providers/moonshot.js";
|
|
10
11
|
import { EventEmitter } from "node:events";
|
|
11
12
|
import { getConfig } from "../core/config.js";
|
|
12
13
|
import {
|
|
@@ -62,6 +63,7 @@ export function getAvailableProviders() {
|
|
|
62
63
|
gemini: !!process.env.GEMINI_API_KEY,
|
|
63
64
|
zhipu: !!process.env.ZHIPU_API_KEY,
|
|
64
65
|
claudecode: isClaudeCodeAvailable(),
|
|
66
|
+
moonshot: !!process.env.MOONSHOT_API_KEY,
|
|
65
67
|
mock: !!mockProviderInstance,
|
|
66
68
|
};
|
|
67
69
|
}
|
|
@@ -582,6 +584,83 @@ export async function chat(options) {
|
|
|
582
584
|
totalTokens: promptTokens + completionTokens,
|
|
583
585
|
};
|
|
584
586
|
}
|
|
587
|
+
} else if (provider === "moonshot") {
|
|
588
|
+
logger.log("Using Moonshot provider");
|
|
589
|
+
const defaultAlias = DEFAULT_MODEL_BY_PROVIDER["moonshot"];
|
|
590
|
+
const defaultModelConfig = MODEL_CONFIG[defaultAlias];
|
|
591
|
+
const defaultModel = defaultModelConfig?.model;
|
|
592
|
+
|
|
593
|
+
// Infer JSON format if not explicitly provided
|
|
594
|
+
const effectiveResponseFormat =
|
|
595
|
+
responseFormat === undefined ||
|
|
596
|
+
responseFormat === null ||
|
|
597
|
+
responseFormat === ""
|
|
598
|
+
? shouldInferJsonFormat(messages)
|
|
599
|
+
? "json_object"
|
|
600
|
+
: undefined
|
|
601
|
+
: responseFormat;
|
|
602
|
+
|
|
603
|
+
const moonshotArgs = {
|
|
604
|
+
messages,
|
|
605
|
+
model: model || defaultModel,
|
|
606
|
+
temperature,
|
|
607
|
+
maxTokens,
|
|
608
|
+
...rest,
|
|
609
|
+
};
|
|
610
|
+
logger.log("Moonshot call parameters:", {
|
|
611
|
+
model: moonshotArgs.model,
|
|
612
|
+
hasMessages: !!moonshotArgs.messages,
|
|
613
|
+
messageCount: moonshotArgs.messages?.length,
|
|
614
|
+
});
|
|
615
|
+
if (stream !== undefined) moonshotArgs.stream = stream;
|
|
616
|
+
if (topP !== undefined) moonshotArgs.topP = topP;
|
|
617
|
+
if (frequencyPenalty !== undefined)
|
|
618
|
+
moonshotArgs.frequencyPenalty = frequencyPenalty;
|
|
619
|
+
if (presencePenalty !== undefined)
|
|
620
|
+
moonshotArgs.presencePenalty = presencePenalty;
|
|
621
|
+
if (stop !== undefined) moonshotArgs.stop = stop;
|
|
622
|
+
if (effectiveResponseFormat !== undefined) {
|
|
623
|
+
moonshotArgs.responseFormat = effectiveResponseFormat;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
logger.log("Calling moonshotChat()...");
|
|
627
|
+
const result = await moonshotChat(moonshotArgs);
|
|
628
|
+
logger.log("moonshotChat() returned:", {
|
|
629
|
+
hasResult: !!result,
|
|
630
|
+
isStream: typeof result?.[Symbol.asyncIterator] !== "undefined",
|
|
631
|
+
hasContent: !!result?.content,
|
|
632
|
+
hasUsage: !!result?.usage,
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
// Streaming mode - return async generator directly
|
|
636
|
+
if (stream && typeof result?.[Symbol.asyncIterator] !== "undefined") {
|
|
637
|
+
return result;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
response = {
|
|
641
|
+
content: result.content,
|
|
642
|
+
raw: result.raw,
|
|
643
|
+
};
|
|
644
|
+
|
|
645
|
+
// Use actual usage from moonshot API if available; otherwise estimate
|
|
646
|
+
if (result?.usage) {
|
|
647
|
+
const { prompt_tokens, completion_tokens, total_tokens } = result.usage;
|
|
648
|
+
usage = {
|
|
649
|
+
promptTokens: prompt_tokens,
|
|
650
|
+
completionTokens: completion_tokens,
|
|
651
|
+
totalTokens: total_tokens,
|
|
652
|
+
};
|
|
653
|
+
} else {
|
|
654
|
+
const promptTokens = estimateTokens(systemMsg + userMsg);
|
|
655
|
+
const completionTokens = estimateTokens(
|
|
656
|
+
typeof result === "string" ? result : JSON.stringify(result)
|
|
657
|
+
);
|
|
658
|
+
usage = {
|
|
659
|
+
promptTokens,
|
|
660
|
+
completionTokens,
|
|
661
|
+
totalTokens: promptTokens + completionTokens,
|
|
662
|
+
};
|
|
663
|
+
}
|
|
585
664
|
} else {
|
|
586
665
|
logger.error("Unknown provider:", provider);
|
|
587
666
|
throw new Error(`Provider ${provider} not yet implemented`);
|
|
@@ -806,6 +885,75 @@ export function createNamedModelsAPI() {
|
|
|
806
885
|
return buildProviderFunctions(MODEL_CONFIG);
|
|
807
886
|
}
|
|
808
887
|
|
|
888
|
+
// Create LLM with pipeline-level override
|
|
889
|
+
// When override is set, all provider method calls are intercepted and routed to the override provider/model
|
|
890
|
+
export function createLLMWithOverride(override) {
|
|
891
|
+
logger.log("createLLMWithOverride called", {
|
|
892
|
+
hasOverride: !!override,
|
|
893
|
+
overrideProvider: override?.provider,
|
|
894
|
+
overrideModel: override?.model,
|
|
895
|
+
});
|
|
896
|
+
|
|
897
|
+
if (!override?.provider) {
|
|
898
|
+
logger.log("No override provider, returning standard LLM");
|
|
899
|
+
return createLLM();
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
logger.log("Creating LLM with override proxy", {
|
|
903
|
+
provider: override.provider,
|
|
904
|
+
model: override.model,
|
|
905
|
+
});
|
|
906
|
+
|
|
907
|
+
const baseLLM = createLLM();
|
|
908
|
+
|
|
909
|
+
return new Proxy(baseLLM, {
|
|
910
|
+
get(target, providerKey) {
|
|
911
|
+
const providerObj = target[providerKey];
|
|
912
|
+
if (typeof providerObj !== "object" || providerObj === null) {
|
|
913
|
+
return providerObj;
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
return new Proxy(providerObj, {
|
|
917
|
+
get(providerTarget, methodKey) {
|
|
918
|
+
// Skip non-string keys (symbols, etc.)
|
|
919
|
+
if (typeof methodKey !== "string") {
|
|
920
|
+
return providerTarget[methodKey];
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
// Skip built-in/serialization methods to prevent spurious API calls
|
|
924
|
+
// when the LLM object is serialized (e.g., JSON.stringify, logging)
|
|
925
|
+
const builtInMethods = ['toJSON', 'toString', 'valueOf', 'then', 'catch', 'finally', 'constructor'];
|
|
926
|
+
if (builtInMethods.includes(methodKey)) {
|
|
927
|
+
return providerTarget[methodKey];
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
// Log interception
|
|
931
|
+
logger.log("LLM call intercepted by override", {
|
|
932
|
+
originalProvider: providerKey,
|
|
933
|
+
originalMethod: methodKey,
|
|
934
|
+
overrideProvider: override.provider,
|
|
935
|
+
overrideModel: override.model,
|
|
936
|
+
});
|
|
937
|
+
|
|
938
|
+
// When override is active, return a function for ANY method key
|
|
939
|
+
// This routes all method calls to the override provider/model
|
|
940
|
+
return (options = {}) =>
|
|
941
|
+
chat({
|
|
942
|
+
...options,
|
|
943
|
+
provider: override.provider,
|
|
944
|
+
model: override.model,
|
|
945
|
+
metadata: {
|
|
946
|
+
...options.metadata,
|
|
947
|
+
originalProvider: providerKey,
|
|
948
|
+
originalModel: options.model,
|
|
949
|
+
},
|
|
950
|
+
});
|
|
951
|
+
},
|
|
952
|
+
});
|
|
953
|
+
},
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
|
|
809
957
|
// Separate function for high-level LLM interface (used by llm.test.js)
|
|
810
958
|
export function createHighLevelLLM(options = {}) {
|
|
811
959
|
// Skip config check in tests to avoid PO_ROOT requirement
|