@ryanfw/prompt-orchestration-pipeline 0.16.4 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ryanfw/prompt-orchestration-pipeline",
3
- "version": "0.16.4",
3
+ "version": "0.17.0",
4
4
  "description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
5
5
  "type": "module",
6
6
  "main": "src/ui/server.js",
@@ -2,7 +2,7 @@
2
2
  * Canonical model configuration for prompt orchestration pipeline.
3
3
  * This module serves as single source of truth for all model metadata.
4
4
  *
5
- * Last updated: December 2025
5
+ * Last updated: January 2026
6
6
  */
7
7
 
8
8
  // Model alias constants grouped by provider
@@ -46,6 +46,16 @@ export const ModelAlias = Object.freeze({
46
46
  CLAUDE_CODE_SONNET: "claudecode:sonnet",
47
47
  CLAUDE_CODE_OPUS: "claudecode:opus",
48
48
  CLAUDE_CODE_HAIKU: "claudecode:haiku",
49
+
50
+ // Moonshot/Kimi (K2/K2.5 series as of Jan 2026)
51
+ MOONSHOT_K2: "moonshot:kimi-k2",
52
+ MOONSHOT_K2_TURBO: "moonshot:kimi-k2-turbo",
53
+ MOONSHOT_K2_THINKING: "moonshot:kimi-k2-thinking",
54
+ MOONSHOT_K2_THINKING_TURBO: "moonshot:kimi-k2-thinking-turbo",
55
+ MOONSHOT_K2_5: "moonshot:kimi-k2.5",
56
+ MOONSHOT_LATEST_8K: "moonshot:kimi-latest-8k",
57
+ MOONSHOT_LATEST_32K: "moonshot:kimi-latest-32k",
58
+ MOONSHOT_LATEST_128K: "moonshot:kimi-latest-128k",
49
59
  });
50
60
 
51
61
  // Consolidated model configuration with pricing metadata
@@ -225,6 +235,57 @@ export const MODEL_CONFIG = Object.freeze({
225
235
  tokenCostInPerMillion: 0,
226
236
  tokenCostOutPerMillion: 0,
227
237
  },
238
+
239
+ // ─── Moonshot/Kimi (Jan 2026) ───
240
+ // K2 and K2.5 series with built-in context caching
241
+ [ModelAlias.MOONSHOT_K2]: {
242
+ provider: "moonshot",
243
+ model: "kimi-k2",
244
+ tokenCostInPerMillion: 0.6,
245
+ tokenCostOutPerMillion: 2.5,
246
+ },
247
+ [ModelAlias.MOONSHOT_K2_TURBO]: {
248
+ provider: "moonshot",
249
+ model: "kimi-k2-turbo",
250
+ tokenCostInPerMillion: 1.15,
251
+ tokenCostOutPerMillion: 8.0,
252
+ },
253
+ [ModelAlias.MOONSHOT_K2_THINKING]: {
254
+ provider: "moonshot",
255
+ model: "kimi-k2-thinking",
256
+ tokenCostInPerMillion: 1.15,
257
+ tokenCostOutPerMillion: 8.0,
258
+ },
259
+ [ModelAlias.MOONSHOT_K2_THINKING_TURBO]: {
260
+ provider: "moonshot",
261
+ model: "kimi-k2-thinking-turbo",
262
+ tokenCostInPerMillion: 1.15,
263
+ tokenCostOutPerMillion: 8.0,
264
+ },
265
+ [ModelAlias.MOONSHOT_K2_5]: {
266
+ provider: "moonshot",
267
+ model: "kimi-k2.5", // Latest model (Jan 2026)
268
+ tokenCostInPerMillion: 0.6,
269
+ tokenCostOutPerMillion: 3.0,
270
+ },
271
+ [ModelAlias.MOONSHOT_LATEST_8K]: {
272
+ provider: "moonshot",
273
+ model: "kimi-latest-8k", // 8K context tier
274
+ tokenCostInPerMillion: 0.2,
275
+ tokenCostOutPerMillion: 2.0,
276
+ },
277
+ [ModelAlias.MOONSHOT_LATEST_32K]: {
278
+ provider: "moonshot",
279
+ model: "kimi-latest-32k", // 32K context tier
280
+ tokenCostInPerMillion: 1.0,
281
+ tokenCostOutPerMillion: 3.0,
282
+ },
283
+ [ModelAlias.MOONSHOT_LATEST_128K]: {
284
+ provider: "moonshot",
285
+ model: "kimi-latest-128k", // 128K context tier
286
+ tokenCostInPerMillion: 2.0,
287
+ tokenCostOutPerMillion: 5.0,
288
+ },
228
289
  });
229
290
 
230
291
  // Validation set of all valid model aliases
@@ -238,6 +299,7 @@ export const DEFAULT_MODEL_BY_PROVIDER = Object.freeze({
238
299
  zhipu: ModelAlias.ZAI_GLM_4_6,
239
300
  anthropic: ModelAlias.ANTHROPIC_OPUS_4_5, // Updated: Opus 4.5 available at better price
240
301
  claudecode: ModelAlias.CLAUDE_CODE_SONNET,
302
+ moonshot: ModelAlias.MOONSHOT_K2_5, // Latest K2.5 model (Jan 2026)
241
303
  });
242
304
 
243
305
  /**
@@ -245,7 +245,14 @@ export async function startOrchestrator(opts) {
245
245
 
246
246
  watcher.on("add", (file) => {
247
247
  // Return promise so tests awaiting the add handler block until processing completes
248
- return handleSeedAdd(file);
248
+ // Catch rejections to prevent unhandled promise rejection crashes
249
+ return handleSeedAdd(file).catch((error) => {
250
+ logger.error("Failed to handle seed file", {
251
+ file,
252
+ error: error instanceof Error ? error.message : String(error),
253
+ stack: error instanceof Error ? error.stack : undefined,
254
+ });
255
+ });
249
256
  });
250
257
 
251
258
  async function stop() {
@@ -380,65 +387,30 @@ function spawnRunner(
380
387
 
381
388
  child.on("exit", (code, signal) => {
382
389
  running.delete(jobId);
383
-
384
- // Write job completion log synchronously
385
- if (fileIO) {
386
- try {
387
- fileIO.writeLogSync(
388
- generateLogName(jobId, "orchestrator", LogEvent.COMPLETE),
389
- JSON.stringify(
390
- {
391
- jobId,
392
- exitCode: code,
393
- signal: signal,
394
- timestamp: new Date().toISOString(),
395
- completionType: code === 0 ? "success" : "failure",
396
- },
397
- null,
398
- 2
399
- ),
400
- { mode: "replace" }
401
- );
402
- } catch (error) {
403
- logger.error("Failed to write job completion log", {
404
- jobId,
405
- error: error.message,
406
- });
407
- }
408
- }
390
+ // Note: We intentionally don't write completion logs here because
391
+ // the pipeline-runner moves the job directory from current/ to complete/
392
+ // before exiting. Writing here would create a ghost directory under current/
393
+ // due to the race condition between fs.rename() and this exit handler.
394
+ // The pipeline-runner already writes its own execution logs and runs.jsonl.
395
+ logger.log("Pipeline runner exited", {
396
+ jobId,
397
+ exitCode: code,
398
+ signal: signal,
399
+ completionType: code === 0 ? "success" : "failure",
400
+ });
409
401
  });
410
402
 
411
403
  child.on("error", (error) => {
412
404
  running.delete(jobId);
413
-
414
- // Write job error log synchronously
415
- if (fileIO) {
416
- try {
417
- fileIO.writeLogSync(
418
- generateLogName(jobId, "orchestrator", LogEvent.ERROR),
419
- JSON.stringify(
420
- {
421
- jobId,
422
- error: {
423
- message: error.message,
424
- name: error.name,
425
- code: error.code,
426
- },
427
- timestamp: new Date().toISOString(),
428
- completionType: "error",
429
- },
430
- null,
431
- 2
432
- ),
433
- { mode: "replace" }
434
- );
435
- } catch (logError) {
436
- logger.error("Failed to write job error log", {
437
- jobId,
438
- error: logError.message,
439
- });
440
- }
441
- }
405
+ // Log spawn errors but don't write to filesystem to avoid race conditions
406
+ logger.error("Pipeline runner spawn error", {
407
+ jobId,
408
+ error: {
409
+ message: error.message,
410
+ name: error.name,
411
+ code: error.code,
412
+ },
413
+ });
442
414
  });
443
415
 
444
416
  // In test mode: return immediately; in real mode you might await readiness
@@ -2,6 +2,27 @@ import fs from "node:fs/promises";
2
2
  import fsSync from "node:fs";
3
3
  import path from "node:path";
4
4
  import { runPipeline } from "./task-runner.js";
5
+
6
+ // Global unhandled rejection handler to prevent hanging on unexpected errors
7
+ // This must be registered early before any async operations
8
+ process.on("unhandledRejection", (reason, promise) => {
9
+ console.error("[PipelineRunner] Unhandled promise rejection:", reason);
10
+ console.error("[PipelineRunner] Promise:", promise);
11
+ // Force exit after a brief delay to allow logs to flush
12
+ setTimeout(() => {
13
+ console.error("[PipelineRunner] Forcing exit due to unhandled rejection");
14
+ process.exit(1);
15
+ }, 100);
16
+ });
17
+
18
+ process.on("uncaughtException", (error) => {
19
+ console.error("[PipelineRunner] Uncaught exception:", error);
20
+ // Force exit after a brief delay to allow logs to flush
21
+ setTimeout(() => {
22
+ console.error("[PipelineRunner] Forcing exit due to uncaught exception");
23
+ process.exit(1);
24
+ }, 100);
25
+ });
5
26
  import { loadFreshModule } from "./module-loader.js";
6
27
  import { validatePipelineOrThrow } from "./validation.js";
7
28
  import { getPipelineConfig } from "./config.js";
@@ -106,6 +127,9 @@ const pipeline = JSON.parse(await fs.readFile(PIPELINE_DEF_PATH, "utf8"));
106
127
  // Validate pipeline format early with a friendly error message
107
128
  validatePipelineOrThrow(pipeline, PIPELINE_DEF_PATH);
108
129
 
130
+ // Extract optional LLM override from pipeline config
131
+ const llmOverride = pipeline.llm || null;
132
+
109
133
  const taskNames = pipeline.tasks.map(getTaskName);
110
134
 
111
135
  const tasks = (await loadFreshModule(TASK_REGISTRY)).default;
@@ -203,6 +227,7 @@ try {
203
227
  taskConfig: pipeline.taskConfig?.[taskName] || {},
204
228
  statusPath: tasksStatusPath,
205
229
  jobId,
230
+ llmOverride,
206
231
  meta: {
207
232
  pipelineTasks: [...pipeline.tasks],
208
233
  },
@@ -416,7 +441,32 @@ try {
416
441
  await cleanupTaskSymlinks(dest);
417
442
  }
418
443
  } catch (error) {
419
- throw error;
444
+ // Log the error with full context instead of re-throwing
445
+ // Re-throwing at top-level causes unhandled promise rejection and hanging
446
+ logger.error("Pipeline execution failed with unhandled error", {
447
+ jobId,
448
+ pipelineSlug,
449
+ error: normalizeError(error),
450
+ });
451
+
452
+ console.error("[PipelineRunner] Fatal error:", error);
453
+
454
+ // Ensure we exit with failure code
455
+ process.exitCode = 1;
456
+
457
+ // Set a forced exit timeout to prevent indefinite hanging
458
+ // This catches cases where cleanup or logging doesn't complete
459
+ const forceExitTimeout = setTimeout(() => {
460
+ console.error("[PipelineRunner] Force exit timeout reached, terminating process");
461
+ process.exit(1);
462
+ }, 5000);
463
+
464
+ // Make the timeout non-blocking so it doesn't keep the event loop alive
465
+ forceExitTimeout.unref();
466
+
467
+ // Clean up and exit
468
+ await cleanupRunnerPid();
469
+ process.exit(1);
420
470
  } finally {
421
471
  // Always ensure PID cleanup at the end of execution
422
472
  await cleanupRunnerPid();
@@ -1,7 +1,7 @@
1
1
  import path from "node:path";
2
2
  import { pathToFileURL } from "node:url";
3
3
  import fs from "fs";
4
- import { createLLM, getLLMEvents } from "../llm/index.js";
4
+ import { createLLM, createLLMWithOverride, getLLMEvents } from "../llm/index.js";
5
5
  import { loadFreshModule } from "./module-loader.js";
6
6
  import { loadEnvironment } from "./environment.js";
7
7
  import { createTaskFileIO, generateLogName } from "./file-io.js";
@@ -353,7 +353,11 @@ export async function runPipeline(modulePath, initialContext = {}) {
353
353
  initialContext.envLoaded = true;
354
354
  }
355
355
 
356
- if (!initialContext.llm) initialContext.llm = createLLM();
356
+ if (!initialContext.llm) {
357
+ initialContext.llm = initialContext.llmOverride
358
+ ? createLLMWithOverride(initialContext.llmOverride)
359
+ : createLLM();
360
+ }
357
361
 
358
362
  const llmMetrics = [];
359
363
  const llmEvents = getLLMEvents();
@@ -399,10 +403,16 @@ export async function runPipeline(modulePath, initialContext = {}) {
399
403
  }
400
404
  };
401
405
 
406
+ const onLLMError = (m) => llmMetrics.push({ ...m, failed: true });
407
+
402
408
  llmEvents.on("llm:request:complete", onLLMComplete);
403
- llmEvents.on("llm:request:error", (m) =>
404
- llmMetrics.push({ ...m, failed: true })
405
- );
409
+ llmEvents.on("llm:request:error", onLLMError);
410
+
411
+ // Helper to clean up all LLM event listeners
412
+ const cleanupLLMListeners = () => {
413
+ llmEvents.off("llm:request:complete", onLLMComplete);
414
+ llmEvents.off("llm:request:error", onLLMError);
415
+ };
406
416
 
407
417
  const abs = toAbsFileURL(modulePath);
408
418
  const mod = await loadFreshModule(abs);
@@ -782,7 +792,7 @@ export async function runPipeline(modulePath, initialContext = {}) {
782
792
  }
783
793
 
784
794
  await tokenWriteQueue.catch(() => {});
785
- llmEvents.off("llm:request:complete", onLLMComplete);
795
+ cleanupLLMListeners();
786
796
 
787
797
  // Fail immediately on any stage error
788
798
  return {
@@ -801,7 +811,7 @@ export async function runPipeline(modulePath, initialContext = {}) {
801
811
  // Flush any trailing token usage appends before cleanup
802
812
  await tokenWriteQueue.catch(() => {}); // absorb last error to not mask pipeline result
803
813
 
804
- llmEvents.off("llm:request:complete", onLLMComplete);
814
+ cleanupLLMListeners();
805
815
 
806
816
  // Write final status with currentStage: null to indicate completion
807
817
  if (context.meta.workDir && context.meta.taskName) {
package/src/llm/index.js CHANGED
@@ -7,6 +7,7 @@ import {
7
7
  claudeCodeChat,
8
8
  isClaudeCodeAvailable,
9
9
  } from "../providers/claude-code.js";
10
+ import { moonshotChat } from "../providers/moonshot.js";
10
11
  import { EventEmitter } from "node:events";
11
12
  import { getConfig } from "../core/config.js";
12
13
  import {
@@ -62,6 +63,7 @@ export function getAvailableProviders() {
62
63
  gemini: !!process.env.GEMINI_API_KEY,
63
64
  zhipu: !!process.env.ZHIPU_API_KEY,
64
65
  claudecode: isClaudeCodeAvailable(),
66
+ moonshot: !!process.env.MOONSHOT_API_KEY,
65
67
  mock: !!mockProviderInstance,
66
68
  };
67
69
  }
@@ -582,6 +584,83 @@ export async function chat(options) {
582
584
  totalTokens: promptTokens + completionTokens,
583
585
  };
584
586
  }
587
+ } else if (provider === "moonshot") {
588
+ logger.log("Using Moonshot provider");
589
+ const defaultAlias = DEFAULT_MODEL_BY_PROVIDER["moonshot"];
590
+ const defaultModelConfig = MODEL_CONFIG[defaultAlias];
591
+ const defaultModel = defaultModelConfig?.model;
592
+
593
+ // Infer JSON format if not explicitly provided
594
+ const effectiveResponseFormat =
595
+ responseFormat === undefined ||
596
+ responseFormat === null ||
597
+ responseFormat === ""
598
+ ? shouldInferJsonFormat(messages)
599
+ ? "json_object"
600
+ : undefined
601
+ : responseFormat;
602
+
603
+ const moonshotArgs = {
604
+ messages,
605
+ model: model || defaultModel,
606
+ temperature,
607
+ maxTokens,
608
+ ...rest,
609
+ };
610
+ logger.log("Moonshot call parameters:", {
611
+ model: moonshotArgs.model,
612
+ hasMessages: !!moonshotArgs.messages,
613
+ messageCount: moonshotArgs.messages?.length,
614
+ });
615
+ if (stream !== undefined) moonshotArgs.stream = stream;
616
+ if (topP !== undefined) moonshotArgs.topP = topP;
617
+ if (frequencyPenalty !== undefined)
618
+ moonshotArgs.frequencyPenalty = frequencyPenalty;
619
+ if (presencePenalty !== undefined)
620
+ moonshotArgs.presencePenalty = presencePenalty;
621
+ if (stop !== undefined) moonshotArgs.stop = stop;
622
+ if (effectiveResponseFormat !== undefined) {
623
+ moonshotArgs.responseFormat = effectiveResponseFormat;
624
+ }
625
+
626
+ logger.log("Calling moonshotChat()...");
627
+ const result = await moonshotChat(moonshotArgs);
628
+ logger.log("moonshotChat() returned:", {
629
+ hasResult: !!result,
630
+ isStream: typeof result?.[Symbol.asyncIterator] !== "undefined",
631
+ hasContent: !!result?.content,
632
+ hasUsage: !!result?.usage,
633
+ });
634
+
635
+ // Streaming mode - return async generator directly
636
+ if (stream && typeof result?.[Symbol.asyncIterator] !== "undefined") {
637
+ return result;
638
+ }
639
+
640
+ response = {
641
+ content: result.content,
642
+ raw: result.raw,
643
+ };
644
+
645
+ // Use actual usage from moonshot API if available; otherwise estimate
646
+ if (result?.usage) {
647
+ const { prompt_tokens, completion_tokens, total_tokens } = result.usage;
648
+ usage = {
649
+ promptTokens: prompt_tokens,
650
+ completionTokens: completion_tokens,
651
+ totalTokens: total_tokens,
652
+ };
653
+ } else {
654
+ const promptTokens = estimateTokens(systemMsg + userMsg);
655
+ const completionTokens = estimateTokens(
656
+ typeof result === "string" ? result : JSON.stringify(result)
657
+ );
658
+ usage = {
659
+ promptTokens,
660
+ completionTokens,
661
+ totalTokens: promptTokens + completionTokens,
662
+ };
663
+ }
585
664
  } else {
586
665
  logger.error("Unknown provider:", provider);
587
666
  throw new Error(`Provider ${provider} not yet implemented`);
@@ -806,6 +885,75 @@ export function createNamedModelsAPI() {
806
885
  return buildProviderFunctions(MODEL_CONFIG);
807
886
  }
808
887
 
888
+ // Create LLM with pipeline-level override
889
+ // When override is set, all provider method calls are intercepted and routed to the override provider/model
890
+ export function createLLMWithOverride(override) {
891
+ logger.log("createLLMWithOverride called", {
892
+ hasOverride: !!override,
893
+ overrideProvider: override?.provider,
894
+ overrideModel: override?.model,
895
+ });
896
+
897
+ if (!override?.provider) {
898
+ logger.log("No override provider, returning standard LLM");
899
+ return createLLM();
900
+ }
901
+
902
+ logger.log("Creating LLM with override proxy", {
903
+ provider: override.provider,
904
+ model: override.model,
905
+ });
906
+
907
+ const baseLLM = createLLM();
908
+
909
+ return new Proxy(baseLLM, {
910
+ get(target, providerKey) {
911
+ const providerObj = target[providerKey];
912
+ if (typeof providerObj !== "object" || providerObj === null) {
913
+ return providerObj;
914
+ }
915
+
916
+ return new Proxy(providerObj, {
917
+ get(providerTarget, methodKey) {
918
+ // Skip non-string keys (symbols, etc.)
919
+ if (typeof methodKey !== "string") {
920
+ return providerTarget[methodKey];
921
+ }
922
+
923
+ // Skip built-in/serialization methods to prevent spurious API calls
924
+ // when the LLM object is serialized (e.g., JSON.stringify, logging)
925
+ const builtInMethods = ['toJSON', 'toString', 'valueOf', 'then', 'catch', 'finally', 'constructor'];
926
+ if (builtInMethods.includes(methodKey)) {
927
+ return providerTarget[methodKey];
928
+ }
929
+
930
+ // Log interception
931
+ logger.log("LLM call intercepted by override", {
932
+ originalProvider: providerKey,
933
+ originalMethod: methodKey,
934
+ overrideProvider: override.provider,
935
+ overrideModel: override.model,
936
+ });
937
+
938
+ // When override is active, return a function for ANY method key
939
+ // This routes all method calls to the override provider/model
940
+ return (options = {}) =>
941
+ chat({
942
+ ...options,
943
+ provider: override.provider,
944
+ model: override.model,
945
+ metadata: {
946
+ ...options.metadata,
947
+ originalProvider: providerKey,
948
+ originalModel: options.model,
949
+ },
950
+ });
951
+ },
952
+ });
953
+ },
954
+ });
955
+ }
956
+
809
957
  // Separate function for high-level LLM interface (used by llm.test.js)
810
958
  export function createHighLevelLLM(options = {}) {
811
959
  // Skip config check in tests to avoid PO_ROOT requirement