@oisincoveney/pipeline 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,13 @@ version: 1
2
2
  default_workflow: inspect
3
3
  orchestrator:
4
4
  profile: moka-orchestrator
5
+ token_budget:
6
+ default_context_window: 200000
7
+ max_context_pct: 50
8
+ fan_out_width:
9
+ default: 4
10
+ by_category:
11
+ green: 2
5
12
  entrypoints:
6
13
  quick:
7
14
  schedule: quick-schedule
@@ -51,6 +51,7 @@ function parsePipelineConfigParts(sources, projectRoot, sourcePaths = {
51
51
  schedules: pipeline.schedules,
52
52
  skills: profiles.skills,
53
53
  ...pipeline.task_context ? { task_context: pipeline.task_context } : {},
54
+ token_budget: pipeline.token_budget,
54
55
  version: 1,
55
56
  workflows: pipeline.workflows
56
57
  }, projectRoot, options);
@@ -116,6 +116,7 @@ declare const workflowNodeBaseSchema: z.ZodObject<{
116
116
  }, z.core.$strip>;
117
117
  type WorkflowNodeBase = z.infer<typeof workflowNodeBaseSchema>;
118
118
  type AgentWorkflowNode = WorkflowNodeBase & {
119
+ category?: string;
119
120
  kind: "agent";
120
121
  profile: string;
121
122
  };
@@ -491,6 +492,15 @@ declare const configSchema: z.ZodObject<{
491
492
  task_context: z.ZodOptional<z.ZodObject<{
492
493
  type: z.ZodString;
493
494
  }, z.core.$loose>>;
495
+ token_budget: z.ZodDefault<z.ZodObject<{
496
+ default_context_window: z.ZodDefault<z.ZodNumber>;
497
+ max_context_pct: z.ZodDefault<z.ZodNumber>;
498
+ model_context_windows: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodNumber>>;
499
+ fan_out_width: z.ZodDefault<z.ZodObject<{
500
+ default: z.ZodDefault<z.ZodNumber>;
501
+ by_category: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodNumber>>;
502
+ }, z.core.$strict>>;
503
+ }, z.core.$strict>>;
494
504
  version: z.ZodLiteral<1>;
495
505
  workflows: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
496
506
  description: z.ZodOptional<z.ZodString>;
@@ -380,6 +380,7 @@ const workflowNodeBaseSchema = z.object({
380
380
  });
381
381
  const workflowNodeSchema = z.lazy(() => z.discriminatedUnion("kind", [
382
382
  workflowNodeBaseSchema.extend({
383
+ category: z.string().min(1).optional(),
383
384
  kind: z.literal("agent"),
384
385
  profile: z.string()
385
386
  }).strict(),
@@ -438,6 +439,28 @@ const profilesFileSchema = z.object({
438
439
  skills: strictRecord(pathRefSchema).default({}),
439
440
  version: z.literal(1)
440
441
  }).strict();
442
+ const fanOutWidthSchema = z.object({
443
+ default: z.number().int().positive().default(4),
444
+ by_category: strictRecord(z.number().int().positive()).default({})
445
+ }).strict();
446
+ const tokenBudgetSchema = z.object({
447
+ default_context_window: z.number().int().positive().default(2e5),
448
+ max_context_pct: z.number().positive().max(100).default(50),
449
+ model_context_windows: strictRecord(z.number().int().positive()).default({}),
450
+ fan_out_width: fanOutWidthSchema.default({
451
+ default: 4,
452
+ by_category: {}
453
+ })
454
+ }).strict();
455
+ const DEFAULT_TOKEN_BUDGET = {
456
+ default_context_window: 2e5,
457
+ max_context_pct: 50,
458
+ model_context_windows: {},
459
+ fan_out_width: {
460
+ default: 4,
461
+ by_category: {}
462
+ }
463
+ };
441
464
  const pipelineFileSchema = z.object({
442
465
  default_workflow: z.string(),
443
466
  entrypoints: strictRecord(entrypointSchema).default({}),
@@ -459,6 +482,7 @@ const pipelineFileSchema = z.object({
459
482
  }),
460
483
  schedules: strictRecord(schedulePolicySchema).default({}),
461
484
  task_context: taskContextResolverSchema.optional(),
485
+ token_budget: tokenBudgetSchema.default(DEFAULT_TOKEN_BUDGET),
462
486
  workflows: strictRecord(workflowSchema).default({}),
463
487
  version: z.literal(1)
464
488
  }).strict();
@@ -489,6 +513,7 @@ const configSchema = z.object({
489
513
  schedules: strictRecord(schedulePolicySchema).default({}),
490
514
  skills: strictRecord(pathRefSchema).default({}),
491
515
  task_context: taskContextResolverSchema.optional(),
516
+ token_budget: tokenBudgetSchema.default(DEFAULT_TOKEN_BUDGET),
492
517
  version: z.literal(1),
493
518
  workflows: strictRecord(workflowSchema).default({})
494
519
  }).strict().superRefine(validateConfigReferences);
@@ -35,12 +35,28 @@ function validatePipelineConfig(rawConfig, projectRoot, options = {}) {
35
35
  validateProfile(profileId, profile, runner, config, issues, projectRoot, options);
36
36
  }
37
37
  validateHookConfig(config, issues, projectRoot, options);
38
+ validateTokenBudget(config, issues);
38
39
  for (const [ruleId, rule] of Object.entries(config.rules)) validatePath(`rules.${ruleId}.path`, rule, projectRoot, issues, options);
39
40
  for (const [skillId, skill] of Object.entries(config.skills)) validatePath(`skills.${skillId}.path`, skill, projectRoot, issues, options);
40
41
  for (const [workflowId, workflow] of Object.entries(config.workflows)) validateWorkflow(workflowId, workflow, config, issues, projectRoot, options);
41
42
  if (issues.length > 0) throw validationError(issues);
42
43
  return config;
43
44
  }
45
+ function knownNodeCategories(config) {
46
+ const categories = /* @__PURE__ */ new Set();
47
+ for (const catalog of Object.values(config.scheduler.node_catalogs)) {
48
+ for (const category of catalog.required_categories) categories.add(category);
49
+ for (const node of Object.values(catalog.nodes)) categories.add(node.category);
50
+ }
51
+ return categories;
52
+ }
53
+ function validateTokenBudget(config, issues) {
54
+ const known = knownNodeCategories(config);
55
+ for (const category of Object.keys(config.token_budget.fan_out_width.by_category)) if (!known.has(category)) issues.push({
56
+ path: `token_budget.fan_out_width.by_category.${category}`,
57
+ message: `fan-out width cap references unknown node category '${category}'`
58
+ });
59
+ }
44
60
  function validateRegistryIds(name, registry, issues) {
45
61
  for (const id of Object.keys(registry)) if (!ID_RE.test(id)) issues.push({
46
62
  path: `${name}.${id}`,
@@ -1,21 +1,42 @@
1
1
  //#region src/model-resolver.ts
2
2
  const DISABLED_MODELS_ENV = "PIPELINE_DISABLED_MODELS";
3
- function selectNodeModel(node) {
4
- return fallbackModelSelection(node.models ?? []);
3
+ function selectNodeModel(node, options) {
4
+ return fallbackModelSelection(node.models ?? [], options);
5
5
  }
6
- function fallbackModelSelection(models) {
6
+ function fallbackModelSelection(models, options) {
7
7
  if (models.length === 0) return {
8
8
  reason: "node declares no model fallback array",
9
9
  skipped: []
10
10
  };
11
- return enabledModelSelection(models, disabledModels());
11
+ const disabled = disabledModels();
12
+ const enabled = models.filter((candidate) => !disabled.has(candidate));
13
+ const disabledSkipped = models.filter((candidate) => disabled.has(candidate));
14
+ if (!options) {
15
+ const model = enabled[0];
16
+ return {
17
+ model,
18
+ reason: selectionReason(model),
19
+ skipped: disabledSkipped
20
+ };
21
+ }
22
+ return sizedSelection(enabled, disabledSkipped, options);
12
23
  }
13
- function enabledModelSelection(models, disabled) {
14
- const model = models.find((candidate) => !disabled.has(candidate));
24
+ function sizedSelection(enabled, disabledSkipped, options) {
25
+ const { estimatedTokens, budget } = options;
26
+ const required = estimatedTokens / (budget.max_context_pct / 100);
27
+ const tooSmall = [];
28
+ for (const candidate of enabled) {
29
+ const window = budget.model_context_windows[candidate] ?? budget.default_context_window;
30
+ if (window >= required) return {
31
+ model: candidate,
32
+ reason: `selected '${candidate}' (window ${window}) — holds estimated ${estimatedTokens} tokens within the ${budget.max_context_pct}% context cap`,
33
+ skipped: [...disabledSkipped, ...tooSmall]
34
+ };
35
+ tooSmall.push(candidate);
36
+ }
15
37
  return {
16
- model,
17
- reason: selectionReason(model),
18
- skipped: models.filter((candidate) => disabled.has(candidate))
38
+ reason: `estimated context ${estimatedTokens} tokens exceeds ${budget.max_context_pct}% of every available model window`,
39
+ skipped: [...disabledSkipped, ...tooSmall]
19
40
  };
20
41
  }
21
42
  function selectionReason(model) {
@@ -15,6 +15,7 @@ declare class WorkflowPlannerError extends Error {
15
15
  interface PlannedWorkflowNode {
16
16
  artifacts?: WorkflowNode["artifacts"];
17
17
  builtin?: string;
18
+ category?: string;
18
19
  children?: PlannedWorkflowNode[];
19
20
  command?: string[];
20
21
  dependents: string[];
@@ -185,10 +185,14 @@ function createWorkflowGraph(nodes, nodeIds = new Set(nodes.map((node) => node.i
185
185
  function uniqueExistingNeeds(node, nodeIds) {
186
186
  return uniqueStrings(node.needs.filter((need) => nodeIds.has(need)));
187
187
  }
188
+ function agentNodeCategory(node) {
189
+ return node.kind === "agent" ? node.category : void 0;
190
+ }
188
191
  function toPlannedNode(node, index) {
189
192
  const planned = {
190
193
  artifacts: node.artifacts,
191
194
  builtin: "builtin" in node ? node.builtin : void 0,
195
+ category: agentNodeCategory(node),
192
196
  command: "command" in node ? node.command : void 0,
193
197
  children: node.kind === "parallel" ? node.nodes.map((child, childIndex) => toPlannedNode(child, childIndex)) : void 0,
194
198
  dependents: [],
@@ -94,6 +94,7 @@ declare const scheduleArtifactSchema: z.ZodObject<{
94
94
  } | undefined;
95
95
  timeout_ms?: number | undefined;
96
96
  } & {
97
+ category?: string;
97
98
  kind: "agent";
98
99
  profile: string;
99
100
  }) | ({
@@ -478,6 +479,7 @@ declare const scheduleArtifactSchema: z.ZodObject<{
478
479
  } | undefined;
479
480
  timeout_ms?: number | undefined;
480
481
  } & {
482
+ category?: string;
481
483
  kind: "agent";
482
484
  profile: string;
483
485
  }) | ({
@@ -861,6 +863,7 @@ declare const scheduleArtifactSchema: z.ZodObject<{
861
863
  } | undefined;
862
864
  timeout_ms?: number | undefined;
863
865
  } & {
866
+ category?: string;
864
867
  kind: "agent";
865
868
  profile: string;
866
869
  }) | ({
@@ -1245,6 +1248,7 @@ declare const scheduleArtifactSchema: z.ZodObject<{
1245
1248
  } | undefined;
1246
1249
  timeout_ms?: number | undefined;
1247
1250
  } & {
1251
+ category?: string;
1248
1252
  kind: "agent";
1249
1253
  profile: string;
1250
1254
  }) | ({
@@ -8,6 +8,7 @@ import { emit, emitAgentFinish, emitAgentStart } from "../events/events.js";
8
8
  import "../events/index.js";
9
9
  import { gatewayServerForProfile } from "../../mcp/gateway.js";
10
10
  import { selectNodeModel } from "../../model-resolver.js";
11
+ import { estimateTokens } from "../../token-estimator.js";
11
12
  import { readFileSync } from "node:fs";
12
13
  //#region src/runtime/agent-node/agent-node.ts
13
14
  async function executeAgentNode(node, context, attempt) {
@@ -17,7 +18,17 @@ async function executeAgentNode(node, context, attempt) {
17
18
  output: ""
18
19
  };
19
20
  const prompt = renderAgentPrompt(node, context);
20
- const modelSelection = selectNodeModel(node);
21
+ const decision = decideNodeModel(prompt, node, context.config.token_budget);
22
+ if (decision.overBudget) return {
23
+ evidence: [
24
+ `agent boundary node=${node.id} profile=${node.profile}`,
25
+ `over token budget: ${decision.selection.reason}`,
26
+ ...decision.selection.skipped.length ? [`model fallbacks skipped: ${decision.selection.skipped.join(", ")}`] : []
27
+ ],
28
+ exitCode: 1,
29
+ output: ""
30
+ };
31
+ const modelSelection = decision.selection;
21
32
  const plan = createRunnerLaunchPlan(context.config, {
22
33
  model: modelSelection.model,
23
34
  nodeId: node.id,
@@ -55,6 +66,7 @@ async function executeAgentNode(node, context, attempt) {
55
66
  return {
56
67
  evidence: [
57
68
  `agent boundary node=${node.id} profile=${node.profile} runner=${plan.runnerId}`,
69
+ `estimated context tokens: ${decision.estimatedTokens}`,
58
70
  `model selection: ${modelSelection.model ?? "profile/default"} (${modelSelection.reason})`,
59
71
  ...modelSelection.skipped.length ? [`model fallbacks skipped: ${modelSelection.skipped.join(", ")}`] : [],
60
72
  ...finalized.evidence,
@@ -66,6 +78,30 @@ async function executeAgentNode(node, context, attempt) {
66
78
  timedOut: result.timedOut
67
79
  };
68
80
  }
81
+ /**
82
+ * Pure model-routing decision for a node: estimate the assembled prompt size and
83
+ * pick the smallest fallback model whose window holds it within the context cap.
84
+ * A node with no fallback array keeps the legacy (size-unaware) selection. A node
85
+ * with a fallback array but no fitting model is `overBudget` — the caller fails
86
+ * it fast rather than truncating.
87
+ */
88
+ function decideNodeModel(prompt, node, budget) {
89
+ const estimatedTokens = estimateTokens(prompt);
90
+ if (!(budget && node.models?.length)) return {
91
+ estimatedTokens,
92
+ overBudget: false,
93
+ selection: selectNodeModel(node)
94
+ };
95
+ const selection = selectNodeModel(node, {
96
+ budget,
97
+ estimatedTokens
98
+ });
99
+ return {
100
+ estimatedTokens,
101
+ overBudget: !selection.model,
102
+ selection
103
+ };
104
+ }
69
105
  async function finalizeAgentOutput(inputs) {
70
106
  const { attempt, context, node, normalized, plan, result } = inputs;
71
107
  const validStructuredOutput = selectValidStructuredOutput(context, node, normalized, plan, result.stdout);
@@ -15,10 +15,12 @@ var LocalScheduler = class {
15
15
  emitWorkflowStarted: () => options.emitWorkflowStarted(context),
16
16
  executeWorkflow: () => runWorkflowScheduler({
17
17
  failFast: plan.execution.failFast,
18
+ fanOutWidth: context.config.token_budget?.fan_out_width,
18
19
  isCancelled: () => options.isCancelled(context),
19
20
  markNodeReady: (nodeId) => options.markNodeReady(nodeId, context),
20
21
  maxParallelNodes: context.maxParallelNodes,
21
22
  nodes: plan.topologicalOrder.map((node) => ({
23
+ category: node.category,
22
24
  dependents: node.dependents,
23
25
  id: node.id,
24
26
  index: node.index,
@@ -38,6 +40,7 @@ async function runWorkflowScheduler(input) {
38
40
  blocked: [],
39
41
  completed: [],
40
42
  failFast: input.failFast,
43
+ fanOutWidth: input.fanOutWidth,
41
44
  maxParallelNodes: input.maxParallelNodes,
42
45
  nodes: orderedNodes(input.nodes),
43
46
  running: [],
@@ -107,7 +110,7 @@ function unstartedBlockingDescendants(nodeId, context) {
107
110
  function launchReadyNodes(input, state, running) {
108
111
  const capacity = workflowNodeCapacity(state);
109
112
  if (capacity <= 0) return;
110
- for (const nodeId of readyNodeIds(state).slice(0, capacity)) {
113
+ for (const nodeId of selectLaunchableNodes(state, capacity)) {
111
114
  input.markNodeReady(nodeId);
112
115
  state.running = [...state.running, nodeId];
113
116
  running.set(nodeId, {
@@ -116,6 +119,50 @@ function launchReadyNodes(input, state, running) {
116
119
  });
117
120
  }
118
121
  }
122
+ /**
123
+ * Choose which ready nodes to launch this tick within the global capacity and
124
+ * the per-category fan-out caps. A category at its cap defers its remaining
125
+ * ready nodes to a later tick (it does not drop them). Nodes without a category
126
+ * are bounded only by the global capacity. Without a fanOutWidth (e.g. in tests
127
+ * or configs with no token_budget), this is the prior `slice(0, capacity)`.
128
+ */
129
+ function selectLaunchableNodes(state, capacity) {
130
+ const ready = readyNodeIds(state);
131
+ return state.fanOutWidth ? cappedSelection(ready, capacity, state, state.fanOutWidth) : ready.slice(0, capacity);
132
+ }
133
+ function cappedSelection(ready, capacity, state, fanOut) {
134
+ const categoryOf = new Map(state.nodes.map((node) => [node.id, node.category]));
135
+ const counts = categoryRunCounts(state.running, categoryOf);
136
+ const selected = [];
137
+ for (const nodeId of ready) {
138
+ if (selected.length >= capacity) break;
139
+ if (claimCategorySlot(categoryOf.get(nodeId), fanOut, counts)) selected.push(nodeId);
140
+ }
141
+ return selected;
142
+ }
143
+ function categoryCap(category, fanOut) {
144
+ return fanOut.by_category[category] ?? fanOut.default;
145
+ }
146
+ /**
147
+ * Whether a node of the given category may launch now, consuming a slot from
148
+ * `counts` when it can. Uncategorized nodes always may; a category at its cap
149
+ * may not.
150
+ */
151
+ function claimCategorySlot(category, fanOut, counts) {
152
+ if (!category) return true;
153
+ const current = counts.get(category) ?? 0;
154
+ if (current >= categoryCap(category, fanOut)) return false;
155
+ counts.set(category, current + 1);
156
+ return true;
157
+ }
158
+ function categoryRunCounts(running, categoryOf) {
159
+ const counts = /* @__PURE__ */ new Map();
160
+ for (const nodeId of running) {
161
+ const category = categoryOf.get(nodeId);
162
+ if (category) counts.set(category, (counts.get(category) ?? 0) + 1);
163
+ }
164
+ return counts;
165
+ }
119
166
  function dependencyPassed(nodeId, context) {
120
167
  const result = (context.completed ?? []).find((item) => item.nodeId === nodeId);
121
168
  return result ? context.shouldContinueAfterNodeResult?.(result) ?? result.status !== "failed" : false;
@@ -25,12 +25,16 @@ function applyNodeCatalogModelsToParallelNode(node, templates) {
25
25
  };
26
26
  }
27
27
  function applyNodeCatalogModelsToAgentNode(node, templates) {
28
- if (node.models?.length) return node;
29
28
  const template = nodeCatalogTemplateFor(node, templates);
30
- return template ? {
29
+ if (!template) return node;
30
+ return {
31
31
  ...node,
32
- models: template.models
33
- } : node;
32
+ category: node.category ?? template.category,
33
+ models: nodeModelsOrCatalog(node, template)
34
+ };
35
+ }
36
+ function nodeModelsOrCatalog(node, template) {
37
+ return node.models?.length ? node.models : template.models;
34
38
  }
35
39
  function nodeCatalogTemplateFor(node, templates) {
36
40
  return templates[node.id] ?? Object.values(templates).find((candidate) => node.id.includes(candidate.category)) ?? Object.values(templates).find((candidate) => candidate.profile === node.profile);
@@ -44,6 +44,9 @@ function plannerPrompt(entrypointId, task, baseline, config, planningContext) {
44
44
  "Scheduler node catalog:",
45
45
  schedulerCatalogPrompt(config, entrypointId),
46
46
  "",
47
+ "Token budget:",
48
+ tokenBudgetPrompt(config),
49
+ "",
47
50
  "Gate recipes:",
48
51
  "- Prefer preserving valid gates from the baseline workflows instead of recreating them.",
49
52
  "- RED/test coverage may use changed_files gates on test-writing nodes. A changed_files gate must include a changed_files object with allow and/or require_any glob arrays.",
@@ -84,6 +87,18 @@ function plannerRepairPrompt(inputs) {
84
87
  stringify(inputs.baseline)
85
88
  ].join("\n");
86
89
  }
90
+ function tokenBudgetPrompt(config) {
91
+ const budget = config.token_budget;
92
+ const windows = Object.entries(budget.model_context_windows);
93
+ const fanOut = Object.entries(budget.fan_out_width.by_category);
94
+ return [
95
+ `- Keep each node's assembled context under ${budget.max_context_pct}% of its model's context window; prefer the smallest-tier model whose window comfortably holds the node within that cap.`,
96
+ `- Assume ${budget.default_context_window} tokens of context window for a model with no declared window.`,
97
+ windows.length > 0 ? `- Known model context windows: ${windows.map(([id, size]) => `${id}=${size}`).join(", ")}.` : void 0,
98
+ `- Do not exceed the per-category fan-out width (max concurrent same-category nodes). Default width: ${budget.fan_out_width.default}.`,
99
+ fanOut.length > 0 ? `- Category fan-out caps: ${fanOut.map(([category, width]) => `${category}=${width}`).join(", ")}.` : void 0
100
+ ].filter((line) => Boolean(line)).join("\n");
101
+ }
87
102
  function allowedProfilePromptLine(config, id) {
88
103
  const profile = config.profiles[id];
89
104
  const runner = config.runners[profile.runner];
@@ -0,0 +1,22 @@
1
+ import { getEncoding } from "js-tiktoken";
2
+ //#region src/token-estimator.ts
3
+ /**
4
+ * Token estimation for node sizing. Uses the `o200k_base` encoding (the GPT-5.5
5
+ * family the MoKa agents run on).
6
+ *
7
+ * This is a cross-model ESTIMATE, not a billing-accurate count: the pipeline
8
+ * routes nodes across OpenAI/Kimi/Qwen models whose tokenizers differ, so the
9
+ * value is a sizing heuristic for budget/routing decisions. For exact counts on
10
+ * Anthropic runners, use the Anthropic `count_tokens` API instead.
11
+ */
12
+ let encoder;
13
+ function encoding() {
14
+ encoder ??= getEncoding("o200k_base");
15
+ return encoder;
16
+ }
17
+ function estimateTokens(text) {
18
+ if (text.length === 0) return 0;
19
+ return encoding().encode(text).length;
20
+ }
21
+ //#endregion
22
+ export { estimateTokens };
package/package.json CHANGED
@@ -9,6 +9,7 @@
9
9
  "execa": "^9.5.2",
10
10
  "git-url-parse": "^16.1.0",
11
11
  "gray-matter": "^4.0.3",
12
+ "js-tiktoken": "^1.0.21",
12
13
  "jsonc-parser": "^3.3.1",
13
14
  "ky": "^2.0.2",
14
15
  "micromatch": "^4.0.8",
@@ -120,7 +121,7 @@
120
121
  "prepack": "bun run build:cli"
121
122
  },
122
123
  "type": "module",
123
- "version": "2.3.1",
124
+ "version": "2.4.0",
124
125
  "description": "Config-driven multi-agent pipeline runner for repository work",
125
126
  "main": "./dist/index.js",
126
127
  "types": "./dist/index.d.ts",