@oni.bot/core 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +146 -126
- package/SECURITY.md +2 -2
- package/dist/agents/context.d.ts +0 -2
- package/dist/agents/context.d.ts.map +1 -1
- package/dist/agents/context.js +1 -3
- package/dist/agents/context.js.map +1 -1
- package/dist/agents/define-agent.d.ts.map +1 -1
- package/dist/agents/define-agent.js +99 -41
- package/dist/agents/define-agent.js.map +1 -1
- package/dist/agents/functional-agent.d.ts.map +1 -1
- package/dist/agents/functional-agent.js +0 -7
- package/dist/agents/functional-agent.js.map +1 -1
- package/dist/agents/types.d.ts +0 -2
- package/dist/agents/types.d.ts.map +1 -1
- package/dist/checkpoint.d.ts.map +1 -1
- package/dist/checkpoint.js +7 -2
- package/dist/checkpoint.js.map +1 -1
- package/dist/checkpointers/postgres.d.ts.map +1 -1
- package/dist/checkpointers/postgres.js +47 -31
- package/dist/checkpointers/postgres.js.map +1 -1
- package/dist/checkpointers/sqlite.js +4 -4
- package/dist/checkpointers/sqlite.js.map +1 -1
- package/dist/circuit-breaker.d.ts +1 -0
- package/dist/circuit-breaker.d.ts.map +1 -1
- package/dist/circuit-breaker.js +13 -0
- package/dist/circuit-breaker.js.map +1 -1
- package/dist/cli/dev.d.ts.map +1 -1
- package/dist/cli/dev.js +0 -1
- package/dist/cli/dev.js.map +1 -1
- package/dist/cli/router.js +1 -1
- package/dist/cli/run.d.ts.map +1 -1
- package/dist/cli/run.js +0 -1
- package/dist/cli/run.js.map +1 -1
- package/dist/config/loader.d.ts +1 -1
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/loader.js +35 -5
- package/dist/config/loader.js.map +1 -1
- package/dist/context.d.ts +20 -0
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js.map +1 -1
- package/dist/coordination/pubsub.d.ts +1 -0
- package/dist/coordination/pubsub.d.ts.map +1 -1
- package/dist/coordination/pubsub.js +31 -16
- package/dist/coordination/pubsub.js.map +1 -1
- package/dist/coordination/request-reply.d.ts +7 -0
- package/dist/coordination/request-reply.d.ts.map +1 -1
- package/dist/coordination/request-reply.js +65 -11
- package/dist/coordination/request-reply.js.map +1 -1
- package/dist/events/bus.d.ts +1 -0
- package/dist/events/bus.d.ts.map +1 -1
- package/dist/events/bus.js +16 -10
- package/dist/events/bus.js.map +1 -1
- package/dist/functional.d.ts.map +1 -1
- package/dist/functional.js +3 -0
- package/dist/functional.js.map +1 -1
- package/dist/graph.d.ts.map +1 -1
- package/dist/graph.js +21 -3
- package/dist/graph.js.map +1 -1
- package/dist/guardrails/audit.d.ts +4 -1
- package/dist/guardrails/audit.d.ts.map +1 -1
- package/dist/guardrails/audit.js +18 -1
- package/dist/guardrails/audit.js.map +1 -1
- package/dist/harness/agent-loop.d.ts.map +1 -1
- package/dist/harness/agent-loop.js +471 -352
- package/dist/harness/agent-loop.js.map +1 -1
- package/dist/harness/context-compactor.d.ts +1 -0
- package/dist/harness/context-compactor.d.ts.map +1 -1
- package/dist/harness/context-compactor.js +43 -1
- package/dist/harness/context-compactor.js.map +1 -1
- package/dist/harness/harness.d.ts +6 -0
- package/dist/harness/harness.d.ts.map +1 -1
- package/dist/harness/harness.js +32 -5
- package/dist/harness/harness.js.map +1 -1
- package/dist/harness/hooks-engine.d.ts.map +1 -1
- package/dist/harness/hooks-engine.js +12 -10
- package/dist/harness/hooks-engine.js.map +1 -1
- package/dist/harness/index.d.ts +3 -1
- package/dist/harness/index.d.ts.map +1 -1
- package/dist/harness/index.js +2 -0
- package/dist/harness/index.js.map +1 -1
- package/dist/harness/memory-loader.d.ts +150 -0
- package/dist/harness/memory-loader.d.ts.map +1 -0
- package/dist/harness/memory-loader.js +714 -0
- package/dist/harness/memory-loader.js.map +1 -0
- package/dist/harness/safety-gate.d.ts.map +1 -1
- package/dist/harness/safety-gate.js +47 -26
- package/dist/harness/safety-gate.js.map +1 -1
- package/dist/harness/skill-loader.d.ts +7 -0
- package/dist/harness/skill-loader.d.ts.map +1 -1
- package/dist/harness/skill-loader.js +24 -8
- package/dist/harness/skill-loader.js.map +1 -1
- package/dist/harness/todo-module.d.ts.map +1 -1
- package/dist/harness/todo-module.js +13 -6
- package/dist/harness/todo-module.js.map +1 -1
- package/dist/harness/types.d.ts +7 -0
- package/dist/harness/types.d.ts.map +1 -1
- package/dist/harness/types.js.map +1 -1
- package/dist/harness/validate-args.js +18 -3
- package/dist/harness/validate-args.js.map +1 -1
- package/dist/hitl/interrupt.d.ts +2 -2
- package/dist/hitl/interrupt.d.ts.map +1 -1
- package/dist/hitl/interrupt.js +6 -4
- package/dist/hitl/interrupt.js.map +1 -1
- package/dist/hitl/resume.d.ts +10 -0
- package/dist/hitl/resume.d.ts.map +1 -1
- package/dist/hitl/resume.js +31 -0
- package/dist/hitl/resume.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/injected.d.ts.map +1 -1
- package/dist/injected.js.map +1 -1
- package/dist/inspect.d.ts.map +1 -1
- package/dist/inspect.js +28 -8
- package/dist/inspect.js.map +1 -1
- package/dist/lsp/client.d.ts +2 -0
- package/dist/lsp/client.d.ts.map +1 -1
- package/dist/lsp/client.js +62 -17
- package/dist/lsp/client.js.map +1 -1
- package/dist/lsp/index.d.ts.map +1 -1
- package/dist/lsp/index.js.map +1 -1
- package/dist/mcp/client.d.ts +2 -0
- package/dist/mcp/client.d.ts.map +1 -1
- package/dist/mcp/client.js +45 -14
- package/dist/mcp/client.js.map +1 -1
- package/dist/mcp/convert.js +1 -1
- package/dist/mcp/convert.js.map +1 -1
- package/dist/mcp/transport.d.ts +2 -0
- package/dist/mcp/transport.d.ts.map +1 -1
- package/dist/mcp/transport.js +33 -8
- package/dist/mcp/transport.js.map +1 -1
- package/dist/messages/index.d.ts.map +1 -1
- package/dist/messages/index.js +7 -1
- package/dist/messages/index.js.map +1 -1
- package/dist/models/anthropic.d.ts.map +1 -1
- package/dist/models/anthropic.js +25 -15
- package/dist/models/anthropic.js.map +1 -1
- package/dist/models/google.d.ts.map +1 -1
- package/dist/models/google.js +23 -7
- package/dist/models/google.js.map +1 -1
- package/dist/models/ollama.d.ts.map +1 -1
- package/dist/models/ollama.js +11 -1
- package/dist/models/ollama.js.map +1 -1
- package/dist/models/openai.d.ts.map +1 -1
- package/dist/models/openai.js +15 -3
- package/dist/models/openai.js.map +1 -1
- package/dist/models/openrouter.d.ts.map +1 -1
- package/dist/models/openrouter.js +14 -3
- package/dist/models/openrouter.js.map +1 -1
- package/dist/prebuilt/react-agent.d.ts.map +1 -1
- package/dist/prebuilt/react-agent.js +1 -0
- package/dist/prebuilt/react-agent.js.map +1 -1
- package/dist/pregel.d.ts +11 -6
- package/dist/pregel.d.ts.map +1 -1
- package/dist/pregel.js +473 -349
- package/dist/pregel.js.map +1 -1
- package/dist/retry.d.ts.map +1 -1
- package/dist/retry.js +7 -6
- package/dist/retry.js.map +1 -1
- package/dist/store/index.d.ts +1 -1
- package/dist/store/index.d.ts.map +1 -1
- package/dist/store/index.js +63 -13
- package/dist/store/index.js.map +1 -1
- package/dist/stream-events.d.ts.map +1 -1
- package/dist/stream-events.js +3 -9
- package/dist/stream-events.js.map +1 -1
- package/dist/streaming.d.ts +5 -2
- package/dist/streaming.d.ts.map +1 -1
- package/dist/streaming.js +9 -8
- package/dist/streaming.js.map +1 -1
- package/dist/swarm/graph.d.ts +16 -2
- package/dist/swarm/graph.d.ts.map +1 -1
- package/dist/swarm/graph.js +204 -53
- package/dist/swarm/graph.js.map +1 -1
- package/dist/swarm/index.d.ts +2 -1
- package/dist/swarm/index.d.ts.map +1 -1
- package/dist/swarm/index.js.map +1 -1
- package/dist/swarm/mailbox.d.ts.map +1 -1
- package/dist/swarm/mailbox.js +3 -1
- package/dist/swarm/mailbox.js.map +1 -1
- package/dist/swarm/mermaid.d.ts +2 -1
- package/dist/swarm/mermaid.d.ts.map +1 -1
- package/dist/swarm/mermaid.js +6 -3
- package/dist/swarm/mermaid.js.map +1 -1
- package/dist/swarm/pool.d.ts.map +1 -1
- package/dist/swarm/pool.js +30 -5
- package/dist/swarm/pool.js.map +1 -1
- package/dist/swarm/registry.d.ts.map +1 -1
- package/dist/swarm/registry.js +7 -0
- package/dist/swarm/registry.js.map +1 -1
- package/dist/swarm/scaling.d.ts +10 -1
- package/dist/swarm/scaling.d.ts.map +1 -1
- package/dist/swarm/scaling.js +85 -14
- package/dist/swarm/scaling.js.map +1 -1
- package/dist/swarm/snapshot.d.ts.map +1 -1
- package/dist/swarm/snapshot.js +10 -1
- package/dist/swarm/snapshot.js.map +1 -1
- package/dist/swarm/supervisor.js +20 -12
- package/dist/swarm/supervisor.js.map +1 -1
- package/dist/swarm/tracer.d.ts +3 -1
- package/dist/swarm/tracer.d.ts.map +1 -1
- package/dist/swarm/tracer.js +66 -15
- package/dist/swarm/tracer.js.map +1 -1
- package/dist/swarm/types.d.ts +1 -6
- package/dist/swarm/types.d.ts.map +1 -1
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.d.ts.map +1 -1
- package/dist/testing/index.js.map +1 -1
- package/dist/tools/define.d.ts.map +1 -1
- package/dist/tools/define.js +1 -0
- package/dist/tools/define.js.map +1 -1
- package/dist/tools/types.d.ts +2 -0
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/types.d.ts +3 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +7 -1
package/dist/pregel.js
CHANGED
|
@@ -3,18 +3,21 @@
|
|
|
3
3
|
// New: interrupt() context management, HITL resume, token streaming
|
|
4
4
|
// ============================================================
|
|
5
5
|
import { START, END, Send, Command, } from "./types.js";
|
|
6
|
-
import { RecursionLimitError, NodeNotFoundError, ONIInterrupt, NodeTimeoutError, ONIError, NodeExecutionError } from "./errors.js";
|
|
6
|
+
import { RecursionLimitError, NodeNotFoundError, ONIInterrupt, NodeTimeoutError, ONIError, NodeExecutionError, CircuitBreakerOpenError } from "./errors.js";
|
|
7
7
|
import { CircuitBreaker } from "./circuit-breaker.js";
|
|
8
8
|
import { withRetry } from "./retry.js";
|
|
9
9
|
import { NamespacedCheckpointer } from "./checkpointers/namespaced.js";
|
|
10
10
|
import { _runWithContext } from "./context.js";
|
|
11
|
-
import { StreamWriterImpl,
|
|
11
|
+
import { StreamWriterImpl, _withTokenHandler } from "./streaming.js";
|
|
12
12
|
import { NodeInterruptSignal, HITLInterruptException, HITLSessionStore, _installInterruptContext, _clearInterruptContext, } from "./hitl/index.js";
|
|
13
13
|
import { EventBus } from "./events/bus.js";
|
|
14
14
|
import { AuditLog } from "./guardrails/audit.js";
|
|
15
15
|
import { BudgetTracker } from "./guardrails/budget.js";
|
|
16
|
+
import { runFilters } from "./guardrails/filters.js";
|
|
16
17
|
import { ONITracer } from "./telemetry.js";
|
|
17
18
|
const DEFAULT_RECURSION_LIMIT = 25;
|
|
19
|
+
/** Maximum entries in the node result cache — oldest entries evicted FIFO when full. */
|
|
20
|
+
const NODE_CACHE_MAX_SIZE = 256;
|
|
18
21
|
export class ONIPregelRunner {
|
|
19
22
|
nodes;
|
|
20
23
|
edges;
|
|
@@ -27,13 +30,17 @@ export class ONIPregelRunner {
|
|
|
27
30
|
hitlStore = new HITLSessionStore();
|
|
28
31
|
nodeCache = new Map();
|
|
29
32
|
circuitBreakers = new Map();
|
|
30
|
-
/**
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
|
|
33
|
+
/** Count of concurrent subgraph invocations active on this runner. >0 means running as subgraph. */
|
|
34
|
+
_subgraphRefCount = 0;
|
|
35
|
+
/** Per-invocation parent updates from Command.PARENT, keyed by parent threadId. */
|
|
36
|
+
_perInvocationParentUpdates = new Map();
|
|
37
|
+
/** Per-invocation checkpointer override for subgraph isolation, keyed by threadId. */
|
|
38
|
+
_perInvocationCheckpointer = new Map();
|
|
34
39
|
eventBus;
|
|
35
40
|
auditLog;
|
|
36
41
|
budgetTracker;
|
|
42
|
+
contentFilters;
|
|
43
|
+
toolPermissions;
|
|
37
44
|
tracer;
|
|
38
45
|
/** Pre-indexed edges by source node — O(1) lookup instead of O(n) filter */
|
|
39
46
|
_edgesBySource;
|
|
@@ -51,6 +58,8 @@ export class ONIPregelRunner {
|
|
|
51
58
|
this.eventBus = new EventBus(listeners);
|
|
52
59
|
this.auditLog = guardrails?.audit ? new AuditLog() : null;
|
|
53
60
|
this.budgetTracker = guardrails?.budget ? new BudgetTracker(guardrails.budget) : null;
|
|
61
|
+
this.contentFilters = guardrails?.filters ?? [];
|
|
62
|
+
this.toolPermissions = guardrails?.toolPermissions;
|
|
54
63
|
this.tracer = new ONITracer(tracer ?? null);
|
|
55
64
|
// Pre-index edges by source for O(1) lookups in getNextNodes
|
|
56
65
|
this._edgesBySource = new Map();
|
|
@@ -143,6 +152,7 @@ export class ONIPregelRunner {
|
|
|
143
152
|
this.nodeCache.delete(cacheKey);
|
|
144
153
|
}
|
|
145
154
|
}
|
|
155
|
+
const _tid = config?.threadId ?? "unknown";
|
|
146
156
|
const ctx = {
|
|
147
157
|
config: config ?? {},
|
|
148
158
|
store: this.store,
|
|
@@ -152,14 +162,41 @@ export class ONIPregelRunner {
|
|
|
152
162
|
parentUpdates: [],
|
|
153
163
|
step: step ?? 0,
|
|
154
164
|
recursionLimit: recursionLimit ?? DEFAULT_RECURSION_LIMIT,
|
|
165
|
+
toolPermissions: this.toolPermissions,
|
|
166
|
+
_recordUsage: (agentName, modelId, usage) => {
|
|
167
|
+
if (!this.budgetTracker)
|
|
168
|
+
return;
|
|
169
|
+
const entries = this.budgetTracker.record(agentName, modelId, usage);
|
|
170
|
+
for (const e of entries)
|
|
171
|
+
this.auditLog?.record(_tid, e);
|
|
172
|
+
},
|
|
173
|
+
_emitEvent: (event) => this.eventBus.emit(event),
|
|
174
|
+
_auditRecord: (entry) => this.auditLog?.record(_tid, entry),
|
|
155
175
|
};
|
|
156
176
|
return _runWithContext(ctx, async () => {
|
|
157
177
|
_installInterruptContext({
|
|
158
178
|
nodeName: nodeDef.name,
|
|
159
|
-
resumeValue:
|
|
160
|
-
hasResume: hasResume ?? false,
|
|
179
|
+
resumeValues: hasResume ? [resumeValue] : [],
|
|
161
180
|
});
|
|
162
181
|
try {
|
|
182
|
+
// Content filter — input direction
|
|
183
|
+
if (this.contentFilters.length > 0) {
|
|
184
|
+
const inputStr = JSON.stringify(state);
|
|
185
|
+
const inputCheck = runFilters(this.contentFilters, inputStr, "input");
|
|
186
|
+
if (!inputCheck.passed) {
|
|
187
|
+
const threadId = config?.threadId ?? "unknown";
|
|
188
|
+
this.eventBus.emit({ type: "filter.blocked", filter: inputCheck.blockedBy, agent: nodeDef.name, direction: "input", reason: inputCheck.reason, timestamp: Date.now() });
|
|
189
|
+
this.auditLog?.record(threadId, { timestamp: Date.now(), agent: nodeDef.name, action: "filter.blocked", data: { filter: inputCheck.blockedBy, direction: "input", reason: inputCheck.reason } });
|
|
190
|
+
throw new Error(`Content blocked by filter "${inputCheck.blockedBy}" on input to node "${nodeDef.name}": ${inputCheck.reason}`);
|
|
191
|
+
}
|
|
192
|
+
// Apply redaction if content was rewritten by a redacting filter
|
|
193
|
+
if (inputCheck.content !== inputStr) {
|
|
194
|
+
try {
|
|
195
|
+
state = JSON.parse(inputCheck.content);
|
|
196
|
+
}
|
|
197
|
+
catch { /* leave state unchanged on parse failure */ }
|
|
198
|
+
}
|
|
199
|
+
}
|
|
163
200
|
const run = () => Promise.resolve(nodeDef.fn(state, config));
|
|
164
201
|
// Core execute call: retry-aware
|
|
165
202
|
const executeCall = async () => {
|
|
@@ -204,15 +241,44 @@ export class ONIPregelRunner {
|
|
|
204
241
|
// Pass through interrupt signals (thrown by interrupt() inside nodes)
|
|
205
242
|
if (err instanceof NodeInterruptSignal)
|
|
206
243
|
throw err;
|
|
207
|
-
//
|
|
208
|
-
if (err instanceof
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
244
|
+
// Circuit breaker open — invoke user fallback with real state + error
|
|
245
|
+
if (err instanceof CircuitBreakerOpenError && nodeDef.circuitBreaker?.fallback) {
|
|
246
|
+
result = nodeDef.circuitBreaker.fallback(state, err);
|
|
247
|
+
}
|
|
248
|
+
else {
|
|
249
|
+
// Pass through structured ONI errors (NodeExecutionError from retry, NodeTimeoutError, etc.)
|
|
250
|
+
if (err instanceof ONIError)
|
|
251
|
+
throw err;
|
|
252
|
+
// Wrap raw errors and non-Error throws in NodeExecutionError
|
|
253
|
+
const cause = err instanceof Error ? err : new Error(String(err));
|
|
254
|
+
throw new NodeExecutionError(nodeDef.name, cause);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
// Content filter — output direction
|
|
258
|
+
if (this.contentFilters.length > 0 && result != null) {
|
|
259
|
+
const outputStr = JSON.stringify(result);
|
|
260
|
+
const outputCheck = runFilters(this.contentFilters, outputStr, "output");
|
|
261
|
+
if (!outputCheck.passed) {
|
|
262
|
+
const threadId = config?.threadId ?? "unknown";
|
|
263
|
+
this.eventBus.emit({ type: "filter.blocked", filter: outputCheck.blockedBy, agent: nodeDef.name, direction: "output", reason: outputCheck.reason, timestamp: Date.now() });
|
|
264
|
+
this.auditLog?.record(threadId, { timestamp: Date.now(), agent: nodeDef.name, action: "filter.blocked", data: { filter: outputCheck.blockedBy, direction: "output", reason: outputCheck.reason } });
|
|
265
|
+
throw new Error(`Content blocked by filter "${outputCheck.blockedBy}" on output of node "${nodeDef.name}": ${outputCheck.reason}`);
|
|
266
|
+
}
|
|
267
|
+
// Apply redaction if content was rewritten by a redacting filter
|
|
268
|
+
if (outputCheck.content !== outputStr) {
|
|
269
|
+
try {
|
|
270
|
+
result = JSON.parse(outputCheck.content);
|
|
271
|
+
}
|
|
272
|
+
catch { /* leave result unchanged on parse failure */ }
|
|
273
|
+
}
|
|
213
274
|
}
|
|
214
|
-
// Store in cache (reuse key computed above)
|
|
275
|
+
// Store in cache (reuse key computed above); evict oldest entry when full
|
|
215
276
|
if (nodeDef.cache && cacheKey) {
|
|
277
|
+
if (this.nodeCache.size >= NODE_CACHE_MAX_SIZE) {
|
|
278
|
+
const oldest = this.nodeCache.keys().next().value;
|
|
279
|
+
if (oldest !== undefined)
|
|
280
|
+
this.nodeCache.delete(oldest);
|
|
281
|
+
}
|
|
216
282
|
this.nodeCache.set(cacheKey, { result, timestamp: Date.now() });
|
|
217
283
|
}
|
|
218
284
|
return result;
|
|
@@ -253,20 +319,29 @@ export class ONIPregelRunner {
|
|
|
253
319
|
const modeMessages = modes.has("messages");
|
|
254
320
|
// Telemetry: graph-level span
|
|
255
321
|
const graphSpan = this.tracer.startGraphSpan("invoke", { threadId, agentId });
|
|
256
|
-
//
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
const
|
|
265
|
-
if (
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
322
|
+
let step = 0; // declared before try so finally can read it for setAttribute
|
|
323
|
+
try {
|
|
324
|
+
// Load resume values from config (set by resume() call)
|
|
325
|
+
const resumeMap = config?.__resumeValues ?? {};
|
|
326
|
+
// Load or init state
|
|
327
|
+
let state;
|
|
328
|
+
let pendingNodes = [];
|
|
329
|
+
let pendingSends = [];
|
|
330
|
+
const effectiveCheckpointer = (this._perInvocationCheckpointer.get(threadId) ?? this.checkpointer);
|
|
331
|
+
if (effectiveCheckpointer && config?.threadId) {
|
|
332
|
+
const cp = await effectiveCheckpointer.get(threadId);
|
|
333
|
+
if (cp) {
|
|
334
|
+
state = this.applyUpdate(cp.state, input);
|
|
335
|
+
step = cp.step;
|
|
336
|
+
pendingNodes = cp.nextNodes;
|
|
337
|
+
pendingSends = cp.pendingSends ?? [];
|
|
338
|
+
}
|
|
339
|
+
else {
|
|
340
|
+
state = this.applyUpdate(this.buildInitialState(), input);
|
|
341
|
+
const init = this.getNextNodes(START, state, config);
|
|
342
|
+
pendingNodes = init.nodes;
|
|
343
|
+
pendingSends = init.sends;
|
|
344
|
+
}
|
|
270
345
|
}
|
|
271
346
|
else {
|
|
272
347
|
state = this.applyUpdate(this.buildInitialState(), input);
|
|
@@ -274,58 +349,318 @@ export class ONIPregelRunner {
|
|
|
274
349
|
pendingNodes = init.nodes;
|
|
275
350
|
pendingSends = init.sends;
|
|
276
351
|
}
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
352
|
+
if (modeValues)
|
|
353
|
+
yield tag(this.evt("state_update", state, step, agentId), "values");
|
|
354
|
+
// ---- Main superstep loop ----
|
|
355
|
+
while (true) {
|
|
356
|
+
const nextNodes = [];
|
|
357
|
+
const nextSends = [];
|
|
358
|
+
state = this.resetEphemeral(state);
|
|
359
|
+
// Drain sends — group by target node for parallel fan-out execution
|
|
360
|
+
const sendGroups = new Map();
|
|
361
|
+
for (const send of pendingSends) {
|
|
362
|
+
if (!sendGroups.has(send.node))
|
|
363
|
+
sendGroups.set(send.node, []);
|
|
364
|
+
sendGroups.get(send.node).push(send);
|
|
365
|
+
if (modeDebug)
|
|
366
|
+
yield tag(this.evt("send", send, step, agentId, send.node), "debug");
|
|
367
|
+
}
|
|
368
|
+
// Recursion limit guard — must fire before any sends execute so that
|
|
369
|
+
// node side-effects are not applied to a step that will be discarded.
|
|
370
|
+
if (step >= recursionLimit)
|
|
371
|
+
throw new RecursionLimitError(recursionLimit);
|
|
372
|
+
// Execute fan-out sends (each Send → separate node execution with its own state)
|
|
373
|
+
if (sendGroups.size > 0) {
|
|
374
|
+
// Build promises directly — avoids spread+flatMap intermediate arrays
|
|
375
|
+
const sendPromises = [];
|
|
376
|
+
for (const [node, sends] of sendGroups) {
|
|
377
|
+
const nodeDef = this.nodes.get(node);
|
|
378
|
+
if (!nodeDef)
|
|
379
|
+
throw new NodeNotFoundError(node);
|
|
380
|
+
for (const send of sends) {
|
|
381
|
+
sendPromises.push((async () => {
|
|
382
|
+
const sendState = this.applyUpdate(state, send.args);
|
|
383
|
+
const result = await this.executeNode(nodeDef, sendState, config, undefined, undefined, undefined, step, recursionLimit);
|
|
384
|
+
return { name: node, result };
|
|
385
|
+
})());
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
const sendResults = await Promise.all(sendPromises);
|
|
389
|
+
// Reduce all send results through channels
|
|
390
|
+
for (const { name, result } of sendResults) {
|
|
391
|
+
if (result instanceof Command) {
|
|
392
|
+
if (result.update)
|
|
393
|
+
state = this.applyUpdate(state, result.update);
|
|
394
|
+
const gotos = result.goto
|
|
395
|
+
? (Array.isArray(result.goto) ? result.goto : [result.goto])
|
|
396
|
+
: this.getNextNodes(name, state, config).nodes;
|
|
397
|
+
nextNodes.push(...gotos);
|
|
398
|
+
}
|
|
399
|
+
else if (result && typeof result === "object") {
|
|
400
|
+
state = this.applyUpdate(state, result);
|
|
401
|
+
const { nodes, sends } = this.getNextNodes(name, state, config);
|
|
402
|
+
nextNodes.push(...nodes);
|
|
403
|
+
nextSends.push(...sends);
|
|
404
|
+
}
|
|
405
|
+
else {
|
|
406
|
+
const { nodes, sends } = this.getNextNodes(name, state, config);
|
|
407
|
+
nextNodes.push(...nodes);
|
|
408
|
+
nextSends.push(...sends);
|
|
409
|
+
}
|
|
410
|
+
if (modeUpdates || modeDebug) {
|
|
411
|
+
const delta = result instanceof Command ? (result.update ?? {}) : (result ?? {});
|
|
412
|
+
if (modeUpdates)
|
|
413
|
+
yield tag(this.evt("node_end", delta, step, agentId, name), "updates");
|
|
414
|
+
if (modeDebug)
|
|
415
|
+
yield tag(this.evt("node_end", delta, step, agentId, name), "debug");
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
pendingSends = [];
|
|
420
|
+
// Filter executable nodes (non-END), excluding nodes already handled by sends
|
|
421
|
+
const executableNodes = pendingNodes.filter((n) => n !== END && !sendGroups.has(n));
|
|
422
|
+
if (executableNodes.length === 0 && sendGroups.size === 0)
|
|
423
|
+
break;
|
|
424
|
+
// Emit debug node_start events before parallel execution
|
|
425
|
+
if (modeDebug) {
|
|
426
|
+
for (const nodeName of executableNodes) {
|
|
427
|
+
const name = nodeName;
|
|
428
|
+
if (!this.nodes.has(name))
|
|
429
|
+
throw new NodeNotFoundError(name);
|
|
430
|
+
// Static interrupt BEFORE (check before emitting start)
|
|
431
|
+
if (this.interruptConfig.interruptBefore?.includes(name))
|
|
432
|
+
throw new ONIInterrupt(name, "before", state);
|
|
433
|
+
this.checkDynamicInterrupt(name, "before", state, config);
|
|
434
|
+
yield tag(this.evt("node_start", {}, step, agentId, name), "debug");
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
// Execute all active nodes in parallel
|
|
438
|
+
const allCustomEvents = [];
|
|
439
|
+
const allMessageEvents = [];
|
|
440
|
+
const allSubgraphEvents = [];
|
|
441
|
+
const nodeWriters = new Map();
|
|
442
|
+
// Track the first HITL interrupt across all parallel nodes. We use
|
|
443
|
+
// allSettled (not Promise.all) so that when one node raises an interrupt,
|
|
444
|
+
// all other in-flight nodes complete before the interrupt is surfaced.
|
|
445
|
+
// This prevents orphaned background executions that would apply side
|
|
446
|
+
// effects without being checkpointed, causing double-application on resume.
|
|
447
|
+
let pendingInterrupt = null;
|
|
448
|
+
const allSettledResults = await Promise.allSettled(executableNodes.map(async (nodeName) => {
|
|
449
|
+
const name = nodeName;
|
|
450
|
+
const nodeDef = this.nodes.get(name);
|
|
306
451
|
if (!nodeDef)
|
|
307
|
-
throw new NodeNotFoundError(
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
452
|
+
throw new NodeNotFoundError(name);
|
|
453
|
+
// Static interrupt BEFORE (non-debug mode)
|
|
454
|
+
if (!modeDebug) {
|
|
455
|
+
if (this.interruptConfig.interruptBefore?.includes(name))
|
|
456
|
+
throw new ONIInterrupt(name, "before", state);
|
|
457
|
+
this.checkDynamicInterrupt(name, "before", state, config);
|
|
458
|
+
}
|
|
459
|
+
// Create a StreamWriter for this node
|
|
460
|
+
const messageId = `msg-${threadId}-${step}-${name}`;
|
|
461
|
+
const customEvents = [];
|
|
462
|
+
const messageEvents = [];
|
|
463
|
+
const writerImpl = new StreamWriterImpl((evt) => customEvents.push(evt), (_token) => { }, (evt) => messageEvents.push(evt), name, step, messageId, agentId);
|
|
464
|
+
nodeWriters.set(name, writerImpl);
|
|
465
|
+
// Check if this node has a pending resume value
|
|
466
|
+
const resumeValue = resumeMap[name];
|
|
467
|
+
const hasResume = name in resumeMap;
|
|
468
|
+
// Emit agent.start lifecycle event
|
|
469
|
+
const nodeStartTime = Date.now();
|
|
470
|
+
this.eventBus.emit({ type: "agent.start", agent: name, timestamp: nodeStartTime, step });
|
|
471
|
+
// Telemetry: node-level span
|
|
472
|
+
const nodeSpan = this.tracer.startNodeSpan(name, { threadId, step, agentId });
|
|
473
|
+
let result;
|
|
474
|
+
let subParentUpdates = [];
|
|
475
|
+
try {
|
|
476
|
+
// Scope emitToken to this node's async context via ALS — parallel nodes each
|
|
477
|
+
// get their own handler so tokens are never dropped or misrouted.
|
|
478
|
+
result = await _withTokenHandler((token) => writerImpl.token(token), async () => {
|
|
479
|
+
if (nodeDef.subgraph) {
|
|
480
|
+
const childRunner = nodeDef.subgraph._runner;
|
|
481
|
+
// Per-invocation key for concurrent-safe state isolation
|
|
482
|
+
const invocationKey = threadId;
|
|
483
|
+
if (childRunner) {
|
|
484
|
+
childRunner._subgraphRefCount++;
|
|
485
|
+
childRunner._perInvocationParentUpdates.set(invocationKey, []);
|
|
486
|
+
}
|
|
487
|
+
// Install a namespaced checkpointer per invocation instead of swapping a shared field
|
|
488
|
+
if (this.checkpointer && childRunner) {
|
|
489
|
+
childRunner._perInvocationCheckpointer.set(invocationKey, new NamespacedCheckpointer(this.checkpointer, name));
|
|
490
|
+
}
|
|
491
|
+
// Stream the subgraph — always clean up per-invocation state, even on throw/interrupt
|
|
492
|
+
let subFinalState;
|
|
493
|
+
try {
|
|
494
|
+
const childStreamMode = ["debug", "values"];
|
|
495
|
+
for await (const evt of nodeDef.subgraph.stream(state, {
|
|
496
|
+
...config,
|
|
497
|
+
// Pass the parent's effective threadId explicitly so the child's
|
|
498
|
+
// _perInvocationParentUpdates lookup at Command.PARENT time uses
|
|
499
|
+
// the same key that was registered in invocationKey above.
|
|
500
|
+
threadId: invocationKey,
|
|
501
|
+
parentRunId: config?.threadId,
|
|
502
|
+
streamMode: childStreamMode,
|
|
503
|
+
})) {
|
|
504
|
+
// Namespace-prefix the node name
|
|
505
|
+
allSubgraphEvents.push({
|
|
506
|
+
...evt,
|
|
507
|
+
node: evt.node ? `${name}:${evt.node}` : name,
|
|
508
|
+
});
|
|
509
|
+
// Track the last state_update as the final subgraph state
|
|
510
|
+
if (evt.event === "state_update") {
|
|
511
|
+
subFinalState = evt.data;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
if (childRunner) {
|
|
515
|
+
subParentUpdates = childRunner._perInvocationParentUpdates.get(invocationKey) ?? [];
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
finally {
|
|
519
|
+
// Clean up per-invocation state — decrement ref count, remove Maps entries
|
|
520
|
+
if (childRunner) {
|
|
521
|
+
childRunner._subgraphRefCount--;
|
|
522
|
+
childRunner._perInvocationParentUpdates.delete(invocationKey);
|
|
523
|
+
childRunner._perInvocationCheckpointer.delete(invocationKey);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
return subFinalState ?? {};
|
|
527
|
+
}
|
|
528
|
+
else {
|
|
529
|
+
return this.executeNode(nodeDef, state, config, resumeValue, hasResume, writerImpl, step, recursionLimit);
|
|
530
|
+
}
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
catch (err) {
|
|
534
|
+
// Catch interrupt() signals thrown from inside nodes
|
|
535
|
+
if (err instanceof NodeInterruptSignal) {
|
|
536
|
+
const iv = {
|
|
537
|
+
value: err.value,
|
|
538
|
+
node: name,
|
|
539
|
+
resumeId: err.resumeId,
|
|
540
|
+
timestamp: Date.now(),
|
|
541
|
+
};
|
|
542
|
+
const exc = new HITLInterruptException(threadId, iv, state);
|
|
543
|
+
// Claim the first-interrupt slot SYNCHRONOUSLY before any await.
|
|
544
|
+
// Both concurrent interrupt handlers check this flag before yielding,
|
|
545
|
+
// so whichever catch block runs first exclusively owns the checkpoint
|
|
546
|
+
// save. Without this guard, the last saveCheckpoint wins and its
|
|
547
|
+
// nextNodes diverge from pendingInterrupt's node, corrupting resume().
|
|
548
|
+
const isFirstInterrupt = !pendingInterrupt;
|
|
549
|
+
if (isFirstInterrupt)
|
|
550
|
+
pendingInterrupt = exc;
|
|
551
|
+
// Only save checkpoint for the first interrupt — the stored nextNodes
|
|
552
|
+
// must match pendingInterrupt's node so resume() restores correctly.
|
|
553
|
+
if (isFirstInterrupt) {
|
|
554
|
+
await this.saveCheckpoint(threadId, step, state, [name], pendingSends, agentId, config?.metadata);
|
|
555
|
+
// Record HITL session if checkpointer exists
|
|
556
|
+
if (effectiveCheckpointer) {
|
|
557
|
+
const cp = await effectiveCheckpointer.get(threadId);
|
|
558
|
+
if (cp)
|
|
559
|
+
this.hitlStore.record(threadId, iv, cp);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
throw exc; // marks this node's settled result as rejected
|
|
563
|
+
}
|
|
564
|
+
// Record to DLQ before re-throwing — use original cause if wrapped
|
|
565
|
+
if (this.dlq && err instanceof Error) {
|
|
566
|
+
const dlqErr = (err instanceof NodeExecutionError && err.cause instanceof Error) ? err.cause : err;
|
|
567
|
+
this.dlq.record(threadId, name, state, dlqErr, nodeDef.retry?.maxAttempts ?? 1);
|
|
568
|
+
}
|
|
569
|
+
// Telemetry: record error on node span — use original cause if wrapped
|
|
570
|
+
if (err instanceof Error) {
|
|
571
|
+
const telErr = (err instanceof NodeExecutionError && err.cause instanceof Error) ? err.cause : err;
|
|
572
|
+
this.tracer.recordError(nodeSpan, telErr);
|
|
573
|
+
}
|
|
574
|
+
this.tracer.endSpan(nodeSpan);
|
|
575
|
+
// Lifecycle event: emit error for non-interrupt failures
|
|
576
|
+
if (err instanceof Error) {
|
|
577
|
+
this.eventBus.emit({ type: "error", agent: name, error: err, timestamp: Date.now() });
|
|
578
|
+
}
|
|
579
|
+
throw err;
|
|
580
|
+
}
|
|
581
|
+
// Telemetry: end node span
|
|
582
|
+
this.tracer.endSpan(nodeSpan);
|
|
583
|
+
// Emit agent.end lifecycle event
|
|
584
|
+
this.eventBus.emit({ type: "agent.end", agent: name, timestamp: Date.now(), step, duration: Date.now() - nodeStartTime });
|
|
585
|
+
// Collect events for yielding after parallel execution
|
|
586
|
+
allCustomEvents.push(...customEvents);
|
|
587
|
+
allMessageEvents.push(...messageEvents);
|
|
588
|
+
return { name, result, subParentUpdates };
|
|
589
|
+
}));
|
|
590
|
+
// Extract results now that all nodes have settled.
|
|
591
|
+
// Re-throw the first non-interrupt error (DLQ/telemetry already handled
|
|
592
|
+
// inside each node's catch block), then surface any HITL interrupt.
|
|
593
|
+
const nodeResults = [];
|
|
594
|
+
for (const settled of allSettledResults) {
|
|
595
|
+
if (settled.status === "fulfilled") {
|
|
596
|
+
nodeResults.push(settled.value);
|
|
597
|
+
}
|
|
598
|
+
else if (!(settled.reason instanceof HITLInterruptException)) {
|
|
599
|
+
throw settled.reason; // first non-interrupt error
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
if (pendingInterrupt)
|
|
603
|
+
throw pendingInterrupt;
|
|
604
|
+
// Yield buffered subgraph events — filtered by parent's active modes
|
|
605
|
+
for (const evt of allSubgraphEvents) {
|
|
606
|
+
const e = evt;
|
|
607
|
+
if (modeDebug) {
|
|
608
|
+
yield tag(e, "debug");
|
|
609
|
+
}
|
|
610
|
+
else if (modeUpdates && (e.event === "node_end")) {
|
|
611
|
+
yield tag(e, "updates");
|
|
612
|
+
}
|
|
613
|
+
else if (modeValues && e.event === "state_update") {
|
|
614
|
+
yield tag(e, "values");
|
|
615
|
+
}
|
|
616
|
+
// Custom and message events from subgraphs are forwarded if those modes are active
|
|
617
|
+
if (modeCustom && e.event === "custom") {
|
|
618
|
+
yield tag(e, "custom");
|
|
619
|
+
}
|
|
620
|
+
if (modeMessages && (e.event === "messages" || e.event === "messages/complete")) {
|
|
621
|
+
yield tag(e, "messages");
|
|
314
622
|
}
|
|
315
623
|
}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
for (const { name, result } of
|
|
624
|
+
// Apply results
|
|
625
|
+
const stepWrites = [];
|
|
626
|
+
for (const { name, result, subParentUpdates: parentUpdates } of nodeResults) {
|
|
319
627
|
if (result instanceof Command) {
|
|
320
|
-
if (result.
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
628
|
+
if (result.graph === Command.PARENT) {
|
|
629
|
+
// Push update to parent — do NOT apply locally
|
|
630
|
+
if (!this._subgraphRefCount) {
|
|
631
|
+
throw new Error("Command.PARENT used but graph is not running as a subgraph");
|
|
632
|
+
}
|
|
633
|
+
if (result.update) {
|
|
634
|
+
const myParentUpdates = this._perInvocationParentUpdates.get(threadId);
|
|
635
|
+
if (myParentUpdates)
|
|
636
|
+
myParentUpdates.push(result.update);
|
|
637
|
+
}
|
|
638
|
+
// Still resolve next nodes normally
|
|
639
|
+
const { nodes, sends } = this.getNextNodes(name, state, config);
|
|
640
|
+
nextNodes.push(...nodes);
|
|
641
|
+
nextSends.push(...sends);
|
|
642
|
+
}
|
|
643
|
+
else {
|
|
644
|
+
if (result.update) {
|
|
645
|
+
state = this.applyUpdate(state, result.update);
|
|
646
|
+
if (Object.keys(result.update).length > 0) {
|
|
647
|
+
stepWrites.push({ nodeId: name, writes: result.update });
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
const gotos = result.goto
|
|
651
|
+
? (Array.isArray(result.goto) ? result.goto : [result.goto])
|
|
652
|
+
: this.getNextNodes(name, state, config).nodes;
|
|
653
|
+
nextNodes.push(...gotos);
|
|
654
|
+
if (result.send)
|
|
655
|
+
nextSends.push(...result.send.map((s) => ({ node: s.node, args: s.args })));
|
|
656
|
+
}
|
|
326
657
|
}
|
|
327
658
|
else if (result && typeof result === "object") {
|
|
328
659
|
state = this.applyUpdate(state, result);
|
|
660
|
+
const writes = result;
|
|
661
|
+
if (Object.keys(writes).length > 0) {
|
|
662
|
+
stepWrites.push({ nodeId: name, writes });
|
|
663
|
+
}
|
|
329
664
|
const { nodes, sends } = this.getNextNodes(name, state, config);
|
|
330
665
|
nextNodes.push(...nodes);
|
|
331
666
|
nextSends.push(...sends);
|
|
@@ -335,6 +670,10 @@ export class ONIPregelRunner {
|
|
|
335
670
|
nextNodes.push(...nodes);
|
|
336
671
|
nextSends.push(...sends);
|
|
337
672
|
}
|
|
673
|
+
// Apply parent updates from subgraph Command.PARENT (after normal result)
|
|
674
|
+
for (const pu of parentUpdates) {
|
|
675
|
+
state = this.applyUpdate(state, pu);
|
|
676
|
+
}
|
|
338
677
|
if (modeUpdates || modeDebug) {
|
|
339
678
|
const delta = result instanceof Command ? (result.update ?? {}) : (result ?? {});
|
|
340
679
|
if (modeUpdates)
|
|
@@ -342,286 +681,69 @@ export class ONIPregelRunner {
|
|
|
342
681
|
if (modeDebug)
|
|
343
682
|
yield tag(this.evt("node_end", delta, step, agentId, name), "debug");
|
|
344
683
|
}
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
const executableNodes = pendingNodes.filter((n) => n !== END && !sendGroups.has(n));
|
|
350
|
-
if (executableNodes.length === 0 && sendGroups.size === 0)
|
|
351
|
-
break;
|
|
352
|
-
if (step >= recursionLimit)
|
|
353
|
-
throw new RecursionLimitError(recursionLimit);
|
|
354
|
-
// Emit debug node_start events before parallel execution
|
|
355
|
-
if (modeDebug) {
|
|
356
|
-
for (const nodeName of executableNodes) {
|
|
357
|
-
const name = nodeName;
|
|
358
|
-
if (!this.nodes.has(name))
|
|
359
|
-
throw new NodeNotFoundError(name);
|
|
360
|
-
// Static interrupt BEFORE (check before emitting start)
|
|
361
|
-
if (this.interruptConfig.interruptBefore?.includes(name))
|
|
362
|
-
throw new ONIInterrupt(name, "before", state);
|
|
363
|
-
this.checkDynamicInterrupt(name, "before", state, config);
|
|
364
|
-
yield tag(this.evt("node_start", {}, step, agentId, name), "debug");
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
// Execute all active nodes in parallel
|
|
368
|
-
const allCustomEvents = [];
|
|
369
|
-
const allMessageEvents = [];
|
|
370
|
-
const allSubgraphEvents = [];
|
|
371
|
-
const nodeWriters = new Map();
|
|
372
|
-
const nodeResults = await Promise.all(executableNodes.map(async (nodeName) => {
|
|
373
|
-
const name = nodeName;
|
|
374
|
-
const nodeDef = this.nodes.get(name);
|
|
375
|
-
if (!nodeDef)
|
|
376
|
-
throw new NodeNotFoundError(name);
|
|
377
|
-
// Static interrupt BEFORE (non-debug mode)
|
|
378
|
-
if (!modeDebug) {
|
|
379
|
-
if (this.interruptConfig.interruptBefore?.includes(name))
|
|
380
|
-
throw new ONIInterrupt(name, "before", state);
|
|
381
|
-
this.checkDynamicInterrupt(name, "before", state, config);
|
|
382
|
-
}
|
|
383
|
-
// Create a StreamWriter for this node
|
|
384
|
-
const messageId = `msg-${threadId}-${step}-${name}`;
|
|
385
|
-
const customEvents = [];
|
|
386
|
-
const messageEvents = [];
|
|
387
|
-
const writerImpl = new StreamWriterImpl((evt) => customEvents.push(evt), (token) => { }, (evt) => messageEvents.push(evt), name, step, messageId, agentId);
|
|
388
|
-
nodeWriters.set(name, writerImpl);
|
|
389
|
-
// Install global emitToken handler so emitToken() routes to this node's writer
|
|
390
|
-
_installTokenHandler((token) => writerImpl.token(token));
|
|
391
|
-
// Check if this node has a pending resume value
|
|
392
|
-
const resumeValue = resumeMap[name];
|
|
393
|
-
const hasResume = name in resumeMap;
|
|
394
|
-
// Emit agent.start lifecycle event
|
|
395
|
-
const nodeStartTime = Date.now();
|
|
396
|
-
this.eventBus.emit({ type: "agent.start", agent: name, timestamp: nodeStartTime, step });
|
|
397
|
-
// Telemetry: node-level span
|
|
398
|
-
const nodeSpan = this.tracer.startNodeSpan(name, { threadId, step, agentId });
|
|
399
|
-
let result;
|
|
400
|
-
let subParentUpdates = [];
|
|
401
|
-
try {
|
|
402
|
-
if (nodeDef.subgraph) {
|
|
403
|
-
// Mark child runner as a subgraph so Command.PARENT works
|
|
404
|
-
const childRunner = nodeDef.subgraph._runner;
|
|
405
|
-
if (childRunner) {
|
|
406
|
-
childRunner._isSubgraph = true;
|
|
407
|
-
childRunner._parentUpdates = [];
|
|
408
|
-
}
|
|
409
|
-
// Namespace the subgraph's checkpointer for isolation
|
|
410
|
-
if (this.checkpointer && childRunner) {
|
|
411
|
-
childRunner.checkpointer = new NamespacedCheckpointer(this.checkpointer, name);
|
|
412
|
-
}
|
|
413
|
-
// Stream the subgraph instead of invoke — buffer events for yielding after Promise.all
|
|
414
|
-
// Use debug+values so we capture ALL event types AND state_update for final state
|
|
415
|
-
let subFinalState;
|
|
416
|
-
const childStreamMode = ["debug", "values"];
|
|
417
|
-
for await (const evt of nodeDef.subgraph.stream(state, {
|
|
418
|
-
...config,
|
|
419
|
-
parentRunId: config?.threadId,
|
|
420
|
-
streamMode: childStreamMode,
|
|
421
|
-
})) {
|
|
422
|
-
// Namespace-prefix the node name
|
|
423
|
-
allSubgraphEvents.push({
|
|
424
|
-
...evt,
|
|
425
|
-
node: evt.node ? `${name}:${evt.node}` : name,
|
|
426
|
-
});
|
|
427
|
-
// Track the last state_update as the final subgraph state
|
|
428
|
-
if (evt.event === "state_update") {
|
|
429
|
-
subFinalState = evt.data;
|
|
430
|
-
}
|
|
431
|
-
}
|
|
432
|
-
// Collect parent updates from child
|
|
433
|
-
if (childRunner) {
|
|
434
|
-
subParentUpdates = childRunner._parentUpdates;
|
|
435
|
-
childRunner._isSubgraph = false;
|
|
436
|
-
childRunner._parentUpdates = [];
|
|
437
|
-
}
|
|
438
|
-
result = subFinalState ?? {};
|
|
439
|
-
}
|
|
440
|
-
else {
|
|
441
|
-
result = await this.executeNode(nodeDef, state, config, resumeValue, hasResume, writerImpl, step, recursionLimit);
|
|
684
|
+
// Static interrupt AFTER
|
|
685
|
+
if (this.interruptConfig.interruptAfter?.includes(name)) {
|
|
686
|
+
await this.saveCheckpoint(threadId, step, state, nextNodes, nextSends, agentId, config?.metadata);
|
|
687
|
+
throw new ONIInterrupt(name, "after", state);
|
|
442
688
|
}
|
|
689
|
+
this.checkDynamicInterrupt(name, "after", state, config);
|
|
443
690
|
}
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
timestamp: Date.now(),
|
|
452
|
-
};
|
|
453
|
-
// Save checkpoint before surfacing interrupt
|
|
454
|
-
await this.saveCheckpoint(threadId, step, state, [name], pendingSends, agentId, config?.metadata);
|
|
455
|
-
// Record HITL session if checkpointer exists
|
|
456
|
-
if (this.checkpointer) {
|
|
457
|
-
const cp = await this.checkpointer.get(threadId);
|
|
458
|
-
if (cp)
|
|
459
|
-
this.hitlStore.record(threadId, iv, cp);
|
|
460
|
-
}
|
|
461
|
-
throw new HITLInterruptException(threadId, iv, state);
|
|
462
|
-
}
|
|
463
|
-
// Record to DLQ before re-throwing — use original cause if wrapped
|
|
464
|
-
if (this.dlq && err instanceof Error) {
|
|
465
|
-
const dlqErr = (err instanceof NodeExecutionError && err.cause instanceof Error) ? err.cause : err;
|
|
466
|
-
this.dlq.record(threadId, name, state, dlqErr, nodeDef.retry?.maxAttempts ?? 1);
|
|
467
|
-
}
|
|
468
|
-
// Telemetry: record error on node span — use original cause if wrapped
|
|
469
|
-
if (err instanceof Error) {
|
|
470
|
-
const telErr = (err instanceof NodeExecutionError && err.cause instanceof Error) ? err.cause : err;
|
|
471
|
-
this.tracer.recordError(nodeSpan, telErr);
|
|
691
|
+
// Yield buffered custom/message events based on stream mode
|
|
692
|
+
if (modeCustom || modeDebug) {
|
|
693
|
+
for (const evt of allCustomEvents) {
|
|
694
|
+
if (modeCustom)
|
|
695
|
+
yield tag(evt, "custom");
|
|
696
|
+
if (modeDebug)
|
|
697
|
+
yield tag(evt, "debug");
|
|
472
698
|
}
|
|
473
|
-
this.tracer.endSpan(nodeSpan);
|
|
474
|
-
_clearTokenHandler();
|
|
475
|
-
throw err;
|
|
476
|
-
}
|
|
477
|
-
// Clear emitToken handler after node completes
|
|
478
|
-
_clearTokenHandler();
|
|
479
|
-
// Telemetry: end node span
|
|
480
|
-
this.tracer.endSpan(nodeSpan);
|
|
481
|
-
// Emit agent.end lifecycle event
|
|
482
|
-
this.eventBus.emit({ type: "agent.end", agent: name, timestamp: Date.now(), step, duration: Date.now() - nodeStartTime });
|
|
483
|
-
// Collect events for yielding after parallel execution
|
|
484
|
-
allCustomEvents.push(...customEvents);
|
|
485
|
-
allMessageEvents.push(...messageEvents);
|
|
486
|
-
return { name, result, subParentUpdates };
|
|
487
|
-
}));
|
|
488
|
-
// Yield buffered subgraph events — filtered by parent's active modes
|
|
489
|
-
for (const evt of allSubgraphEvents) {
|
|
490
|
-
const e = evt;
|
|
491
|
-
if (modeDebug) {
|
|
492
|
-
yield tag(e, "debug");
|
|
493
|
-
}
|
|
494
|
-
else if (modeUpdates && (e.event === "node_end")) {
|
|
495
|
-
yield tag(e, "updates");
|
|
496
|
-
}
|
|
497
|
-
else if (modeValues && e.event === "state_update") {
|
|
498
|
-
yield tag(e, "values");
|
|
499
|
-
}
|
|
500
|
-
// Custom and message events from subgraphs are forwarded if those modes are active
|
|
501
|
-
if (modeCustom && e.event === "custom") {
|
|
502
|
-
yield tag(e, "custom");
|
|
503
699
|
}
|
|
504
|
-
if (modeMessages
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
for (const { name, result, subParentUpdates: parentUpdates } of nodeResults) {
|
|
511
|
-
if (result instanceof Command) {
|
|
512
|
-
if (result.graph === Command.PARENT) {
|
|
513
|
-
// Push update to parent — do NOT apply locally
|
|
514
|
-
if (!this._isSubgraph) {
|
|
515
|
-
throw new Error("Command.PARENT used but graph is not running as a subgraph");
|
|
516
|
-
}
|
|
517
|
-
if (result.update)
|
|
518
|
-
this._parentUpdates.push(result.update);
|
|
519
|
-
// Still resolve next nodes normally
|
|
520
|
-
const { nodes, sends } = this.getNextNodes(name, state, config);
|
|
521
|
-
nextNodes.push(...nodes);
|
|
522
|
-
nextSends.push(...sends);
|
|
700
|
+
if (modeMessages || modeDebug) {
|
|
701
|
+
for (const evt of allMessageEvents) {
|
|
702
|
+
if (modeMessages)
|
|
703
|
+
yield tag(evt, "messages");
|
|
704
|
+
if (modeDebug)
|
|
705
|
+
yield tag(evt, "debug");
|
|
523
706
|
}
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
707
|
+
// Emit messages/complete for each node that produced tokens
|
|
708
|
+
for (const [, writer] of nodeWriters) {
|
|
709
|
+
const complete = writer._complete();
|
|
710
|
+
if (complete) {
|
|
711
|
+
if (modeMessages)
|
|
712
|
+
yield tag(complete, "messages");
|
|
713
|
+
if (modeDebug)
|
|
714
|
+
yield tag(complete, "debug");
|
|
530
715
|
}
|
|
531
|
-
const gotos = result.goto
|
|
532
|
-
? (Array.isArray(result.goto) ? result.goto : [result.goto])
|
|
533
|
-
: this.getNextNodes(name, state, config).nodes;
|
|
534
|
-
nextNodes.push(...gotos);
|
|
535
|
-
if (result.send)
|
|
536
|
-
nextSends.push(...result.send.map((s) => ({ node: s.node, args: s.args })));
|
|
537
716
|
}
|
|
538
717
|
}
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
}
|
|
545
|
-
const { nodes, sends } = this.getNextNodes(name, state, config);
|
|
546
|
-
nextNodes.push(...nodes);
|
|
547
|
-
nextSends.push(...sends);
|
|
718
|
+
if (modeValues)
|
|
719
|
+
yield tag(this.evt("state_update", state, step, agentId), "values");
|
|
720
|
+
// Deduplicate nextNodes — avoid Set+spread when no dupes (common case)
|
|
721
|
+
if (nextNodes.length <= 1) {
|
|
722
|
+
pendingNodes = nextNodes;
|
|
548
723
|
}
|
|
549
724
|
else {
|
|
550
|
-
const
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
if (modeUpdates || modeDebug) {
|
|
559
|
-
const delta = result instanceof Command ? (result.update ?? {}) : (result ?? {});
|
|
560
|
-
if (modeUpdates)
|
|
561
|
-
yield tag(this.evt("node_end", delta, step, agentId, name), "updates");
|
|
562
|
-
if (modeDebug)
|
|
563
|
-
yield tag(this.evt("node_end", delta, step, agentId, name), "debug");
|
|
564
|
-
}
|
|
565
|
-
// Static interrupt AFTER
|
|
566
|
-
if (this.interruptConfig.interruptAfter?.includes(name)) {
|
|
567
|
-
await this.saveCheckpoint(threadId, step, state, nextNodes, nextSends, agentId, config?.metadata);
|
|
568
|
-
throw new ONIInterrupt(name, "after", state);
|
|
569
|
-
}
|
|
570
|
-
this.checkDynamicInterrupt(name, "after", state, config);
|
|
571
|
-
}
|
|
572
|
-
// Yield buffered custom/message events based on stream mode
|
|
573
|
-
if (modeCustom || modeDebug) {
|
|
574
|
-
for (const evt of allCustomEvents) {
|
|
575
|
-
if (modeCustom)
|
|
576
|
-
yield tag(evt, "custom");
|
|
577
|
-
if (modeDebug)
|
|
578
|
-
yield tag(evt, "debug");
|
|
579
|
-
}
|
|
580
|
-
}
|
|
581
|
-
if (modeMessages || modeDebug) {
|
|
582
|
-
for (const evt of allMessageEvents) {
|
|
583
|
-
if (modeMessages)
|
|
584
|
-
yield tag(evt, "messages");
|
|
585
|
-
if (modeDebug)
|
|
586
|
-
yield tag(evt, "debug");
|
|
587
|
-
}
|
|
588
|
-
// Emit messages/complete for each node that produced tokens
|
|
589
|
-
for (const [, writer] of nodeWriters) {
|
|
590
|
-
const complete = writer._complete();
|
|
591
|
-
if (complete) {
|
|
592
|
-
if (modeMessages)
|
|
593
|
-
yield tag(complete, "messages");
|
|
594
|
-
if (modeDebug)
|
|
595
|
-
yield tag(complete, "debug");
|
|
725
|
+
const seen = new Set();
|
|
726
|
+
pendingNodes = [];
|
|
727
|
+
for (const n of nextNodes) {
|
|
728
|
+
const key = n;
|
|
729
|
+
if (!seen.has(key)) {
|
|
730
|
+
seen.add(key);
|
|
731
|
+
pendingNodes.push(n);
|
|
732
|
+
}
|
|
596
733
|
}
|
|
597
734
|
}
|
|
735
|
+
pendingSends = nextSends;
|
|
736
|
+
step++;
|
|
737
|
+
await this.saveCheckpoint(threadId, step, state, pendingNodes, pendingSends, agentId, config?.metadata, stepWrites);
|
|
598
738
|
}
|
|
599
739
|
if (modeValues)
|
|
600
740
|
yield tag(this.evt("state_update", state, step, agentId), "values");
|
|
601
|
-
// Deduplicate nextNodes — avoid Set+spread when no dupes (common case)
|
|
602
|
-
if (nextNodes.length <= 1) {
|
|
603
|
-
pendingNodes = nextNodes;
|
|
604
|
-
}
|
|
605
|
-
else {
|
|
606
|
-
const seen = new Set();
|
|
607
|
-
pendingNodes = [];
|
|
608
|
-
for (const n of nextNodes) {
|
|
609
|
-
const key = n;
|
|
610
|
-
if (!seen.has(key)) {
|
|
611
|
-
seen.add(key);
|
|
612
|
-
pendingNodes.push(n);
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
pendingSends = nextSends;
|
|
617
|
-
step++;
|
|
618
|
-
await this.saveCheckpoint(threadId, step, state, pendingNodes, pendingSends, agentId, config?.metadata, stepWrites);
|
|
619
741
|
}
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
742
|
+
finally {
|
|
743
|
+
// Telemetry: end graph span — always runs, even on error or interrupt
|
|
744
|
+
graphSpan.setAttribute("oni.steps", step);
|
|
745
|
+
this.tracer.endSpan(graphSpan);
|
|
746
|
+
}
|
|
625
747
|
}
|
|
626
748
|
// ----------------------------------------------------------------
|
|
627
749
|
// Public API
|
|
@@ -699,9 +821,6 @@ export class ONIPregelRunner {
|
|
|
699
821
|
cb = new CircuitBreaker({
|
|
700
822
|
threshold: nodeDef.circuitBreaker.threshold,
|
|
701
823
|
resetAfter: nodeDef.circuitBreaker.resetAfter,
|
|
702
|
-
fallback: nodeDef.circuitBreaker.fallback
|
|
703
|
-
? () => nodeDef.circuitBreaker.fallback(undefined, undefined)
|
|
704
|
-
: undefined,
|
|
705
824
|
}, nodeDef.name);
|
|
706
825
|
this.circuitBreakers.set(nodeDef.name, cb);
|
|
707
826
|
}
|
|
@@ -713,16 +832,21 @@ export class ONIPregelRunner {
|
|
|
713
832
|
}
|
|
714
833
|
// ---- Helpers ----
|
|
715
834
|
async saveCheckpoint(threadId, step, state, nextNodes, pendingSends, agentId, metadata, pendingWrites) {
|
|
716
|
-
|
|
835
|
+
const cp = (this._perInvocationCheckpointer.get(threadId) ?? this.checkpointer);
|
|
836
|
+
if (!cp)
|
|
717
837
|
return;
|
|
718
838
|
const cpSpan = this.tracer.startCheckpointSpan("put", { threadId });
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
839
|
+
try {
|
|
840
|
+
await cp.put({
|
|
841
|
+
threadId, step, state, agentId, metadata, pendingWrites,
|
|
842
|
+
nextNodes: nextNodes.map(String),
|
|
843
|
+
pendingSends: pendingSends,
|
|
844
|
+
timestamp: Date.now(),
|
|
845
|
+
});
|
|
846
|
+
}
|
|
847
|
+
finally {
|
|
848
|
+
this.tracer.endSpan(cpSpan);
|
|
849
|
+
}
|
|
726
850
|
}
|
|
727
851
|
evt(event, data, step, agentId, node) {
|
|
728
852
|
return { event, data, step, timestamp: Date.now(), agentId, node };
|