@tangle-network/agent-runtime 0.37.0 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +3 -3
- package/dist/analyst-loop.d.ts +2 -2
- package/dist/analyst-loop.js +3 -257
- package/dist/analyst-loop.js.map +1 -1
- package/dist/chunk-VOX6Z3II.js +90 -0
- package/dist/chunk-VOX6Z3II.js.map +1 -0
- package/dist/chunk-XBUG326M.js +261 -0
- package/dist/chunk-XBUG326M.js.map +1 -0
- package/dist/{chunk-T3GJBKHA.js → chunk-Z523NPJK.js} +58 -1
- package/dist/chunk-Z523NPJK.js.map +1 -0
- package/dist/dynamic-DeOPeeAw.d.ts +106 -0
- package/dist/{improvement-adapter-CaZxFxTd.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
- package/dist/improvement.d.ts +6 -130
- package/dist/improvement.js +4 -85
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +67 -5
- package/dist/index.js +61 -2
- package/dist/index.js.map +1 -1
- package/dist/loops.d.ts +5 -106
- package/dist/mcp/index.d.ts +4 -79
- package/dist/mcp/index.js +2 -57
- package/dist/mcp/index.js.map +1 -1
- package/dist/optimize-prompt-cmH9wZdH.d.ts +129 -0
- package/dist/{otel-export-DgFMwsVy.d.ts → otel-export-CNmeg_7B.d.ts} +77 -2
- package/dist/profiles.d.ts +1 -1
- package/dist/{types-CmTjKLyB.d.ts → types-CmkQl8qE.d.ts} +1 -1
- package/dist/{types-D_MXrmJP.d.ts → types-p8dWBIXL.d.ts} +1 -1
- package/package.json +1 -1
- package/dist/chunk-T3GJBKHA.js.map +0 -1
package/dist/loops.d.ts
CHANGED
|
@@ -1,115 +1,14 @@
|
|
|
1
1
|
import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
|
|
2
2
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
-
import {
|
|
4
|
-
export {
|
|
3
|
+
import { P as PlannerContext, T as TopologyPlanner } from './dynamic-DeOPeeAw.js';
|
|
4
|
+
export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-DeOPeeAw.js';
|
|
5
|
+
import { D as Driver, I as Iteration, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, a as LoopWinner, b as LoopResult } from './types-CmkQl8qE.js';
|
|
6
|
+
export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopPlanDescription, i as LoopPlanPayload, j as LoopSandboxPlacement, k as LoopStartedPayload, l as LoopTokenUsage, m as LoopTraceEmitter, n as LoopTraceEvent, o as ValidationCtx } from './types-CmkQl8qE.js';
|
|
5
7
|
import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
|
|
6
8
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
7
9
|
import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
8
10
|
import './types-CsCCryln.js';
|
|
9
11
|
|
|
10
|
-
/**
|
|
11
|
-
* @experimental
|
|
12
|
-
*
|
|
13
|
-
* Dynamic driver — the agent authors the loop topology at runtime.
|
|
14
|
-
*
|
|
15
|
-
* Where `refine` and `fanout-vote` encode a fixed shape as a pure function of
|
|
16
|
-
* history, this driver delegates the per-round shape to an injected
|
|
17
|
-
* `TopologyPlanner`. Each round the planner inspects the task + iteration
|
|
18
|
-
* history and emits one `TopologyMove`:
|
|
19
|
-
* - `refine` → one task next round (optionally rewritten from the prior attempt)
|
|
20
|
-
* - `fanout` → N tasks next round (the kernel round-robins `agentRuns`, so a
|
|
21
|
-
* 2-harness fanout dispatches branch 0 to harness A and branch 1 to harness B)
|
|
22
|
-
* - `stop` → terminate; the kernel selects the winner across all iterations
|
|
23
|
-
*
|
|
24
|
-
* The planner is the brain; this driver is the structure. It maps moves onto
|
|
25
|
-
* the kernel's `plan`/`decide` contract, enforces the iteration + fanout caps,
|
|
26
|
-
* and fails loud on a malformed move. The planner is injected exactly like
|
|
27
|
-
* `refine`'s `refineTask` and `fanout-vote`'s `selector` — so a test can drive
|
|
28
|
-
* a deterministic policy through the real kernel, and production can wire it to
|
|
29
|
-
* an LLM via `createSandboxPlanner`.
|
|
30
|
-
*
|
|
31
|
-
* Topology is orthogonal to harness: the planner never names a backend. Which
|
|
32
|
-
* harness runs a branch is decided by the `AgentRunSpec` the kernel round-robins
|
|
33
|
-
* to, so one dynamic driver works across claude-code, codex, opencode, pi —
|
|
34
|
-
* including fanning a single round across several at once.
|
|
35
|
-
*/
|
|
36
|
-
|
|
37
|
-
/** Terminal once `decide` returns `'done'` (a kernel terminal decision). */
|
|
38
|
-
type DynamicDecision = 'continue' | 'done';
|
|
39
|
-
/**
|
|
40
|
-
* One topology decision for the next round. `fanout` carries explicit tasks
|
|
41
|
-
* rather than a count so the planner can issue heterogeneous branches (a
|
|
42
|
-
* different sub-task per harness); pass N copies of one task for a homogeneous
|
|
43
|
-
* fanout that relies on `agentRuns` diversity instead.
|
|
44
|
-
*
|
|
45
|
-
* @experimental
|
|
46
|
-
*/
|
|
47
|
-
type TopologyMove<Task> = {
|
|
48
|
-
kind: 'refine';
|
|
49
|
-
task: Task;
|
|
50
|
-
rationale?: string;
|
|
51
|
-
} | {
|
|
52
|
-
kind: 'fanout';
|
|
53
|
-
tasks: Task[];
|
|
54
|
-
rationale?: string;
|
|
55
|
-
} | {
|
|
56
|
-
kind: 'stop';
|
|
57
|
-
rationale?: string;
|
|
58
|
-
};
|
|
59
|
-
/** @experimental */
|
|
60
|
-
interface PlannerContext<Task, Output> {
|
|
61
|
-
/** The root task the loop was invoked with — stable across rounds. */
|
|
62
|
-
task: Task;
|
|
63
|
-
/** Every iteration so far, in dispatch order, with outputs + verdicts. */
|
|
64
|
-
history: ReadonlyArray<Iteration<Task, Output>>;
|
|
65
|
-
/** `history.length` — iterations already spent. */
|
|
66
|
-
iterationsSpent: number;
|
|
67
|
-
/** Iterations left before the driver's `maxIterations` cap forces a stop. */
|
|
68
|
-
iterationsRemaining: number;
|
|
69
|
-
}
|
|
70
|
-
/**
|
|
71
|
-
* Chooses the next topology move from the task + history. Sync or async; an
|
|
72
|
-
* async planner is where an LLM call goes (see `createSandboxPlanner`).
|
|
73
|
-
*
|
|
74
|
-
* @experimental
|
|
75
|
-
*/
|
|
76
|
-
type TopologyPlanner<Task, Output> = (ctx: PlannerContext<Task, Output>) => TopologyMove<Task> | Promise<TopologyMove<Task>>;
|
|
77
|
-
/** @experimental */
|
|
78
|
-
interface CreateDynamicDriverOptions<Task, Output> {
|
|
79
|
-
/** The agent-authored topology policy. Invoked once per round in `plan`. */
|
|
80
|
-
planner: TopologyPlanner<Task, Output>;
|
|
81
|
-
/**
|
|
82
|
-
* Hard safety cap on total iterations. When reached, the driver stops before
|
|
83
|
-
* consulting the planner. Default 8. Set the kernel's `runLoop`
|
|
84
|
-
* `maxIterations >= ` this so the driver's cap governs and the loop closes on
|
|
85
|
-
* a clean `'done'` rather than a truncated `'continue'`.
|
|
86
|
-
*/
|
|
87
|
-
maxIterations?: number;
|
|
88
|
-
/** Max branches a single `fanout` move may dispatch. Default 4. */
|
|
89
|
-
maxFanout?: number;
|
|
90
|
-
/** Stable identifier surfaced in trace events. Default `'dynamic'`. */
|
|
91
|
-
name?: string;
|
|
92
|
-
}
|
|
93
|
-
/** @experimental */
|
|
94
|
-
declare function createDynamicDriver<Task, Output>(options: CreateDynamicDriverOptions<Task, Output>): Driver<Task, Output, DynamicDecision>;
|
|
95
|
-
/**
|
|
96
|
-
* Compact, planner-friendly view of iteration history — what an LLM planner
|
|
97
|
-
* needs to choose the next move without the raw event streams. Output is
|
|
98
|
-
* truncated so a long run's prompt stays bounded.
|
|
99
|
-
*
|
|
100
|
-
* @experimental
|
|
101
|
-
*/
|
|
102
|
-
declare function summarizeHistory<Task, Output>(history: ReadonlyArray<Iteration<Task, Output>>, opts?: {
|
|
103
|
-
maxOutputChars?: number;
|
|
104
|
-
}): Array<{
|
|
105
|
-
index: number;
|
|
106
|
-
agentRunName: string;
|
|
107
|
-
valid?: boolean;
|
|
108
|
-
score?: number;
|
|
109
|
-
error?: string;
|
|
110
|
-
output?: string;
|
|
111
|
-
}>;
|
|
112
|
-
|
|
113
12
|
/**
|
|
114
13
|
* @experimental
|
|
115
14
|
*
|
|
@@ -413,4 +312,4 @@ interface UsageSink {
|
|
|
413
312
|
*/
|
|
414
313
|
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
415
314
|
|
|
416
|
-
export { AgentRunSpec, type
|
|
315
|
+
export { AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, Validator, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, loopCampaignDispatch, loopDispatch, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations };
|
package/dist/mcp/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { L as LoopSandboxClient, j as LoopSandboxPlacement, m as LoopTraceEmitter } from '../types-
|
|
2
|
-
import { F as FleetHandle, D as DelegationExecutor, a as DelegateFeedbackArgs, b as DelegationFeedbackSnapshot, c as DelegationProfile, d as DelegateCodeArgs, e as DelegateResearchArgs, f as DelegationStatus, g as DelegationProgress, h as DelegationResultPayload, i as DelegationError, j as DelegationStatusResult, k as DelegationHistoryArgs, l as DelegationHistoryEntry, C as CoderDelegate, R as ResearcherDelegate, m as DelegateCodeResult, n as DelegateFeedbackResult, o as ResearchSource, p as DelegateResearchResult, q as DelegationHistoryResult, r as DelegationStatusArgs, O as OtelExporter } from '../otel-export-
|
|
3
|
-
export { s as CoderReview, t as CoderReviewer, u as CoderWinnerSelection, v as CreateDefaultCoderDelegateOptions, w as
|
|
1
|
+
import { L as LoopSandboxClient, j as LoopSandboxPlacement, m as LoopTraceEmitter } from '../types-CmkQl8qE.js';
|
|
2
|
+
import { F as FleetHandle, D as DelegationExecutor, a as DelegateFeedbackArgs, b as DelegationFeedbackSnapshot, c as DelegationProfile, d as DelegateCodeArgs, e as DelegateResearchArgs, f as DelegationStatus, g as DelegationProgress, h as DelegationResultPayload, i as DelegationError, j as DelegationStatusResult, k as DelegationHistoryArgs, l as DelegationHistoryEntry, C as CoderDelegate, R as ResearcherDelegate, m as DelegateCodeResult, n as DelegateFeedbackResult, o as ResearchSource, p as DelegateResearchResult, q as DelegationHistoryResult, r as DelegationStatusArgs, O as OtelExporter } from '../otel-export-CNmeg_7B.js';
|
|
3
|
+
export { s as CoderReview, t as CoderReviewer, u as CoderWinnerSelection, v as CreateDefaultCoderDelegateOptions, w as CreateKbGateOptions, x as DelegateCodeConfig, y as DelegateResearchConfig, z as DelegateRunCtx, A as FactCandidate, B as FactJudge, E as FactJudgeVerdict, G as FeedbackRating, H as FeedbackRefersTo, I as FleetWorkspaceExecutorOptions, K as KbGateResult, J as ResearchOutputShape, S as SiblingSandboxExecutorOptions, L as createDefaultCoderDelegate, M as createFleetWorkspaceExecutor, N as createKbGate, P as createSiblingSandboxExecutor, Q as mcpToolsForRuntimeMcp, T as mcpToolsForRuntimeMcpSubset } from '../otel-export-CNmeg_7B.js';
|
|
4
4
|
import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
|
|
5
5
|
export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
|
|
6
6
|
import '@tangle-network/agent-eval';
|
|
@@ -285,81 +285,6 @@ interface InProcessExecutorDescribePlacement extends LoopSandboxPlacement {
|
|
|
285
285
|
*/
|
|
286
286
|
declare function createInProcessExecutor(options: InProcessExecutorOptions): DelegationExecutor;
|
|
287
287
|
|
|
288
|
-
/**
|
|
289
|
-
* @experimental
|
|
290
|
-
*
|
|
291
|
-
* `createKbGate` — the valid-only knowledge-base growth gate, distilled from
|
|
292
|
-
* physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
|
|
293
|
-
* writer) runs candidate facts through this before persisting, so the KB grows
|
|
294
|
-
* with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
|
|
295
|
-
* vetoed at the gate.
|
|
296
|
-
*
|
|
297
|
-
* Fail-closed by construction: every judge must `accept`; the FIRST veto wins
|
|
298
|
-
* and the fact is rejected. The non-negotiable floor (always on, can't be
|
|
299
|
-
* disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
|
|
300
|
-
* literally appear in its `sourceText`. That single check kills the dominant
|
|
301
|
-
* failure mode (a confident claim decoupled from any real source).
|
|
302
|
-
*
|
|
303
|
-
* Pure + dependency-free: it operates on fact candidates, not on a store, so it
|
|
304
|
-
* composes with `@tangle-network/agent-knowledge` or any persistence layer
|
|
305
|
-
* without importing it. The remediation policy (correct-on-veto vs
|
|
306
|
-
* escalate-as-unverified) is the caller's — this returns the verdict; it never
|
|
307
|
-
* drops a fact silently.
|
|
308
|
-
*/
|
|
309
|
-
/** @experimental A fact proposed for the KB, with its grounding. */
|
|
310
|
-
interface FactCandidate {
|
|
311
|
-
/** The atomic claim text. */
|
|
312
|
-
claim: string;
|
|
313
|
-
/** Optional extracted value (number or string) the claim asserts. */
|
|
314
|
-
value?: string | number;
|
|
315
|
-
/** Verbatim span lifted from the source that backs the claim. */
|
|
316
|
-
verbatimPassage: string;
|
|
317
|
-
/** The raw source text the passage must be grounded in. */
|
|
318
|
-
sourceText: string;
|
|
319
|
-
/** Where the fact claims to come from — checked for circular/self citations. */
|
|
320
|
-
citation?: string;
|
|
321
|
-
}
|
|
322
|
-
/** @experimental */
|
|
323
|
-
interface FactJudgeVerdict {
|
|
324
|
-
accept: boolean;
|
|
325
|
-
reason?: string;
|
|
326
|
-
}
|
|
327
|
-
/** @experimental A pluggable fact validator. Throw is NOT allowed — return a
|
|
328
|
-
* verdict; a thrown judge is a programmer error, not a veto. */
|
|
329
|
-
interface FactJudge {
|
|
330
|
-
name: string;
|
|
331
|
-
judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
|
|
332
|
-
}
|
|
333
|
-
/** @experimental */
|
|
334
|
-
interface KbGateResult {
|
|
335
|
-
accepted: boolean;
|
|
336
|
-
/** Name of the judge that vetoed; undefined when accepted. */
|
|
337
|
-
vetoedBy?: string;
|
|
338
|
-
reason?: string;
|
|
339
|
-
}
|
|
340
|
-
/** @experimental */
|
|
341
|
-
interface CreateKbGateOptions {
|
|
342
|
-
/** Extra judges appended after the built-in floor (e.g. an LLM judge). */
|
|
343
|
-
judges?: FactJudge[];
|
|
344
|
-
/** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
|
|
345
|
-
minPassageChars?: number;
|
|
346
|
-
/**
|
|
347
|
-
* Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
|
|
348
|
-
* `'cad_params'`, `'requirements'`). A citation naming one is circular
|
|
349
|
-
* (laundering) — the fact cites a derived artifact, not a real source.
|
|
350
|
-
* Default `[]` (no circular check unless the consumer declares its kinds).
|
|
351
|
-
*/
|
|
352
|
-
selfArtifactKinds?: string[];
|
|
353
|
-
}
|
|
354
|
-
/**
|
|
355
|
-
* @experimental
|
|
356
|
-
*
|
|
357
|
-
* Build a fail-closed KB gate. The returned function runs the built-in floor
|
|
358
|
-
* (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
|
|
359
|
-
* then any consumer judges, returning on the first veto.
|
|
360
|
-
*/
|
|
361
|
-
declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
|
|
362
|
-
|
|
363
288
|
/**
|
|
364
289
|
* @experimental
|
|
365
290
|
*
|
|
@@ -947,4 +872,4 @@ declare function createPropagatingTraceEmitter(ctx: TraceContext): {
|
|
|
947
872
|
*/
|
|
948
873
|
declare function traceContextToEnv(ctx: TraceContext): Record<string, string>;
|
|
949
874
|
|
|
950
|
-
export { CoderDelegate, type
|
|
875
|
+
export { CoderDelegate, type CreateWorktreeOptions, DELEGATE_CODE_DESCRIPTION, DELEGATE_CODE_INPUT_SCHEMA, DELEGATE_CODE_TOOL_NAME, DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_INPUT_SCHEMA, DELEGATE_FEEDBACK_TOOL_NAME, DELEGATE_RESEARCH_DESCRIPTION, DELEGATE_RESEARCH_INPUT_SCHEMA, DELEGATE_RESEARCH_TOOL_NAME, DELEGATION_HISTORY_DESCRIPTION, DELEGATION_HISTORY_INPUT_SCHEMA, DELEGATION_HISTORY_TOOL_NAME, DELEGATION_STATUS_DESCRIPTION, DELEGATION_STATUS_INPUT_SCHEMA, DELEGATION_STATUS_TOOL_NAME, DelegateCodeArgs, DelegateCodeResult, DelegateFeedbackArgs, DelegateFeedbackResult, DelegateResearchArgs, DelegateResearchResult, DelegationError, DelegationExecutor, DelegationFeedbackSnapshot, DelegationHistoryArgs, DelegationHistoryEntry, DelegationHistoryResult, DelegationProfile, DelegationProgress, type DelegationRecord, DelegationResultPayload, DelegationStatus, DelegationStatusArgs, DelegationStatusResult, DelegationTaskQueue, type DelegationTaskQueueOptions, type DetectExecutorArgs, type DiffOptions, type DiffResult, type FeedbackEvent, type FeedbackStore, FleetHandle, type GitRunner, InMemoryFeedbackStore, type InProcessExecutorDescribePlacement, type InProcessExecutorOptions, type JsonRpcMessage, type JsonRpcResponse, LocalHarness, type McpServer, type McpServerOptions, type McpToolDescriptor, type McpTransport, type RemoveWorktreeOptions, ResearchSource, ResearcherDelegate, type SubmitInput, type SubmitOutput, type TraceContext, type WorktreeHandle, captureWorktreeDiff, createDelegateCodeHandler, createDelegateFeedbackHandler, createDelegateResearchHandler, createDelegationHistoryHandler, createDelegationStatusHandler, createInProcessExecutor, createInProcessTransport, createMcpServer, createPropagatingTraceEmitter, createWorktree, detectExecutor, eventToSnapshot, hashIdempotencyInput, readTraceContextFromEnv, removeWorktree, runLocalHarness, traceContextToEnv, validateDelegateCodeArgs, validateDelegateFeedbackArgs, validateDelegateResearchArgs, validateDelegationHistoryArgs, validateDelegationStatusArgs };
|
package/dist/mcp/index.js
CHANGED
|
@@ -9,10 +9,11 @@ import {
|
|
|
9
9
|
} from "../chunk-M65QJD35.js";
|
|
10
10
|
import {
|
|
11
11
|
buildLoopOtelSpans,
|
|
12
|
+
createKbGate,
|
|
12
13
|
createOtelExporter,
|
|
13
14
|
mcpToolsForRuntimeMcp,
|
|
14
15
|
mcpToolsForRuntimeMcpSubset
|
|
15
|
-
} from "../chunk-
|
|
16
|
+
} from "../chunk-Z523NPJK.js";
|
|
16
17
|
import {
|
|
17
18
|
DELEGATE_CODE_DESCRIPTION,
|
|
18
19
|
DELEGATE_CODE_INPUT_SCHEMA,
|
|
@@ -56,62 +57,6 @@ import "../chunk-PY6NMZYX.js";
|
|
|
56
57
|
import "../chunk-SQSCRJ7U.js";
|
|
57
58
|
import "../chunk-DGUM43GV.js";
|
|
58
59
|
|
|
59
|
-
// src/mcp/kb-gate.ts
|
|
60
|
-
var norm = (s) => s.toLowerCase().replace(/\s+/g, " ").trim();
|
|
61
|
-
function valueAppears(value, passageNorm) {
|
|
62
|
-
if (passageNorm.includes(norm(String(value)))) return true;
|
|
63
|
-
if (typeof value !== "number" || !Number.isFinite(value)) return false;
|
|
64
|
-
const forms = [value.toLocaleString("en-US")];
|
|
65
|
-
if (Math.abs(value) >= 1e9) forms.push(`${trimZero(value / 1e9)} billion`);
|
|
66
|
-
if (Math.abs(value) >= 1e6) forms.push(`${trimZero(value / 1e6)} million`);
|
|
67
|
-
return forms.some((f) => passageNorm.includes(norm(f)));
|
|
68
|
-
}
|
|
69
|
-
function trimZero(n) {
|
|
70
|
-
return Number.isInteger(n) ? String(n) : String(Number(n.toFixed(2)));
|
|
71
|
-
}
|
|
72
|
-
function builtinJudges(minPassageChars, selfArtifactKinds) {
|
|
73
|
-
const kinds = selfArtifactKinds.map((k) => k.toLowerCase());
|
|
74
|
-
return [
|
|
75
|
-
{
|
|
76
|
-
name: "passage-non-empty",
|
|
77
|
-
judge: (c) => c.verbatimPassage.trim().length >= minPassageChars ? { accept: true } : { accept: false, reason: `passage shorter than ${minPassageChars} chars` }
|
|
78
|
-
},
|
|
79
|
-
{
|
|
80
|
-
// THE anti-hallucination floor — the passage must literally be in the source.
|
|
81
|
-
name: "passage-present",
|
|
82
|
-
judge: (c) => norm(c.sourceText).includes(norm(c.verbatimPassage)) ? { accept: true } : { accept: false, reason: "verbatim passage not found in source (unbacked fact)" }
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
name: "value-in-passage",
|
|
86
|
-
judge: (c) => c.value === void 0 || valueAppears(c.value, norm(c.verbatimPassage)) ? { accept: true } : { accept: false, reason: `value ${JSON.stringify(c.value)} not present in passage` }
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
name: "no-circular-citation",
|
|
90
|
-
judge: (c) => {
|
|
91
|
-
if (!c.citation || kinds.length === 0) return { accept: true };
|
|
92
|
-
const cite = c.citation.toLowerCase();
|
|
93
|
-
const hit = kinds.find((k) => cite.includes(k));
|
|
94
|
-
return hit ? { accept: false, reason: `circular citation to self-generated artifact "${hit}"` } : { accept: true };
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
];
|
|
98
|
-
}
|
|
99
|
-
function createKbGate(options = {}) {
|
|
100
|
-
const judges = [
|
|
101
|
-
...builtinJudges(options.minPassageChars ?? 12, options.selfArtifactKinds ?? []),
|
|
102
|
-
...options.judges ?? []
|
|
103
|
-
];
|
|
104
|
-
return async (candidate) => {
|
|
105
|
-
for (const j of judges) {
|
|
106
|
-
const verdict = await j.judge(candidate);
|
|
107
|
-
if (!verdict.accept) {
|
|
108
|
-
return { accepted: false, vetoedBy: j.name, reason: verdict.reason };
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
return { accepted: true };
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
|
|
115
60
|
// src/mcp/trace-propagation.ts
|
|
116
61
|
function readTraceContextFromEnv() {
|
|
117
62
|
const traceId = process.env.TRACE_ID || generateTraceId();
|
package/dist/mcp/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/mcp/kb-gate.ts","../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * `createKbGate` — the valid-only knowledge-base growth gate, distilled from\n * physim's KB-research subsystem. A research-in-a-loop delegate (or any KB\n * writer) runs candidate facts through this before persisting, so the KB grows\n * with ONLY grounded facts — hallucinated, unsourced, or laundered claims are\n * vetoed at the gate.\n *\n * Fail-closed by construction: every judge must `accept`; the FIRST veto wins\n * and the fact is rejected. The non-negotiable floor (always on, can't be\n * disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST\n * literally appear in its `sourceText`. That single check kills the dominant\n * failure mode (a confident claim decoupled from any real source).\n *\n * Pure + dependency-free: it operates on fact candidates, not on a store, so it\n * composes with `@tangle-network/agent-knowledge` or any persistence layer\n * without importing it. The remediation policy (correct-on-veto vs\n * escalate-as-unverified) is the caller's — this returns the verdict; it never\n * drops a fact silently.\n */\n\n/** @experimental A fact proposed for the KB, with its grounding. */\nexport interface FactCandidate {\n /** The atomic claim text. */\n claim: string\n /** Optional extracted value (number or string) the claim asserts. */\n value?: string | number\n /** Verbatim span lifted from the source that backs the claim. */\n verbatimPassage: string\n /** The raw source text the passage must be grounded in. */\n sourceText: string\n /** Where the fact claims to come from — checked for circular/self citations. */\n citation?: string\n}\n\n/** @experimental */\nexport interface FactJudgeVerdict {\n accept: boolean\n reason?: string\n}\n\n/** @experimental A pluggable fact validator. Throw is NOT allowed — return a\n * verdict; a thrown judge is a programmer error, not a veto. */\nexport interface FactJudge {\n name: string\n judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>\n}\n\n/** @experimental */\nexport interface KbGateResult {\n accepted: boolean\n /** Name of the judge that vetoed; undefined when accepted. */\n vetoedBy?: string\n reason?: string\n}\n\n/** @experimental */\nexport interface CreateKbGateOptions {\n /** Extra judges appended after the built-in floor (e.g. an LLM judge). */\n judges?: FactJudge[]\n /** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */\n minPassageChars?: number\n /**\n * Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,\n * `'cad_params'`, `'requirements'`). A citation naming one is circular\n * (laundering) — the fact cites a derived artifact, not a real source.\n * Default `[]` (no circular check unless the consumer declares its kinds).\n */\n selfArtifactKinds?: string[]\n}\n\nconst norm = (s: string): string => s.toLowerCase().replace(/\\s+/g, ' ').trim()\n\n/** Does `value` appear in the (normalized) passage — literally, comma-grouped,\n * or in billion/million shorthand (the forms a source actually writes). */\nfunction valueAppears(value: string | number, passageNorm: string): boolean {\n if (passageNorm.includes(norm(String(value)))) return true\n if (typeof value !== 'number' || !Number.isFinite(value)) return false\n const forms = [value.toLocaleString('en-US')]\n if (Math.abs(value) >= 1e9) forms.push(`${trimZero(value / 1e9)} billion`)\n if (Math.abs(value) >= 1e6) forms.push(`${trimZero(value / 1e6)} million`)\n return forms.some((f) => passageNorm.includes(norm(f)))\n}\n\nfunction trimZero(n: number): string {\n return Number.isInteger(n) ? String(n) : String(Number(n.toFixed(2)))\n}\n\n/** The always-on floor judges. Order matters: cheapest / most-fundamental first. */\nfunction builtinJudges(minPassageChars: number, selfArtifactKinds: string[]): FactJudge[] {\n const kinds = selfArtifactKinds.map((k) => k.toLowerCase())\n return [\n {\n name: 'passage-non-empty',\n judge: (c) =>\n c.verbatimPassage.trim().length >= minPassageChars\n ? { accept: true }\n : { accept: false, reason: `passage shorter than ${minPassageChars} chars` },\n },\n {\n // THE anti-hallucination floor — the passage must literally be in the source.\n name: 'passage-present',\n judge: (c) =>\n norm(c.sourceText).includes(norm(c.verbatimPassage))\n ? { accept: true }\n : { accept: false, reason: 'verbatim passage not found in source (unbacked fact)' },\n },\n {\n name: 'value-in-passage',\n judge: (c) =>\n c.value === undefined || valueAppears(c.value, norm(c.verbatimPassage))\n ? { accept: true }\n : { accept: false, reason: `value ${JSON.stringify(c.value)} not present in passage` },\n },\n {\n name: 'no-circular-citation',\n judge: (c) => {\n if (!c.citation || kinds.length === 0) return { accept: true }\n const cite = c.citation.toLowerCase()\n const hit = kinds.find((k) => cite.includes(k))\n return hit\n ? { accept: false, reason: `circular citation to self-generated artifact \"${hit}\"` }\n : { accept: true }\n },\n },\n ]\n}\n\n/**\n * @experimental\n *\n * Build a fail-closed KB gate. The returned function runs the built-in floor\n * (passage-non-empty → passage-present → value-in-passage → no-circular-citation)\n * then any consumer judges, returning on the first veto.\n */\nexport function createKbGate(\n options: CreateKbGateOptions = {},\n): (candidate: FactCandidate) => Promise<KbGateResult> {\n const judges = [\n ...builtinJudges(options.minPassageChars ?? 12, options.selfArtifactKinds ?? []),\n ...(options.judges ?? []),\n ]\n return async (candidate) => {\n for (const j of judges) {\n const verdict = await j.judge(candidate)\n if (!verdict.accept) {\n return { accepted: false, vetoedBy: j.name, reason: verdict.reason }\n }\n }\n return { accepted: true }\n }\n}\n","/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport { buildLoopOtelSpans, createOtelExporter } from '../otel-export'\n\nexport interface TraceContext {\n /** Trace id inherited from the parent process, or a fresh one. */\n traceId: string\n /** Parent span id from the delegation that launched this MCP server. */\n parentSpanId?: string\n}\n\n/**\n * Read trace context from the process environment.\n * Returns a context with inherited ids or a freshly generated root.\n */\nexport function readTraceContextFromEnv(): TraceContext {\n const traceId = process.env.TRACE_ID || generateTraceId()\n const parentSpanId = process.env.PARENT_SPAN_ID || undefined\n return { traceId, parentSpanId }\n}\n\n/**\n * Create a LoopTraceEmitter that:\n * 1. Parents all spans under the inherited PARENT_SPAN_ID.\n * 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.\n *\n * Returns both the emitter and the optional exporter handle for shutdown.\n */\nexport function createPropagatingTraceEmitter(ctx: TraceContext): {\n emitter: LoopTraceEmitter\n exporter: OtelExporter | undefined\n context: TraceContext\n} {\n const exporter = createOtelExporter()\n\n // Buffer events per loop run, then emit the full nested span tree on\n // `loop.ended` so the topology hierarchy (loop → round → branch) reaches the\n // OTLP collector — not a flat list of zero-duration point spans. A run that\n // never reaches `loop.ended` (hard abort) drops its buffer; acceptable for\n // the short-lived MCP subprocess.\n const buffers = new Map<string, LoopTraceEvent[]>()\n\n const emitter: LoopTraceEmitter = {\n emit(event: LoopTraceEvent) {\n if (!exporter) return\n const buf = buffers.get(event.runId)\n if (buf) buf.push(event)\n else buffers.set(event.runId, [event])\n if (event.kind === 'loop.ended') {\n const events = buffers.get(event.runId) ?? [event]\n buffers.delete(event.runId)\n for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {\n exporter.exportSpan(span)\n }\n }\n },\n }\n\n return { emitter, exporter, context: ctx }\n}\n\n/**\n * Build env vars to pass to a child MCP subprocess so it inherits the\n * current trace context.\n */\nexport function traceContextToEnv(ctx: TraceContext): Record<string, string> {\n const env: Record<string, string> = { TRACE_ID: ctx.traceId }\n if (ctx.parentSpanId) env.PARENT_SPAN_ID = ctx.parentSpanId\n return env\n}\n\nfunction generateTraceId(): string {\n const bytes = new Uint8Array(16)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 16; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAwEA,IAAM,OAAO,CAAC,MAAsB,EAAE,YAAY,EAAE,QAAQ,QAAQ,GAAG,EAAE,KAAK;AAI9E,SAAS,aAAa,OAAwB,aAA8B;AAC1E,MAAI,YAAY,SAAS,KAAK,OAAO,KAAK,CAAC,CAAC,EAAG,QAAO;AACtD,MAAI,OAAO,UAAU,YAAY,CAAC,OAAO,SAAS,KAAK,EAAG,QAAO;AACjE,QAAM,QAAQ,CAAC,MAAM,eAAe,OAAO,CAAC;AAC5C,MAAI,KAAK,IAAI,KAAK,KAAK,IAAK,OAAM,KAAK,GAAG,SAAS,QAAQ,GAAG,CAAC,UAAU;AACzE,MAAI,KAAK,IAAI,KAAK,KAAK,IAAK,OAAM,KAAK,GAAG,SAAS,QAAQ,GAAG,CAAC,UAAU;AACzE,SAAO,MAAM,KAAK,CAAC,MAAM,YAAY,SAAS,KAAK,CAAC,CAAC,CAAC;AACxD;AAEA,SAAS,SAAS,GAAmB;AACnC,SAAO,OAAO,UAAU,CAAC,IAAI,OAAO,CAAC,IAAI,OAAO,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC;AACtE;AAGA,SAAS,cAAc,iBAAyB,mBAA0C;AACxF,QAAM,QAAQ,kBAAkB,IAAI,CAAC,MAAM,EAAE,YAAY,CAAC;AAC1D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO,CAAC,MACN,EAAE,gBAAgB,KAAK,EAAE,UAAU,kBAC/B,EAAE,QAAQ,KAAK,IACf,EAAE,QAAQ,OAAO,QAAQ,wBAAwB,eAAe,SAAS;AAAA,IACjF;AAAA,IACA;AAAA;AAAA,MAEE,MAAM;AAAA,MACN,OAAO,CAAC,MACN,KAAK,EAAE,UAAU,EAAE,SAAS,KAAK,EAAE,eAAe,CAAC,IAC/C,EAAE,QAAQ,KAAK,IACf,EAAE,QAAQ,OAAO,QAAQ,uDAAuD;AAAA,IACxF;AAAA,IACA;AAAA,MACE,MAAM;AAAA,MACN,OAAO,CAAC,MACN,EAAE,UAAU,UAAa,aAAa,EAAE,OAAO,KAAK,EAAE,eAAe,CAAC,IAClE,EAAE,QAAQ,KAAK,IACf,EAAE,QAAQ,OAAO,QAAQ,SAAS,KAAK,UAAU,EAAE,KAAK,CAAC,0BAA0B;AAAA,IAC3F;AAAA,IACA;AAAA,MACE,MAAM;AAAA,MACN,OAAO,CAAC,MAAM;AACZ,YAAI,CAAC,EAAE,YAAY,MAAM,WAAW,EAAG,QAAO,EAAE,QAAQ,KAAK;AAC7D,cAAM,OAAO,EAAE,SAAS,YAAY;AACpC,cAAM,MAAM,MAAM,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;AAC9C,eAAO,MACH,EAAE,QAAQ,OAAO,QAAQ,iDAAiD,GAAG,IAAI,IACjF,EAAE,QAAQ,KAAK;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AACF;AASO,SAAS,aACd,UAA+B,CAAC,GACqB;AACrD,QAAM,SAAS;AAAA,IACb,GAAG,cAAc,QAAQ,mBAAmB,IAAI,QAAQ,qBAAqB,CAAC,CAAC;AAAA,IAC/E,GAAI,QAAQ,UAAU,CAAC;AAAA,EACzB;AACA,SAAO,OAAO,cAAc;AAC1B,eAAW,KAAK,QAAQ;AACtB,YAAM,UAAU,MAAM,EAAE,MAAM,SAAS;AACvC,UAAI,CAAC,QAAQ,QAAQ;AACnB,eAAO,EAAE,UAAU,OAAO,UAAU,EAAE,MAAM,QAAQ,QAAQ,OAAO;AAAA,MACrE;AAAA,IACF;AACA,WAAO,EAAE,UAAU,KAAK;AAAA,EAC1B;AACF;;;ACtHO,SAAS,0BAAwC;AACtD,QAAM,UAAU,QAAQ,IAAI,YAAY,gBAAgB;AACxD,QAAM,eAAe,QAAQ,IAAI,kBAAkB;AACnD,SAAO,EAAE,SAAS,aAAa;AACjC;AASO,SAAS,8BAA8B,KAI5C;AACA,QAAM,WAAW,mBAAmB;AAOpC,QAAM,UAAU,oBAAI,IAA8B;AAElD,QAAM,UAA4B;AAAA,IAChC,KAAK,OAAuB;AAC1B,UAAI,CAAC,SAAU;AACf,YAAM,MAAM,QAAQ,IAAI,MAAM,KAAK;AACnC,UAAI,IAAK,KAAI,KAAK,KAAK;AAAA,UAClB,SAAQ,IAAI,MAAM,OAAO,CAAC,KAAK,CAAC;AACrC,UAAI,MAAM,SAAS,cAAc;AAC/B,cAAM,SAAS,QAAQ,IAAI,MAAM,KAAK,KAAK,CAAC,KAAK;AACjD,gBAAQ,OAAO,MAAM,KAAK;AAC1B,mBAAW,QAAQ,mBAAmB,QAAQ,IAAI,SAAS,IAAI,YAAY,GAAG;AAC5E,mBAAS,WAAW,IAAI;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,UAAU,SAAS,IAAI;AAC3C;AAMO,SAAS,kBAAkB,KAA2C;AAC3E,QAAM,MAA8B,EAAE,UAAU,IAAI,QAAQ;AAC5D,MAAI,IAAI,aAAc,KAAI,iBAAiB,IAAI;AAC/C,SAAO;AACT;AAEA,SAAS,kBAA0B;AACjC,QAAM,QAAQ,IAAI,WAAW,EAAE;AAC/B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,IAAI,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACxE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport { buildLoopOtelSpans, createOtelExporter } from '../otel-export'\n\nexport interface TraceContext {\n /** Trace id inherited from the parent process, or a fresh one. */\n traceId: string\n /** Parent span id from the delegation that launched this MCP server. */\n parentSpanId?: string\n}\n\n/**\n * Read trace context from the process environment.\n * Returns a context with inherited ids or a freshly generated root.\n */\nexport function readTraceContextFromEnv(): TraceContext {\n const traceId = process.env.TRACE_ID || generateTraceId()\n const parentSpanId = process.env.PARENT_SPAN_ID || undefined\n return { traceId, parentSpanId }\n}\n\n/**\n * Create a LoopTraceEmitter that:\n * 1. Parents all spans under the inherited PARENT_SPAN_ID.\n * 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.\n *\n * Returns both the emitter and the optional exporter handle for shutdown.\n */\nexport function createPropagatingTraceEmitter(ctx: TraceContext): {\n emitter: LoopTraceEmitter\n exporter: OtelExporter | undefined\n context: TraceContext\n} {\n const exporter = createOtelExporter()\n\n // Buffer events per loop run, then emit the full nested span tree on\n // `loop.ended` so the topology hierarchy (loop → round → branch) reaches the\n // OTLP collector — not a flat list of zero-duration point spans. A run that\n // never reaches `loop.ended` (hard abort) drops its buffer; acceptable for\n // the short-lived MCP subprocess.\n const buffers = new Map<string, LoopTraceEvent[]>()\n\n const emitter: LoopTraceEmitter = {\n emit(event: LoopTraceEvent) {\n if (!exporter) return\n const buf = buffers.get(event.runId)\n if (buf) buf.push(event)\n else buffers.set(event.runId, [event])\n if (event.kind === 'loop.ended') {\n const events = buffers.get(event.runId) ?? [event]\n buffers.delete(event.runId)\n for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {\n exporter.exportSpan(span)\n }\n }\n },\n }\n\n return { emitter, exporter, context: ctx }\n}\n\n/**\n * Build env vars to pass to a child MCP subprocess so it inherits the\n * current trace context.\n */\nexport function traceContextToEnv(ctx: TraceContext): Record<string, string> {\n const env: Record<string, string> = { TRACE_ID: ctx.traceId }\n if (ctx.parentSpanId) env.PARENT_SPAN_ID = ctx.parentSpanId\n return env\n}\n\nfunction generateTraceId(): string {\n const bytes = new Uint8Array(16)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 16; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkCO,SAAS,0BAAwC;AACtD,QAAM,UAAU,QAAQ,IAAI,YAAY,gBAAgB;AACxD,QAAM,eAAe,QAAQ,IAAI,kBAAkB;AACnD,SAAO,EAAE,SAAS,aAAa;AACjC;AASO,SAAS,8BAA8B,KAI5C;AACA,QAAM,WAAW,mBAAmB;AAOpC,QAAM,UAAU,oBAAI,IAA8B;AAElD,QAAM,UAA4B;AAAA,IAChC,KAAK,OAAuB;AAC1B,UAAI,CAAC,SAAU;AACf,YAAM,MAAM,QAAQ,IAAI,MAAM,KAAK;AACnC,UAAI,IAAK,KAAI,KAAK,KAAK;AAAA,UAClB,SAAQ,IAAI,MAAM,OAAO,CAAC,KAAK,CAAC;AACrC,UAAI,MAAM,SAAS,cAAc;AAC/B,cAAM,SAAS,QAAQ,IAAI,MAAM,KAAK,KAAK,CAAC,KAAK;AACjD,gBAAQ,OAAO,MAAM,KAAK;AAC1B,mBAAW,QAAQ,mBAAmB,QAAQ,IAAI,SAAS,IAAI,YAAY,GAAG;AAC5E,mBAAS,WAAW,IAAI;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,UAAU,SAAS,IAAI;AAC3C;AAMO,SAAS,kBAAkB,KAA2C;AAC3E,QAAM,MAA8B,EAAE,UAAU,IAAI,QAAQ;AAC5D,MAAI,IAAI,aAAc,KAAI,iBAAiB,IAAI;AAC/C,SAAO;AACT;AAEA,SAAS,kBAA0B;AACjC,QAAM,QAAQ,IAAI,WAAW,EAAE;AAC/B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,IAAI,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACxE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;","names":[]}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { LlmClientOptions } from '@tangle-network/agent-eval';
|
|
2
|
+
import { Scenario, DispatchContext, JudgeConfig, ImprovementDriver, Gate, CampaignStorage, GateResult, RunImprovementLoopResult } from '@tangle-network/agent-eval/campaign';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @experimental
|
|
6
|
+
*
|
|
7
|
+
* `optimizePrompt` — identity-gated optimization for any TEXT prompt surface
|
|
8
|
+
* (system prompt, planner prompt, judge rubric, skill doc).
|
|
9
|
+
*
|
|
10
|
+
* The text-surface sibling to this module's `improvementDriver` (the
|
|
11
|
+
* CODE-surface / worktree path). Both feed agent-eval's `runImprovementLoop`;
|
|
12
|
+
* this one defaults the driver to agent-eval's `gepaDriver` (reflective text
|
|
13
|
+
* mutator) and the gate to `heldOutGate`.
|
|
14
|
+
*
|
|
15
|
+
* IDENTITY-GATED BY CONSTRUCTION — the whole point. The loop runs evals,
|
|
16
|
+
* collects per-scenario signal, proposes candidates, and the gate compares
|
|
17
|
+
* candidate-vs-baseline ON THE HELDOUT. `result.prompt` is the baseline
|
|
18
|
+
* (identity) UNLESS the gate decided `'ship'`. So wiring a surface up is safe:
|
|
19
|
+
* a surface with no beneficial mutation simply keeps its baseline. You never
|
|
20
|
+
* regress by registering a prompt — you only ever improve when the held-out
|
|
21
|
+
* data earns it.
|
|
22
|
+
*
|
|
23
|
+
* Generic over the runtime: `runWithPrompt` is the only domain seam — given a
|
|
24
|
+
* candidate prompt + scenario, run it however the surface runs (sandbox
|
|
25
|
+
* `streamPrompt`, a `runLoop`, a direct model call) and return the artifact the
|
|
26
|
+
* judges score. The optimizer never assumes how a prompt is executed.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
/** Reflection config for the default `gepaDriver`. Omit when passing a custom
|
|
30
|
+
* `driver`. */
|
|
31
|
+
interface OptimizePromptReflection {
|
|
32
|
+
/** Router transport for the reflection model. */
|
|
33
|
+
llm: LlmClientOptions;
|
|
34
|
+
/** Model that performs the reflective rewrite. */
|
|
35
|
+
model: string;
|
|
36
|
+
/** What is being optimized — orients the reflection prompt. Default
|
|
37
|
+
* `'system prompt'`. */
|
|
38
|
+
target?: string;
|
|
39
|
+
/** Surface-specific mutation levers offered to the reflector. */
|
|
40
|
+
mutationPrimitives?: string[];
|
|
41
|
+
/** H2 (`## Foo`) headings that MUST survive every candidate. gepaDriver's
|
|
42
|
+
* only structural guard — load-bearing sections of the prompt should be
|
|
43
|
+
* `##` headings so a rewrite cannot drop them. */
|
|
44
|
+
preserveSections?: string[];
|
|
45
|
+
/** Max sentence-level edits per candidate vs the parent (a textual learning
|
|
46
|
+
* rate). Caps a rewrite from wiping prior rules in one generation. */
|
|
47
|
+
maxSentenceEdits?: number;
|
|
48
|
+
}
|
|
49
|
+
/** @experimental */
|
|
50
|
+
interface OptimizePromptOptions<TScenario extends Scenario, TArtifact> {
|
|
51
|
+
/** The prompt being optimized — the identity baseline the gate protects. */
|
|
52
|
+
baselinePrompt: string;
|
|
53
|
+
/** Domain seam: run a candidate prompt against a scenario → artifact the
|
|
54
|
+
* judges score. The optimizer is agnostic to HOW the prompt runs. */
|
|
55
|
+
runWithPrompt: (prompt: string, scenario: TScenario, ctx: DispatchContext) => Promise<TArtifact>;
|
|
56
|
+
/** Training pool — scored each generation to rank candidates. */
|
|
57
|
+
scenarios: TScenario[];
|
|
58
|
+
/** Held out of training — scored ONLY for the gate's baseline-vs-winner
|
|
59
|
+
* delta. Disjoint from `scenarios`; this is what makes promotion measure
|
|
60
|
+
* generalization, not memorization. */
|
|
61
|
+
holdoutScenarios: TScenario[];
|
|
62
|
+
/** Scorers — deterministic checks or LLM judges. */
|
|
63
|
+
judges: JudgeConfig<TArtifact, TScenario>[];
|
|
64
|
+
/** Where artifacts + traces land (opaque key under in-memory storage). */
|
|
65
|
+
runDir: string;
|
|
66
|
+
/** Default driver = `gepaDriver` built from this. Required UNLESS `driver`
|
|
67
|
+
* is supplied. */
|
|
68
|
+
reflection?: OptimizePromptReflection;
|
|
69
|
+
/** Override the improvement strategy (custom driver / deterministic tests). */
|
|
70
|
+
driver?: ImprovementDriver;
|
|
71
|
+
/** Override the promotion gate. Default `heldOutGate` over `holdoutScenarios`
|
|
72
|
+
* — zero extra LLM. Wrap `defaultProductionGate` for red-team/reward-hacking
|
|
73
|
+
* hardening on production wiring. */
|
|
74
|
+
gate?: Gate<TArtifact, TScenario>;
|
|
75
|
+
/** Minimum held-out composite lift to ship, forwarded to the default
|
|
76
|
+
* `heldOutGate`. When omitted the gate uses its own default. */
|
|
77
|
+
deltaThreshold?: number;
|
|
78
|
+
/** Candidates proposed per generation. Default 4. */
|
|
79
|
+
populationSize?: number;
|
|
80
|
+
/** Generations to run. Default 3. */
|
|
81
|
+
maxGenerations?: number;
|
|
82
|
+
/** Candidates carried to the next generation. Default 2. */
|
|
83
|
+
promoteTopK?: number;
|
|
84
|
+
/** Storage backend. Pass `inMemoryCampaignStorage()` for filesystem-less /
|
|
85
|
+
* test runs. Default: Node filesystem. */
|
|
86
|
+
storage?: CampaignStorage;
|
|
87
|
+
/** Reproducibility seed. Default 42. */
|
|
88
|
+
seed?: number;
|
|
89
|
+
/** Per-scenario replicates for CI bands. Default 1. */
|
|
90
|
+
reps?: number;
|
|
91
|
+
/** Max concurrent cells. Default 2. */
|
|
92
|
+
maxConcurrency?: number;
|
|
93
|
+
/** Test seam — override the wall clock. */
|
|
94
|
+
now?: () => Date;
|
|
95
|
+
/** On a shipped gate: `'pr'` opens a PR, `'none'` just reports. Default
|
|
96
|
+
* `'none'`. */
|
|
97
|
+
autoOnPromote?: 'pr' | 'none';
|
|
98
|
+
ghOwner?: string;
|
|
99
|
+
ghRepo?: string;
|
|
100
|
+
}
|
|
101
|
+
/** @experimental */
|
|
102
|
+
interface OptimizePromptResult<TArtifact, TScenario extends Scenario> {
|
|
103
|
+
/** The prompt to USE. Identity (the baseline) unless the gate shipped a
|
|
104
|
+
* winner — so a caller can always assign `result.prompt` unconditionally. */
|
|
105
|
+
prompt: string;
|
|
106
|
+
/** True only when the gate promoted a candidate over baseline on holdout. */
|
|
107
|
+
improved: boolean;
|
|
108
|
+
/** The gate's verdict (`'ship' | 'hold' | 'need_more_work' | ...`). */
|
|
109
|
+
decision: GateResult['decision'];
|
|
110
|
+
/** Human-readable reasons the gate gave. */
|
|
111
|
+
reasons: string[];
|
|
112
|
+
/** Mean held-out composite of the baseline. */
|
|
113
|
+
baselineComposite: number;
|
|
114
|
+
/** Mean held-out composite of the winner candidate. */
|
|
115
|
+
winnerComposite: number;
|
|
116
|
+
/** Held-out lift (winner − baseline); the gate's `delta` when it reported one. */
|
|
117
|
+
delta: number;
|
|
118
|
+
/** Why the winner was proposed — present when a shipped winner carried a
|
|
119
|
+
* driver rationale. */
|
|
120
|
+
rationale?: string;
|
|
121
|
+
/** Unified baseline→winner diff (empty when the winner is the baseline). */
|
|
122
|
+
diff: string;
|
|
123
|
+
/** The full loop result for callers that need generations / campaigns. */
|
|
124
|
+
raw: RunImprovementLoopResult<TArtifact, TScenario>;
|
|
125
|
+
}
|
|
126
|
+
/** @experimental */
|
|
127
|
+
declare function optimizePrompt<TScenario extends Scenario, TArtifact>(opts: OptimizePromptOptions<TScenario, TArtifact>): Promise<OptimizePromptResult<TArtifact, TScenario>>;
|
|
128
|
+
|
|
129
|
+
export { type OptimizePromptOptions as O, type OptimizePromptReflection as a, type OptimizePromptResult as b, optimizePrompt as o };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { CoderOutput, CoderTask } from './profiles.js';
|
|
2
|
-
import { L as LoopSandboxClient } from './types-
|
|
2
|
+
import { L as LoopSandboxClient } from './types-CmkQl8qE.js';
|
|
3
3
|
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
4
4
|
import { O as OpenAIChatTool } from './types-CsCCryln.js';
|
|
5
5
|
|
|
@@ -361,6 +361,81 @@ interface CreateDefaultCoderDelegateOptions {
|
|
|
361
361
|
*/
|
|
362
362
|
declare function createDefaultCoderDelegate(options: CreateDefaultCoderDelegateOptions): CoderDelegate;
|
|
363
363
|
|
|
364
|
+
/**
|
|
365
|
+
* @experimental
|
|
366
|
+
*
|
|
367
|
+
* `createKbGate` — the valid-only knowledge-base growth gate, distilled from
|
|
368
|
+
* physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
|
|
369
|
+
* writer) runs candidate facts through this before persisting, so the KB grows
|
|
370
|
+
* with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
|
|
371
|
+
* vetoed at the gate.
|
|
372
|
+
*
|
|
373
|
+
* Fail-closed by construction: every judge must `accept`; the FIRST veto wins
|
|
374
|
+
* and the fact is rejected. The non-negotiable floor (always on, can't be
|
|
375
|
+
* disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
|
|
376
|
+
* literally appear in its `sourceText`. That single check kills the dominant
|
|
377
|
+
* failure mode (a confident claim decoupled from any real source).
|
|
378
|
+
*
|
|
379
|
+
* Pure + dependency-free: it operates on fact candidates, not on a store, so it
|
|
380
|
+
* composes with `@tangle-network/agent-knowledge` or any persistence layer
|
|
381
|
+
* without importing it. The remediation policy (correct-on-veto vs
|
|
382
|
+
* escalate-as-unverified) is the caller's — this returns the verdict; it never
|
|
383
|
+
* drops a fact silently.
|
|
384
|
+
*/
|
|
385
|
+
/** @experimental A fact proposed for the KB, with its grounding. */
|
|
386
|
+
interface FactCandidate {
|
|
387
|
+
/** The atomic claim text. */
|
|
388
|
+
claim: string;
|
|
389
|
+
/** Optional extracted value (number or string) the claim asserts. */
|
|
390
|
+
value?: string | number;
|
|
391
|
+
/** Verbatim span lifted from the source that backs the claim. */
|
|
392
|
+
verbatimPassage: string;
|
|
393
|
+
/** The raw source text the passage must be grounded in. */
|
|
394
|
+
sourceText: string;
|
|
395
|
+
/** Where the fact claims to come from — checked for circular/self citations. */
|
|
396
|
+
citation?: string;
|
|
397
|
+
}
|
|
398
|
+
/** @experimental */
|
|
399
|
+
interface FactJudgeVerdict {
|
|
400
|
+
accept: boolean;
|
|
401
|
+
reason?: string;
|
|
402
|
+
}
|
|
403
|
+
/** @experimental A pluggable fact validator. Throw is NOT allowed — return a
|
|
404
|
+
* verdict; a thrown judge is a programmer error, not a veto. */
|
|
405
|
+
interface FactJudge {
|
|
406
|
+
name: string;
|
|
407
|
+
judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
|
|
408
|
+
}
|
|
409
|
+
/** @experimental */
|
|
410
|
+
interface KbGateResult {
|
|
411
|
+
accepted: boolean;
|
|
412
|
+
/** Name of the judge that vetoed; undefined when accepted. */
|
|
413
|
+
vetoedBy?: string;
|
|
414
|
+
reason?: string;
|
|
415
|
+
}
|
|
416
|
+
/** @experimental */
|
|
417
|
+
interface CreateKbGateOptions {
|
|
418
|
+
/** Extra judges appended after the built-in floor (e.g. an LLM judge). */
|
|
419
|
+
judges?: FactJudge[];
|
|
420
|
+
/** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
|
|
421
|
+
minPassageChars?: number;
|
|
422
|
+
/**
|
|
423
|
+
* Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
|
|
424
|
+
* `'cad_params'`, `'requirements'`). A citation naming one is circular
|
|
425
|
+
* (laundering) — the fact cites a derived artifact, not a real source.
|
|
426
|
+
* Default `[]` (no circular check unless the consumer declares its kinds).
|
|
427
|
+
*/
|
|
428
|
+
selfArtifactKinds?: string[];
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* @experimental
|
|
432
|
+
*
|
|
433
|
+
* Build a fail-closed KB gate. The returned function runs the built-in floor
|
|
434
|
+
* (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
|
|
435
|
+
* then any consumer judges, returning on the first veto.
|
|
436
|
+
*/
|
|
437
|
+
declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
|
|
438
|
+
|
|
364
439
|
/**
|
|
365
440
|
* @experimental
|
|
366
441
|
*
|
|
@@ -549,4 +624,4 @@ interface EvalRunsExportResult {
|
|
|
549
624
|
*/
|
|
550
625
|
declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
|
|
551
626
|
|
|
552
|
-
export { type
|
|
627
|
+
export { type OtelSpan as $, type FactCandidate as A, type FactJudge as B, type CoderDelegate as C, type DelegationExecutor as D, type FactJudgeVerdict as E, type FleetHandle as F, type FeedbackRating as G, type FeedbackRefersTo as H, type FleetWorkspaceExecutorOptions as I, type ResearchOutputShape as J, type KbGateResult as K, createDefaultCoderDelegate as L, createFleetWorkspaceExecutor as M, createKbGate as N, type OtelExporter as O, createSiblingSandboxExecutor as P, mcpToolsForRuntimeMcp as Q, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, mcpToolsForRuntimeMcpSubset as T, type EvalRunEvent as U, type EvalRunGeneration as V, type EvalRunsExportConfig as W, type EvalRunsExportResult as X, INTELLIGENCE_WIRE_VERSION as Y, type OtelAttribute as Z, type OtelExportConfig as _, type DelegateFeedbackArgs as a, buildLoopOtelSpans as a0, createOtelExporter as a1, exportEvalRuns as a2, loopEventToOtelSpan as a3, type DelegationFeedbackSnapshot as b, type DelegationProfile as c, type DelegateCodeArgs as d, type DelegateResearchArgs as e, type DelegationStatus as f, type DelegationProgress as g, type DelegationResultPayload as h, type DelegationError as i, type DelegationStatusResult as j, type DelegationHistoryArgs as k, type DelegationHistoryEntry as l, type DelegateCodeResult as m, type DelegateFeedbackResult as n, type ResearchSource as o, type DelegateResearchResult as p, type DelegationHistoryResult as q, type DelegationStatusArgs as r, type CoderReview as s, type CoderReviewer as t, type CoderWinnerSelection as u, type CreateDefaultCoderDelegateOptions as v, type CreateKbGateOptions as w, type DelegateCodeConfig as x, type DelegateResearchConfig as y, type DelegateRunCtx as z };
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { AgentProfile } from '@tangle-network/sandbox';
|
|
2
|
-
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-
|
|
2
|
+
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-CmkQl8qE.js';
|
|
3
3
|
import '@tangle-network/agent-eval';
|
|
4
4
|
import './types-CsCCryln.js';
|
|
5
5
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
2
|
-
import { CreateSandboxOptions, SandboxInstance,
|
|
2
|
+
import { CreateSandboxOptions, SandboxInstance, SandboxEvent, AgentProfile } from '@tangle-network/sandbox';
|
|
3
3
|
import { A as AgentTaskSpec, R as RuntimeStreamEvent } from './types-CsCCryln.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AnalystRunInputs, AnalystFinding, AnalystRunResult, AnalystRunEvent, FindingsDiff } from '@tangle-network/agent-eval';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Public types for the closed-loop analyst orchestrator.
|
package/package.json
CHANGED