@vextlabs/theron-agent-sdk 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -0
- package/LICENSE +21 -0
- package/README.md +270 -0
- package/dist/adapters/theron.cjs +92 -0
- package/dist/adapters/theron.d.cts +42 -0
- package/dist/adapters/theron.d.ts +42 -0
- package/dist/adapters/theron.js +89 -0
- package/dist/agent/index.cjs +33 -0
- package/dist/agent/index.d.cts +84 -0
- package/dist/agent/index.d.ts +84 -0
- package/dist/agent/index.js +31 -0
- package/dist/council/index.cjs +68 -0
- package/dist/council/index.d.cts +96 -0
- package/dist/council/index.d.ts +96 -0
- package/dist/council/index.js +66 -0
- package/dist/index.cjs +1288 -0
- package/dist/index.d.cts +60 -0
- package/dist/index.d.ts +60 -0
- package/dist/index.js +1244 -0
- package/dist/loop/index.cjs +106 -0
- package/dist/loop/index.d.cts +285 -0
- package/dist/loop/index.d.ts +285 -0
- package/dist/loop/index.js +95 -0
- package/dist/mcp/index.cjs +153 -0
- package/dist/mcp/index.d.cts +69 -0
- package/dist/mcp/index.d.ts +69 -0
- package/dist/mcp/index.js +150 -0
- package/dist/memory/index.cjs +53 -0
- package/dist/memory/index.d.cts +73 -0
- package/dist/memory/index.d.ts +73 -0
- package/dist/memory/index.js +50 -0
- package/dist/patterns/index.cjs +159 -0
- package/dist/patterns/index.d.cts +200 -0
- package/dist/patterns/index.d.ts +200 -0
- package/dist/patterns/index.js +150 -0
- package/dist/receipts/index.cjs +151 -0
- package/dist/receipts/index.d.cts +132 -0
- package/dist/receipts/index.d.ts +132 -0
- package/dist/receipts/index.js +146 -0
- package/dist/runtime/index.cjs +205 -0
- package/dist/runtime/index.d.cts +148 -0
- package/dist/runtime/index.d.ts +148 -0
- package/dist/runtime/index.js +203 -0
- package/dist/session/index.cjs +49 -0
- package/dist/session/index.d.cts +79 -0
- package/dist/session/index.d.ts +79 -0
- package/dist/session/index.js +47 -0
- package/dist/tools/index.cjs +51 -0
- package/dist/tools/index.d.cts +52 -0
- package/dist/tools/index.d.ts +52 -0
- package/dist/tools/index.js +46 -0
- package/dist/verifiers/index.cjs +96 -0
- package/dist/verifiers/index.d.cts +63 -0
- package/dist/verifiers/index.d.ts +63 -0
- package/dist/verifiers/index.js +93 -0
- package/examples/01_code_reviewer.ts +90 -0
- package/examples/02_research_assistant.ts +85 -0
- package/examples/03_council_of_three.ts +91 -0
- package/examples/_adapters/openrouter.ts +90 -0
- package/examples/adapters/openrouter.ts +144 -0
- package/examples/adapters/theron.ts +105 -0
- package/examples/basic-agent.ts +56 -0
- package/examples/council-deliberation.ts +90 -0
- package/examples/cyber-recon-bot.ts +163 -0
- package/examples/loop-primitives.ts +50 -0
- package/examples/meeting-prep-bot.ts +172 -0
- package/examples/reasoning-patterns.ts +125 -0
- package/examples/support-triage-bot.ts +181 -0
- package/examples/verifier-kernel.ts +108 -0
- package/package.json +154 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sample agent: meeting-prep-bot
|
|
3
|
+
*
|
|
4
|
+
* Generates a one-page prep brief for an upcoming meeting from a calendar
|
|
5
|
+
* event + linked docs. Demonstrates how to wire Memory (prior notes about
|
|
6
|
+
* the attendees) into an agent loop.
|
|
7
|
+
*
|
|
8
|
+
* Ships with mock calendar + docs tools so it runs offline. Swap in a real
|
|
9
|
+
* Google Calendar / Outlook adapter and a real docs source for production.
|
|
10
|
+
*
|
|
11
|
+
* Run:
|
|
12
|
+
* OPENROUTER_API_KEY=sk-or-... npx tsx examples/meeting-prep-bot.ts
|
|
13
|
+
*
|
|
14
|
+
* What this demonstrates:
|
|
15
|
+
* - Tool composition (calendar.list_events + docs.fetch + memory.lookup)
|
|
16
|
+
* - Memory primitive: prior knowledge of attendees informs the brief
|
|
17
|
+
* - Receipts: every external read carries a cap that names the surface
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import {
|
|
21
|
+
Agent,
|
|
22
|
+
Runner,
|
|
23
|
+
defineTool,
|
|
24
|
+
zod as z,
|
|
25
|
+
VerifierKernels,
|
|
26
|
+
InMemoryStore,
|
|
27
|
+
ReceiptEmitter,
|
|
28
|
+
InMemoryReceiptSink,
|
|
29
|
+
} from "../src/index.js";
|
|
30
|
+
import { openrouterAdapter } from "./adapters/openrouter.js";
|
|
31
|
+
|
|
32
|
+
// --- Memory (seeded with prior context about an attendee) ------------------
|
|
33
|
+
|
|
34
|
+
const memory = new InMemoryStore();
|
|
35
|
+
await memory.set({
|
|
36
|
+
key: "person:dana",
|
|
37
|
+
value:
|
|
38
|
+
"Dana runs platform infra. Cares about p99 latency, hates flaky tests, " +
|
|
39
|
+
"previously asked for a SLO dashboard in our last 1:1.",
|
|
40
|
+
tags: ["person", "platform"],
|
|
41
|
+
created_at: Date.now(),
|
|
42
|
+
last_accessed_at: Date.now(),
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// --- Tools ------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
const listEvents = defineTool({
|
|
48
|
+
name: "calendar_list_events",
|
|
49
|
+
description: "List upcoming calendar events for the next N hours.",
|
|
50
|
+
input: z.object({ hours_ahead: z.number() }),
|
|
51
|
+
async execute({ hours_ahead }) {
|
|
52
|
+
return {
|
|
53
|
+
events: [
|
|
54
|
+
{
|
|
55
|
+
id: "evt_001",
|
|
56
|
+
title: "Q3 planning sync",
|
|
57
|
+
start: new Date(Date.now() + 3_600_000).toISOString(),
|
|
58
|
+
end: new Date(Date.now() + 5_400_000).toISOString(),
|
|
59
|
+
attendees: ["dana@example.com", "anna@example.com"],
|
|
60
|
+
linked_docs: ["doc_42"],
|
|
61
|
+
horizon_hours: hours_ahead,
|
|
62
|
+
},
|
|
63
|
+
],
|
|
64
|
+
};
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const fetchDoc = defineTool({
|
|
69
|
+
name: "docs_fetch",
|
|
70
|
+
description: "Fetch the body text of a linked doc.",
|
|
71
|
+
input: z.object({ doc_id: z.string() }),
|
|
72
|
+
async execute({ doc_id }) {
|
|
73
|
+
return {
|
|
74
|
+
doc_id,
|
|
75
|
+
title: "Q3 platform roadmap (draft)",
|
|
76
|
+
body:
|
|
77
|
+
"Three bets: (a) SLO dashboards, (b) flake-detection in CI, " +
|
|
78
|
+
"(c) cost-per-tenant reporting. Open question: do we own (c) or " +
|
|
79
|
+
"punt to finance.",
|
|
80
|
+
};
|
|
81
|
+
},
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
const recallPerson = defineTool({
|
|
85
|
+
name: "memory_recall_person",
|
|
86
|
+
description: "Look up prior notes about a person by email or handle.",
|
|
87
|
+
input: z.object({ handle: z.string() }),
|
|
88
|
+
async execute({ handle }) {
|
|
89
|
+
const key = `person:${handle.split("@")[0]}`;
|
|
90
|
+
const hits = await memory.query({ key });
|
|
91
|
+
return { handle, notes: hits.map((h) => h.value) };
|
|
92
|
+
},
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
// --- Agent ------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
const prepBot = new Agent({
|
|
98
|
+
name: "meeting-prep-bot",
|
|
99
|
+
instruction: `You write a one-page prep brief for an upcoming meeting.
|
|
100
|
+
|
|
101
|
+
Workflow:
|
|
102
|
+
1. calendar_list_events for the next 4 hours
|
|
103
|
+
2. For each event, docs_fetch every linked doc
|
|
104
|
+
3. For each attendee, memory_recall_person
|
|
105
|
+
4. Produce a brief:
|
|
106
|
+
## Meeting
|
|
107
|
+
## Attendees (one line each: role + recent context from memory)
|
|
108
|
+
## Docs (3-bullet TLDR per doc)
|
|
109
|
+
## Questions to raise (3 max, prioritized)
|
|
110
|
+
## Decisions to push for (1-3)
|
|
111
|
+
|
|
112
|
+
Rules:
|
|
113
|
+
- One page max.
|
|
114
|
+
- No filler. No em-dashes or AI-isms.`,
|
|
115
|
+
tools: [listEvents, fetchDoc, recallPerson],
|
|
116
|
+
verifiers: [VerifierKernels.emDash, VerifierKernels.aiIsm],
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// --- Main -------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
async function main() {
|
|
122
|
+
const apiKey = process.env.OPENROUTER_API_KEY;
|
|
123
|
+
if (!apiKey) {
|
|
124
|
+
console.error("Set OPENROUTER_API_KEY (https://openrouter.ai/keys) and rerun.");
|
|
125
|
+
process.exit(1);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const runner = new Runner({
|
|
129
|
+
model: openrouterAdapter({ apiKey }),
|
|
130
|
+
default_model: "openai/gpt-4o-mini",
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const sink = new InMemoryReceiptSink();
|
|
134
|
+
const receipts = new ReceiptEmitter({
|
|
135
|
+
sinks: [sink],
|
|
136
|
+
issuer: "did:web:local",
|
|
137
|
+
actor: prepBot.name,
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
runner.on(async (event) => {
|
|
141
|
+
if (event.type === "tool_call_done") {
|
|
142
|
+
// Cap names are reverse-DNS-ish so receipts cluster cleanly per surface.
|
|
143
|
+
const cap =
|
|
144
|
+
event.tool === "calendar_list_events"
|
|
145
|
+
? "vext.calendar.list_events"
|
|
146
|
+
: event.tool === "docs_fetch"
|
|
147
|
+
? "vext.docs.fetch"
|
|
148
|
+
: "vext.memory.recall";
|
|
149
|
+
await receipts.emit({
|
|
150
|
+
cap,
|
|
151
|
+
input: { tool: event.tool },
|
|
152
|
+
output: event.output,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
if (event.type === "agent_output") {
|
|
156
|
+
await receipts.emit({
|
|
157
|
+
cap: "agent.run",
|
|
158
|
+
input: { agent: event.agent },
|
|
159
|
+
output: event.output,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
const result = await runner.run(prepBot, "Prep me for everything in the next 4 hours.");
|
|
165
|
+
console.log("\n=== Brief ===\n" + result.output);
|
|
166
|
+
console.log(`\n=== Receipts: ${sink.list().length} ===`);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
main().catch((err) => {
|
|
170
|
+
console.error(err);
|
|
171
|
+
process.exit(1);
|
|
172
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sample: Reasoning patterns
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates the framework-agnostic reasoning-pattern primitives — the
|
|
5
|
+
* SDK-side counterparts of Theron's server Hive loops. No API key needed: this
|
|
6
|
+
* example uses a deterministic mock "model" so it runs offline and its output is
|
|
7
|
+
* stable. Swap the mock for any provider call (OpenRouter, Anthropic, OpenAI,
|
|
8
|
+
* a local model) and the patterns are unchanged.
|
|
9
|
+
*
|
|
10
|
+
* Run:
|
|
11
|
+
* npx tsx examples/reasoning-patterns.ts
|
|
12
|
+
*
|
|
13
|
+
* What this demonstrates:
|
|
14
|
+
* - selfConsistency — sample N paths, take the majority answer
|
|
15
|
+
* - bestOfN — verifier-guided best-of-N
|
|
16
|
+
* - selfRefine — draft → critique → revise (early-exit when clean)
|
|
17
|
+
* - treeOfThoughts — best-first branch/score/expand search
|
|
18
|
+
* - chainOfVerification — draft → verify claims independently → revise
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import {
|
|
22
|
+
selfConsistency,
|
|
23
|
+
bestOfN,
|
|
24
|
+
selfRefine,
|
|
25
|
+
treeOfThoughts,
|
|
26
|
+
chainOfVerification,
|
|
27
|
+
mixtureOfAgents,
|
|
28
|
+
reflexion,
|
|
29
|
+
measureLift,
|
|
30
|
+
} from "../src/index.js";
|
|
31
|
+
|
|
32
|
+
// A stand-in "model". Replace with a real provider call in production.
|
|
33
|
+
async function mockModel(prompt: string): Promise<string> {
|
|
34
|
+
if (prompt.includes("critique")) return prompt.includes("v2") ? "no issues" : "flaw: be more specific";
|
|
35
|
+
if (prompt.includes("verify:")) return "supported";
|
|
36
|
+
// A slightly noisy answerer so self-consistency has a majority to find.
|
|
37
|
+
const answers = ["42", "42", "41", "42"];
|
|
38
|
+
return answers[prompt.length % answers.length];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function main() {
|
|
42
|
+
// 1) Self-consistency — majority vote across samples (3 of 4 agree on "42").
|
|
43
|
+
const sampled = ["42", "42", "41", "42"];
|
|
44
|
+
const sc = await selfConsistency({
|
|
45
|
+
samples: 4,
|
|
46
|
+
generate: (i) => sampled[i],
|
|
47
|
+
});
|
|
48
|
+
console.log(`selfConsistency → "${sc.answer}" (agreement ${sc.consistency})`);
|
|
49
|
+
|
|
50
|
+
// 2) Best-of-N — pick the highest-scored candidate (score = your verifier).
|
|
51
|
+
const bo = await bestOfN({
|
|
52
|
+
n: 3,
|
|
53
|
+
generate: (i) => `candidate ${i}`,
|
|
54
|
+
score: (_v, i) => i / 10, // stand-in verifier confidence
|
|
55
|
+
});
|
|
56
|
+
console.log(`bestOfN → "${bo.best}" (score ${bo.score})`);
|
|
57
|
+
|
|
58
|
+
// 3) Self-refine — iterate until the critique is clean.
|
|
59
|
+
const sr = await selfRefine<string>({
|
|
60
|
+
draft: () => "v1 draft",
|
|
61
|
+
critique: (v) => mockModel(`critique ${v}`),
|
|
62
|
+
revise: () => "v2 draft",
|
|
63
|
+
maxIters: 3,
|
|
64
|
+
});
|
|
65
|
+
console.log(`selfRefine → "${sr.answer}" (${sr.revised} revision(s))`);
|
|
66
|
+
|
|
67
|
+
// 4) Tree-of-thoughts — best-first search over reasoning branches.
|
|
68
|
+
const tot = await treeOfThoughts<number>({
|
|
69
|
+
breadth: 3,
|
|
70
|
+
depth: 2,
|
|
71
|
+
expand: (_path, b) => b,
|
|
72
|
+
score: (cand) => cand,
|
|
73
|
+
synthesize: (path) => path.reduce((a, b) => a + b, 0),
|
|
74
|
+
});
|
|
75
|
+
console.log(`treeOfThoughts → ${tot.answer} (path ${tot.path.map((p) => p.thought).join("→")})`);
|
|
76
|
+
|
|
77
|
+
// 5) Chain-of-verification — verify the draft's claims, then revise.
|
|
78
|
+
const cov = await chainOfVerification<string>({
|
|
79
|
+
draft: () => "the draft answer",
|
|
80
|
+
planChecks: () => ["verify: claim A", "verify: claim B"],
|
|
81
|
+
answerCheck: (q) => mockModel(q),
|
|
82
|
+
revise: (_d, checks) => `revised after ${checks.length} checks`,
|
|
83
|
+
});
|
|
84
|
+
console.log(`chainOfVerification → "${cov.answer}"`);
|
|
85
|
+
|
|
86
|
+
// 5b) Mixture-of-agents — layered multi-agent propose → refine → aggregate.
|
|
87
|
+
const moa = await mixtureOfAgents({
|
|
88
|
+
agents: 3,
|
|
89
|
+
layers: 2,
|
|
90
|
+
propose: (a) => `agent${a}'s take`,
|
|
91
|
+
refine: (a, others) => `agent${a} refined (saw ${others.length} peers)`,
|
|
92
|
+
aggregate: (final) => `consensus of ${final.length}`,
|
|
93
|
+
});
|
|
94
|
+
console.log(`mixtureOfAgents → "${moa.answer}" (${moa.layerOutputs.length} layers)`);
|
|
95
|
+
|
|
96
|
+
// 5c) Reflexion — retry with accumulated reflections until success.
|
|
97
|
+
const refl = await reflexion<string>({
|
|
98
|
+
maxAttempts: 4,
|
|
99
|
+
attempt: (reflections) => `attempt with ${reflections.length} reflection(s)`,
|
|
100
|
+
evaluate: (r) => ({ success: r.includes("2 reflection"), feedback: "not yet" }),
|
|
101
|
+
reflect: (_r, fb, i) => `lesson ${i}: ${fb}`,
|
|
102
|
+
});
|
|
103
|
+
console.log(`reflexion → "${refl.answer}" (succeeded ${refl.succeeded} in ${refl.attempts} attempts)`);
|
|
104
|
+
|
|
105
|
+
// 6) measureLift — prove a pattern beats single-shot on a task set.
|
|
106
|
+
// Baseline: one noisy sample. Treatment: self-consistency over 5 samples.
|
|
107
|
+
// Scorer: 1.0 if the answer is the correct "42", else 0.
|
|
108
|
+
const tasks = [0, 1, 2, 3, 4, 5]; // 6 tasks (seeds)
|
|
109
|
+
const noisy = (seed: number) => (["42", "42", "41", "42", "40", "42"][seed % 6]);
|
|
110
|
+
const lift = await measureLift<number>({
|
|
111
|
+
tasks,
|
|
112
|
+
baseline: (t) => noisy(t), // single shot
|
|
113
|
+
treatment: async (t) => (await selfConsistency({ samples: 5, generate: (i) => noisy(t + i) })).answer,
|
|
114
|
+
score: (_t, out) => (out === "42" ? 1 : 0),
|
|
115
|
+
});
|
|
116
|
+
console.log(
|
|
117
|
+
`measureLift → baseline ${lift.baselineMean} vs treatment ${lift.treatmentMean} ` +
|
|
118
|
+
`(lift ${lift.lift >= 0 ? "+" : ""}${lift.lift}, win-rate ${lift.winRate})`,
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
main().catch((e) => {
|
|
123
|
+
console.error(e);
|
|
124
|
+
process.exit(1);
|
|
125
|
+
});
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sample agent: support-triage-bot
|
|
3
|
+
*
|
|
4
|
+
* Triages an inbound support ticket: classifies it (bug / feature-request /
|
|
5
|
+
* billing / how-to), pulls related past tickets, decides routing (engineering
|
|
6
|
+
* queue / billing queue / docs queue), and writes a one-paragraph reply
|
|
7
|
+
* draft. Demonstrates a Council pattern — three specialists (classifier,
|
|
8
|
+
* retriever, writer) deliberate to produce the final routing decision.
|
|
9
|
+
*
|
|
10
|
+
* Ships with mock ticket + KB stores so it runs offline.
|
|
11
|
+
*
|
|
12
|
+
* Run:
|
|
13
|
+
* OPENROUTER_API_KEY=sk-or-... npx tsx examples/support-triage-bot.ts
|
|
14
|
+
*
|
|
15
|
+
* What this demonstrates:
|
|
16
|
+
* - Council primitive used for routing (not just deliberation)
|
|
17
|
+
* - Verifier kernels enforcing the response style
|
|
18
|
+
* - Receipts: the routing decision is signed so a downstream router can
|
|
19
|
+
* prove an agent (not a human) made the call
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import {
|
|
23
|
+
Agent,
|
|
24
|
+
Council,
|
|
25
|
+
Runner,
|
|
26
|
+
defineTool,
|
|
27
|
+
zod as z,
|
|
28
|
+
VerifierKernels,
|
|
29
|
+
ReceiptEmitter,
|
|
30
|
+
InMemoryReceiptSink,
|
|
31
|
+
} from "../src/index.js";
|
|
32
|
+
import { openrouterAdapter } from "./adapters/openrouter.js";
|
|
33
|
+
|
|
34
|
+
// --- Tools (ticket + KB) ----------------------------------------------------
|
|
35
|
+
|
|
36
|
+
const fetchTicket = defineTool({
|
|
37
|
+
name: "fetch_ticket",
|
|
38
|
+
description: "Fetch a support ticket by id.",
|
|
39
|
+
input: z.object({ ticket_id: z.string() }),
|
|
40
|
+
async execute({ ticket_id }) {
|
|
41
|
+
return {
|
|
42
|
+
ticket_id,
|
|
43
|
+
subject: "Login redirects to /404 after upgrade",
|
|
44
|
+
body:
|
|
45
|
+
"Upgraded to v0.4.2 last night. Login works, but the post-login " +
|
|
46
|
+
"redirect lands on /404. Worked fine on v0.4.1. Three of my " +
|
|
47
|
+
"teammates hit it too.",
|
|
48
|
+
reporter: "u_8821",
|
|
49
|
+
plan: "team",
|
|
50
|
+
};
|
|
51
|
+
},
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const similarTickets = defineTool({
|
|
55
|
+
name: "similar_tickets",
|
|
56
|
+
description: "Find past tickets with similar symptoms.",
|
|
57
|
+
input: z.object({ query: z.string() }),
|
|
58
|
+
async execute({ query }) {
|
|
59
|
+
return {
|
|
60
|
+
query,
|
|
61
|
+
hits: [
|
|
62
|
+
{
|
|
63
|
+
id: "t_771",
|
|
64
|
+
subject: "Redirect breaks after v0.4 upgrade",
|
|
65
|
+
status: "resolved",
|
|
66
|
+
resolution: "Fix shipped in v0.4.3; ask user to upgrade.",
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
};
|
|
70
|
+
},
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const kbLookup = defineTool({
|
|
74
|
+
name: "kb_lookup",
|
|
75
|
+
description: "Search the support knowledge base.",
|
|
76
|
+
input: z.object({ topic: z.string() }),
|
|
77
|
+
async execute({ topic }) {
|
|
78
|
+
return {
|
|
79
|
+
topic,
|
|
80
|
+
articles: [
|
|
81
|
+
{
|
|
82
|
+
title: "Login redirect troubleshooting",
|
|
83
|
+
url: "https://docs.example.com/kb/login-redirect",
|
|
84
|
+
},
|
|
85
|
+
],
|
|
86
|
+
};
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// --- Specialists ------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
const classifier = new Agent({
|
|
93
|
+
name: "classifier",
|
|
94
|
+
instruction: `You classify a support ticket into one of: bug, feature-request, billing, how-to.
|
|
95
|
+
Output a single JSON object: {"class": "...", "confidence": 0-1, "rationale": "..."}.
|
|
96
|
+
Do not use em-dashes or AI-isms.`,
|
|
97
|
+
tools: [fetchTicket],
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
const retriever = new Agent({
|
|
101
|
+
name: "retriever",
|
|
102
|
+
instruction: `You find prior context for a support ticket. Call similar_tickets and kb_lookup.
|
|
103
|
+
Output a single JSON object: {"similar_ticket_ids": [...], "kb_urls": [...], "notes": "..."}.
|
|
104
|
+
Do not use em-dashes or AI-isms.`,
|
|
105
|
+
tools: [fetchTicket, similarTickets, kbLookup],
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const writer = new Agent({
|
|
109
|
+
name: "writer",
|
|
110
|
+
instruction: `You write a one-paragraph reply draft for the ticket reporter.
|
|
111
|
+
Be direct. Lead with what they should do. End with what we will do.
|
|
112
|
+
Do not use em-dashes or AI-isms. Length: 60-100 words.`,
|
|
113
|
+
tools: [fetchTicket],
|
|
114
|
+
verifiers: [VerifierKernels.emDash, VerifierKernels.aiIsm],
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
const triageCouncil = new Council({
|
|
118
|
+
name: "support-triage",
|
|
119
|
+
specialists: [classifier, retriever, writer],
|
|
120
|
+
verifiers: [VerifierKernels.emDash, VerifierKernels.aiIsm],
|
|
121
|
+
specialist_timeout_ms: 25_000,
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
// --- Main -------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
async function main() {
|
|
127
|
+
const apiKey = process.env.OPENROUTER_API_KEY;
|
|
128
|
+
if (!apiKey) {
|
|
129
|
+
console.error("Set OPENROUTER_API_KEY (https://openrouter.ai/keys) and rerun.");
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const runner = new Runner({
|
|
134
|
+
model: openrouterAdapter({ apiKey }),
|
|
135
|
+
default_model: "openai/gpt-4o-mini",
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
const sink = new InMemoryReceiptSink();
|
|
139
|
+
const receipts = new ReceiptEmitter({
|
|
140
|
+
sinks: [sink],
|
|
141
|
+
issuer: "did:web:local",
|
|
142
|
+
actor: triageCouncil.name,
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
runner.on(async (event) => {
|
|
146
|
+
if (event.type === "council_done") {
|
|
147
|
+
// The routing decision is the headline event — sign it so the queue
|
|
148
|
+
// router downstream can prove an agent made the call.
|
|
149
|
+
await receipts.emit({
|
|
150
|
+
cap: "support.triage.route",
|
|
151
|
+
input: { council: event.council },
|
|
152
|
+
output: {
|
|
153
|
+
answer: event.output.answer,
|
|
154
|
+
consensus: event.output.consensus,
|
|
155
|
+
specialists: event.output.specialists.map((s) => s.specialist),
|
|
156
|
+
},
|
|
157
|
+
metadata: {
|
|
158
|
+
total_cost_usd: event.output.total_cost_usd,
|
|
159
|
+
total_latency_ms: event.output.total_latency_ms,
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
const out = await runner.runCouncil(
|
|
166
|
+
triageCouncil,
|
|
167
|
+
"Triage ticket t_902 and produce a routing decision + reply draft.",
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
console.log("\n=== Consensus ===", out.consensus);
|
|
171
|
+
console.log("\n=== Answer ===\n" + out.answer);
|
|
172
|
+
console.log(`\n=== Receipts: ${sink.list().length} ===`);
|
|
173
|
+
for (const r of sink.list()) {
|
|
174
|
+
console.log(` ${r.cap} → ${r.content_hash.slice(0, 12)}…`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
main().catch((err) => {
|
|
179
|
+
console.error(err);
|
|
180
|
+
process.exit(1);
|
|
181
|
+
});
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* verifier-kernel — define a custom verifier, attach it to an agent,
|
|
3
|
+
* watch the refutation flow.
|
|
4
|
+
*
|
|
5
|
+
* The custom kernel here flags any claim of the form "X happened in YEAR"
|
|
6
|
+
* where YEAR < 1900 or YEAR > current year. Cheap, deterministic, and runs
|
|
7
|
+
* in milliseconds. Verifiers do NOT call another LLM — they're the answer
|
|
8
|
+
* to "LLM-judge is slow, expensive, and hallucinates."
|
|
9
|
+
*
|
|
10
|
+
* Run:
|
|
11
|
+
* OPENROUTER_API_KEY=sk-or-... npx tsx examples/verifier-kernel.ts
|
|
12
|
+
*
|
|
13
|
+
* What this shows:
|
|
14
|
+
* - defineVerifier — the factory for custom kernels
|
|
15
|
+
* - Attaching a verifier to an Agent (vs a Council)
|
|
16
|
+
* - The "issues" payload that surfaces on refutation
|
|
17
|
+
* - That a failed verifier does NOT throw — callers decide policy
|
|
18
|
+
*/
|
|
19
|
+
import {
|
|
20
|
+
Agent,
|
|
21
|
+
Runner,
|
|
22
|
+
defineVerifier,
|
|
23
|
+
VerifierKernels,
|
|
24
|
+
} from "../src/index.js";
|
|
25
|
+
import { openrouterAdapter } from "./adapters/openrouter.js";
|
|
26
|
+
|
|
27
|
+
const currentYear = new Date().getFullYear();
|
|
28
|
+
|
|
29
|
+
const plausibleYear = defineVerifier({
|
|
30
|
+
name: "plausible_year",
|
|
31
|
+
description:
|
|
32
|
+
"Flag claims with year numbers outside [1900, current year]. " +
|
|
33
|
+
"Catches hallucinated historical dates and future-dated claims.",
|
|
34
|
+
check: async (output) => {
|
|
35
|
+
const pattern = /\b(\d{4})\b/g;
|
|
36
|
+
const issues = [];
|
|
37
|
+
for (const m of output.matchAll(pattern)) {
|
|
38
|
+
const y = parseInt(m[1], 10);
|
|
39
|
+
if (y < 1900 || y > currentYear) {
|
|
40
|
+
issues.push({
|
|
41
|
+
kernel: "plausible_year",
|
|
42
|
+
severity: "error" as const,
|
|
43
|
+
message: `Year ${y} is implausible (expected 1900..${currentYear}).`,
|
|
44
|
+
span: { start: m.index!, end: m.index! + m[1].length },
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return { pass: issues.length === 0, issues };
|
|
49
|
+
},
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
const historian = new Agent({
|
|
53
|
+
name: "historian",
|
|
54
|
+
instruction:
|
|
55
|
+
"You answer history questions in 2-3 sentences. Always include the " +
|
|
56
|
+
"year a key event happened. Be concise.",
|
|
57
|
+
verifiers: [
|
|
58
|
+
plausibleYear,
|
|
59
|
+
VerifierKernels.emDash,
|
|
60
|
+
VerifierKernels.aiIsm,
|
|
61
|
+
],
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
async function main() {
|
|
65
|
+
const apiKey = process.env.OPENROUTER_API_KEY;
|
|
66
|
+
if (!apiKey) {
|
|
67
|
+
console.error("Set OPENROUTER_API_KEY (https://openrouter.ai/keys) and rerun.");
|
|
68
|
+
process.exit(1);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const runner = new Runner({
|
|
72
|
+
model: openrouterAdapter({ apiKey }),
|
|
73
|
+
default_model: "openai/gpt-4o-mini",
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
runner.on((event) => {
|
|
77
|
+
if (event.type === "agent_thinking") process.stdout.write(event.delta);
|
|
78
|
+
if (event.type === "verifier_run") {
|
|
79
|
+
const { kernel, result } = event;
|
|
80
|
+
const tag = result.pass ? "PASS" : "FAIL";
|
|
81
|
+
console.log(`\n[${tag}] ${kernel} (${result.ms}ms)`);
|
|
82
|
+
for (const issue of result.issues) {
|
|
83
|
+
console.log(` - ${issue.severity}: ${issue.message}`);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
const result = await runner.run(
|
|
89
|
+
historian,
|
|
90
|
+
"When did the first programmable digital computer come online? Answer with a year.",
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
console.log("\n--- Final output ---");
|
|
94
|
+
console.log(result.output);
|
|
95
|
+
|
|
96
|
+
const failed = result.verifier_results.filter((v) => !v.pass);
|
|
97
|
+
if (failed.length > 0) {
|
|
98
|
+
console.log(`\nRefutations: ${failed.map((v) => v.kernel).join(", ")}`);
|
|
99
|
+
process.exitCode = 1;
|
|
100
|
+
} else {
|
|
101
|
+
console.log("\nAll verifier kernels passed.");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
main().catch((err) => {
|
|
106
|
+
console.error(err);
|
|
107
|
+
process.exit(1);
|
|
108
|
+
});
|