@vextlabs/theron-agent-sdk 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,6 +1,12 @@
1
1
  'use strict';
2
2
 
3
3
  var zod = require('zod');
4
+ var promises = require('fs/promises');
5
+ var os = require('os');
6
+ var path = require('path');
7
+ var child_process = require('child_process');
8
+ var util = require('util');
9
+ var crypto = require('crypto');
4
10
 
5
11
  // src/agent/index.ts
6
12
  var Agent = class {
@@ -22,23 +28,213 @@ var Agent = class {
22
28
  this.verifiers = config.verifiers ?? [];
23
29
  this.max_turns = config.max_turns ?? 10;
24
30
  }
25
- /** Render the tools as JSON schemas for the model. */
31
+ /**
32
+ * Render the tools as JSON schemas for the model — the agent's own tools
33
+ * PLUS one `delegate_to_<sub-agent>` tool per declared sub-agent, so a
34
+ * supervisor can actually hand work to its specialists. The Runner routes
35
+ * those delegate calls back into `runner.run(subAgent, task)`.
36
+ */
26
37
  toolSchemas() {
27
- return this.tools.map((t) => t.schema);
38
+ const own = this.tools.map((t) => t.schema);
39
+ const delegates = this.sub_agents.map((sa) => ({
40
+ name: subAgentToolName(sa.name),
41
+ description: `Delegate a self-contained subtask to the "${sa.name}" sub-agent and get back its result. ${sa.instruction.system.slice(0, 200)}`,
42
+ input_schema: {
43
+ type: "object",
44
+ properties: {
45
+ task: {
46
+ type: "string",
47
+ description: `The subtask for the "${sa.name}" sub-agent to perform, stated as a complete, standalone instruction.`
48
+ }
49
+ },
50
+ required: ["task"]
51
+ }
52
+ }));
53
+ return [...own, ...delegates];
54
+ }
55
+ /** Resolve a delegate tool-call name back to the sub-agent it targets. */
56
+ findSubAgent(toolName) {
57
+ return this.sub_agents.find((sa) => subAgentToolName(sa.name) === toolName);
28
58
  }
29
59
  /** True if the agent has any sub-agents (i.e., this is a supervisor). */
30
60
  isSupervisor() {
31
61
  return this.sub_agents.length > 0;
32
62
  }
33
63
  };
64
+ function subAgentToolName(name) {
65
+ return `delegate_to_${name}`.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 64);
66
+ }
67
+ function parseMarkdownAgent(filename, content) {
68
+ const fmMatch = content.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
69
+ if (!fmMatch) return null;
70
+ const frontmatter = fmMatch[1];
71
+ const body = fmMatch[2].trim();
72
+ if (!body) return null;
73
+ const fields = {};
74
+ let currentKey = "";
75
+ for (const line of frontmatter.split("\n")) {
76
+ const listMatch = line.match(/^\s+-\s+(.+)$/);
77
+ if (listMatch && currentKey) {
78
+ const existing = fields[currentKey];
79
+ if (Array.isArray(existing)) {
80
+ existing.push(listMatch[1].trim());
81
+ } else {
82
+ fields[currentKey] = [listMatch[1].trim()];
83
+ }
84
+ continue;
85
+ }
86
+ const kvMatch = line.match(/^(\w[\w-]*)\s*:\s*(.*)$/);
87
+ if (kvMatch) {
88
+ const key = kvMatch[1].trim();
89
+ const value = kvMatch[2].trim();
90
+ currentKey = key;
91
+ fields[key] = value.replace(/^["']|["']$/g, "");
92
+ }
93
+ }
94
+ const name = String(fields.name || "").trim();
95
+ if (!name) return null;
96
+ const baseFilename = filename.replace(/\.md$/i, "").replace(/[^a-z0-9_-]/gi, "-").toLowerCase();
97
+ const id = name.toLowerCase().replace(/[^a-z0-9_-]/g, "-").replace(/-+/g, "-") || baseFilename;
98
+ return {
99
+ id,
100
+ name,
101
+ description: String(fields.description || ""),
102
+ model: fields.model ? String(fields.model) : void 0,
103
+ tools: Array.isArray(fields.tools) ? fields.tools.map(String) : void 0,
104
+ max_turns: fields.max_turns ? parseInt(String(fields.max_turns), 10) || void 0 : void 0,
105
+ system_prompt: body,
106
+ source: filename
107
+ };
108
+ }
109
+ async function loadMarkdownAgents(dir) {
110
+ const fs = await import('fs/promises');
111
+ const path = await import('path');
112
+ try {
113
+ const entries = await fs.readdir(dir);
114
+ const mdFiles = entries.filter((f) => f.endsWith(".md"));
115
+ const results = [];
116
+ for (const file of mdFiles) {
117
+ try {
118
+ const fullPath = path.join(dir, file);
119
+ const content = await fs.readFile(fullPath, "utf8");
120
+ const parsed = parseMarkdownAgent(file, content);
121
+ if (parsed) results.push(parsed);
122
+ } catch {
123
+ }
124
+ }
125
+ return results;
126
+ } catch {
127
+ return [];
128
+ }
129
+ }
130
+ async function loadAllMarkdownAgents(projectDir) {
131
+ const os = await import('os');
132
+ const path = await import('path');
133
+ const home = os.homedir();
134
+ const globalDir = path.join(home, ".theron", "agents");
135
+ const localDir = projectDir ? path.join(projectDir, ".theron", "agents") : path.join(process.cwd(), ".theron", "agents");
136
+ const [globalAgents, localAgents] = await Promise.all([
137
+ loadMarkdownAgents(globalDir),
138
+ loadMarkdownAgents(localDir)
139
+ ]);
140
+ const byId = /* @__PURE__ */ new Map();
141
+ for (const a of globalAgents) byId.set(a.id, a);
142
+ for (const a of localAgents) byId.set(a.id, a);
143
+ return [...byId.values()];
144
+ }
145
+
146
+ // src/skills/index.ts
147
+ function parseMarkdownSkill(filename, content) {
148
+ const fmMatch = content.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
149
+ if (!fmMatch) return null;
150
+ const frontmatter = fmMatch[1];
151
+ const body = fmMatch[2].trim();
152
+ if (!body) return null;
153
+ const fields = {};
154
+ let currentKey = "";
155
+ for (const line of frontmatter.split("\n")) {
156
+ const listMatch = line.match(/^\s+-\s+(.+)$/);
157
+ if (listMatch && currentKey) {
158
+ const existing = fields[currentKey];
159
+ if (Array.isArray(existing)) existing.push(listMatch[1].trim());
160
+ else fields[currentKey] = [listMatch[1].trim()];
161
+ continue;
162
+ }
163
+ const kvMatch = line.match(/^([\w-]+)\s*:\s*(.*)$/);
164
+ if (kvMatch) {
165
+ currentKey = kvMatch[1].trim();
166
+ fields[currentKey] = kvMatch[2].trim().replace(/^["']|["']$/g, "");
167
+ }
168
+ }
169
+ const rawName = String(fields.name || filename.replace(/\.md$/i, "")).trim();
170
+ const name = rawName.toLowerCase().replace(/[^a-z0-9_-]/g, "-").replace(/-+/g, "-");
171
+ if (!name) return null;
172
+ let allowedTools;
173
+ const at = fields["allowed-tools"] ?? fields.allowedTools ?? fields.tools;
174
+ if (Array.isArray(at)) allowedTools = at.map(String);
175
+ else if (typeof at === "string" && at.trim()) {
176
+ allowedTools = at.split(",").map((s) => s.trim()).filter(Boolean);
177
+ }
178
+ return {
179
+ name,
180
+ description: String(fields.description || ""),
181
+ body,
182
+ allowedTools,
183
+ model: fields.model ? String(fields.model) : void 0,
184
+ source: filename
185
+ };
186
+ }
187
+ async function loadMarkdownSkills(dir) {
188
+ const fs = await import('fs/promises');
189
+ const path = await import('path');
190
+ const out = [];
191
+ try {
192
+ const entries = await fs.readdir(dir, { withFileTypes: true });
193
+ for (const ent of entries) {
194
+ try {
195
+ if (ent.isFile() && ent.name.endsWith(".md")) {
196
+ const full = path.join(dir, ent.name);
197
+ const parsed = parseMarkdownSkill(ent.name, await fs.readFile(full, "utf8"));
198
+ if (parsed) out.push(parsed);
199
+ } else if (ent.isDirectory()) {
200
+ const full = path.join(dir, ent.name, "SKILL.md");
201
+ const buf = await fs.readFile(full, "utf8").catch(() => null);
202
+ if (buf) {
203
+ const parsed = parseMarkdownSkill(`${ent.name}/SKILL.md`, buf);
204
+ if (parsed) out.push({ ...parsed, name: parsed.name || ent.name.toLowerCase() });
205
+ }
206
+ }
207
+ } catch {
208
+ }
209
+ }
210
+ } catch {
211
+ }
212
+ return out;
213
+ }
214
+ async function loadAllMarkdownSkills(projectDir) {
215
+ const os = await import('os');
216
+ const path = await import('path');
217
+ const globalDir = path.join(os.homedir(), ".theron", "skills");
218
+ const localDir = path.join(projectDir ?? process.cwd(), ".theron", "skills");
219
+ const [globalSkills, localSkills] = await Promise.all([
220
+ loadMarkdownSkills(globalDir),
221
+ loadMarkdownSkills(localDir)
222
+ ]);
223
+ const byName = /* @__PURE__ */ new Map();
224
+ for (const s of globalSkills) byName.set(s.name, s);
225
+ for (const s of localSkills) byName.set(s.name, s);
226
+ return [...byName.values()];
227
+ }
34
228
 
35
229
  // src/council/index.ts
230
+ var sentenceClaimExtractor = (output) => output.split(/(?<=[.!?])\s+/).map((s) => s.trim()).filter((s) => s.length > 0).map((text) => ({ text, confidence: 1, type: "assertion" }));
36
231
  var Council = class {
37
232
  name;
38
233
  specialists;
39
234
  verifiers;
40
235
  reconciler;
41
236
  specialist_timeout_ms;
237
+ claimExtractor;
42
238
  constructor(config) {
43
239
  if (!config.name) throw new Error("Council requires a `name`.");
44
240
  if (!config.specialists || config.specialists.length === 0) {
@@ -49,6 +245,7 @@ var Council = class {
49
245
  this.verifiers = config.verifiers ?? [];
50
246
  this.reconciler = config.reconciler ?? deterministicClaimMerge;
51
247
  this.specialist_timeout_ms = config.specialist_timeout_ms ?? 3e4;
248
+ this.claimExtractor = config.claimExtractor;
52
249
  }
53
250
  /**
54
251
  * Convenience entry point — pointed at the Runner you've already constructed.
@@ -214,22 +411,57 @@ function defineTool(opts) {
214
411
  };
215
412
  }
216
413
  function zodToJsonSchema(schema) {
414
+ const description = schema._def?.description;
415
+ const withDesc = (s) => description ? { ...s, description } : s;
416
+ if (schema instanceof zod.z.ZodOptional) return zodToJsonSchema(schema.unwrap());
417
+ if (schema instanceof zod.z.ZodDefault) {
418
+ const innerType = schema._def.innerType;
419
+ const inner = zodToJsonSchema(innerType);
420
+ let def;
421
+ try {
422
+ def = schema._def.defaultValue?.();
423
+ } catch {
424
+ def = void 0;
425
+ }
426
+ return withDesc(def === void 0 ? inner : { ...inner, default: def });
427
+ }
428
+ if (schema instanceof zod.z.ZodNullable) {
429
+ return withDesc({ ...zodToJsonSchema(schema.unwrap()), nullable: true });
430
+ }
217
431
  if (schema instanceof zod.z.ZodObject) {
218
432
  const properties = {};
219
433
  const required = [];
220
434
  for (const [key, value] of Object.entries(schema.shape)) {
221
- properties[key] = zodToJsonSchema(value);
222
- if (!(value instanceof zod.z.ZodOptional)) required.push(key);
435
+ const field = value;
436
+ properties[key] = zodToJsonSchema(field);
437
+ if (!(field instanceof zod.z.ZodOptional) && !(field instanceof zod.z.ZodDefault)) {
438
+ required.push(key);
439
+ }
223
440
  }
224
- return { type: "object", properties, ...required.length > 0 ? { required } : {} };
441
+ return withDesc({ type: "object", properties, ...required.length > 0 ? { required } : {} });
225
442
  }
226
- if (schema instanceof zod.z.ZodString) return { type: "string" };
227
- if (schema instanceof zod.z.ZodNumber) return { type: "number" };
228
- if (schema instanceof zod.z.ZodBoolean) return { type: "boolean" };
229
- if (schema instanceof zod.z.ZodArray) return { type: "array", items: zodToJsonSchema(schema.element) };
230
- if (schema instanceof zod.z.ZodOptional) return zodToJsonSchema(schema.unwrap());
231
- if (schema instanceof zod.z.ZodEnum) return { type: "string", enum: schema.options };
232
- return { type: "string" };
443
+ if (schema instanceof zod.z.ZodString) return withDesc({ type: "string" });
444
+ if (schema instanceof zod.z.ZodNumber) return withDesc({ type: "number" });
445
+ if (schema instanceof zod.z.ZodBoolean) return withDesc({ type: "boolean" });
446
+ if (schema instanceof zod.z.ZodArray) return withDesc({ type: "array", items: zodToJsonSchema(schema.element) });
447
+ if (schema instanceof zod.z.ZodEnum) return withDesc({ type: "string", enum: schema.options });
448
+ if (schema instanceof zod.z.ZodLiteral) {
449
+ const val = schema.value;
450
+ const t = typeof val === "number" ? "number" : typeof val === "boolean" ? "boolean" : "string";
451
+ return withDesc({ type: t, enum: [val] });
452
+ }
453
+ if (schema instanceof zod.z.ZodUnion) {
454
+ const options = schema._def.options;
455
+ return withDesc({ anyOf: options.map((o) => zodToJsonSchema(o)) });
456
+ }
457
+ if (schema instanceof zod.z.ZodRecord) {
458
+ const valueType = schema._def.valueType;
459
+ return withDesc({
460
+ type: "object",
461
+ additionalProperties: valueType ? zodToJsonSchema(valueType) : true
462
+ });
463
+ }
464
+ return withDesc({ type: "string" });
233
465
  }
234
466
 
235
467
  // src/tools/local-contract.ts
@@ -281,17 +513,30 @@ var LOCAL_TOOL_PARAMETERS = {
281
513
  Grep: {
282
514
  type: "object",
283
515
  properties: {
284
- pattern: { type: "string", description: "Regex pattern." },
516
+ pattern: { type: "string", description: "Regex pattern (ripgrep syntax). Use fixed_strings for a literal search." },
285
517
  path: { type: "string", description: "Optional file or directory to limit the search." },
286
- glob: { type: "string", description: "Optional glob filter, e.g. '*.ts'." },
287
- case_insensitive: { type: "boolean", default: false }
518
+ glob: { type: "string", description: "Optional glob filter, e.g. '*.ts' or 'src/**/*.tsx'." },
519
+ type: { type: "string", description: "Optional file-type filter (ripgrep --type), e.g. 'ts', 'py', 'rust'. More efficient than glob for language filters." },
520
+ case_insensitive: { type: "boolean", default: false, description: "Case-insensitive match." },
521
+ output_mode: {
522
+ type: "string",
523
+ enum: ["content", "files_with_matches", "count"],
524
+ description: "What to return: 'content' = matching lines with file:line (default), 'files_with_matches' = just the file paths, 'count' = per-file match counts.",
525
+ default: "content"
526
+ },
527
+ context_lines: { type: "number", description: "Lines of context to show before AND after each match (ripgrep -C). Only applies to output_mode 'content'." },
528
+ multiline: { type: "boolean", default: false, description: "Allow the pattern to span line boundaries (ripgrep --multiline; '.' matches newlines)." },
529
+ fixed_strings: { type: "boolean", default: false, description: "Treat the pattern as a literal string, not a regex (ripgrep -F)." }
288
530
  },
289
531
  required: ["pattern"]
290
532
  },
291
533
  LS: {
292
534
  type: "object",
293
535
  properties: {
294
- path: { type: "string", description: "Path to list. Defaults to the working directory." }
536
+ path: { type: "string", description: "Path to list. Defaults to the working directory." },
537
+ show_hidden: { type: "boolean", default: false, description: "Include dotfiles (.env, .gitignore, .github, etc.) in the listing." },
538
+ recursive: { type: "boolean", default: false, description: "List subdirectories recursively (up to `depth` levels)." },
539
+ depth: { type: "number", description: "Max recursion depth when recursive=true (default 3)." }
295
540
  }
296
541
  }
297
542
  };
@@ -412,6 +657,105 @@ var VerifierKernels = {
412
657
  })
413
658
  };
414
659
 
660
+ // src/reasoning-cert/index.ts
661
+ var ARITH = /(-?\d+(?:\.\d+)?)\s*([+\-*/])\s*(-?\d+(?:\.\d+)?)\s*=\s*(-?\d+(?:\.\d+)?)/g;
662
+ async function sha256Hex(s) {
663
+ const buf = await globalThis.crypto.subtle.digest("SHA-256", new TextEncoder().encode(s));
664
+ return "sha256:" + [...new Uint8Array(buf)].map((b) => b.toString(16).padStart(2, "0")).join("");
665
+ }
666
+ async function certifyArithmetic(text) {
667
+ const claim = String(text ?? "");
668
+ const matched = [...claim.matchAll(ARITH)].length;
669
+ const res = await VerifierKernels.arithmetic.check(claim);
670
+ const verdict = matched === 0 ? "ABSTAIN" : res.pass ? "PASS" : "FAIL";
671
+ return {
672
+ tier: "arithmetic",
673
+ oracle_id: "js_runtime",
674
+ oracle_version: typeof process !== "undefined" && process.version ? process.version : "webcrypto",
675
+ claim_input_hash: await sha256Hex(claim),
676
+ verdict,
677
+ verdict_detail: matched === 0 ? "no 'A op B = C' arithmetic claim found \u2014 nothing certified" : res.pass ? `${matched} arithmetic claim(s) re-computed and match` : res.issues.map((i) => i.message).join("; "),
678
+ certifies: verdict === "PASS" ? "arithmetic_correct" : verdict === "FAIL" ? "arithmetic_incorrect" : "nothing",
679
+ does_not_certify: "any reasoning, fact, or step beyond the literal 'A op B = C' arithmetic re-check",
680
+ oracle_ts: Math.floor(Date.now() / 1e3)
681
+ };
682
+ }
683
+ async function verifyReasoningCertificate(cert, claimText) {
684
+ const reasons = [];
685
+ if (cert.tier !== "arithmetic") {
686
+ reasons.push(`offline re-check for tier '${cert.tier}' is not implemented in Slice 0 (arithmetic only)`);
687
+ return { ok: false, reasons };
688
+ }
689
+ const claim = String(claimText ?? "");
690
+ const hashOk = await sha256Hex(claim) === cert.claim_input_hash;
691
+ if (!hashOk) reasons.push("claim_input_hash does not match the provided claim text");
692
+ const recomputed = await certifyArithmetic(claim);
693
+ const verdictOk = recomputed.verdict === cert.verdict;
694
+ if (!verdictOk) reasons.push(`re-computed verdict ${recomputed.verdict} != certificate ${cert.verdict}`);
695
+ return { ok: hashOk && verdictOk, reasons };
696
+ }
697
+ var pExecFile = util.promisify(child_process.execFile);
698
+ function resolveInside(root, p) {
699
+ const abs = path.isAbsolute(p) ? p : path.resolve(root, p);
700
+ const rel = path.relative(root, abs);
701
+ if (rel === ".." || rel.startsWith(`..${path.sep}`) || path.isAbsolute(rel)) {
702
+ throw new Error(`path escapes session root: ${p}`);
703
+ }
704
+ return abs;
705
+ }
706
+ var LocalCloudSession = class {
707
+ id;
708
+ root;
709
+ disposed = false;
710
+ constructor(id, root) {
711
+ this.id = id;
712
+ this.root = root;
713
+ }
714
+ async exec(command, options = {}) {
715
+ if (this.disposed) throw new Error("session disposed");
716
+ const cwd = options.cwd ? resolveInside(this.root, options.cwd) : this.root;
717
+ try {
718
+ const { stdout, stderr } = await pExecFile("/bin/sh", ["-c", command], {
719
+ cwd,
720
+ timeout: options.timeoutMs ?? 12e4,
721
+ env: { ...process.env, ...options.env ?? {} },
722
+ maxBuffer: 64 * 1024 * 1024
723
+ });
724
+ return { stdout: stdout.toString(), stderr: stderr.toString(), exitCode: 0 };
725
+ } catch (e) {
726
+ const err = e;
727
+ const exitCode = typeof err.code === "number" ? err.code : err.killed ? 124 : 1;
728
+ return {
729
+ stdout: (err.stdout ?? "").toString(),
730
+ stderr: (err.stderr ?? err.message ?? String(e)).toString(),
731
+ exitCode
732
+ };
733
+ }
734
+ }
735
+ async readFile(path) {
736
+ if (this.disposed) throw new Error("session disposed");
737
+ return promises.readFile(resolveInside(this.root, path), "utf8");
738
+ }
739
+ async writeFile(path$1, content) {
740
+ if (this.disposed) throw new Error("session disposed");
741
+ const abs = resolveInside(this.root, path$1);
742
+ await promises.mkdir(path.dirname(abs), { recursive: true });
743
+ await promises.writeFile(abs, content, "utf8");
744
+ }
745
+ async dispose() {
746
+ if (this.disposed) return;
747
+ this.disposed = true;
748
+ await promises.rm(this.root, { recursive: true, force: true });
749
+ }
750
+ };
751
+ var LocalCloudSessionProvider = class {
752
+ async provision() {
753
+ const id = crypto.randomUUID();
754
+ const root = await promises.mkdtemp(path.join(os.tmpdir(), `theron-session-${id.slice(0, 8)}-`));
755
+ return new LocalCloudSession(id, root);
756
+ }
757
+ };
758
+
415
759
  // src/runtime/index.ts
416
760
  var Runner = class {
417
761
  model;
@@ -449,8 +793,9 @@ var Runner = class {
449
793
  * 4. Run any registered verifier kernels on the final output
450
794
  * 5. Return the AgentResult
451
795
  */
452
- async run(agent, query) {
796
+ async run(agent, query, opts) {
453
797
  const startedAt = Date.now();
798
+ const signal = opts?.signal;
454
799
  this.emit({ type: "agent_start", agent: agent.name, query });
455
800
  const messages = [
456
801
  { role: "system", content: agent.instruction.system }
@@ -463,49 +808,84 @@ var Runner = class {
463
808
  const toolCalls = [];
464
809
  let tokensIn = 0;
465
810
  let tokensOut = 0;
811
+ let costUsd = 0;
466
812
  let finalOutput = "";
813
+ let completed = false;
814
+ let aborted = false;
467
815
  for (let turn = 0; turn < agent.max_turns; turn++) {
816
+ if (signal?.aborted) {
817
+ aborted = true;
818
+ this.emit({ type: "aborted", agent: agent.name });
819
+ break;
820
+ }
468
821
  const response = await this.model.chat({
469
822
  model: agent.model ?? this.default_model,
470
823
  messages,
471
824
  tools: agent.toolSchemas(),
472
- onDelta: (delta) => this.emit({ type: "agent_thinking", agent: agent.name, delta })
825
+ onDelta: (delta) => this.emit({ type: "agent_thinking", agent: agent.name, delta }),
826
+ signal
473
827
  });
474
828
  tokensIn += response.tokens.input;
475
829
  tokensOut += response.tokens.output;
830
+ costUsd += response.cost_usd ?? 0;
476
831
  if (response.tool_calls && response.tool_calls.length > 0) {
477
832
  messages.push({ role: "assistant", content: response.content });
478
- for (const call of response.tool_calls) {
479
- const tool = agent.tools.find((t) => t.schema.name === call.name);
480
- if (!tool) {
481
- this.emit({
482
- type: "error",
483
- agent: agent.name,
484
- message: `Model called unknown tool: ${call.name}`
485
- });
486
- messages.push({ role: "tool", content: `error: unknown tool ${call.name}` });
487
- continue;
488
- }
489
- this.emit({ type: "tool_call_start", agent: agent.name, tool: call.name, input: call.input });
490
- const t0 = Date.now();
491
- try {
492
- const output = await tool.execute(call.input, this.tool_context);
493
- const ms = Date.now() - t0;
494
- this.emit({ type: "tool_call_done", agent: agent.name, tool: call.name, output, ms });
495
- toolCalls.push({ name: call.name, input: call.input, output });
496
- messages.push({ role: "tool", content: JSON.stringify(output) });
497
- } catch (err) {
498
- const msg = err instanceof Error ? err.message : String(err);
499
- this.emit({ type: "error", agent: agent.name, message: `Tool ${call.name} threw: ${msg}` });
500
- messages.push({ role: "tool", content: `error: ${msg}` });
501
- }
833
+ const calls = response.tool_calls;
834
+ const results = await Promise.all(
835
+ calls.map(async (call) => {
836
+ const subAgent = agent.findSubAgent(call.name);
837
+ if (subAgent) {
838
+ this.emit({ type: "tool_call_start", agent: agent.name, tool: call.name, input: call.input });
839
+ const t02 = Date.now();
840
+ try {
841
+ const task = typeof call.input?.task === "string" ? call.input.task : JSON.stringify(call.input);
842
+ const sub = await this.run(subAgent, task, { signal });
843
+ const ms = Date.now() - t02;
844
+ this.emit({ type: "tool_call_done", agent: agent.name, tool: call.name, output: sub.output, ms });
845
+ return { call, output: sub.output, content: sub.output, ok: true };
846
+ } catch (err) {
847
+ const msg = err instanceof Error ? err.message : String(err);
848
+ this.emit({ type: "error", agent: agent.name, message: `Sub-agent ${subAgent.name} threw: ${msg}` });
849
+ return { call, content: `error: ${msg}`, ok: false };
850
+ }
851
+ }
852
+ const tool = agent.tools.find((t) => t.schema.name === call.name);
853
+ if (!tool) {
854
+ this.emit({
855
+ type: "error",
856
+ agent: agent.name,
857
+ message: `Model called unknown tool: ${call.name}`
858
+ });
859
+ return { call, content: `error: unknown tool ${call.name}`, ok: false };
860
+ }
861
+ this.emit({ type: "tool_call_start", agent: agent.name, tool: call.name, input: call.input });
862
+ const t0 = Date.now();
863
+ try {
864
+ const output = await tool.execute(call.input, this.tool_context);
865
+ const ms = Date.now() - t0;
866
+ this.emit({ type: "tool_call_done", agent: agent.name, tool: call.name, output, ms });
867
+ return { call, output, content: JSON.stringify(output), ok: true };
868
+ } catch (err) {
869
+ const msg = err instanceof Error ? err.message : String(err);
870
+ this.emit({ type: "error", agent: agent.name, message: `Tool ${call.name} threw: ${msg}` });
871
+ return { call, content: `error: ${msg}`, ok: false };
872
+ }
873
+ })
874
+ );
875
+ for (const r of results) {
876
+ if (r.ok) toolCalls.push({ name: r.call.name, input: r.call.input, output: r.output });
877
+ messages.push({ role: "tool", content: r.content });
502
878
  }
503
879
  continue;
504
880
  }
505
881
  finalOutput = response.content;
506
882
  messages.push({ role: "assistant", content: finalOutput });
883
+ completed = true;
507
884
  break;
508
885
  }
886
+ if (!completed && !aborted) {
887
+ this.emit({ type: "max_turns_exhausted", agent: agent.name, turns: agent.max_turns });
888
+ }
509
889
  this.emit({ type: "agent_output", agent: agent.name, output: finalOutput });
510
890
  const verifier_results = [];
511
891
  for (const v of agent.verifiers) {
@@ -528,8 +908,8 @@ var Runner = class {
528
908
  tool_calls: toolCalls,
529
909
  verifier_results,
530
910
  tokens_used: { input: tokensIn, output: tokensOut },
531
- cost_usd: 0,
532
- // adapter-specific; populated by adapter
911
+ cost_usd: costUsd,
912
+ // summed from adapter-reported per-call cost (0 if the adapter doesn't report it)
533
913
  latency_ms
534
914
  };
535
915
  }
@@ -539,17 +919,18 @@ var Runner = class {
539
919
  * Fan out to all specialists in parallel (with timeout), gather outputs,
540
920
  * run council-level verifier kernels on each, and reconcile.
541
921
  */
542
- async runCouncil(council, query) {
922
+ async runCouncil(council, query, opts) {
543
923
  const startedAt = Date.now();
924
+ const signal = opts?.signal;
544
925
  this.emit({ type: "council_start", council: council.name, query });
545
926
  const withTimeout = (p, ms) => Promise.race([
546
927
  p,
547
- new Promise((resolve) => setTimeout(() => resolve(null), ms))
928
+ new Promise((resolve2) => setTimeout(() => resolve2(null), ms))
548
929
  ]);
549
930
  const specialistResults = await Promise.all(
550
931
  council.specialists.map(async (spec) => {
551
932
  try {
552
- const result = await withTimeout(this.run(spec, query), council.specialist_timeout_ms);
933
+ const result = await withTimeout(this.run(spec, query, { signal }), council.specialist_timeout_ms);
553
934
  if (result === null) {
554
935
  this.emit({
555
936
  type: "error",
@@ -575,8 +956,11 @@ var Runner = class {
575
956
  const out = {
576
957
  specialist: spec.name,
577
958
  output: result.output,
578
- claims: [],
579
- // claim extraction is the reconciler's job
959
+ // Extract claims if the council supplies an extractor; otherwise the
960
+ // reconciler is responsible (the default deterministic reconciler
961
+ // votes over these claims, so a council that wants automatic
962
+ // ratification should set `claimExtractor`).
963
+ claims: council.claimExtractor ? council.claimExtractor(result.output) : [],
580
964
  // AgentResult.verifier_results widens issues to unknown[]; at the
581
965
  // runtime layer we know every entry came from a Verifier.check()
582
966
  // call (which produces VerifierIssue[]), so the cast is sound.
@@ -619,7 +1003,7 @@ var MCP_PROTOCOL_VERSION = "2024-11-05";
619
1003
  var DEFAULT_TIMEOUT_MS = 12e3;
620
1004
  var MCPClient = class {
621
1005
  config;
622
- initialized = false;
1006
+ initPromise = null;
623
1007
  toolCache = null;
624
1008
  constructor(config) {
625
1009
  if (!/^[a-z0-9_-]+$/.test(config.slug)) {
@@ -685,7 +1069,17 @@ var MCPClient = class {
685
1069
  };
686
1070
  }
687
1071
  async ensureInitialized(signal) {
688
- if (this.initialized) return;
1072
+ if (!this.initPromise) {
1073
+ this.initPromise = this.doInitialize(signal);
1074
+ }
1075
+ try {
1076
+ await this.initPromise;
1077
+ } catch (err) {
1078
+ this.initPromise = null;
1079
+ throw err;
1080
+ }
1081
+ }
1082
+ async doInitialize(signal) {
689
1083
  await this.rpc(
690
1084
  "initialize",
691
1085
  {
@@ -696,7 +1090,6 @@ var MCPClient = class {
696
1090
  signal
697
1091
  );
698
1092
  this.rpc("notifications/initialized", {}, signal).catch(() => void 0);
699
- this.initialized = true;
700
1093
  }
701
1094
  async rpc(method, params, externalSignal) {
702
1095
  const ac = new AbortController();
@@ -710,9 +1103,9 @@ var MCPClient = class {
710
1103
  }
711
1104
  const body = {
712
1105
  jsonrpc: "2.0",
713
- id: Date.now() + Math.floor(Math.random() * 1e3),
714
1106
  method,
715
- params
1107
+ params,
1108
+ ...method.startsWith("notifications/") ? {} : { id: Date.now() + Math.floor(Math.random() * 1e3) }
716
1109
  };
717
1110
  try {
718
1111
  const r = await fetch(this.config.url, {
@@ -735,11 +1128,19 @@ var MCPClient = class {
735
1128
  const ct = r.headers.get("content-type") || "";
736
1129
  if (ct.includes("text/event-stream")) {
737
1130
  const text = await r.text();
738
- const m = text.match(/data:\s*(\{[\s\S]*?\})\s*\n/);
739
- if (!m) throw new Error("mcp sse stream had no data event");
740
- const env2 = JSON.parse(m[1]);
741
- if (env2.error) throw new Error(`mcp error: ${env2.error.message}`);
742
- return env2.result;
1131
+ for (const ev of text.split(/\n\n/)) {
1132
+ const payload = ev.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).replace(/^ /, "")).join("\n").trim();
1133
+ if (!payload || payload === "[DONE]") continue;
1134
+ let env2;
1135
+ try {
1136
+ env2 = JSON.parse(payload);
1137
+ } catch {
1138
+ continue;
1139
+ }
1140
+ if (env2.error) throw new Error(`mcp error: ${env2.error.message}`);
1141
+ return env2.result;
1142
+ }
1143
+ throw new Error("mcp sse stream had no data event");
743
1144
  }
744
1145
  const env = await r.json();
745
1146
  if (env.error) throw new Error(`mcp error: ${env.error.message}`);
@@ -798,6 +1199,7 @@ function theronAdapter(opts = {}) {
798
1199
  let inputTokens = 0;
799
1200
  let outputTokens = 0;
800
1201
  let buf = "";
1202
+ const toolAcc = {};
801
1203
  for (; ; ) {
802
1204
  const { value, done } = await reader.read();
803
1205
  if (done) break;
@@ -810,11 +1212,20 @@ function theronAdapter(opts = {}) {
810
1212
  if (!data || data === "[DONE]") continue;
811
1213
  try {
812
1214
  const json2 = JSON.parse(data);
813
- const delta = json2.choices?.[0]?.delta?.content;
1215
+ const d = json2.choices?.[0]?.delta;
1216
+ const delta = d?.content;
814
1217
  if (delta) {
815
1218
  onDelta(delta);
816
1219
  content += delta;
817
1220
  }
1221
+ if (Array.isArray(d?.tool_calls)) {
1222
+ for (const tc of d.tool_calls) {
1223
+ const i = typeof tc.index === "number" ? tc.index : 0;
1224
+ toolAcc[i] ??= { name: "", args: "" };
1225
+ if (tc.function?.name) toolAcc[i].name = tc.function.name;
1226
+ if (tc.function?.arguments) toolAcc[i].args += tc.function.arguments;
1227
+ }
1228
+ }
818
1229
  if (json2.usage) {
819
1230
  inputTokens = json2.usage.prompt_tokens ?? inputTokens;
820
1231
  outputTokens = json2.usage.completion_tokens ?? outputTokens;
@@ -823,7 +1234,8 @@ function theronAdapter(opts = {}) {
823
1234
  }
824
1235
  }
825
1236
  }
826
- return { content, tokens: { input: inputTokens, output: outputTokens } };
1237
+ const tool_calls2 = Object.keys(toolAcc).length ? Object.values(toolAcc).map((t) => ({ name: t.name, input: safeJson(t.args) })) : void 0;
1238
+ return { content, tool_calls: tool_calls2, tokens: { input: inputTokens, output: outputTokens } };
827
1239
  }
828
1240
  const json = await res.json();
829
1241
  const msg = json.choices?.[0]?.message ?? { content: "" };
@@ -875,7 +1287,7 @@ var ReceiptEmitter = class {
875
1287
  output: input.output,
876
1288
  ...input.metadata !== void 0 ? { metadata: input.metadata } : {}
877
1289
  };
878
- const content_hash = await sha256Hex(canonicalize(payload));
1290
+ const content_hash = await sha256Hex2(canonicalize(payload));
879
1291
  let receipt = {
880
1292
  v: "stoa.receipt.v1",
881
1293
  id: ulid(),
@@ -965,7 +1377,7 @@ function canonicalize(value) {
965
1377
  (k) => JSON.stringify(k) + ":" + canonicalize(value[k])
966
1378
  ).join(",") + "}";
967
1379
  }
968
- async function sha256Hex(input) {
1380
+ async function sha256Hex2(input) {
969
1381
  const data = new TextEncoder().encode(input);
970
1382
  const buf = await globalThis.crypto.subtle.digest("SHA-256", data);
971
1383
  const bytes = new Uint8Array(buf);
@@ -1059,8 +1471,9 @@ async function compactHistory(opts) {
1059
1471
  const older = msgs.slice(0, msgs.length - keepRecent);
1060
1472
  const recent = msgs.slice(msgs.length - keepRecent);
1061
1473
  const summary = String(await opts.summarize(older));
1474
+ const summaryRole = opts.summaryRole ?? "system";
1062
1475
  return {
1063
- messages: [{ role: "system", content: `${SUMMARY_PREFIX}
1476
+ messages: [{ role: summaryRole, content: `${SUMMARY_PREFIX}
1064
1477
  ${summary}` }, ...recent],
1065
1478
  compacted: true,
1066
1479
  summary,
@@ -1240,7 +1653,7 @@ async function measureLift(opts) {
1240
1653
  }
1241
1654
 
1242
1655
  // src/index.ts
1243
- var VERSION = "0.3.0";
1656
+ var VERSION = "0.3.1";
1244
1657
 
1245
1658
  Object.defineProperty(exports, "zod", {
1246
1659
  enumerable: true,
@@ -1252,6 +1665,8 @@ exports.InMemoryReceiptSink = InMemoryReceiptSink;
1252
1665
  exports.InMemoryStore = InMemoryStore;
1253
1666
  exports.LOCAL_TOOL_NAMES = LOCAL_TOOL_NAMES;
1254
1667
  exports.LOCAL_TOOL_PARAMETERS = LOCAL_TOOL_PARAMETERS;
1668
+ exports.LocalCloudSession = LocalCloudSession;
1669
+ exports.LocalCloudSessionProvider = LocalCloudSessionProvider;
1255
1670
  exports.MCPClient = MCPClient;
1256
1671
  exports.MUTATING_LOCAL_TOOLS = MUTATING_LOCAL_TOOLS;
1257
1672
  exports.Memory = Memory;
@@ -1265,6 +1680,7 @@ exports.anyOf = anyOf;
1265
1680
  exports.bestOfN = bestOfN;
1266
1681
  exports.boundWorkingSet = boundWorkingSet;
1267
1682
  exports.buildLocalToolSchemas = buildLocalToolSchemas;
1683
+ exports.certifyArithmetic = certifyArithmetic;
1268
1684
  exports.chainOfVerification = chainOfVerification;
1269
1685
  exports.collectMcpTools = collectMcpTools;
1270
1686
  exports.compactHistory = compactHistory;
@@ -1273,16 +1689,25 @@ exports.defineTool = defineTool;
1273
1689
  exports.defineVerifier = defineVerifier;
1274
1690
  exports.fileReceiptSink = fileReceiptSink;
1275
1691
  exports.httpReceiptSink = httpReceiptSink;
1692
+ exports.loadAllMarkdownAgents = loadAllMarkdownAgents;
1693
+ exports.loadAllMarkdownSkills = loadAllMarkdownSkills;
1694
+ exports.loadMarkdownAgents = loadMarkdownAgents;
1695
+ exports.loadMarkdownSkills = loadMarkdownSkills;
1276
1696
  exports.measureLift = measureLift;
1277
1697
  exports.mixtureOfAgents = mixtureOfAgents;
1698
+ exports.parseMarkdownAgent = parseMarkdownAgent;
1699
+ exports.parseMarkdownSkill = parseMarkdownSkill;
1278
1700
  exports.reflexion = reflexion;
1279
1701
  exports.runImprovementCycle = runImprovementCycle;
1280
1702
  exports.runUntil = runUntil;
1281
1703
  exports.selfConsistency = selfConsistency;
1282
1704
  exports.selfRefine = selfRefine;
1705
+ exports.sentenceClaimExtractor = sentenceClaimExtractor;
1283
1706
  exports.stepCountIs = stepCountIs;
1707
+ exports.subAgentToolName = subAgentToolName;
1284
1708
  exports.theron = theron;
1285
1709
  exports.theronAdapter = theronAdapter;
1286
1710
  exports.treeOfThoughts = treeOfThoughts;
1287
1711
  exports.verifiedRatchet = verifiedRatchet;
1288
1712
  exports.verifierSatisfied = verifierSatisfied;
1713
+ exports.verifyReasoningCertificate = verifyReasoningCertificate;