@fideliosai/adapter-hermes-local 0.0.41 → 0.0.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -2
- package/src/server/execute.js +74 -1
- package/src/server/execute.test.js +324 -0
- package/src/server/headless.js +185 -0
- package/src/server/headless.test.js +253 -0
- package/src/server/toolset-registry.js +95 -0
- package/src/server/toolset-registry.test.js +58 -0
- package/src/server/triage.js +234 -0
- package/src/server/triage.test.js +264 -0
- package/src/ui/build-config.js +8 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for FID-52: headless I/O contract.
|
|
3
|
+
*/
|
|
4
|
+
import { describe, it, expect, vi } from "vitest";
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
isHeadlessEnv,
|
|
8
|
+
filterHeadlessUnsafe,
|
|
9
|
+
filterHeadlessCsv,
|
|
10
|
+
parseClarifyMarker,
|
|
11
|
+
escalateClarify,
|
|
12
|
+
} from "./headless.js";
|
|
13
|
+
|
|
14
|
+
describe("isHeadlessEnv", () => {
|
|
15
|
+
it("returns true when FIDELIOS_RUN_ID is present", () => {
|
|
16
|
+
expect(isHeadlessEnv({ FIDELIOS_RUN_ID: "abc" })).toBe(true);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("returns true when FIDELIOS_HEADLESS=1", () => {
|
|
20
|
+
expect(isHeadlessEnv({ FIDELIOS_HEADLESS: "1" })).toBe(true);
|
|
21
|
+
expect(isHeadlessEnv({ FIDELIOS_HEADLESS: "true" })).toBe(true);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("explicit FIDELIOS_HEADLESS=0 wins over RUN_ID (used for tests)", () => {
|
|
25
|
+
expect(isHeadlessEnv({ FIDELIOS_HEADLESS: "0", FIDELIOS_RUN_ID: "x" })).toBe(false);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("returns false on empty env when stdin is a TTY", () => {
|
|
29
|
+
expect(isHeadlessEnv({}, { stdinIsTTY: true })).toBe(false);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("returns true when stdin is not a TTY (and no FideliOS markers)", () => {
|
|
33
|
+
expect(isHeadlessEnv({}, { stdinIsTTY: false })).toBe(true);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("returns false when neither marker nor TTY hint is present (assume interactive)", () => {
|
|
37
|
+
expect(isHeadlessEnv({})).toBe(false);
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
describe("filterHeadlessUnsafe", () => {
|
|
42
|
+
it("strips clarify (registry-flagged headlessSafe:false)", () => {
|
|
43
|
+
const r = filterHeadlessUnsafe(["file", "clarify", "web"]);
|
|
44
|
+
expect(r.kept).toEqual(["file", "web"]);
|
|
45
|
+
expect(r.stripped).toEqual(["clarify"]);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("preserves order of kept names", () => {
|
|
49
|
+
const r = filterHeadlessUnsafe(["clarify", "terminal", "clarify", "file"]);
|
|
50
|
+
expect(r.kept).toEqual(["terminal", "file"]);
|
|
51
|
+
expect(r.stripped).toEqual(["clarify", "clarify"]);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("leaves unknown names alone (Hermes / triage handle those)", () => {
|
|
55
|
+
const r = filterHeadlessUnsafe(["mystery", "file"]);
|
|
56
|
+
expect(r.kept).toEqual(["mystery", "file"]);
|
|
57
|
+
expect(r.stripped).toEqual([]);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("accepts a custom registry override", () => {
|
|
61
|
+
const reg = [
|
|
62
|
+
{ name: "alpha", headlessSafe: false },
|
|
63
|
+
{ name: "beta" },
|
|
64
|
+
];
|
|
65
|
+
const r = filterHeadlessUnsafe(["alpha", "beta", "clarify"], reg);
|
|
66
|
+
// Note: with custom registry, `clarify` is unknown → kept.
|
|
67
|
+
expect(r.kept).toEqual(["beta", "clarify"]);
|
|
68
|
+
expect(r.stripped).toEqual(["alpha"]);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
describe("filterHeadlessCsv", () => {
|
|
73
|
+
it("filters comma-separated input and returns new csv", () => {
|
|
74
|
+
const r = filterHeadlessCsv("file,clarify,web");
|
|
75
|
+
expect(r.csv).toBe("file,web");
|
|
76
|
+
expect(r.stripped).toEqual(["clarify"]);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("returns empty string when nothing is left after stripping", () => {
|
|
80
|
+
const r = filterHeadlessCsv("clarify");
|
|
81
|
+
expect(r.csv).toBe("");
|
|
82
|
+
expect(r.stripped).toEqual(["clarify"]);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("passes through undefined input", () => {
|
|
86
|
+
const r = filterHeadlessCsv(undefined);
|
|
87
|
+
expect(r.csv).toBeUndefined();
|
|
88
|
+
expect(r.stripped).toEqual([]);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("passes through empty string input", () => {
|
|
92
|
+
const r = filterHeadlessCsv("");
|
|
93
|
+
expect(r.csv).toBe("");
|
|
94
|
+
expect(r.stripped).toEqual([]);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it("trims whitespace and skips empty entries", () => {
|
|
98
|
+
const r = filterHeadlessCsv(" file , clarify ,, web ");
|
|
99
|
+
expect(r.csv).toBe("file,web");
|
|
100
|
+
expect(r.stripped).toEqual(["clarify"]);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("parseClarifyMarker", () => {
|
|
105
|
+
it("extracts question from a verbose tool-call line", () => {
|
|
106
|
+
const line = `[tool] clarify {"question":"What now?","choices":["a","b"]}`;
|
|
107
|
+
expect(parseClarifyMarker(line)).toBe("What now?");
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it("returns null for unrelated lines", () => {
|
|
111
|
+
expect(parseClarifyMarker("[tool] file {\"path\":\"x\"}")).toBeNull();
|
|
112
|
+
expect(parseClarifyMarker("session_id: abc")).toBeNull();
|
|
113
|
+
expect(parseClarifyMarker("")).toBeNull();
|
|
114
|
+
expect(parseClarifyMarker(null)).toBeNull();
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("returns null when JSON cannot be parsed", () => {
|
|
118
|
+
expect(parseClarifyMarker("[tool] clarify { not-json")).toBeNull();
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
describe("escalateClarify", () => {
|
|
123
|
+
function jsonRes(status, body) {
|
|
124
|
+
return {
|
|
125
|
+
ok: status >= 200 && status < 300,
|
|
126
|
+
status,
|
|
127
|
+
json: async () => body,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
it("posts comment + PATCHes blocked and returns commentId", async () => {
|
|
132
|
+
const calls = [];
|
|
133
|
+
const fetchImpl = vi.fn(async (url, init) => {
|
|
134
|
+
calls.push({ url, method: init.method, body: init.body });
|
|
135
|
+
if (init.method === "POST") return jsonRes(201, { id: "c-1" });
|
|
136
|
+
return jsonRes(200, {});
|
|
137
|
+
});
|
|
138
|
+
const r = await escalateClarify({
|
|
139
|
+
question: "Pick A or B?",
|
|
140
|
+
taskId: "task-1",
|
|
141
|
+
apiUrl: "http://api/api",
|
|
142
|
+
fetchImpl,
|
|
143
|
+
});
|
|
144
|
+
expect(r.ok).toBe(true);
|
|
145
|
+
expect(r.commentId).toBe("c-1");
|
|
146
|
+
expect(calls).toHaveLength(2);
|
|
147
|
+
expect(calls[0].url).toBe("http://api/api/issues/task-1/comments");
|
|
148
|
+
expect(calls[0].method).toBe("POST");
|
|
149
|
+
expect(JSON.parse(calls[0].body).body).toMatch(/Agent question.*Pick A or B/);
|
|
150
|
+
expect(calls[1].url).toBe("http://api/api/issues/task-1");
|
|
151
|
+
expect(calls[1].method).toBe("PATCH");
|
|
152
|
+
expect(JSON.parse(calls[1].body)).toEqual({ status: "blocked" });
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("attaches Authorization header when apiKey is provided", async () => {
|
|
156
|
+
const fetchImpl = vi.fn(async () => jsonRes(201, { id: "c-2" }));
|
|
157
|
+
await escalateClarify({
|
|
158
|
+
question: "q",
|
|
159
|
+
taskId: "t",
|
|
160
|
+
apiUrl: "http://api/api",
|
|
161
|
+
apiKey: "secret",
|
|
162
|
+
fetchImpl,
|
|
163
|
+
});
|
|
164
|
+
expect(fetchImpl.mock.calls[0][1].headers["Authorization"]).toBe("Bearer secret");
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it("trims trailing slash from apiUrl", async () => {
|
|
168
|
+
const fetchImpl = vi.fn(async () => jsonRes(201, { id: "c-3" }));
|
|
169
|
+
await escalateClarify({
|
|
170
|
+
question: "q",
|
|
171
|
+
taskId: "t",
|
|
172
|
+
apiUrl: "http://api/api/",
|
|
173
|
+
fetchImpl,
|
|
174
|
+
});
|
|
175
|
+
expect(fetchImpl.mock.calls[0][0]).toBe("http://api/api/issues/t/comments");
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it("returns error when comment POST fails", async () => {
|
|
179
|
+
const fetchImpl = vi.fn(async () => jsonRes(500, { error: "boom" }));
|
|
180
|
+
const r = await escalateClarify({
|
|
181
|
+
question: "q",
|
|
182
|
+
taskId: "t",
|
|
183
|
+
apiUrl: "http://api/api",
|
|
184
|
+
fetchImpl,
|
|
185
|
+
});
|
|
186
|
+
expect(r.ok).toBe(false);
|
|
187
|
+
expect(r.error).toMatch(/comment POST 500/);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it("returns error when status PATCH fails (but reports commentId)", async () => {
|
|
191
|
+
let i = 0;
|
|
192
|
+
const fetchImpl = vi.fn(async () => {
|
|
193
|
+
i += 1;
|
|
194
|
+
if (i === 1) return jsonRes(201, { id: "c-4" });
|
|
195
|
+
return jsonRes(403, { error: "no" });
|
|
196
|
+
});
|
|
197
|
+
const r = await escalateClarify({
|
|
198
|
+
question: "q",
|
|
199
|
+
taskId: "t",
|
|
200
|
+
apiUrl: "http://api/api",
|
|
201
|
+
fetchImpl,
|
|
202
|
+
});
|
|
203
|
+
expect(r.ok).toBe(false);
|
|
204
|
+
expect(r.commentId).toBe("c-4");
|
|
205
|
+
expect(r.error).toMatch(/status PATCH 403/);
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it("returns error when fetch throws", async () => {
|
|
209
|
+
const fetchImpl = vi.fn(async () => {
|
|
210
|
+
throw new Error("network down");
|
|
211
|
+
});
|
|
212
|
+
const r = await escalateClarify({
|
|
213
|
+
question: "q",
|
|
214
|
+
taskId: "t",
|
|
215
|
+
apiUrl: "http://api/api",
|
|
216
|
+
fetchImpl,
|
|
217
|
+
});
|
|
218
|
+
expect(r.ok).toBe(false);
|
|
219
|
+
expect(r.error).toMatch(/network down/);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it("rejects with structured error when taskId / apiUrl missing", async () => {
|
|
223
|
+
const fetchImpl = vi.fn();
|
|
224
|
+
let r = await escalateClarify({ question: "q", apiUrl: "http://api", fetchImpl });
|
|
225
|
+
expect(r.ok).toBe(false);
|
|
226
|
+
expect(r.error).toBe("taskId required");
|
|
227
|
+
r = await escalateClarify({ question: "q", taskId: "t", fetchImpl });
|
|
228
|
+
expect(r.ok).toBe(false);
|
|
229
|
+
expect(r.error).toBe("apiUrl required");
|
|
230
|
+
expect(fetchImpl).not.toHaveBeenCalled();
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it("rejects when fetch is not available", async () => {
|
|
234
|
+
const r = await escalateClarify({
|
|
235
|
+
question: "q",
|
|
236
|
+
taskId: "t",
|
|
237
|
+
apiUrl: "http://api",
|
|
238
|
+
fetchImpl: undefined,
|
|
239
|
+
// simulate by overriding globalThis.fetch via temp deletion
|
|
240
|
+
});
|
|
241
|
+
// Real-world cwd: globalThis.fetch exists in node 18+, so this test
|
|
242
|
+
// path is only exercised in environments without fetch. The helper
|
|
243
|
+
// returns ok:false in that case.
|
|
244
|
+
if (typeof globalThis.fetch !== "function") {
|
|
245
|
+
expect(r.ok).toBe(false);
|
|
246
|
+
expect(r.error).toBe("fetch not available");
|
|
247
|
+
} else {
|
|
248
|
+
// When fetch is present it goes through and fails on the network
|
|
249
|
+
// call to a fake host — still ok:false.
|
|
250
|
+
expect(r.ok).toBe(false);
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical registry of Hermes Agent built-in toolsets.
|
|
3
|
+
*
|
|
4
|
+
* Snapshot from `hermes tools list` (Hermes Agent v0.12.0, 2026.4.30).
|
|
5
|
+
* Used by the triage engine (FID-48) to:
|
|
6
|
+
* 1. expose names + descriptions to the triage LLM,
|
|
7
|
+
* 2. filter LLM-returned toolsets to known names,
|
|
8
|
+
* 3. supply a safe-default fallback when triage fails.
|
|
9
|
+
*
|
|
10
|
+
* Source-of-truth refresh policy: rerun `hermes tools list` and update this
|
|
11
|
+
* file when a new Hermes release adds/removes toolsets.
|
|
12
|
+
*
|
|
13
|
+
* `headlessSafe` flag:
|
|
14
|
+
* - false → tool is known to block on stdin or otherwise hang in headless mode.
|
|
15
|
+
* - true (or omitted) → safe for unattended runs.
|
|
16
|
+
* Currently only `clarify` is marked unsafe (FID-47). The headless I/O contract
|
|
17
|
+
* is being addressed separately under FID-52.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @typedef {Object} ToolsetEntry
|
|
22
|
+
* @property {string} name Canonical Hermes toolset name (matches `-t` flag).
|
|
23
|
+
* @property {string} description Human + LLM-facing description for triage prompt.
|
|
24
|
+
* @property {boolean} [headlessSafe] Default true; false for stdin-blocking toolsets.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
/** @type {ToolsetEntry[]} */
|
|
28
|
+
export const HERMES_TOOLSET_REGISTRY = [
|
|
29
|
+
{ name: "web", description: "Web Search & Scraping — fetch and parse pages, run web queries." },
|
|
30
|
+
{ name: "browser", description: "Browser Automation — drive a real browser (clicks, forms, screenshots)." },
|
|
31
|
+
{ name: "terminal", description: "Terminal & Processes — run shell commands, manage long-running processes." },
|
|
32
|
+
{ name: "file", description: "File Operations — read, write, edit files on disk." },
|
|
33
|
+
{ name: "code_execution", description: "Code Execution — run Python / JavaScript snippets in a sandbox." },
|
|
34
|
+
{ name: "vision", description: "Vision / Image Analysis — describe and reason about images." },
|
|
35
|
+
{ name: "video", description: "Video Analysis — extract frames, transcribe, summarize video." },
|
|
36
|
+
{ name: "image_gen", description: "Image Generation — generate images from text prompts." },
|
|
37
|
+
{ name: "moa", description: "Mixture of Agents — multi-model ensemble reasoning." },
|
|
38
|
+
{ name: "tts", description: "Text-to-Speech — synthesize spoken audio from text." },
|
|
39
|
+
{ name: "skills", description: "Skills — invoke installed Hermes skill packages." },
|
|
40
|
+
{ name: "todo", description: "Task Planning — track multi-step todo lists during execution." },
|
|
41
|
+
{ name: "memory", description: "Memory — persist facts and preferences across sessions." },
|
|
42
|
+
{ name: "session_search", description: "Session Search — search prior chat sessions for context." },
|
|
43
|
+
{ name: "clarify", description: "Clarifying Questions — ask the user follow-up questions interactively.", headlessSafe: false },
|
|
44
|
+
{ name: "delegation", description: "Task Delegation — spawn sub-agents to handle parts of a task." },
|
|
45
|
+
{ name: "cronjob", description: "Cron Jobs — schedule recurring background tasks." },
|
|
46
|
+
{ name: "messaging", description: "Cross-Platform Messaging — send messages on Telegram/Discord/Slack/etc." },
|
|
47
|
+
{ name: "rl", description: "RL Training — record trajectories for reinforcement-learning fine-tunes." },
|
|
48
|
+
{ name: "homeassistant", description: "Home Assistant — control smart-home devices via HA." },
|
|
49
|
+
{ name: "spotify", description: "Spotify — control playback and query the Spotify catalog." },
|
|
50
|
+
{ name: "yuanbao", description: "Yuanbao — Tencent Yuanbao integration." },
|
|
51
|
+
];
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Safe-default subset used when triage fails or produces no usable result.
|
|
55
|
+
*
|
|
56
|
+
* Chosen to cover the vast majority of FideliOS agent tasks (read code, run
|
|
57
|
+
* commands, fetch the web, persist memory) while excluding stdin-blocking and
|
|
58
|
+
* heavyweight toolsets (`clarify`, `browser`, `image_gen`, etc.).
|
|
59
|
+
*/
|
|
60
|
+
export const SAFE_DEFAULT_TOOLSETS = [
|
|
61
|
+
"terminal",
|
|
62
|
+
"file",
|
|
63
|
+
"code_execution",
|
|
64
|
+
"web",
|
|
65
|
+
"skills",
|
|
66
|
+
"todo",
|
|
67
|
+
"memory",
|
|
68
|
+
"session_search",
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Map of toolset name → registry entry, for O(1) filter/validation.
|
|
73
|
+
* @type {Map<string, ToolsetEntry>}
|
|
74
|
+
*/
|
|
75
|
+
export const TOOLSET_BY_NAME = new Map(
|
|
76
|
+
HERMES_TOOLSET_REGISTRY.map((entry) => [entry.name, entry])
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* @param {string} name
|
|
81
|
+
* @returns {boolean}
|
|
82
|
+
*/
|
|
83
|
+
export function isKnownToolset(name) {
|
|
84
|
+
return TOOLSET_BY_NAME.has(name);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* @param {string} name
|
|
89
|
+
* @returns {boolean}
|
|
90
|
+
*/
|
|
91
|
+
export function isHeadlessSafeToolset(name) {
|
|
92
|
+
const entry = TOOLSET_BY_NAME.get(name);
|
|
93
|
+
if (!entry) return false;
|
|
94
|
+
return entry.headlessSafe !== false;
|
|
95
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
HERMES_TOOLSET_REGISTRY,
|
|
4
|
+
SAFE_DEFAULT_TOOLSETS,
|
|
5
|
+
TOOLSET_BY_NAME,
|
|
6
|
+
isHeadlessSafeToolset,
|
|
7
|
+
isKnownToolset,
|
|
8
|
+
} from "./toolset-registry.js";
|
|
9
|
+
|
|
10
|
+
describe("toolset-registry", () => {
|
|
11
|
+
it("exposes a non-empty canonical list with unique names", () => {
|
|
12
|
+
expect(HERMES_TOOLSET_REGISTRY.length).toBeGreaterThan(0);
|
|
13
|
+
const names = HERMES_TOOLSET_REGISTRY.map((t) => t.name);
|
|
14
|
+
expect(new Set(names).size).toBe(names.length);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("populates name → entry map for every entry", () => {
|
|
18
|
+
expect(TOOLSET_BY_NAME.size).toBe(HERMES_TOOLSET_REGISTRY.length);
|
|
19
|
+
for (const entry of HERMES_TOOLSET_REGISTRY) {
|
|
20
|
+
expect(TOOLSET_BY_NAME.get(entry.name)).toBe(entry);
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("requires every entry to have a non-empty description", () => {
|
|
25
|
+
for (const entry of HERMES_TOOLSET_REGISTRY) {
|
|
26
|
+
expect(typeof entry.description).toBe("string");
|
|
27
|
+
expect(entry.description.length).toBeGreaterThan(0);
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("marks `clarify` as not headless-safe (FID-47)", () => {
|
|
32
|
+
expect(isHeadlessSafeToolset("clarify")).toBe(false);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("treats unknown toolsets as not headless-safe", () => {
|
|
36
|
+
expect(isHeadlessSafeToolset("does_not_exist")).toBe(false);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("treats omitted-flag toolsets as headless-safe by default", () => {
|
|
40
|
+
expect(isHeadlessSafeToolset("file")).toBe(true);
|
|
41
|
+
expect(isHeadlessSafeToolset("terminal")).toBe(true);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("isKnownToolset identifies registry vs unknown names", () => {
|
|
45
|
+
expect(isKnownToolset("file")).toBe(true);
|
|
46
|
+
expect(isKnownToolset("not_a_real_toolset")).toBe(false);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("safe-default subset is fully contained in the registry", () => {
|
|
50
|
+
for (const name of SAFE_DEFAULT_TOOLSETS) {
|
|
51
|
+
expect(TOOLSET_BY_NAME.has(name)).toBe(true);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("safe-default subset excludes the known stdin-blocker `clarify`", () => {
|
|
56
|
+
expect(SAFE_DEFAULT_TOOLSETS).not.toContain("clarify");
|
|
57
|
+
});
|
|
58
|
+
});
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hermes toolset triage engine (FID-48).
|
|
3
|
+
*
|
|
4
|
+
* Before each Hermes spawn, ask the configured local LLM to pick which subset
|
|
5
|
+
* of available toolsets is relevant for the user's prompt. The selected names
|
|
6
|
+
* are then passed to Hermes via `-t a,b,c`, replacing the static whitelist
|
|
7
|
+
* approach rejected in FID-47.
|
|
8
|
+
*
|
|
9
|
+
* Design decisions:
|
|
10
|
+
* - The router LLM defaults to `adapterConfig.model` (the same Ollama model
|
|
11
|
+
* already configured for the agent). Optional `triageModel` override.
|
|
12
|
+
* - We call Ollama with `format: 'json'` for robust parsing.
|
|
13
|
+
* - Hard 30s timeout. On any failure (timeout, parse error, empty result,
|
|
14
|
+
* bad JSON shape) we fall back to SAFE_DEFAULT_TOOLSETS and surface a
|
|
15
|
+
* warning + `triageError` field.
|
|
16
|
+
* - LLM-returned names are intersected with the registry, so unknown / made-up
|
|
17
|
+
* toolsets are silently dropped.
|
|
18
|
+
*/
|
|
19
|
+
import { Ollama } from "ollama";
|
|
20
|
+
import {
|
|
21
|
+
HERMES_TOOLSET_REGISTRY,
|
|
22
|
+
SAFE_DEFAULT_TOOLSETS,
|
|
23
|
+
TOOLSET_BY_NAME,
|
|
24
|
+
} from "./toolset-registry.js";
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @typedef {import("./toolset-registry.js").ToolsetEntry} ToolsetEntry
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* @typedef {Object} TriageResult
|
|
32
|
+
* @property {string[]} toolsets Filtered, deduped list of canonical toolset names.
|
|
33
|
+
* @property {boolean} usedFallback True when SAFE_DEFAULT_TOOLSETS was returned.
|
|
34
|
+
* @property {string=} error Human-readable error string when fallback was used.
|
|
35
|
+
* @property {number} durationMs Wall-clock time spent on the triage call.
|
|
36
|
+
* @property {string=} rawContent Raw LLM response (for debug / log truncation).
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
const DEFAULT_TRIAGE_TIMEOUT_MS = 30_000;
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Build the system prompt presented to the triage LLM.
|
|
43
|
+
*
|
|
44
|
+
* Kept short and structured to maximize JSON-mode reliability with small
|
|
45
|
+
* local models. We err on the side of "include the toolset if unsure" so
|
|
46
|
+
* the downstream agent is never starved of capability.
|
|
47
|
+
*
|
|
48
|
+
* @param {ToolsetEntry[]} registry
|
|
49
|
+
* @returns {string}
|
|
50
|
+
*/
|
|
51
|
+
export function buildTriageSystemPrompt(registry) {
|
|
52
|
+
const lines = registry.map((t) => `- ${t.name}: ${t.description}`);
|
|
53
|
+
return [
|
|
54
|
+
"You are a tool-selection router for an autonomous AI agent.",
|
|
55
|
+
"Given the user's task, choose which toolsets the agent will actually need.",
|
|
56
|
+
"",
|
|
57
|
+
"Rules:",
|
|
58
|
+
'- Reply with valid JSON only, shape: {"toolsets": ["name1", "name2", ...]}.',
|
|
59
|
+
"- Use only canonical names from the list below — do not invent new ones.",
|
|
60
|
+
"- Prefer fewer toolsets, but include any that the agent may plausibly need.",
|
|
61
|
+
"- If the task is ambiguous or general, include the core defaults: terminal, file, code_execution, web, skills, todo, memory.",
|
|
62
|
+
"",
|
|
63
|
+
"Available toolsets:",
|
|
64
|
+
...lines,
|
|
65
|
+
].join("\n");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Parse a raw LLM JSON response into a clean array of toolset names.
|
|
70
|
+
* Tolerates whitespace, trailing prose, and bare JSON without code fences.
|
|
71
|
+
*
|
|
72
|
+
* @param {string} raw
|
|
73
|
+
* @returns {string[] | null} null when content is unparseable.
|
|
74
|
+
*/
|
|
75
|
+
export function parseTriageJson(raw) {
|
|
76
|
+
if (typeof raw !== "string" || !raw.trim()) return null;
|
|
77
|
+
// Try direct parse first (Ollama format:'json' should already give us a JSON doc).
|
|
78
|
+
let parsed;
|
|
79
|
+
try {
|
|
80
|
+
parsed = JSON.parse(raw);
|
|
81
|
+
} catch {
|
|
82
|
+
// Fall back to extracting the first {...} block.
|
|
83
|
+
const match = raw.match(/\{[\s\S]*\}/);
|
|
84
|
+
if (!match) return null;
|
|
85
|
+
try {
|
|
86
|
+
parsed = JSON.parse(match[0]);
|
|
87
|
+
} catch {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (!parsed || typeof parsed !== "object") return null;
|
|
92
|
+
const list = parsed.toolsets ?? parsed.tools ?? parsed.selected;
|
|
93
|
+
if (!Array.isArray(list)) return null;
|
|
94
|
+
return list.filter((v) => typeof v === "string" && v.length > 0);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Dedupe + filter a list of LLM-returned names against the canonical registry
|
|
99
|
+
* (or a caller-supplied registry, for tests).
|
|
100
|
+
*
|
|
101
|
+
* @param {string[]} names
|
|
102
|
+
* @param {ToolsetEntry[]} [registry] Defaults to the canonical Hermes registry.
|
|
103
|
+
* @returns {string[]}
|
|
104
|
+
*/
|
|
105
|
+
export function filterToolsetNames(names, registry) {
|
|
106
|
+
const lookup = registry
|
|
107
|
+
? new Set(registry.map((t) => t.name))
|
|
108
|
+
: TOOLSET_BY_NAME;
|
|
109
|
+
const seen = new Set();
|
|
110
|
+
const out = [];
|
|
111
|
+
for (const name of names) {
|
|
112
|
+
if (typeof name !== "string") continue;
|
|
113
|
+
const trimmed = name.trim();
|
|
114
|
+
if (!trimmed) continue;
|
|
115
|
+
const known = lookup instanceof Set ? lookup.has(trimmed) : lookup.has(trimmed);
|
|
116
|
+
if (!known) continue;
|
|
117
|
+
if (seen.has(trimmed)) continue;
|
|
118
|
+
seen.add(trimmed);
|
|
119
|
+
out.push(trimmed);
|
|
120
|
+
}
|
|
121
|
+
return out;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Wrap a promise with a timeout. Resolves with the original promise's value
|
|
126
|
+
* or rejects with a timeout Error.
|
|
127
|
+
*
|
|
128
|
+
* @template T
|
|
129
|
+
* @param {Promise<T>} promise
|
|
130
|
+
* @param {number} ms
|
|
131
|
+
* @param {string} label
|
|
132
|
+
* @returns {Promise<T>}
|
|
133
|
+
*/
|
|
134
|
+
function withTimeout(promise, ms, label) {
|
|
135
|
+
let timer;
|
|
136
|
+
const timeout = new Promise((_, reject) => {
|
|
137
|
+
timer = setTimeout(() => {
|
|
138
|
+
reject(new Error(`${label} timed out after ${ms}ms`));
|
|
139
|
+
}, ms);
|
|
140
|
+
});
|
|
141
|
+
return Promise.race([
|
|
142
|
+
promise.finally(() => clearTimeout(timer)),
|
|
143
|
+
timeout,
|
|
144
|
+
]);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Run the triage LLM call. Always resolves — never rejects.
|
|
149
|
+
* On any error, returns SAFE_DEFAULT_TOOLSETS with `usedFallback: true` and
|
|
150
|
+
* an `error` field describing what went wrong.
|
|
151
|
+
*
|
|
152
|
+
* @param {Object} opts
|
|
153
|
+
* @param {string} opts.prompt User-facing prompt that the agent will execute.
|
|
154
|
+
* @param {string} opts.model Ollama model name (e.g. "qwen3:4b").
|
|
155
|
+
* @param {ToolsetEntry[]} [opts.registry] Override registry (defaults to the canonical one).
|
|
156
|
+
* @param {string[]} [opts.fallback] Override fallback list.
|
|
157
|
+
* @param {number} [opts.timeoutMs] Hard timeout in ms.
|
|
158
|
+
* @param {string} [opts.host] Optional Ollama host override.
|
|
159
|
+
* @param {{ chat: Function } | null} [opts.client] Optional pre-built Ollama client (tests).
|
|
160
|
+
* @param {{ Ollama: any } | null} [opts.ollamaCtor] Optional Ollama ctor (tests).
|
|
161
|
+
* @returns {Promise<TriageResult>}
|
|
162
|
+
*/
|
|
163
|
+
export async function triageToolsets(opts) {
|
|
164
|
+
const start = Date.now();
|
|
165
|
+
const registry = opts.registry ?? HERMES_TOOLSET_REGISTRY;
|
|
166
|
+
const fallback = opts.fallback ?? SAFE_DEFAULT_TOOLSETS;
|
|
167
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_TRIAGE_TIMEOUT_MS;
|
|
168
|
+
|
|
169
|
+
const fallbackResult = (error, rawContent) => ({
|
|
170
|
+
toolsets: filterToolsetNames(fallback, registry),
|
|
171
|
+
usedFallback: true,
|
|
172
|
+
error,
|
|
173
|
+
durationMs: Date.now() - start,
|
|
174
|
+
rawContent,
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
if (!opts.model || typeof opts.model !== "string") {
|
|
178
|
+
return fallbackResult("triage skipped: no model configured");
|
|
179
|
+
}
|
|
180
|
+
if (!opts.prompt || typeof opts.prompt !== "string") {
|
|
181
|
+
return fallbackResult("triage skipped: empty prompt");
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let client = opts.client;
|
|
185
|
+
if (!client) {
|
|
186
|
+
try {
|
|
187
|
+
const Ctor = opts.ollamaCtor?.Ollama ?? Ollama;
|
|
188
|
+
client = new Ctor(opts.host ? { host: opts.host } : {});
|
|
189
|
+
} catch (err) {
|
|
190
|
+
return fallbackResult(`failed to construct Ollama client: ${err?.message ?? err}`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const system = buildTriageSystemPrompt(registry);
|
|
195
|
+
const messages = [
|
|
196
|
+
{ role: "system", content: system },
|
|
197
|
+
{ role: "user", content: `Task:\n${opts.prompt.slice(0, 4000)}` },
|
|
198
|
+
];
|
|
199
|
+
|
|
200
|
+
let response;
|
|
201
|
+
try {
|
|
202
|
+
response = await withTimeout(
|
|
203
|
+
client.chat({
|
|
204
|
+
model: opts.model,
|
|
205
|
+
messages,
|
|
206
|
+
format: "json",
|
|
207
|
+
stream: false,
|
|
208
|
+
options: { temperature: 0 },
|
|
209
|
+
}),
|
|
210
|
+
timeoutMs,
|
|
211
|
+
"hermes-triage"
|
|
212
|
+
);
|
|
213
|
+
} catch (err) {
|
|
214
|
+
return fallbackResult(`triage call failed: ${err?.message ?? err}`);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const raw = response?.message?.content ?? "";
|
|
218
|
+
const parsed = parseTriageJson(raw);
|
|
219
|
+
if (!parsed) {
|
|
220
|
+
return fallbackResult("triage response not valid JSON", raw);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const filtered = filterToolsetNames(parsed, registry);
|
|
224
|
+
if (filtered.length === 0) {
|
|
225
|
+
return fallbackResult("triage returned no known toolsets", raw);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
toolsets: filtered,
|
|
230
|
+
usedFallback: false,
|
|
231
|
+
durationMs: Date.now() - start,
|
|
232
|
+
rawContent: raw,
|
|
233
|
+
};
|
|
234
|
+
}
|