@possumtech/rummy 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +12 -7
- package/BENCH_ENVIRONMENT.md +230 -0
- package/CLIENT_INTERFACE.md +396 -0
- package/PLUGINS.md +93 -1
- package/SPEC.md +305 -28
- package/bin/postinstall.js +2 -2
- package/bin/rummy.js +2 -2
- package/last_run.txt +5617 -0
- package/migrations/001_initial_schema.sql +2 -1
- package/package.json +6 -2
- package/scriptify/cache_probe.js +66 -0
- package/scriptify/cache_probe_grok.js +74 -0
- package/service.js +22 -11
- package/src/agent/AgentLoop.js +33 -139
- package/src/agent/ContextAssembler.js +2 -9
- package/src/agent/Entries.js +36 -101
- package/src/agent/ProjectAgent.js +2 -9
- package/src/agent/TurnExecutor.js +45 -83
- package/src/agent/XmlParser.js +247 -273
- package/src/agent/budget.js +5 -28
- package/src/agent/config.js +38 -0
- package/src/agent/errors.js +7 -13
- package/src/agent/httpStatus.js +1 -19
- package/src/agent/known_store.sql +7 -2
- package/src/agent/materializeContext.js +12 -17
- package/src/agent/pathEncode.js +5 -0
- package/src/agent/rummyHome.js +9 -0
- package/src/agent/runs.sql +18 -0
- package/src/agent/tokens.js +2 -8
- package/src/hooks/HookRegistry.js +1 -16
- package/src/hooks/Hooks.js +8 -33
- package/src/hooks/PluginContext.js +3 -21
- package/src/hooks/RpcRegistry.js +1 -4
- package/src/hooks/RummyContext.js +2 -16
- package/src/hooks/ToolRegistry.js +5 -15
- package/src/llm/LlmProvider.js +28 -23
- package/src/llm/errors.js +41 -4
- package/src/llm/openaiStream.js +125 -0
- package/src/llm/retry.js +61 -15
- package/src/plugins/budget/budget.js +14 -81
- package/src/plugins/cli/README.md +87 -0
- package/src/plugins/cli/bin.js +61 -0
- package/src/plugins/cli/cli.js +120 -0
- package/src/plugins/env/README.md +2 -1
- package/src/plugins/env/env.js +4 -6
- package/src/plugins/env/envDoc.md +2 -2
- package/src/plugins/error/error.js +23 -23
- package/src/plugins/file/file.js +2 -22
- package/src/plugins/get/get.js +12 -34
- package/src/plugins/get/getDoc.md +5 -3
- package/src/plugins/hedberg/edits.js +1 -11
- package/src/plugins/hedberg/hedberg.js +3 -26
- package/src/plugins/hedberg/normalize.js +1 -5
- package/src/plugins/hedberg/patterns.js +4 -15
- package/src/plugins/hedberg/sed.js +1 -7
- package/src/plugins/helpers.js +28 -20
- package/src/plugins/index.js +25 -41
- package/src/plugins/instructions/README.md +18 -0
- package/src/plugins/instructions/instructions.js +13 -76
- package/src/plugins/instructions/instructions.md +19 -18
- package/src/plugins/instructions/instructions_104.md +5 -4
- package/src/plugins/instructions/instructions_105.md +16 -15
- package/src/plugins/instructions/instructions_106.md +15 -14
- package/src/plugins/instructions/instructions_107.md +13 -6
- package/src/plugins/known/README.md +26 -6
- package/src/plugins/known/known.js +36 -34
- package/src/plugins/log/README.md +2 -2
- package/src/plugins/log/log.js +6 -33
- package/src/plugins/ollama/ollama.js +50 -66
- package/src/plugins/openai/openai.js +26 -44
- package/src/plugins/openrouter/openrouter.js +28 -52
- package/src/plugins/policy/README.md +8 -2
- package/src/plugins/policy/policy.js +8 -21
- package/src/plugins/prompt/README.md +22 -0
- package/src/plugins/prompt/prompt.js +8 -16
- package/src/plugins/rm/rm.js +5 -2
- package/src/plugins/rm/rmDoc.md +4 -4
- package/src/plugins/rpc/README.md +2 -1
- package/src/plugins/rpc/rpc.js +51 -47
- package/src/plugins/set/README.md +5 -1
- package/src/plugins/set/set.js +23 -33
- package/src/plugins/set/setDoc.md +1 -1
- package/src/plugins/sh/README.md +2 -1
- package/src/plugins/sh/sh.js +5 -11
- package/src/plugins/sh/shDoc.md +2 -2
- package/src/plugins/stream/README.md +6 -5
- package/src/plugins/stream/stream.js +6 -35
- package/src/plugins/telemetry/telemetry.js +26 -19
- package/src/plugins/think/think.js +4 -7
- package/src/plugins/unknown/unknown.js +8 -13
- package/src/plugins/update/update.js +36 -35
- package/src/plugins/update/updateDoc.md +3 -3
- package/src/plugins/xai/xai.js +30 -20
- package/src/plugins/yolo/yolo.js +8 -41
- package/src/server/ClientConnection.js +17 -47
- package/src/server/SocketServer.js +14 -14
- package/src/server/protocol.js +1 -10
- package/src/sql/functions/slugify.js +5 -7
- package/src/sql/v_model_context.sql +4 -11
- package/turns/cli_1777462658211/turn_001.txt +772 -0
- package/turns/cli_1777462658211/turn_002.txt +606 -0
- package/turns/cli_1777462658211/turn_003.txt +667 -0
- package/turns/cli_1777462658211/turn_004.txt +297 -0
- package/turns/cli_1777462658211/turn_005.txt +301 -0
- package/turns/cli_1777462658211/turn_006.txt +262 -0
- package/turns/cli_1777465095132/turn_001.txt +715 -0
- package/turns/cli_1777465095132/turn_002.txt +236 -0
- package/turns/cli_1777465095132/turn_003.txt +287 -0
- package/turns/cli_1777465095132/turn_004.txt +694 -0
- package/turns/cli_1777465095132/turn_005.txt +422 -0
- package/turns/cli_1777465095132/turn_006.txt +365 -0
- package/turns/cli_1777465095132/turn_007.txt +885 -0
- package/turns/cli_1777465095132/turn_008.txt +1277 -0
- package/turns/cli_1777465095132/turn_009.txt +736 -0
package/src/agent/XmlParser.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { Parser } from "htmlparser2";
|
|
2
1
|
import { parseEditContent } from "../plugins/hedberg/edits.js";
|
|
3
2
|
import { parseJsonEdit } from "../plugins/hedberg/normalize.js";
|
|
4
3
|
import { parseSed } from "../plugins/hedberg/sed.js";
|
|
@@ -13,15 +12,11 @@ export const ALL_TOOLS = new Set([
|
|
|
13
12
|
"think",
|
|
14
13
|
]);
|
|
15
14
|
|
|
16
|
-
|
|
17
|
-
* Resolve the competing attr-vs-body philosophies per tool.
|
|
18
|
-
* If the canonical attribute is missing, the body fills it. Silent.
|
|
19
|
-
*/
|
|
15
|
+
// Per-tool resolution: missing canonical attribute is filled silently from the body.
|
|
20
16
|
function resolveCommand(name, a, rawBody) {
|
|
21
17
|
const trimmed = rawBody.trim();
|
|
22
18
|
|
|
23
19
|
if (name === "set") {
|
|
24
|
-
// Structured edit detection — merge conflict, udiff, Claude XML
|
|
25
20
|
const hasEdit =
|
|
26
21
|
/<{3,12} SEARCH/.test(trimmed) ||
|
|
27
22
|
/>{3,12} REPLACE/.test(trimmed) ||
|
|
@@ -35,17 +30,15 @@ function resolveCommand(name, a, rawBody) {
|
|
|
35
30
|
name,
|
|
36
31
|
path: a.path,
|
|
37
32
|
body: a.body,
|
|
38
|
-
|
|
33
|
+
manifest: a.manifest,
|
|
39
34
|
blocks,
|
|
40
35
|
};
|
|
41
36
|
}
|
|
42
37
|
}
|
|
43
|
-
// JSON-style { search, replace }
|
|
44
38
|
const jsonEdit = parseJsonEdit(trimmed);
|
|
45
39
|
if (jsonEdit) {
|
|
46
40
|
return { name, path: a.path, ...jsonEdit };
|
|
47
41
|
}
|
|
48
|
-
// Sed syntax: s/search/replace/flags — supports chained commands
|
|
49
42
|
if (trimmed.startsWith("s/")) {
|
|
50
43
|
const blocks = parseSed(trimmed);
|
|
51
44
|
if (blocks?.length === 1) {
|
|
@@ -62,29 +55,26 @@ function resolveCommand(name, a, rawBody) {
|
|
|
62
55
|
return { name, path: a.path, blocks };
|
|
63
56
|
}
|
|
64
57
|
}
|
|
65
|
-
// search+replace attrs → attribute edit mode
|
|
66
58
|
if (a.search) {
|
|
67
59
|
const replace = a.replace ?? trimmed;
|
|
68
60
|
return {
|
|
69
61
|
name,
|
|
70
62
|
path: a.path,
|
|
71
63
|
body: a.body,
|
|
72
|
-
|
|
64
|
+
manifest: a.manifest,
|
|
73
65
|
search: a.search,
|
|
74
66
|
replace,
|
|
75
67
|
};
|
|
76
68
|
}
|
|
77
|
-
// Body attr + body content → search/replace (attr is search, body is replace)
|
|
78
69
|
if (trimmed && a.body) {
|
|
79
70
|
return {
|
|
80
71
|
name,
|
|
81
72
|
path: a.path,
|
|
82
73
|
search: a.body,
|
|
83
74
|
replace: trimmed,
|
|
84
|
-
|
|
75
|
+
manifest: a.manifest,
|
|
85
76
|
};
|
|
86
77
|
}
|
|
87
|
-
// Plain write or visibility change
|
|
88
78
|
const body = trimmed || a.body || "";
|
|
89
79
|
return { name, ...a, body };
|
|
90
80
|
}
|
|
@@ -96,10 +86,6 @@ function resolveCommand(name, a, rawBody) {
|
|
|
96
86
|
}
|
|
97
87
|
|
|
98
88
|
if (name === "get" || name === "rm") {
|
|
99
|
-
// Spread `a` so `line`, `limit`, `visibility`, and future attrs
|
|
100
|
-
// reach the handler. Earlier narrow extraction silently dropped
|
|
101
|
-
// `line=/limit=` and stranded the partial-read path advertised
|
|
102
|
-
// in getDoc.
|
|
103
89
|
return { name, ...a, path: a.path || trimmed || null };
|
|
104
90
|
}
|
|
105
91
|
|
|
@@ -110,10 +96,6 @@ function resolveCommand(name, a, rawBody) {
|
|
|
110
96
|
}
|
|
111
97
|
|
|
112
98
|
if (name === "mv" || name === "cp") {
|
|
113
|
-
// Spread `a` so `visibility` reaches the handler. mvDoc
|
|
114
|
-
// advertises `<mv path="known://..." visibility="summarized"/>`
|
|
115
|
-
// for batch visibility changes and was silently stripping that
|
|
116
|
-
// attr before.
|
|
117
99
|
return { name, ...a, path: a.path, to: a.to || trimmed || null };
|
|
118
100
|
}
|
|
119
101
|
|
|
@@ -131,176 +113,102 @@ function resolveCommand(name, a, rawBody) {
|
|
|
131
113
|
return { name, ...a, body: trimmed || a.body };
|
|
132
114
|
}
|
|
133
115
|
|
|
116
|
+
const NAME_CHAR = /[a-zA-Z0-9_]/;
|
|
117
|
+
const ATTR_KEY_CHAR = /[a-zA-Z0-9_:-]/;
|
|
118
|
+
const WS = /\s/;
|
|
119
|
+
|
|
120
|
+
// Recovery-tolerant tokenizer for rummy's closed set of tool tags.
|
|
121
|
+
//
|
|
122
|
+
// Design contract:
|
|
123
|
+
// - Tool tags (<get>, <set>, <sh>, ...) are the only syntactic special tags.
|
|
124
|
+
// Any other "<...>" sequence in OUTER text is treated as literal text.
|
|
125
|
+
// - Inside a tool tag's body, content is OPAQUE: only the matching close
|
|
126
|
+
// tag is recognized. Body may contain regex (`(?<!`), generics (`Vec<u8>`),
|
|
127
|
+
// HTML, XML, heredocs, comparison operators — none of it affects parsing.
|
|
128
|
+
// - Backtick spans (`...`) and triple-backtick fences (```...```) at the
|
|
129
|
+
// OUTER level neutralize tag-like content, mirroring the markdown
|
|
130
|
+
// convention that documentation about a tool isn't a tool call.
|
|
131
|
+
// Inside tool bodies this tracking does NOT apply (body opacity wins).
|
|
132
|
+
// - Same-name nesting (`<set>...<set/>...</set>`) is depth-counted so
|
|
133
|
+
// nested examples don't prematurely close the outer.
|
|
134
|
+
// - Recovery: unclosed openers capture body to EOF + emit a warning.
|
|
135
|
+
// Orphan closes at outer level become text, no warning (body opacity
|
|
136
|
+
// means models legitimately write `</set>` in prose / summaries).
|
|
134
137
|
export default class XmlParser {
|
|
135
|
-
/**
|
|
136
|
-
* Parse tool commands from model content using htmlparser2.
|
|
137
|
-
* Handles malformed XML gracefully — unclosed tags, missing slashes, etc.
|
|
138
|
-
* Every tool can appear as self-closing (attrs only) or with body content.
|
|
139
|
-
* Competing attr-vs-body philosophies are resolved silently.
|
|
140
|
-
* @param {string} content - Raw model response text
|
|
141
|
-
* @returns {{ commands: Array, warnings: string[], unparsed: string }}
|
|
142
|
-
*/
|
|
143
138
|
static MAX_COMMANDS = Number(process.env.RUMMY_MAX_COMMANDS);
|
|
144
139
|
|
|
145
140
|
static parse(content) {
|
|
146
141
|
if (!content) return { commands: [], warnings: [], unparsed: "" };
|
|
147
142
|
|
|
148
|
-
// Normalize native tool call formats to rummy XML
|
|
149
143
|
const normalized = XmlParser.#normalizeToolCalls(content);
|
|
144
|
+
return XmlParser.#tokenize(normalized, []);
|
|
145
|
+
}
|
|
150
146
|
|
|
147
|
+
static #tokenize(s, warnings) {
|
|
151
148
|
const commands = [];
|
|
152
|
-
const
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
// Models quote instructions containing `<get/>` etc. — the parser
|
|
157
|
-
// would treat them as real tool calls. Replace the angle brackets
|
|
158
|
-
// inside backtick spans so htmlparser2 ignores them.
|
|
159
|
-
const codeNeutralized = XmlParser.#neutralizeCodeSpans(normalized);
|
|
160
|
-
|
|
161
|
-
// Pre-flight: fix mismatched close tags that htmlparser2 silently
|
|
162
|
-
// drops (making our onclosetag recovery code unreachable). Must run
|
|
163
|
-
// before balanceAttrQuotes since the mismatch scan needs clean tags.
|
|
164
|
-
const mismatchFixed = XmlParser.#correctMismatchedCloses(
|
|
165
|
-
codeNeutralized,
|
|
166
|
-
warnings,
|
|
167
|
-
);
|
|
168
|
-
|
|
169
|
-
// Pre-flight: balance unclosed attribute quotes that would otherwise
|
|
170
|
-
// cause htmlparser2 to consume the rest of input as a single attribute
|
|
171
|
-
// value, silently dropping every subsequent tool call.
|
|
172
|
-
const balanced = XmlParser.#balanceAttrQuotes(mismatchFixed, warnings);
|
|
173
|
-
let current = null;
|
|
174
|
-
let ended = false;
|
|
149
|
+
const text = [];
|
|
150
|
+
let i = 0;
|
|
151
|
+
let inSingleBacktick = false;
|
|
152
|
+
let inTripleFence = false;
|
|
175
153
|
let capped = false;
|
|
176
154
|
|
|
177
|
-
|
|
178
|
-
{
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
if (current) {
|
|
183
|
-
// Empty-body case: current tool opened but got no text
|
|
184
|
-
// content before a new tag. The model likely meant current
|
|
185
|
-
// to self-close but typed it in paired form, or emitted a
|
|
186
|
-
// mismatched close tag that htmlparser2 silently dropped.
|
|
187
|
-
// Close current, open new.
|
|
188
|
-
const hasBody = current.rawBody.trim() !== "";
|
|
189
|
-
const hasNestedOpens = (current.nested || []).length > 0;
|
|
190
|
-
if (!hasBody && !hasNestedOpens && ALL_TOOLS.has(name)) {
|
|
191
|
-
warnings.push(
|
|
192
|
-
`Unclosed <${current.name}> before <${name}> — recovered`,
|
|
193
|
-
);
|
|
194
|
-
commands.push(
|
|
195
|
-
resolveCommand(current.name, current.attrs, current.rawBody),
|
|
196
|
-
);
|
|
197
|
-
current = null;
|
|
198
|
-
} else {
|
|
199
|
-
// Nested tag inside a body with content — treat as body
|
|
200
|
-
// text. Tool bodies are opaque: the model writing a plan
|
|
201
|
-
// with <get/> in it, SEARCH/REPLACE in <set>, or XML
|
|
202
|
-
// examples in <known> all need to survive intact. Track
|
|
203
|
-
// nested opens on a stack so matching closes pop off and
|
|
204
|
-
// orphan closes (typos) still trigger recovery.
|
|
205
|
-
const attrStr = Object.entries(attrs)
|
|
206
|
-
.map(([k, v]) => (v === "" ? k : `${k}="${v}"`))
|
|
207
|
-
.join(" ");
|
|
208
|
-
current.rawBody += attrStr ? `<${name} ${attrStr}>` : `<${name}>`;
|
|
209
|
-
current.nested ||= [];
|
|
210
|
-
current.nested.push(name);
|
|
211
|
-
return;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
if (!ALL_TOOLS.has(name)) return;
|
|
155
|
+
while (i < s.length) {
|
|
156
|
+
if (commands.length >= XmlParser.MAX_COMMANDS) {
|
|
157
|
+
capped = true;
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
216
160
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
161
|
+
// Triple-backtick fence toggles take precedence over single backtick
|
|
162
|
+
// because ``` overlaps `.
|
|
163
|
+
if (s[i] === "`" && s[i + 1] === "`" && s[i + 2] === "`") {
|
|
164
|
+
inTripleFence = !inTripleFence;
|
|
165
|
+
text.push("```");
|
|
166
|
+
i += 3;
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
if (s[i] === "`" && !inTripleFence) {
|
|
170
|
+
inSingleBacktick = !inSingleBacktick;
|
|
171
|
+
text.push("`");
|
|
172
|
+
i++;
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
221
175
|
|
|
222
|
-
|
|
223
|
-
|
|
176
|
+
if (inSingleBacktick || inTripleFence || s[i] !== "<") {
|
|
177
|
+
text.push(s[i]);
|
|
178
|
+
i++;
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
224
181
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
}
|
|
232
|
-
},
|
|
233
|
-
|
|
234
|
-
onclosetag(name, isImplied) {
|
|
235
|
-
if (capped) return;
|
|
236
|
-
|
|
237
|
-
if (current) {
|
|
238
|
-
// Matching nested close — pop stack, keep as text.
|
|
239
|
-
const nested = current.nested;
|
|
240
|
-
if (nested.length > 0 && nested[nested.length - 1] === name) {
|
|
241
|
-
nested.pop();
|
|
242
|
-
current.rawBody += `</${name}>`;
|
|
243
|
-
return;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
// Matching close for outer tool — finalize.
|
|
247
|
-
if (name === current.name && nested.length === 0) {
|
|
248
|
-
if (ended) {
|
|
249
|
-
warnings.push(
|
|
250
|
-
`Unclosed <${name}> tag — content captured anyway`,
|
|
251
|
-
);
|
|
252
|
-
}
|
|
253
|
-
commands.push(
|
|
254
|
-
resolveCommand(current.name, current.attrs, current.rawBody),
|
|
255
|
-
);
|
|
256
|
-
current = null;
|
|
257
|
-
return;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
// Orphan close for a known tool (likely typo) — recover.
|
|
261
|
-
if (ALL_TOOLS.has(name)) {
|
|
262
|
-
warnings.push(
|
|
263
|
-
`Mismatched </${name}> closing <${current.name}> — recovered`,
|
|
264
|
-
);
|
|
265
|
-
commands.push(
|
|
266
|
-
resolveCommand(current.name, current.attrs, current.rawBody),
|
|
267
|
-
);
|
|
268
|
-
current = null;
|
|
269
|
-
return;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// Unknown orphan close — text.
|
|
273
|
-
current.rawBody += `</${name}>`;
|
|
274
|
-
return;
|
|
275
|
-
}
|
|
182
|
+
const opener = XmlParser.#matchOpener(s, i);
|
|
183
|
+
if (!opener) {
|
|
184
|
+
text.push(s[i]);
|
|
185
|
+
i++;
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
276
188
|
|
|
277
|
-
|
|
278
|
-
// Self-closing tag that htmlparser2 auto-closed at top level
|
|
279
|
-
}
|
|
280
|
-
},
|
|
189
|
+
const { name, attrs, selfClose, end: openerEnd } = opener;
|
|
281
190
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
recognizeSelfClosing: true,
|
|
288
|
-
lowerCaseTags: true,
|
|
289
|
-
lowerCaseAttributeNames: true,
|
|
290
|
-
},
|
|
291
|
-
);
|
|
191
|
+
if (selfClose) {
|
|
192
|
+
commands.push(resolveCommand(name, attrs, ""));
|
|
193
|
+
i = openerEnd;
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
292
196
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
197
|
+
const result = XmlParser.#findBodyEnd(s, name, openerEnd);
|
|
198
|
+
const body = s.slice(openerEnd, result.bodyEnd);
|
|
199
|
+
if (result.unclosed) {
|
|
200
|
+
warnings.push(`Unclosed <${name}> tag — content captured anyway`);
|
|
201
|
+
} else if (result.mismatchedCloseName) {
|
|
202
|
+
warnings.push(
|
|
203
|
+
`Mismatched </${result.mismatchedCloseName}> closing <${name}> — corrected to </${name}>`,
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
commands.push(resolveCommand(name, attrs, body));
|
|
207
|
+
i = result.afterClose;
|
|
296
208
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
commands.push(
|
|
301
|
-
resolveCommand(current.name, current.attrs, current.rawBody),
|
|
302
|
-
);
|
|
303
|
-
current = null;
|
|
209
|
+
// Body terminated; reset outer-text fence tracking.
|
|
210
|
+
inSingleBacktick = false;
|
|
211
|
+
inTripleFence = false;
|
|
304
212
|
}
|
|
305
213
|
|
|
306
214
|
if (capped) {
|
|
@@ -309,109 +217,187 @@ export default class XmlParser {
|
|
|
309
217
|
);
|
|
310
218
|
}
|
|
311
219
|
|
|
312
|
-
|
|
313
|
-
|
|
220
|
+
return {
|
|
221
|
+
commands,
|
|
222
|
+
warnings,
|
|
223
|
+
unparsed: text.join("").trim(),
|
|
224
|
+
};
|
|
314
225
|
}
|
|
315
226
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
227
|
+
// Returns { name, attrs, selfClose, end } if `s[pos..]` opens a known tool,
|
|
228
|
+
// else null. `end` is the index after the closing `>` (or `/>`).
|
|
229
|
+
static #matchOpener(s, pos) {
|
|
230
|
+
if (s[pos] !== "<") return null;
|
|
231
|
+
let i = pos + 1;
|
|
232
|
+
|
|
233
|
+
const nameStart = i;
|
|
234
|
+
while (i < s.length && NAME_CHAR.test(s[i])) i++;
|
|
235
|
+
const name = s.slice(nameStart, i).toLowerCase();
|
|
236
|
+
if (!ALL_TOOLS.has(name)) return null;
|
|
237
|
+
|
|
238
|
+
// Char after the name must end the name token cleanly.
|
|
239
|
+
if (i < s.length && !WS.test(s[i]) && s[i] !== "/" && s[i] !== ">") {
|
|
240
|
+
return null;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const attrsStart = i;
|
|
244
|
+
let inQuote = null;
|
|
245
|
+
|
|
246
|
+
while (i < s.length) {
|
|
247
|
+
const c = s[i];
|
|
248
|
+
if (inQuote) {
|
|
249
|
+
if (c === inQuote) inQuote = null;
|
|
250
|
+
i++;
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
if (c === '"' || c === "'") {
|
|
254
|
+
inQuote = c;
|
|
255
|
+
i++;
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
if (c === "/") {
|
|
259
|
+
let k = i + 1;
|
|
260
|
+
while (k < s.length && WS.test(s[k])) k++;
|
|
261
|
+
if (s[k] === ">") {
|
|
262
|
+
return {
|
|
263
|
+
name,
|
|
264
|
+
attrs: XmlParser.#parseAttrs(s.slice(attrsStart, i)),
|
|
265
|
+
selfClose: true,
|
|
266
|
+
end: k + 1,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
i++;
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
if (c === ">") {
|
|
273
|
+
return {
|
|
274
|
+
name,
|
|
275
|
+
attrs: XmlParser.#parseAttrs(s.slice(attrsStart, i)),
|
|
276
|
+
selfClose: false,
|
|
277
|
+
end: i + 1,
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
i++;
|
|
342
281
|
}
|
|
343
|
-
|
|
282
|
+
|
|
283
|
+
// Hit EOF without closing — not a parseable opener.
|
|
284
|
+
return null;
|
|
344
285
|
}
|
|
345
286
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
287
|
+
static #parseAttrs(raw) {
|
|
288
|
+
const attrs = {};
|
|
289
|
+
let i = 0;
|
|
290
|
+
while (i < raw.length) {
|
|
291
|
+
while (i < raw.length && WS.test(raw[i])) i++;
|
|
292
|
+
if (i >= raw.length) break;
|
|
293
|
+
|
|
294
|
+
const keyStart = i;
|
|
295
|
+
while (i < raw.length && ATTR_KEY_CHAR.test(raw[i])) i++;
|
|
296
|
+
if (i === keyStart) {
|
|
297
|
+
i++;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
const key = raw.slice(keyStart, i).toLowerCase();
|
|
301
|
+
|
|
302
|
+
while (i < raw.length && WS.test(raw[i])) i++;
|
|
303
|
+
|
|
304
|
+
if (raw[i] !== "=") {
|
|
305
|
+
attrs[key] = "";
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
i++;
|
|
309
|
+
|
|
310
|
+
while (i < raw.length && WS.test(raw[i])) i++;
|
|
311
|
+
|
|
312
|
+
if (raw[i] === '"' || raw[i] === "'") {
|
|
313
|
+
const quote = raw[i];
|
|
314
|
+
i++;
|
|
315
|
+
const valStart = i;
|
|
316
|
+
while (i < raw.length && raw[i] !== quote) i++;
|
|
317
|
+
attrs[key] = raw.slice(valStart, i);
|
|
318
|
+
if (raw[i] === quote) i++;
|
|
319
|
+
} else {
|
|
320
|
+
const valStart = i;
|
|
321
|
+
while (i < raw.length && !WS.test(raw[i])) i++;
|
|
322
|
+
attrs[key] = raw.slice(valStart, i);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return attrs;
|
|
368
326
|
}
|
|
369
327
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
328
|
+
// Scans body content from `fromPos` until the matching `</name>` closer,
|
|
329
|
+
// counting depth so same-name nested examples don't prematurely close.
|
|
330
|
+
// Returns { bodyEnd, afterClose, unclosed, mismatchedCloseName }.
|
|
331
|
+
//
|
|
332
|
+
// Mismatched-close recovery: if we encounter `</X>` where X != name and X
|
|
333
|
+
// is not a depth-counted nested tag, we use a balance heuristic to decide
|
|
334
|
+
// whether the orphan close was a typo (recover here) or legitimate body
|
|
335
|
+
// content (continue scanning). Specifically: count `</name>` minus
|
|
336
|
+
// `<name` in the rest of the string; if non-positive, no real close
|
|
337
|
+
// exists ahead and the orphan must be the intended close.
|
|
338
|
+
static #findBodyEnd(s, name, fromPos) {
|
|
339
|
+
let depth = 1;
|
|
340
|
+
let i = fromPos;
|
|
341
|
+
while (i < s.length) {
|
|
342
|
+
if (s[i] !== "<") {
|
|
343
|
+
i++;
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
if (s[i + 1] === "/") {
|
|
347
|
+
const nameStart = i + 2;
|
|
348
|
+
let nameEnd = nameStart;
|
|
349
|
+
while (nameEnd < s.length && NAME_CHAR.test(s[nameEnd])) nameEnd++;
|
|
350
|
+
const closeName = s.slice(nameStart, nameEnd).toLowerCase();
|
|
351
|
+
let k = nameEnd;
|
|
352
|
+
while (k < s.length && WS.test(s[k])) k++;
|
|
353
|
+
const isCloseTag = s[k] === ">";
|
|
354
|
+
|
|
355
|
+
if (isCloseTag && closeName === name) {
|
|
356
|
+
depth--;
|
|
357
|
+
if (depth === 0) {
|
|
358
|
+
return { bodyEnd: i, afterClose: k + 1, unclosed: false };
|
|
382
359
|
}
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
360
|
+
i = k + 1;
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
if (isCloseTag && closeName.length > 0) {
|
|
365
|
+
const rest = s.slice(k + 1);
|
|
366
|
+
const closesAhead = (
|
|
367
|
+
rest.match(new RegExp(`<\\/${name}\\b\\s*>`, "g")) || []
|
|
368
|
+
).length;
|
|
369
|
+
const opensAhead = (rest.match(new RegExp(`<${name}\\b`, "g")) || [])
|
|
370
|
+
.length;
|
|
371
|
+
if (closesAhead - opensAhead < 1) {
|
|
372
|
+
return {
|
|
373
|
+
bodyEnd: i,
|
|
374
|
+
afterClose: k + 1,
|
|
375
|
+
unclosed: false,
|
|
376
|
+
mismatchedCloseName: closeName,
|
|
377
|
+
};
|
|
389
378
|
}
|
|
390
|
-
return match;
|
|
391
379
|
}
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
380
|
+
}
|
|
381
|
+
const opener = XmlParser.#matchOpener(s, i);
|
|
382
|
+
if (opener && opener.name === name && !opener.selfClose) {
|
|
383
|
+
depth++;
|
|
384
|
+
i = opener.end;
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
387
|
+
i++;
|
|
388
|
+
}
|
|
389
|
+
return { bodyEnd: s.length, afterClose: s.length, unclosed: true };
|
|
396
390
|
}
|
|
397
391
|
|
|
398
|
-
|
|
399
|
-
* Normalize native tool call formats to rummy XML.
|
|
400
|
-
* Models sometimes emit their training-format tool calls instead of
|
|
401
|
-
* our XML tags. The intent is unambiguous — translate silently.
|
|
402
|
-
*/
|
|
392
|
+
// Translate native training-format tool calls into rummy XML silently.
|
|
403
393
|
static #normalizeToolCalls(content) {
|
|
404
|
-
// Gemma
|
|
394
|
+
// Gemma code-fenced XML.
|
|
405
395
|
let result = content.replace(
|
|
406
396
|
/```(?:tool_code|tool_command|xml)\n([\s\S]*?)```/g,
|
|
407
397
|
(_, inner) => inner.trim(),
|
|
408
398
|
);
|
|
409
399
|
|
|
410
|
-
// Qwen/gemma
|
|
411
|
-
// NAME may be namespaced with any of /, :, or . separators
|
|
412
|
-
// (e.g. `rummy.nvim/get`, `rummy:get`) — extract the trailing word
|
|
413
|
-
// sequence as the tool name. Value forms observed in the wild:
|
|
414
|
-
// key="v" / key:"v" / key:v (unquoted) / key:<|"|>v<|"|> (gemma chat-quotes)
|
|
400
|
+
// Qwen/gemma <|tool_call>call:NAME{...}<tool_call|>; NAME may be namespaced.
|
|
415
401
|
result = result.replace(
|
|
416
402
|
/<\|tool_call>call:([\w.:/-]+)\{([^}]*)\}<(?:tool_call\||\|tool_call)>/g,
|
|
417
403
|
(match, qualifiedName, params) => {
|
|
@@ -469,28 +455,16 @@ export default class XmlParser {
|
|
|
469
455
|
},
|
|
470
456
|
);
|
|
471
457
|
|
|
472
|
-
// Catch-all
|
|
473
|
-
// attempts (no {} block, missing close, wrong shape entirely). Replace
|
|
474
|
-
// each with an <error> so the model gets feedback on its next turn and
|
|
475
|
-
// learns to switch to XML. Lazy-match up to the next native close, the
|
|
476
|
-
// next XML close tag, or end of input — preserves any trailing valid XML.
|
|
477
|
-
// Error body must NOT contain literal <get>/<set>/etc. — those would
|
|
478
|
-
// re-enter the parser as phantom tool calls. Describe the format in
|
|
479
|
-
// prose instead and point at the tool docs above.
|
|
458
|
+
// Catch-all malformed <|tool_call> → <error> in prose (no literal tags or they'd re-parse).
|
|
480
459
|
result = result.replace(
|
|
481
460
|
/<\|tool_call>[\s\S]*?(?:<\|?tool_call\|?>|<\/\w+>|$)/g,
|
|
482
461
|
() =>
|
|
483
462
|
"<error>Native tool call format not supported. Use the XML commands listed above (e.g. a get tag with a path attribute, or a set tag with path and body).</error>",
|
|
484
463
|
);
|
|
485
464
|
|
|
486
|
-
// Strip any orphan chat-format quote tokens left after replacement.
|
|
487
465
|
result = result.replace(/<\|"\|>/g, '"');
|
|
488
466
|
|
|
489
|
-
//
|
|
490
|
-
// real XML output: `<|channel>thought\n<channel|>…<set path=…/>`.
|
|
491
|
-
// These aren't tool calls (handled above), they're role/channel
|
|
492
|
-
// tokens. Strip any remaining `<|name>` / `<name|>` pseudo-tags
|
|
493
|
-
// before the XML parser sees them.
|
|
467
|
+
// Strip OpenAI-harmony role/channel pseudo-tags (gemma leaks these).
|
|
494
468
|
result = result.replace(/<\|[\w:/-]+>/g, "");
|
|
495
469
|
result = result.replace(/<[\w:/-]+\|>/g, "");
|
|
496
470
|
|