@possumtech/rummy 2.1.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +40 -15
- package/.xai.key +1 -0
- package/PLUGINS.md +169 -53
- package/README.md +38 -32
- package/SPEC.md +366 -179
- package/bin/digest.js +1097 -0
- package/biome/no-fallbacks.grit +2 -2
- package/gemini.key +1 -0
- package/lang/en.json +10 -1
- package/migrations/001_initial_schema.sql +9 -2
- package/package.json +19 -8
- package/service.js +1 -0
- package/src/agent/AgentLoop.js +76 -26
- package/src/agent/ContextAssembler.js +2 -0
- package/src/agent/Entries.js +238 -60
- package/src/agent/ProjectAgent.js +44 -0
- package/src/agent/TurnExecutor.js +99 -30
- package/src/agent/XmlParser.js +206 -111
- package/src/agent/errors.js +35 -0
- package/src/agent/known_queries.sql +1 -1
- package/src/agent/known_store.sql +3 -42
- package/src/agent/materializeContext.js +30 -1
- package/src/agent/runs.sql +8 -18
- package/src/agent/tokens.js +0 -1
- package/src/agent/turns.sql +1 -0
- package/src/hooks/Hooks.js +26 -0
- package/src/hooks/RummyContext.js +12 -1
- package/src/lib/hedberg/README.md +60 -0
- package/src/lib/hedberg/hedberg.js +60 -0
- package/src/lib/hedberg/marker.js +158 -0
- package/src/{plugins → lib}/hedberg/matcher.js +1 -2
- package/src/llm/LlmProvider.js +41 -3
- package/src/llm/openaiStream.js +17 -0
- package/src/plugins/ask_user/ask_user.js +12 -2
- package/src/plugins/ask_user/ask_userDoc.md +1 -5
- package/src/plugins/budget/README.md +29 -24
- package/src/plugins/budget/budget.js +166 -110
- package/src/plugins/cli/README.md +3 -4
- package/src/plugins/cli/cli.js +31 -5
- package/src/plugins/cloudflare/cloudflare.js +136 -0
- package/src/plugins/cp/cp.js +41 -4
- package/src/plugins/cp/cpDoc.md +5 -6
- package/src/plugins/engine/engine.sql +1 -1
- package/src/plugins/env/README.md +5 -4
- package/src/plugins/env/env.js +7 -4
- package/src/plugins/env/envDoc.md +7 -8
- package/src/plugins/error/error.js +56 -15
- package/src/plugins/file/README.md +12 -3
- package/src/plugins/file/file.js +2 -2
- package/src/plugins/get/get.js +59 -36
- package/src/plugins/get/getDoc.md +10 -34
- package/src/plugins/google/google.js +115 -0
- package/src/plugins/hedberg/hedberg.js +13 -56
- package/src/plugins/helpers.js +66 -12
- package/src/plugins/index.js +1 -2
- package/src/plugins/instructions/README.md +44 -47
- package/src/plugins/instructions/instructions-system.md +44 -0
- package/src/plugins/instructions/instructions-user.md +53 -0
- package/src/plugins/instructions/instructions.js +58 -189
- package/src/plugins/known/README.md +6 -7
- package/src/plugins/known/known.js +24 -30
- package/src/plugins/log/log.js +41 -32
- package/src/plugins/mv/mv.js +40 -1
- package/src/plugins/mv/mvDoc.md +1 -8
- package/src/plugins/ollama/ollama.js +4 -3
- package/src/plugins/openai/openai.js +4 -3
- package/src/plugins/openrouter/openrouter.js +14 -4
- package/src/plugins/persona/README.md +11 -13
- package/src/plugins/persona/default.md +29 -0
- package/src/plugins/persona/persona.js +10 -66
- package/src/plugins/policy/policy.js +23 -22
- package/src/plugins/prompt/README.md +37 -27
- package/src/plugins/prompt/prompt.js +13 -19
- package/src/plugins/rm/rm.js +18 -0
- package/src/plugins/rm/rmDoc.md +5 -6
- package/src/plugins/rpc/rpc.js +3 -3
- package/src/plugins/set/set.js +205 -323
- package/src/plugins/set/setDoc.md +47 -17
- package/src/plugins/sh/README.md +6 -5
- package/src/plugins/sh/sh.js +8 -5
- package/src/plugins/sh/shDoc.md +7 -8
- package/src/plugins/skill/README.md +37 -14
- package/src/plugins/skill/skill.js +200 -101
- package/src/plugins/skill/skillDoc.js +3 -0
- package/src/plugins/skill/skillDoc.md +9 -0
- package/src/plugins/stream/README.md +7 -6
- package/src/plugins/stream/finalize.js +100 -0
- package/src/plugins/stream/stream.js +13 -45
- package/src/plugins/telemetry/telemetry.js +27 -4
- package/src/plugins/think/think.js +2 -3
- package/src/plugins/think/thinkDoc.md +2 -4
- package/src/plugins/unknown/README.md +1 -1
- package/src/plugins/unknown/unknown.js +17 -19
- package/src/plugins/update/update.js +4 -51
- package/src/plugins/update/updateDoc.md +21 -6
- package/src/plugins/xai/xai.js +68 -102
- package/src/plugins/yolo/yolo.js +102 -75
- package/src/sql/functions/hedmatch.js +1 -1
- package/src/sql/functions/hedreplace.js +1 -1
- package/src/sql/functions/hedsearch.js +1 -1
- package/src/sql/functions/slugify.js +16 -2
- package/BENCH_ENVIRONMENT.md +0 -230
- package/CLIENT_INTERFACE.md +0 -396
- package/last_run.txt +0 -5617
- package/scriptify/ask_run.js +0 -77
- package/scriptify/cache_probe.js +0 -66
- package/scriptify/cache_probe_grok.js +0 -74
- package/src/agent/budget.js +0 -33
- package/src/agent/config.js +0 -38
- package/src/plugins/hedberg/README.md +0 -71
- package/src/plugins/hedberg/docs.md +0 -0
- package/src/plugins/hedberg/edits.js +0 -55
- package/src/plugins/hedberg/normalize.js +0 -17
- package/src/plugins/hedberg/sed.js +0 -49
- package/src/plugins/instructions/instructions.md +0 -34
- package/src/plugins/instructions/instructions_104.md +0 -8
- package/src/plugins/instructions/instructions_105.md +0 -39
- package/src/plugins/instructions/instructions_106.md +0 -22
- package/src/plugins/instructions/instructions_107.md +0 -17
- package/src/plugins/instructions/instructions_108.md +0 -0
- package/src/plugins/known/knownDoc.js +0 -3
- package/src/plugins/known/knownDoc.md +0 -8
- package/src/plugins/unknown/unknownDoc.js +0 -3
- package/src/plugins/unknown/unknownDoc.md +0 -11
- package/turns/cli_1777462658211/turn_001.txt +0 -772
- package/turns/cli_1777462658211/turn_002.txt +0 -606
- package/turns/cli_1777462658211/turn_003.txt +0 -667
- package/turns/cli_1777462658211/turn_004.txt +0 -297
- package/turns/cli_1777462658211/turn_005.txt +0 -301
- package/turns/cli_1777462658211/turn_006.txt +0 -262
- package/turns/cli_1777465095132/turn_001.txt +0 -715
- package/turns/cli_1777465095132/turn_002.txt +0 -236
- package/turns/cli_1777465095132/turn_003.txt +0 -287
- package/turns/cli_1777465095132/turn_004.txt +0 -694
- package/turns/cli_1777465095132/turn_005.txt +0 -422
- package/turns/cli_1777465095132/turn_006.txt +0 -365
- package/turns/cli_1777465095132/turn_007.txt +0 -885
- package/turns/cli_1777465095132/turn_008.txt +0 -1277
- package/turns/cli_1777465095132/turn_009.txt +0 -736
- /package/src/{plugins → lib}/hedberg/patterns.js +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import Entries from "../../agent/Entries.js";
|
|
2
|
+
import { logPathToDataBase } from "../helpers.js";
|
|
3
|
+
|
|
4
|
+
// Single termination site for streaming entries. Both stream/completed
|
|
5
|
+
// (external producer signaling close) and yolo's local child-spawn
|
|
6
|
+
// close handler funnel through here so finalization shape stays
|
|
7
|
+
// identical: channel terminal states, log-entry body rewrite, and
|
|
8
|
+
// dormant-run wake all live in one place.
|
|
9
|
+
//
|
|
10
|
+
// terminalState: "resolved" (exit_code=0), "failed" (non-zero).
|
|
11
|
+
// Aborts/cancellations write their own state ("cancelled") through the
|
|
12
|
+
// stream/aborted and stream/cancel paths and do NOT call this helper —
|
|
13
|
+
// explicit cancellation should not summon a follow-up turn.
|
|
14
|
+
export default async function finalizeStream({
|
|
15
|
+
db,
|
|
16
|
+
entries,
|
|
17
|
+
hooks,
|
|
18
|
+
runRow,
|
|
19
|
+
path,
|
|
20
|
+
exitCode = 0,
|
|
21
|
+
duration = null,
|
|
22
|
+
wake = true,
|
|
23
|
+
}) {
|
|
24
|
+
const rawBase = logPathToDataBase(path);
|
|
25
|
+
if (!rawBase) {
|
|
26
|
+
throw new Error(
|
|
27
|
+
`path must be a log entry (log://turn_N/...); got: ${path}`,
|
|
28
|
+
);
|
|
29
|
+
}
|
|
30
|
+
// The log entry path may arrive in its raw URL-encoded form (e.g.
|
|
31
|
+
// `%20` for spaces) but the data-channel rows are stored under the
|
|
32
|
+
// canonical form (`%20` → `_` via encodeSegment). Normalize the
|
|
33
|
+
// derived dataBase so `${dataBase}_*` matches the stored channel
|
|
34
|
+
// paths regardless of which form the caller passed in.
|
|
35
|
+
const dataBase = Entries.normalizePath(rawBase);
|
|
36
|
+
// Pin every state-transition write to the action's originating turn.
|
|
37
|
+
// Without this, entries.set's default turn=0 re-stamps the entry's
|
|
38
|
+
// run_view.turn to 0 — and the auto-failure hook then derives
|
|
39
|
+
// log://turn_0/error/... for failures that actually happened on
|
|
40
|
+
// turn N.
|
|
41
|
+
const turnMatch = path.match(/^log:\/\/turn_(\d+)\//);
|
|
42
|
+
const turn = turnMatch ? Number(turnMatch[1]) : 0;
|
|
43
|
+
|
|
44
|
+
const runId = runRow.id;
|
|
45
|
+
const terminalState = exitCode === 0 ? "resolved" : "failed";
|
|
46
|
+
const terminalOutcome = exitCode === 0 ? null : `exit:${exitCode}`;
|
|
47
|
+
|
|
48
|
+
const channels = await entries.getEntriesByPattern(
|
|
49
|
+
runId,
|
|
50
|
+
`${dataBase}_*`,
|
|
51
|
+
null,
|
|
52
|
+
);
|
|
53
|
+
for (const ch of channels) {
|
|
54
|
+
await entries.set({
|
|
55
|
+
runId,
|
|
56
|
+
turn,
|
|
57
|
+
path: ch.path,
|
|
58
|
+
state: terminalState,
|
|
59
|
+
body: ch.body,
|
|
60
|
+
outcome: terminalOutcome,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const logEntry = await entries.getAttributes(runId, path);
|
|
65
|
+
let command = "";
|
|
66
|
+
if (logEntry?.command) command = logEntry.command;
|
|
67
|
+
else if (logEntry?.summary) command = logEntry.summary;
|
|
68
|
+
const channelSummary = channels
|
|
69
|
+
.map((c) => {
|
|
70
|
+
const size = c.body ? `${c.tokens} tokens` : "empty";
|
|
71
|
+
return `${c.path} (${size})`;
|
|
72
|
+
})
|
|
73
|
+
.join(", ");
|
|
74
|
+
const dur = duration ? ` (${duration})` : "";
|
|
75
|
+
const exitLabel = exitCode === 0 ? "exit=0" : `exit=${exitCode}`;
|
|
76
|
+
const body = `ran '${command}', ${exitLabel}${dur}. Output: ${channelSummary}`;
|
|
77
|
+
await entries.set({ runId, turn, path, state: "resolved", body });
|
|
78
|
+
|
|
79
|
+
if (!wake) return { channels: channels.length };
|
|
80
|
+
|
|
81
|
+
// Dormancy: any pending (100) or active (102) loop on the run blocks
|
|
82
|
+
// the wake — the active loop will see the new log entry on its next
|
|
83
|
+
// turn assembly and the producer doesn't owe it a fresh prompt.
|
|
84
|
+
const inflight = await db.get_pending_loops.all({ run_id: runId });
|
|
85
|
+
if (inflight.length > 0) return { channels: channels.length, woke: false };
|
|
86
|
+
|
|
87
|
+
// Mode for the wake loop: inherit from the latest completed loop on
|
|
88
|
+
// the run. Fresh runs without a completed loop don't get woken (the
|
|
89
|
+
// child closing before any loop terminated is a state we'd never
|
|
90
|
+
// reach in practice).
|
|
91
|
+
const latest = await db.get_latest_completed_loop.get({ run_id: runId });
|
|
92
|
+
if (!latest) return { channels: channels.length, woke: false };
|
|
93
|
+
|
|
94
|
+
await hooks.run.wake.emit({
|
|
95
|
+
runAlias: runRow.alias,
|
|
96
|
+
body: "Process complete",
|
|
97
|
+
mode: latest.mode,
|
|
98
|
+
});
|
|
99
|
+
return { channels: channels.length, woke: true };
|
|
100
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { logPathToDataBase } from "../helpers.js";
|
|
2
|
+
import finalizeStream from "./finalize.js";
|
|
2
3
|
|
|
3
4
|
// RPC plumbing that appends/terminates streaming data entries; see plugin README.
|
|
4
5
|
export default class Stream {
|
|
@@ -59,55 +60,22 @@ export default class Stream {
|
|
|
59
60
|
alias: params.run,
|
|
60
61
|
});
|
|
61
62
|
if (!runRow) throw new Error(`run not found: ${params.run}`);
|
|
62
|
-
const runId = runRow.id;
|
|
63
63
|
|
|
64
64
|
const { exit_code: exitCode = 0, duration = null } = params;
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const channels = await store.getEntriesByPattern(
|
|
77
|
-
runId,
|
|
78
|
-
`${dataBase}_*`,
|
|
79
|
-
null,
|
|
80
|
-
);
|
|
81
|
-
for (const ch of channels) {
|
|
82
|
-
await store.set({
|
|
83
|
-
runId,
|
|
84
|
-
path: ch.path,
|
|
85
|
-
state: terminalState,
|
|
86
|
-
body: ch.body,
|
|
87
|
-
outcome: terminalOutcome,
|
|
88
|
-
});
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// One-line final stats for the log entry body.
|
|
92
|
-
const logEntry = await store.getAttributes(runId, params.path);
|
|
93
|
-
let command = "";
|
|
94
|
-
if (logEntry?.command) command = logEntry.command;
|
|
95
|
-
else if (logEntry?.summary) command = logEntry.summary;
|
|
96
|
-
const channelSummary = channels
|
|
97
|
-
.map((c) => {
|
|
98
|
-
const size = c.body ? `${c.tokens} tokens` : "empty";
|
|
99
|
-
return `${c.path} (${size})`;
|
|
100
|
-
})
|
|
101
|
-
.join(", ");
|
|
102
|
-
const dur = duration ? ` (${duration})` : "";
|
|
103
|
-
const exitLabel = exitCode === 0 ? "exit=0" : `exit=${exitCode}`;
|
|
104
|
-
const body = `ran '${command}', ${exitLabel}${dur}. Output: ${channelSummary}`;
|
|
105
|
-
await store.set({ runId, path: params.path, state: "resolved", body });
|
|
106
|
-
|
|
107
|
-
return { ok: true, channels: channels.length };
|
|
65
|
+
const result = await finalizeStream({
|
|
66
|
+
db: ctx.db,
|
|
67
|
+
entries: ctx.projectAgent.entries,
|
|
68
|
+
hooks,
|
|
69
|
+
runRow,
|
|
70
|
+
path: params.path,
|
|
71
|
+
exitCode,
|
|
72
|
+
duration,
|
|
73
|
+
wake: true,
|
|
74
|
+
});
|
|
75
|
+
return { ok: true, ...result };
|
|
108
76
|
},
|
|
109
77
|
description:
|
|
110
|
-
"Finalize a streaming producer. Transitions all `{path}_*` data channels to terminal status (200 on exit_code=0, 500 otherwise)
|
|
78
|
+
"Finalize a streaming producer. Transitions all `{path}_*` data channels to terminal status (200 on exit_code=0, 500 otherwise), rewrites the log entry body with exit code/duration/channel sizes, and wakes the run with a 'Process complete' prompt if it has gone dormant.",
|
|
111
79
|
params: {
|
|
112
80
|
run: "string — run alias",
|
|
113
81
|
path: "string — log-entry path (log://turn_N/{action}/{slug}); server derives the data channel path",
|
|
@@ -206,6 +206,18 @@ export default class Telemetry {
|
|
|
206
206
|
if (usage.prompt_tokens) actualContextTokens = usage.prompt_tokens;
|
|
207
207
|
else if (assembledTokens) actualContextTokens = assembledTokens;
|
|
208
208
|
const numberOrZero = (v) => (typeof v === "number" ? v : 0);
|
|
209
|
+
// Forensic metadata blob — everything the provider sent that
|
|
210
|
+
// isn't content/reasoning_content (those live elsewhere) or
|
|
211
|
+
// already-derived columns (token counts, cost). Catches
|
|
212
|
+
// finish_reason, system_fingerprint, response id, service_tier,
|
|
213
|
+
// raw usage, and any provider-specific fields that may light up
|
|
214
|
+
// future investigations. JSON column tolerates shape drift.
|
|
215
|
+
const responseMetadata = {
|
|
216
|
+
finish_reason: result.choices[0].finish_reason,
|
|
217
|
+
model_returned: result.model,
|
|
218
|
+
usage: result.usage,
|
|
219
|
+
...result.chunkMetadata,
|
|
220
|
+
};
|
|
209
221
|
await rummy.entries.updateTurnStats({
|
|
210
222
|
id: rummy.turnId,
|
|
211
223
|
context_tokens: actualContextTokens,
|
|
@@ -217,12 +229,23 @@ export default class Telemetry {
|
|
|
217
229
|
completion_tokens: numberOrZero(usage.completion_tokens),
|
|
218
230
|
reasoning_tokens: reasoningTokens,
|
|
219
231
|
total_tokens: numberOrZero(usage.total_tokens),
|
|
220
|
-
//
|
|
221
|
-
//
|
|
222
|
-
//
|
|
232
|
+
// Cost surfaces under different field names by provider:
|
|
233
|
+
// - OpenRouter direct: `usage.cost` (USD, what the relay billed us)
|
|
234
|
+
// - OpenRouter BYOK: `usage.cost.upstream_inference_cost` (USD,
|
|
235
|
+
// relay didn't bill — upstream charged our key directly, so
|
|
236
|
+
// `usage.cost` is 0 and the true compute cost lives here).
|
|
237
|
+
// - xAI direct: `usage.cost_in_usd_ticks` where 1 tick = 10⁻¹⁰
|
|
238
|
+
// USD (verified empirically: 11 uncached + 161 cached + 1
|
|
239
|
+
// output tokens → 107,500 ticks → $0.00001075 at xAI's
|
|
240
|
+
// $0.20/M input, $0.05/M cached, $0.50/M output rates).
|
|
241
|
+
// Divide by 1e10 to land in USD alongside the others.
|
|
242
|
+
// All three normalized to USD; downstream summaries sum them
|
|
243
|
+
// as comparable dollars.
|
|
223
244
|
cost:
|
|
224
245
|
numberOrZero(usage.cost) ||
|
|
225
|
-
numberOrZero(usage.cost_details?.upstream_inference_cost)
|
|
246
|
+
numberOrZero(usage.cost_details?.upstream_inference_cost) ||
|
|
247
|
+
numberOrZero(usage.cost_in_usd_ticks) / 1e10,
|
|
248
|
+
response_metadata: JSON.stringify(responseMetadata),
|
|
226
249
|
});
|
|
227
250
|
}
|
|
228
251
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
import config from "../../agent/config.js";
|
|
2
1
|
import docs from "./thinkDoc.js";
|
|
3
2
|
|
|
4
|
-
const
|
|
3
|
+
const THINK = process.env.RUMMY_THINK === "1";
|
|
5
4
|
|
|
6
5
|
export default class Think {
|
|
7
6
|
constructor(core) {
|
|
8
7
|
core.registerScheme({ modelVisible: 0, category: "logging" });
|
|
9
|
-
if (THINK
|
|
8
|
+
if (THINK) {
|
|
10
9
|
core.ensureTool();
|
|
11
10
|
core.filter("instructions.toolDocs", async (docsMap) => {
|
|
12
11
|
docsMap.think = docs;
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
## <think>[reasoning]</think> - Think before acting
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
<!-- Positioning: think first, then act. Prevents degenerate tool-call storms. -->
|
|
3
|
+
Example: <think>Plan: <search> for X; <get> the top-ranked result; distill into known://Y.</think>
|
|
5
4
|
|
|
6
|
-
* Reasoning inside <think></think> is private — it does not appear in your context
|
|
7
|
-
<!-- Frees the model to reason without consuming context budget. -->
|
|
5
|
+
* Reasoning inside <think></think> is private — it does not appear in your context.
|
|
@@ -9,7 +9,7 @@ The Rumsfeld mechanism. The model registers what it doesn't know before acting.
|
|
|
9
9
|
- **Tool**: `unknown`
|
|
10
10
|
- **Category**: `unknown`
|
|
11
11
|
- **Handler**: None — recorded by TurnExecutor, deduplicated against existing unknowns.
|
|
12
|
-
- **Filter**: `assembly.user` at priority
|
|
12
|
+
- **Filter**: `assembly.user` at priority 150 — renders `<unknowns>` after `<log>` (priority 100) and before `<instructions>` (priority 165) in the sandwich. Unknowns are active work, not stable environment state; they belong in the user packet.
|
|
13
13
|
|
|
14
14
|
## Projection
|
|
15
15
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { renderEntry, SUMMARY_MAX_CHARS } from "../helpers.js";
|
|
2
|
+
|
|
1
3
|
export default class Unknown {
|
|
2
4
|
constructor(core) {
|
|
3
5
|
core.ensureTool();
|
|
@@ -7,7 +9,10 @@ export default class Unknown {
|
|
|
7
9
|
core.on("handler", this.handler.bind(this));
|
|
8
10
|
core.on("visible", this.full.bind(this));
|
|
9
11
|
core.on("summarized", this.summary.bind(this));
|
|
10
|
-
core.filter("assembly.user", this.assembleUnknowns.bind(this),
|
|
12
|
+
core.filter("assembly.user", this.assembleUnknowns.bind(this), 175);
|
|
13
|
+
// Hidden from the advertised tool list — the model writes unknowns
|
|
14
|
+
// via <set path="unknown://..."/>. The unknown:// scheme lifecycle
|
|
15
|
+
// is taught in instructions-user.md, not in a separate tooldoc.
|
|
11
16
|
core.markHidden();
|
|
12
17
|
}
|
|
13
18
|
|
|
@@ -28,12 +33,12 @@ export default class Unknown {
|
|
|
28
33
|
return;
|
|
29
34
|
}
|
|
30
35
|
|
|
31
|
-
//
|
|
36
|
+
// tags > body for slug; lets the model round-trip via <get>.
|
|
32
37
|
const unknownPath = await store.slugPath(
|
|
33
38
|
runId,
|
|
34
39
|
"unknown",
|
|
35
40
|
entry.body,
|
|
36
|
-
entry.attributes?.
|
|
41
|
+
entry.attributes?.tags,
|
|
37
42
|
);
|
|
38
43
|
await store.set({
|
|
39
44
|
runId,
|
|
@@ -49,11 +54,10 @@ export default class Unknown {
|
|
|
49
54
|
return entry.body;
|
|
50
55
|
}
|
|
51
56
|
|
|
52
|
-
// First
|
|
57
|
+
// First SUMMARY_MAX_CHARS of the body. Matches <known> / <prompt>.
|
|
53
58
|
summary(entry) {
|
|
54
59
|
if (!entry.body) return "";
|
|
55
|
-
|
|
56
|
-
return `${entry.body.slice(0, 500)}\n[truncated — promote to see the full question]`;
|
|
60
|
+
return entry.body.slice(0, SUMMARY_MAX_CHARS);
|
|
57
61
|
}
|
|
58
62
|
|
|
59
63
|
async assembleUnknowns(content, ctx) {
|
|
@@ -69,18 +73,12 @@ function renderUnknownTag(entry) {
|
|
|
69
73
|
typeof entry.attributes === "string"
|
|
70
74
|
? JSON.parse(entry.attributes)
|
|
71
75
|
: entry.attributes;
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const tokens = entry.aTokens != null ? ` tokens="${entry.aTokens}"` : "";
|
|
77
|
-
const summary =
|
|
78
|
-
typeof attrs?.summary === "string"
|
|
79
|
-
? ` summary="${attrs.summary.replace(/"/g, "'").slice(0, 80)}"`
|
|
80
|
-
: "";
|
|
81
|
-
const attrStr = `${turn}${summary}${visibility}${tokens}`;
|
|
82
|
-
if (entry.body) {
|
|
83
|
-
return `<unknown path="${entry.path}"${attrStr}>${entry.body}</unknown>`;
|
|
76
|
+
const meta = {};
|
|
77
|
+
if (entry.source_turn) meta.turn = entry.source_turn;
|
|
78
|
+
if (typeof attrs?.tags === "string") {
|
|
79
|
+
meta.tags = attrs.tags.slice(0, 80);
|
|
84
80
|
}
|
|
85
|
-
|
|
81
|
+
if (entry.visibility) meta.visibility = entry.visibility;
|
|
82
|
+
if (entry.aTokens != null) meta.tokens = entry.aTokens;
|
|
83
|
+
return renderEntry(entry.path, meta, entry.body);
|
|
86
84
|
}
|
|
@@ -1,16 +1,8 @@
|
|
|
1
1
|
import docs from "./updateDoc.js";
|
|
2
2
|
|
|
3
|
-
const TERMINAL_STATUSES = new Set([200, 204, 422, 500]);
|
|
4
|
-
|
|
5
3
|
const CONTRACT_REMINDER = "Missing update";
|
|
6
4
|
|
|
7
|
-
const EMPTY_RESPONSE_REMINDER =
|
|
8
|
-
"Response empty - Update with status 500 if unable to fulfill request.";
|
|
9
|
-
|
|
10
|
-
function isValidStatus(status) {
|
|
11
|
-
if (TERMINAL_STATUSES.has(status)) return true;
|
|
12
|
-
return Number.isInteger(status) && status >= 100 && status < 200;
|
|
13
|
-
}
|
|
5
|
+
const EMPTY_RESPONSE_REMINDER = "Response empty";
|
|
14
6
|
|
|
15
7
|
export default class Update {
|
|
16
8
|
#core;
|
|
@@ -32,53 +24,14 @@ export default class Update {
|
|
|
32
24
|
}
|
|
33
25
|
|
|
34
26
|
async handler(entry, rummy) {
|
|
35
|
-
|
|
36
|
-
const status = entry.attributes?.status ?? 102;
|
|
37
|
-
const validation = await rummy.hooks.instructions.validateNavigation(
|
|
38
|
-
status,
|
|
39
|
-
rummy,
|
|
40
|
-
);
|
|
41
|
-
if (!validation.ok) {
|
|
42
|
-
entry.state = "failed";
|
|
43
|
-
entry.outcome = "invalid_navigation";
|
|
44
|
-
entry.body = validation.reason;
|
|
45
|
-
await store.set({
|
|
46
|
-
runId,
|
|
47
|
-
turn,
|
|
48
|
-
loopId,
|
|
49
|
-
path: entry.resultPath,
|
|
50
|
-
body: validation.reason,
|
|
51
|
-
state: "failed",
|
|
52
|
-
outcome: "invalid_navigation",
|
|
53
|
-
attributes: { status },
|
|
54
|
-
});
|
|
55
|
-
return;
|
|
56
|
-
}
|
|
57
|
-
if (!isValidStatus(status)) {
|
|
58
|
-
entry.state = "failed";
|
|
59
|
-
entry.outcome = "invalid_status";
|
|
60
|
-
const message = `Invalid status ${status} on update — use 1xx to continue or 200 to conclude.`;
|
|
61
|
-
entry.body = message;
|
|
62
|
-
await store.set({
|
|
63
|
-
runId,
|
|
64
|
-
turn,
|
|
65
|
-
loopId,
|
|
66
|
-
path: entry.resultPath,
|
|
67
|
-
body: message,
|
|
68
|
-
state: "failed",
|
|
69
|
-
outcome: "invalid_status",
|
|
70
|
-
attributes: { status },
|
|
71
|
-
});
|
|
72
|
-
return;
|
|
73
|
-
}
|
|
74
|
-
await rummy.update(entry.body, { status });
|
|
27
|
+
await rummy.update(entry.body, { status: entry.attributes?.status });
|
|
75
28
|
}
|
|
76
29
|
|
|
77
30
|
async resolve({ recorded, content, runId, turn, loopId, rummy }) {
|
|
78
31
|
const entry = recorded.findLast((e) => e.scheme === "update");
|
|
79
|
-
const status = entry?.attributes?.status
|
|
32
|
+
const status = entry?.attributes?.status;
|
|
80
33
|
const failed = entry?.state === "failed";
|
|
81
|
-
const isTerminal =
|
|
34
|
+
const isTerminal = status === 200 && !failed;
|
|
82
35
|
let summaryText = null;
|
|
83
36
|
let updateText = null;
|
|
84
37
|
if (entry?.body && !failed) {
|
|
@@ -1,8 +1,23 @@
|
|
|
1
|
-
## <update status="N">{
|
|
2
|
-
<!-- Header defines position, frequency, and status code requirement. -->
|
|
1
|
+
## <update status="N">{ direct answer or one-line summary }</update> - Turn termination
|
|
3
2
|
|
|
4
|
-
YOU MUST
|
|
5
|
-
|
|
3
|
+
YOU MUST conclude every turn with one (and only one) <update status="N"></update>.
|
|
4
|
+
YOU MUST keep the update body to <= 80 characters.
|
|
5
|
+
YOU MUST use status 102 for continuation and 200 for final delivery.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
Example:
|
|
8
|
+
{ demote irrelevant source entries and log entries }
|
|
9
|
+
<set path="known://plan"><<SEARCH
|
|
10
|
+
- [ ] Distill geography unknowns
|
|
11
|
+
SEARCH
|
|
12
|
+
<<REPLACE
|
|
13
|
+
- [x] Distill geography unknowns
|
|
14
|
+
REPLACE</set>
|
|
15
|
+
<update status="102">distilled three unknowns into known://trivia/geography/capitals</update>
|
|
16
|
+
Example:
|
|
17
|
+
<set path="known://plan"><<SEARCH
|
|
18
|
+
- [ ] Deliver direct answer
|
|
19
|
+
SEARCH
|
|
20
|
+
<<REPLACE
|
|
21
|
+
- [x] Deliver direct answer
|
|
22
|
+
REPLACE</set>
|
|
23
|
+
<update status="200">Paris</update>
|
package/src/plugins/xai/xai.js
CHANGED
|
@@ -1,12 +1,28 @@
|
|
|
1
|
-
import config from "../../agent/config.js";
|
|
2
1
|
import msg from "../../agent/messages.js";
|
|
3
|
-
import {
|
|
2
|
+
import { chatCompletionStream } from "../../llm/openaiStream.js";
|
|
4
3
|
|
|
5
|
-
const
|
|
4
|
+
const FETCH_TIMEOUT = Number(process.env.RUMMY_FETCH_TIMEOUT);
|
|
5
|
+
|
|
6
|
+
// reasoning_effort takes low|medium|high|none. Models that don't support
|
|
7
|
+
// the parameter reject the request with 400, so the env knob is opt-in:
|
|
8
|
+
// set it only on profiles targeting a model that accepts it.
|
|
9
|
+
const REASONING_EFFORT = process.env.RUMMY_REASONING_EFFORT;
|
|
6
10
|
|
|
7
11
|
const PROVIDER = "xai";
|
|
8
12
|
|
|
9
|
-
// Inert unless XAI_BASE_URL set; xai/{model} aliases
|
|
13
|
+
// Inert unless XAI_BASE_URL set; xai/{model} aliases.
|
|
14
|
+
//
|
|
15
|
+
// XAI_BASE_URL points at xAI's v1 root (e.g. https://api.x.ai/v1).
|
|
16
|
+
// We POST to {base}/chat/completions and stream the response via the
|
|
17
|
+
// shared OpenAI-compatible client — this is the path that surfaces
|
|
18
|
+
// reasoning_content deltas. The /v1/responses endpoint is xAI's newer
|
|
19
|
+
// API but its non-streaming output drops reasoning content (we still
|
|
20
|
+
// pay for it via reasoning_tokens; we just never see it). Streaming on
|
|
21
|
+
// /v1/responses uses a different event shape that our shared stream
|
|
22
|
+
// client doesn't speak. So we use /v1/chat/completions: caching is
|
|
23
|
+
// preserved via the `x-grok-conv-id` header (xAI's chat-completions
|
|
24
|
+
// equivalent of the /v1/responses `prompt_cache_key` body field).
|
|
25
|
+
// See https://docs.x.ai/developers/advanced-api-usage/prompt-caching.
|
|
10
26
|
export default class Xai {
|
|
11
27
|
#baseUrl;
|
|
12
28
|
#apiKey;
|
|
@@ -15,7 +31,22 @@ export default class Xai {
|
|
|
15
31
|
constructor(core) {
|
|
16
32
|
const baseUrl = process.env.XAI_BASE_URL;
|
|
17
33
|
if (!baseUrl) return;
|
|
18
|
-
this.#baseUrl = baseUrl;
|
|
34
|
+
this.#baseUrl = baseUrl.replace(/\/$/, "");
|
|
35
|
+
// Fail-fast on the legacy `/v1/responses` endpoint (used in earlier
|
|
36
|
+
// rummy versions before we switched to streaming /chat/completions).
|
|
37
|
+
// Composing `${baseUrl}/chat/completions` against a stale shell
|
|
38
|
+
// `XAI_BASE_URL=https://api.x.ai/v1/responses` produces a 404 route
|
|
39
|
+
// that escapes to AgentLoop's outer catch and 500-storms a sweep
|
|
40
|
+
// silently. Throwing at construction surfaces the env trap before
|
|
41
|
+
// any task starts (verified pathology: 2026-05-01 sweep, 31/31
|
|
42
|
+
// status=500). xAI's API root ends in `/v1`; anything else is wrong.
|
|
43
|
+
if (!/\/v1$/.test(this.#baseUrl)) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
`XAI_BASE_URL must be the API root ending in /v1 (got "${this.#baseUrl}"). ` +
|
|
46
|
+
"Likely a stale shell env from earlier /v1/responses usage; " +
|
|
47
|
+
"set XAI_BASE_URL=https://api.x.ai/v1 (or the relevant proxy root).",
|
|
48
|
+
);
|
|
49
|
+
}
|
|
19
50
|
this.#apiKey = process.env.XAI_API_KEY;
|
|
20
51
|
|
|
21
52
|
const wireModel = (alias) => alias.split("/").slice(1).join("/");
|
|
@@ -32,119 +63,57 @@ export default class Xai {
|
|
|
32
63
|
async #completion(messages, model, options = {}) {
|
|
33
64
|
if (!this.#apiKey) throw new Error(msg("error.xai_api_key_missing"));
|
|
34
65
|
|
|
35
|
-
const body = { model,
|
|
66
|
+
const body = { model, messages };
|
|
67
|
+
if (options.maxTokens !== undefined) body.max_tokens = options.maxTokens;
|
|
36
68
|
if (options.temperature !== undefined)
|
|
37
69
|
body.temperature = options.temperature;
|
|
38
|
-
|
|
39
|
-
// turn run pinned to the same backend so the cached prefix actually
|
|
40
|
-
// hits. Without this, requests load-balance and cache_tokens stays
|
|
41
|
-
// near-zero. See https://docs.x.ai/developers/advanced-api-usage/prompt-caching.
|
|
42
|
-
if (options.runAlias) body.prompt_cache_key = options.runAlias;
|
|
70
|
+
if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT;
|
|
43
71
|
|
|
44
72
|
const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT);
|
|
45
73
|
const signal = options.signal
|
|
46
74
|
? AbortSignal.any([options.signal, timeoutSignal])
|
|
47
75
|
: timeoutSignal;
|
|
48
76
|
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
77
|
+
const headers = {
|
|
78
|
+
Authorization: `Bearer ${this.#apiKey}`,
|
|
79
|
+
};
|
|
80
|
+
// Pin caching to the run alias. xAI's chat-completions cache is
|
|
81
|
+
// per-server; same conv-id routes to the same backend, which is
|
|
82
|
+
// where the cached prefix lives. Without this, requests load-
|
|
83
|
+
// balance across servers and cached_tokens stays near zero.
|
|
84
|
+
if (options.runAlias) headers["x-grok-conv-id"] = options.runAlias;
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
return await chatCompletionStream({
|
|
88
|
+
url: `${this.#baseUrl}/chat/completions`,
|
|
89
|
+
headers,
|
|
90
|
+
body,
|
|
91
|
+
signal,
|
|
92
|
+
});
|
|
93
|
+
} catch (err) {
|
|
94
|
+
if (err.status === 401 || err.status === 403) {
|
|
95
|
+
throw new Error(
|
|
64
96
|
msg("error.xai_auth", {
|
|
65
|
-
status: `${
|
|
97
|
+
status: `${err.status} - ${err.body}`,
|
|
66
98
|
}),
|
|
67
99
|
);
|
|
68
|
-
err.status = response.status;
|
|
69
|
-
err.body = errorBody;
|
|
70
|
-
throw err;
|
|
71
100
|
}
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
err.body = errorBody;
|
|
79
|
-
err.retryAfter = retryAfter;
|
|
80
|
-
throw err;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return this.#normalize(await response.json());
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
#normalize(data) {
|
|
87
|
-
let content = "";
|
|
88
|
-
let reasoningContent = null;
|
|
89
|
-
|
|
90
|
-
for (const item of data.output) {
|
|
91
|
-
if (item.type === "reasoning") {
|
|
92
|
-
const text = this.#extractText(item.content);
|
|
93
|
-
if (text)
|
|
94
|
-
reasoningContent = reasoningContent
|
|
95
|
-
? `${reasoningContent}\n${text}`
|
|
96
|
-
: text;
|
|
97
|
-
}
|
|
98
|
-
if (item.type === "message") {
|
|
99
|
-
const text = this.#extractText(item.content);
|
|
100
|
-
if (text) content = content ? `${content}\n${text}` : text;
|
|
101
|
+
if (err.status) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
msg("error.xai_api", {
|
|
104
|
+
status: `${err.status} - ${err.body}`,
|
|
105
|
+
}),
|
|
106
|
+
);
|
|
101
107
|
}
|
|
108
|
+
throw err;
|
|
102
109
|
}
|
|
103
|
-
|
|
104
|
-
const { usage } = data;
|
|
105
|
-
const inputTokens = usage.input_tokens;
|
|
106
|
-
const outputTokens = usage.output_tokens;
|
|
107
|
-
// Optional per xAI API; absent on providers that don't surface them.
|
|
108
|
-
const cached = usage.input_tokens_details?.cached_tokens;
|
|
109
|
-
const reasoningTokens = usage.output_tokens_details?.reasoning_tokens;
|
|
110
|
-
const costTicks = usage.cost_in_usd_ticks;
|
|
111
|
-
return {
|
|
112
|
-
choices: [
|
|
113
|
-
{
|
|
114
|
-
message: {
|
|
115
|
-
role: "assistant",
|
|
116
|
-
content,
|
|
117
|
-
reasoning_content: reasoningContent,
|
|
118
|
-
},
|
|
119
|
-
},
|
|
120
|
-
],
|
|
121
|
-
usage: {
|
|
122
|
-
prompt_tokens: inputTokens,
|
|
123
|
-
cached_tokens: cached === undefined ? 0 : cached,
|
|
124
|
-
completion_tokens: outputTokens,
|
|
125
|
-
reasoning_tokens: reasoningTokens === undefined ? 0 : reasoningTokens,
|
|
126
|
-
total_tokens: inputTokens + outputTokens,
|
|
127
|
-
cost: costTicks === undefined ? 0 : costTicks / 10_000_000_000,
|
|
128
|
-
},
|
|
129
|
-
};
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
#extractText(content) {
|
|
133
|
-
if (typeof content === "string") return content;
|
|
134
|
-
if (!Array.isArray(content)) return null;
|
|
135
|
-
const joined = content
|
|
136
|
-
.filter((c) => c.type === "text" || c.type === "output_text")
|
|
137
|
-
.map((c) => c.text)
|
|
138
|
-
.join("\n");
|
|
139
|
-
return joined ? joined : null;
|
|
140
110
|
}
|
|
141
111
|
|
|
142
112
|
async #getContextSize(model) {
|
|
143
113
|
if (this.#contextCache.has(model)) return this.#contextCache.get(model);
|
|
144
114
|
if (!this.#apiKey) throw new Error(msg("error.xai_api_key_missing"));
|
|
145
115
|
|
|
146
|
-
const
|
|
147
|
-
const res = await fetch(modelsUrl, {
|
|
116
|
+
const res = await fetch(`${this.#baseUrl}/models`, {
|
|
148
117
|
headers: { Authorization: `Bearer ${this.#apiKey}` },
|
|
149
118
|
signal: AbortSignal.timeout(FETCH_TIMEOUT),
|
|
150
119
|
});
|
|
@@ -164,10 +133,7 @@ export default class Xai {
|
|
|
164
133
|
}
|
|
165
134
|
}
|
|
166
135
|
|
|
167
|
-
const langUrl = this.#baseUrl
|
|
168
|
-
/\/responses$/,
|
|
169
|
-
`/language-models/${model}`,
|
|
170
|
-
);
|
|
136
|
+
const langUrl = `${this.#baseUrl}/language-models/${model}`;
|
|
171
137
|
// Optional probe; failure falls through to terminal throw below.
|
|
172
138
|
const langRes = await fetch(langUrl, {
|
|
173
139
|
headers: { Authorization: `Bearer ${this.#apiKey}` },
|