@possumtech/rummy 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +21 -0
- package/SPEC.md +84 -0
- package/package.json +8 -8
- package/scriptify/ask_run.js +77 -0
- package/src/agent/AgentLoop.js +30 -19
- package/src/agent/Entries.js +23 -2
- package/src/agent/ProjectAgent.js +2 -2
- package/src/agent/TurnExecutor.js +3 -0
- package/src/agent/known_queries.sql +1 -1
- package/src/agent/known_store.sql +5 -0
- package/src/agent/materializeContext.js +4 -2
- package/src/agent/runs.sql +19 -0
- package/src/agent/tokens.js +6 -0
- package/src/hooks/RummyContext.js +4 -0
- package/src/llm/LlmProvider.js +24 -21
- package/src/llm/errors.js +1 -1
- package/src/llm/retry.js +63 -0
- package/src/plugins/budget/budget.js +64 -18
- package/src/plugins/get/getDoc.md +3 -3
- package/src/plugins/instructions/instructions.js +123 -1
- package/src/plugins/instructions/instructions.md +20 -12
- package/src/plugins/instructions/instructions_104.md +4 -4
- package/src/plugins/instructions/instructions_105.md +28 -36
- package/src/plugins/instructions/instructions_106.md +21 -0
- package/src/plugins/instructions/instructions_107.md +10 -0
- package/src/plugins/instructions/instructions_108.md +0 -8
- package/src/plugins/known/known.js +2 -1
- package/src/plugins/log/log.js +27 -7
- package/src/plugins/prompt/prompt.js +10 -4
- package/src/plugins/rpc/rpc.js +11 -1
- package/src/plugins/update/update.js +18 -2
- package/src/plugins/yolo/yolo.js +192 -0
package/src/llm/retry.js
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exponential backoff with full jitter, time-bounded.
|
|
3
|
+
*
|
|
4
|
+
* Calls `fn` until it returns a value, the deadline elapses, or a
|
|
5
|
+
* non-retryable error is thrown. Between attempts, sleeps for a
|
|
6
|
+
* random duration in [0, min(maxDelayMs, baseDelayMs * 2^attempt)).
|
|
7
|
+
* Full jitter (AWS / Google Cloud pattern) prevents thundering-herd
|
|
8
|
+
* synchronization across concurrent clients hitting the same API.
|
|
9
|
+
*
|
|
10
|
+
* Time-bounded, not count-bounded: a connect-level outage that
|
|
11
|
+
* recovers in 4 minutes is invisible to the caller, but a persistent
|
|
12
|
+
* outage fails after deadlineMs with a clear cause chain.
|
|
13
|
+
*
|
|
14
|
+
* Aborts immediately if the supplied AbortSignal fires — even mid-sleep.
|
|
15
|
+
*/
|
|
16
|
+
export async function retryWithBackoff(
|
|
17
|
+
fn,
|
|
18
|
+
{
|
|
19
|
+
signal,
|
|
20
|
+
deadlineMs,
|
|
21
|
+
baseDelayMs = 1000,
|
|
22
|
+
maxDelayMs = 30_000,
|
|
23
|
+
isRetryable,
|
|
24
|
+
onRetry,
|
|
25
|
+
} = {},
|
|
26
|
+
) {
|
|
27
|
+
const startTime = Date.now();
|
|
28
|
+
let attempt = 0;
|
|
29
|
+
while (true) {
|
|
30
|
+
signal?.throwIfAborted();
|
|
31
|
+
try {
|
|
32
|
+
return await fn();
|
|
33
|
+
} catch (err) {
|
|
34
|
+
if (!isRetryable(err)) throw err;
|
|
35
|
+
const elapsedMs = Date.now() - startTime;
|
|
36
|
+
const remainingMs = deadlineMs - elapsedMs;
|
|
37
|
+
if (remainingMs <= 0) {
|
|
38
|
+
throw new Error(
|
|
39
|
+
`transient failures persisted ${Math.round(elapsedMs / 1000)}s past deadline; last error: ${err.message}`,
|
|
40
|
+
{ cause: err },
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
const expCap = Math.min(maxDelayMs, baseDelayMs * 2 ** attempt);
|
|
44
|
+
const jittered = Math.floor(Math.random() * expCap);
|
|
45
|
+
const delayMs = Math.min(remainingMs, jittered);
|
|
46
|
+
onRetry?.(err, attempt + 1, delayMs, remainingMs);
|
|
47
|
+
await sleep(delayMs, signal);
|
|
48
|
+
attempt++;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function sleep(ms, signal) {
|
|
54
|
+
return new Promise((resolve, reject) => {
|
|
55
|
+
const t = setTimeout(resolve, ms);
|
|
56
|
+
if (!signal) return;
|
|
57
|
+
const onAbort = () => {
|
|
58
|
+
clearTimeout(t);
|
|
59
|
+
reject(signal.reason || new Error("aborted"));
|
|
60
|
+
};
|
|
61
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
62
|
+
});
|
|
63
|
+
}
|
|
@@ -66,56 +66,102 @@ export default class Budget {
|
|
|
66
66
|
|
|
67
67
|
const cap = ceiling(contextSize);
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
// Per-scheme aggregation: counts and costs at each visibility tier
|
|
70
|
+
// plus the savings (premium) the model would unlock by demoting
|
|
71
|
+
// visible → summarized. All math derives from per-row vTokens
|
|
72
|
+
// (cost as visible) / sTokens (cost as summarized) / aTokens
|
|
73
|
+
// (= vTokens − sTokens, the promotion premium).
|
|
74
|
+
const byScheme = new Map();
|
|
70
75
|
let visibleCount = 0;
|
|
71
76
|
let premiumTokens = 0;
|
|
72
77
|
let summarizedCount = 0;
|
|
73
|
-
let
|
|
78
|
+
let _summarizedTokens = 0;
|
|
74
79
|
let floorTokens = 0;
|
|
80
|
+
let knownVTokens = 0;
|
|
81
|
+
let sourceVTokens = 0;
|
|
82
|
+
|
|
83
|
+
const schemeEntry = (s) => {
|
|
84
|
+
let e = byScheme.get(s);
|
|
85
|
+
if (!e) {
|
|
86
|
+
e = {
|
|
87
|
+
vis: 0,
|
|
88
|
+
sum: 0,
|
|
89
|
+
visTokens: 0, // current cost of visible entries
|
|
90
|
+
visIfSumTokens: 0, // sTokens of visible (what they'd cost demoted)
|
|
91
|
+
sumTokens: 0, // current cost of summarized entries
|
|
92
|
+
premium: 0, // savings from demoting visible → summarized
|
|
93
|
+
};
|
|
94
|
+
byScheme.set(s, e);
|
|
95
|
+
}
|
|
96
|
+
return e;
|
|
97
|
+
};
|
|
75
98
|
|
|
76
99
|
for (const r of rows) {
|
|
77
100
|
if (r.aTokens == null) continue;
|
|
78
101
|
const s = r.scheme || "file";
|
|
102
|
+
const entry = schemeEntry(s);
|
|
79
103
|
if (r.visibility === "visible") {
|
|
80
|
-
|
|
81
|
-
entry.
|
|
82
|
-
entry.
|
|
83
|
-
|
|
104
|
+
entry.vis += 1;
|
|
105
|
+
entry.visTokens += r.vTokens || 0;
|
|
106
|
+
entry.visIfSumTokens += r.sTokens || 0;
|
|
107
|
+
entry.premium += r.aTokens || 0;
|
|
84
108
|
visibleCount += 1;
|
|
85
109
|
premiumTokens += r.aTokens;
|
|
86
110
|
floorTokens += r.sTokens;
|
|
111
|
+
const v = r.vTokens || 0;
|
|
112
|
+
if (s === "known") knownVTokens += v;
|
|
113
|
+
else if (s === "prompt") sourceVTokens += v;
|
|
114
|
+
else if (r.category === "data") sourceVTokens += v;
|
|
87
115
|
} else if (r.visibility === "summarized") {
|
|
116
|
+
entry.sum += 1;
|
|
117
|
+
entry.sumTokens += r.sTokens || 0;
|
|
88
118
|
summarizedCount += 1;
|
|
89
|
-
|
|
119
|
+
_summarizedTokens += r.sTokens;
|
|
90
120
|
floorTokens += r.sTokens;
|
|
91
121
|
}
|
|
92
122
|
}
|
|
93
123
|
|
|
124
|
+
const fcrmDenom = knownVTokens + sourceVTokens;
|
|
125
|
+
const fcrmScore =
|
|
126
|
+
fcrmDenom > 0 ? (knownVTokens / fcrmDenom).toFixed(2) : "1.00";
|
|
127
|
+
|
|
94
128
|
const systemTokens = countTokens(systemPrompt || "");
|
|
95
129
|
const tokenUsage = floorTokens + premiumTokens + systemTokens;
|
|
96
130
|
const tokensFree = Math.max(0, cap - tokenUsage);
|
|
97
131
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
132
|
+
// Sort schemes by current cost descending — biggest-impact rows
|
|
133
|
+
// land at the top, so "what should I demote first?" reads
|
|
134
|
+
// straight off the table.
|
|
135
|
+
const schemeRows = [...byScheme.entries()]
|
|
136
|
+
.toSorted(
|
|
137
|
+
([, a], [, b]) =>
|
|
138
|
+
b.visTokens + b.sumTokens - (a.visTokens + a.sumTokens),
|
|
139
|
+
)
|
|
140
|
+
.map(([scheme, e]) => {
|
|
141
|
+
const cost = e.visTokens + e.sumTokens;
|
|
142
|
+
const ifAllSum = e.visIfSumTokens + e.sumTokens;
|
|
143
|
+
return `| ${scheme} | ${e.vis} | ${e.sum} | ${cost} | ${ifAllSum} | ${e.premium} |`;
|
|
103
144
|
});
|
|
104
145
|
|
|
105
|
-
const
|
|
106
|
-
|
|
146
|
+
const systemPct =
|
|
147
|
+
tokenUsage > 0 ? Math.round((systemTokens / tokenUsage) * 100) : 0;
|
|
107
148
|
|
|
108
149
|
const table = [
|
|
109
|
-
"| scheme |
|
|
110
|
-
"
|
|
150
|
+
"| scheme | vis | sum | cost | if-all-sum | premium |",
|
|
151
|
+
"|---|---|---|---|---|---|",
|
|
111
152
|
...schemeRows,
|
|
112
153
|
].join("\n");
|
|
113
154
|
|
|
114
|
-
const summarizedLine = `Summarized: ${summarizedCount} entries, ${summarizedTokens} tokens (${summarizedPct}% of budget).`;
|
|
115
155
|
const systemLine = `System: ${systemTokens} tokens (${systemPct}% of budget).`;
|
|
116
156
|
const totalLine = `Total: ${visibleCount} visible + ${summarizedCount} summarized entries; tokenUsage ${tokenUsage} / ceiling ${cap}. ${tokensFree} tokens free.`;
|
|
157
|
+
const legend = [
|
|
158
|
+
"Columns:",
|
|
159
|
+
"- cost: current cost of this scheme (vTokens for visible + sTokens for summarized)",
|
|
160
|
+
"- if-all-sum: cost if every entry of this scheme were demoted to summarized",
|
|
161
|
+
"- premium: savings from demoting visible → summarized (cost − if-all-sum)",
|
|
162
|
+
].join("\n");
|
|
117
163
|
|
|
118
|
-
return `${content}<budget tokenUsage="${tokenUsage}" tokensFree="${tokensFree}">\n${table}\n\n${
|
|
164
|
+
return `${content}<budget tokenUsage="${tokenUsage}" tokensFree="${tokensFree}" fcrmScore="${fcrmScore}">\n${table}\n\n${legend}\n${systemLine}\n${totalLine}\n</budget>\n`;
|
|
119
165
|
}
|
|
120
166
|
|
|
121
167
|
#check({ contextSize, messages, rows, lastPromptTokens = 0 }) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
## <get
|
|
1
|
+
## <get path="[path/to/file]"/> - Promote an entry
|
|
2
2
|
|
|
3
|
-
Example: <get
|
|
4
|
-
<!-- Simplest form. Body
|
|
3
|
+
Example: <get path="src/app.js"/>
|
|
4
|
+
<!-- Simplest form. Path attribute. Body is reserved for content filter. -->
|
|
5
5
|
|
|
6
6
|
Example: <get path="known://*">auth</get>
|
|
7
7
|
<!-- Keyword recall: glob in path, search term in body. -->
|
|
@@ -23,7 +23,7 @@ const TURN_FROM_PATH = /^log:\/\/turn_(\d+)\/update\//;
|
|
|
23
23
|
|
|
24
24
|
function phaseForStatus(status) {
|
|
25
25
|
if (status == null) return 4;
|
|
26
|
-
if (status === 200) return
|
|
26
|
+
if (status === 200) return 7;
|
|
27
27
|
const last = status % 10;
|
|
28
28
|
return PHASES.includes(last) ? last : 4;
|
|
29
29
|
}
|
|
@@ -49,6 +49,10 @@ function latestUpdateStatusFromRows(rows) {
|
|
|
49
49
|
: r.attributes;
|
|
50
50
|
const status = attrs?.status;
|
|
51
51
|
if (status == null) continue;
|
|
52
|
+
// Rejected updates are written for the model's audit trail but are
|
|
53
|
+
// not navigation events — phase router skips them so the model
|
|
54
|
+
// stays in the stage it was already in.
|
|
55
|
+
if (attrs?.rejected) continue;
|
|
52
56
|
if (turn > bestTurn || (turn === bestTurn && status > bestStatus)) {
|
|
53
57
|
bestTurn = turn;
|
|
54
58
|
bestStatus = status;
|
|
@@ -66,6 +70,10 @@ export default class Instructions {
|
|
|
66
70
|
core.on("turn.started", this.onTurnStarted.bind(this));
|
|
67
71
|
core.hooks.instructions.resolveSystemPrompt =
|
|
68
72
|
this.resolveSystemPrompt.bind(this);
|
|
73
|
+
core.hooks.instructions.validateNavigation =
|
|
74
|
+
this.validateNavigation.bind(this);
|
|
75
|
+
core.hooks.instructions.findLatestSummary =
|
|
76
|
+
this.findLatestSummary.bind(this);
|
|
69
77
|
// Dynamic phase instructions live in the user message (above
|
|
70
78
|
// <prompt>) so the system message stays cache-stable across turns.
|
|
71
79
|
// Priority 250 puts us between <log> (100), <unknowns> (200),
|
|
@@ -85,6 +93,7 @@ export default class Instructions {
|
|
|
85
93
|
runId,
|
|
86
94
|
"instructions://system",
|
|
87
95
|
null,
|
|
96
|
+
{ includeAuditSchemes: true },
|
|
88
97
|
);
|
|
89
98
|
// The entry is always written by onTurnStarted before this runs.
|
|
90
99
|
const entry = entries[0];
|
|
@@ -102,6 +111,119 @@ export default class Instructions {
|
|
|
102
111
|
});
|
|
103
112
|
}
|
|
104
113
|
|
|
114
|
+
/**
|
|
115
|
+
* Reject illegal stage navigation. Two checks:
|
|
116
|
+
*
|
|
117
|
+
* 1. Forward skip — `nextPhase > currentPhase + 1`. Models advancing
|
|
118
|
+
* more than one stage at a time are jumping past required work.
|
|
119
|
+
* Returns and continuations (nextPhase ≤ currentPhase) always pass.
|
|
120
|
+
*
|
|
121
|
+
* 2. Deployment with prior prompts — any status landing the model in
|
|
122
|
+
* Deployment (phase 7) requires zero visible PRIOR prompts. State-
|
|
123
|
+
* property rule covering both entry (167) and continuation (177,
|
|
124
|
+
* 200) — once in Deployment, the model still can't claim it with
|
|
125
|
+
* undemoted prior prompts. The current (latest) prompt always
|
|
126
|
+
* stays visible since Deployment must act on it.
|
|
127
|
+
*
|
|
128
|
+
* On rejection the caller marks the update entry rejected (so the
|
|
129
|
+
* phase router skips it) and emits an error log; navigation rejections
|
|
130
|
+
* count as normal strikes.
|
|
131
|
+
*/
|
|
132
|
+
async validateNavigation(status, rummy) {
|
|
133
|
+
const currentPhase = await this.#getCurrentPhase(rummy);
|
|
134
|
+
const nextPhase = phaseForStatus(status);
|
|
135
|
+
if (nextPhase > currentPhase + 1) {
|
|
136
|
+
return { ok: false, reason: "Illegal navigation attempt" };
|
|
137
|
+
}
|
|
138
|
+
if (nextPhase === 7) {
|
|
139
|
+
const visible = await this.#countVisiblePriorPrompts(rummy);
|
|
140
|
+
if (visible > 0) {
|
|
141
|
+
return {
|
|
142
|
+
ok: false,
|
|
143
|
+
reason: `Illegal navigation attempt: ${visible} visible prior prompts`,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return { ok: true };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async #getCurrentPhase(rummy) {
|
|
151
|
+
// `**` (not `*`) for the slug position — update slugs are derived
|
|
152
|
+
// from the model's update body and can contain URL-encoded `/`
|
|
153
|
+
// characters (e.g. `known%3A//foo/bar` in a "ready for deployment"
|
|
154
|
+
// summary). Single `*` doesn't cross those embedded slashes and
|
|
155
|
+
// silently misses the prior turn's update.
|
|
156
|
+
const updates = await rummy.entries.getEntriesByPattern(
|
|
157
|
+
rummy.runId,
|
|
158
|
+
"log://*/update/**",
|
|
159
|
+
null,
|
|
160
|
+
);
|
|
161
|
+
let bestTurn = -1;
|
|
162
|
+
let bestStatus = null;
|
|
163
|
+
for (const e of updates) {
|
|
164
|
+
const m = TURN_FROM_PATH.exec(e.path);
|
|
165
|
+
if (!m) continue;
|
|
166
|
+
const turn = Number(m[1]);
|
|
167
|
+
if (turn >= rummy.sequence) continue;
|
|
168
|
+
const attrs =
|
|
169
|
+
typeof e.attributes === "string"
|
|
170
|
+
? JSON.parse(e.attributes)
|
|
171
|
+
: e.attributes;
|
|
172
|
+
if (attrs?.rejected) continue;
|
|
173
|
+
if (attrs?.status == null) continue;
|
|
174
|
+
if (turn > bestTurn) {
|
|
175
|
+
bestTurn = turn;
|
|
176
|
+
bestStatus = attrs.status;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return phaseForStatus(bestStatus);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Find the latest successful Deployment summary from a log-entry list.
|
|
184
|
+
* Matches `log://turn_N/update/...` entries with status=200 (successful
|
|
185
|
+
* Deployment completion) and returns the most recent. Used by
|
|
186
|
+
* AgentLoop telemetry to surface the model's latest delivery.
|
|
187
|
+
*
|
|
188
|
+
* Lives here, not in AgentLoop, because "what counts as a summary" is
|
|
189
|
+
* state-machine knowledge — phase 7's success status (200) is the
|
|
190
|
+
* definition. AgentLoop just consumes the result.
|
|
191
|
+
*/
|
|
192
|
+
findLatestSummary(logEntries) {
|
|
193
|
+
return logEntries
|
|
194
|
+
.filter((e) => {
|
|
195
|
+
if (!TURN_FROM_PATH.test(e.path)) return false;
|
|
196
|
+
const attrs =
|
|
197
|
+
typeof e.attributes === "string"
|
|
198
|
+
? JSON.parse(e.attributes)
|
|
199
|
+
: e.attributes;
|
|
200
|
+
return attrs?.status === 200;
|
|
201
|
+
})
|
|
202
|
+
.at(-1);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
async #countVisiblePriorPrompts(rummy) {
|
|
206
|
+
const prompts = await rummy.entries.getEntriesByPattern(
|
|
207
|
+
rummy.runId,
|
|
208
|
+
"prompt://*",
|
|
209
|
+
null,
|
|
210
|
+
);
|
|
211
|
+
const visible = prompts.filter((p) => p.visibility === "visible");
|
|
212
|
+
if (visible.length === 0) return 0;
|
|
213
|
+
// Exclude the current (latest) prompt — that's what Deployment acts on.
|
|
214
|
+
// Demoting it would force the model to deliver on content it hid from
|
|
215
|
+
// itself. Only PRIOR prompts are subject to demote-before-Deployment.
|
|
216
|
+
let maxNum = -1;
|
|
217
|
+
for (const p of visible) {
|
|
218
|
+
const m = /^prompt:\/\/(\d+)$/.exec(p.path);
|
|
219
|
+
if (m && Number(m[1]) > maxNum) maxNum = Number(m[1]);
|
|
220
|
+
}
|
|
221
|
+
return visible.filter((p) => {
|
|
222
|
+
const m = /^prompt:\/\/(\d+)$/.exec(p.path);
|
|
223
|
+
return !m || Number(m[1]) !== maxNum;
|
|
224
|
+
}).length;
|
|
225
|
+
}
|
|
226
|
+
|
|
105
227
|
async onTurnStarted({ rummy }) {
|
|
106
228
|
const { entries: store, sequence: turn, runId } = rummy;
|
|
107
229
|
const runRow = await store.getRun(runId);
|
|
@@ -1,25 +1,33 @@
|
|
|
1
1
|
XML Commands Available: [%TOOLS%]
|
|
2
2
|
|
|
3
|
-
# FCRM
|
|
3
|
+
# FCRM State Machine
|
|
4
4
|
|
|
5
|
-
You are a Folksonomic Context Relevance Maximization (FCRM)
|
|
6
|
-
* Definition Stage: Register everything unknown about the prompt request.
|
|
7
|
-
* Discovery Stage: Discover, Distill, and Demote source entries to resolve unknowns into knowns.
|
|
8
|
-
* Deployment Stage: Act on the prompt.
|
|
5
|
+
You are a Folksonomic Context Relevance Maximization (FCRM) State Machine
|
|
9
6
|
|
|
10
|
-
|
|
7
|
+
YOU MUST perform the actions corresponding with your current stage:
|
|
8
|
+
* Definition Stage: Defining what's unknown into unknown:// entries
|
|
9
|
+
* Discovery Stage: Selecting an unknown, discovering relevant source entries and prompts, then distilling them into known:// entries
|
|
10
|
+
* Demotion Stage: Demoting the unknown entries, source entries, prompts, and log events after distillation is completed
|
|
11
|
+
* Deployment Stage: Acting on the current prompt
|
|
12
|
+
* Resolution Stage: Multi-prompt benchmark final `fcrmScore`
|
|
11
13
|
|
|
14
|
+
## Visibility States: Promote and Demote Visibility State to Control Context Relevance
|
|
15
|
+
* visible: Fully visible, but uses `tokens="N"` context budget
|
|
16
|
+
* summarized: Approximate, summary information, very small context budget penalty
|
|
17
|
+
* archived: Hidden from Context, but can be retrieved later with <get path="..."/>
|
|
18
|
+
|
|
19
|
+
Tip: You can leverage the FCRM's Visibility States with folksonomic taxonomies and tags to store and recall unlimited information.
|
|
12
20
|
Tip: The `tokens="N"` shows how much context memory is consumed if "visible". Entries only consume tokens when at "visible" visibility.
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
Tip: Log items are demotable just like context entries. Demote their visibility to "summarized" or "archived" as needed.
|
|
17
|
-
Tip: Entries and log events that have been archived are fully hidden (no memory used, no summary), but can be retrieved later by path.
|
|
21
|
+
|
|
22
|
+
Warning: YOU MUST NOT allow the `tokens="N"` sum of irrelevant source entries, prompts, or log events to exceed `tokensFree` budget.
|
|
23
|
+
Warning: YOU MUST NOT skip or avoid state machine steps or the Resolution Stage will fail.
|
|
18
24
|
|
|
19
25
|
# Commands
|
|
20
26
|
|
|
21
|
-
Warning: YOU MUST NOT use shell commands for project file operations. Project files are entries that require XML
|
|
27
|
+
Warning: YOU MUST NOT use shell commands for project file operations. Project files are entries that require XML Commands.
|
|
22
28
|
Example: <set path="src/file.txt">new file content</set>
|
|
23
29
|
Example: <get path="src/*.txt" preview/>
|
|
24
30
|
|
|
31
|
+
Tip: Project files, entries, prompts, and log events are all accessible with the XML Commands.
|
|
32
|
+
|
|
25
33
|
[%TOOLDOCS%]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
Definition Stage: YOU MUST ONLY
|
|
1
|
+
# Definition Stage: YOU MUST ONLY create topical, taxonomized, and tagged unknown:// entries for missing information
|
|
2
2
|
|
|
3
|
-
YOU MUST create topical, taxonomized, and tagged unknown:// entries for missing information you need to discover.
|
|
4
3
|
Example: <set path="unknown://countries/france/capital" summary="countries,france,capital,geography,trivia">What is the capital of France?</set>
|
|
5
4
|
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
|
|
6
|
+
## Turn Termination:
|
|
7
|
+
* Definition Stage Completion: <update status="145">unknowns identified</update>
|
|
@@ -1,46 +1,38 @@
|
|
|
1
|
-
# Discovery Stage
|
|
1
|
+
# Discovery Stage: YOU MUST select an unknown:// entry, then discover its source entries and distill them into known:// entries
|
|
2
2
|
|
|
3
|
-
YOU MUST
|
|
4
|
-
YOU MUST
|
|
5
|
-
YOU MUST
|
|
6
|
-
YOU MUST
|
|
7
|
-
YOU MUST demote source entries to "summarized" after extracting and decomposing their relevant information into known:// entries.
|
|
8
|
-
YOU MUST demote the unknown:// entries to "summarized" after they are referenced or resolved by known:// entries.
|
|
9
|
-
YOU MUST demote all irrelevant source entries and log events to maximize FCRM.
|
|
10
|
-
Tip: Source entry "summarized" information is not reliable. Only place "visible" source entry information in known:// entries.
|
|
11
|
-
Tip: A "relevant" source entry that has been successfully distilled into known:// entries is no longer relevant.
|
|
12
|
-
Tip: Discover, Distill, and Demote per source entry, not globally, to maximize FCRM.
|
|
3
|
+
YOU MUST create topical, taxonomized, and tagged known:// entries to resolve the selected unknown:// entry.
|
|
4
|
+
YOU MUST reference all related source entries and prompts.
|
|
5
|
+
YOU MUST ONLY populate known entries with promoted information, NOT from your own training data or opinion.
|
|
6
|
+
YOU MUST immediately demote unknowns, source entries, prompts, and log events after they are distilled, irrelevant, or resolved.
|
|
13
7
|
|
|
14
|
-
|
|
8
|
+
Tip: Check the `tokens="N"` of the source entries against the `tokensFree="N"` constraint before promoting entries.
|
|
9
|
+
Tip: You can use <get path="..." preview/> to preview the potential `tokens="N"` budget impact of bulk operations.
|
|
10
|
+
Tip: You can use <get path="..." line="X" limit="Y"/> to read subsets of entries that would exceed your `tokensFree` budget.
|
|
15
11
|
|
|
16
|
-
|
|
12
|
+
## Example:
|
|
13
|
+
<get path="**" preview>capital</get>
|
|
14
|
+
<get path="prompt://3" line="1" limit="100"/>
|
|
17
15
|
|
|
18
|
-
<set path="trivia/capitals.csv" visibility="visible"/>
|
|
16
|
+
<set path="trivia/capitals.csv" visibility="visible"/>
|
|
19
17
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
The capital of France is Paris.
|
|
18
|
+
<set path="known://countries/france/capital" summary="countries,france,capital,geography,trivia">
|
|
19
|
+
# Capital of France
|
|
20
|
+
The capital of France is Paris.
|
|
24
21
|
|
|
25
|
-
|
|
22
|
+
{...}
|
|
26
23
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
## Related
|
|
25
|
+
[trivia question](prompt://3)
|
|
26
|
+
[unknown resolving](unknown://countries/france/capital)
|
|
27
|
+
[source entry](trivia/capitals.csv)
|
|
28
|
+
</set>
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
<set path="
|
|
35
|
-
<set path="
|
|
36
|
-
<set path="unknown://countries/poland/capital" summary="REJECTED: Irrelevant" visibility="summarized"/>
|
|
37
|
-
<set path="https://en.wikipedia.org/wiki/Paris,_Texas" summary="REJECTED: Wrong Paris" visibility="summarized"/>
|
|
38
|
-
<set path="log://turn_1/set/*" visibility="archived"/>
|
|
39
|
-
<set path="log://turn_1/get/trivia/*" visibility="archived"/>
|
|
40
|
-
<set path="log://turn_2/get/capital%20of%20france" visibility="archived"/>
|
|
30
|
+
<set path="prompt://3" visibility="summarized"/>
|
|
31
|
+
<set path="unknown://countries/france/capital" visibility="summarized"/>
|
|
32
|
+
<set path="unknown://countries/france/seat_of_government" summary="RESOLVED: Not necessary" visibility="summarized"/>
|
|
33
|
+
<set path="trivia/capitals.csv" visibility="summarized"/>
|
|
41
34
|
|
|
42
35
|
## Turn Termination (CHOOSE ONLY ONE):
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
Discovery Stage
|
|
46
|
-
Discovery Stage Completion: <update status="158">all unknowns (if any) referenced or resolved by known entries</update>
|
|
36
|
+
* Definition Stage Return: <update status="154">returning to Definition Stage</update>
|
|
37
|
+
* Discovery Stage Continuation: <update status="155">discovering and distilling more for the selected unknown</update>
|
|
38
|
+
* Discovery Stage Completion: <update status="156">this unknown's known entries written</update>
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Demotion Stage: YOU MUST demote all source entries, prompts, and log events that are now distilled or no longer relevant
|
|
2
|
+
|
|
3
|
+
Examples:
|
|
4
|
+
<set path="prompt://2" summary="All information distilled into knowns" visibility="summarized"/>
|
|
5
|
+
<set path="trivia/capitals.csv" visibility="summarized"/>
|
|
6
|
+
<set path="unknown://countries/france/capital" visibility="summarized"/>
|
|
7
|
+
<set path="unknown://countries/poland/capital" summary="REJECTED: Irrelevant" visibility="summarized"/>
|
|
8
|
+
<set path="https://en.wikipedia.org/wiki/Paris,_Texas" summary="REJECTED: Wrong Paris" visibility="summarized"/>
|
|
9
|
+
<set path="log://turn_1/**" visibility="archived"/>
|
|
10
|
+
<set path="log://turn_2/**" visibility="archived"/>
|
|
11
|
+
<set path="log://turn_3/set/**" visibility="archived"/>
|
|
12
|
+
<set path="log://turn_3/get/**" visibility="archived"/>
|
|
13
|
+
<set path="log://turn_3/search/**" visibility="archived"/>
|
|
14
|
+
|
|
15
|
+
Tip: You need room to think. Demote large prompts and source entries, then iterate them with <get path="..." line="N" limit="N"/> as necessary.
|
|
16
|
+
|
|
17
|
+
## Turn Termination (CHOOSE ONLY ONE):
|
|
18
|
+
* Definition Stage Return: <update status="164">returning to Definition Stage</update>
|
|
19
|
+
* Discovery Stage Return: <update status="165">more unknowns remain; returning to Discovery Stage</update>
|
|
20
|
+
* Demotion Stage Continuation: <update status="166">demoting more distilled or irrelevant entries, prompts, and log events</update>
|
|
21
|
+
* Demotion Stage Completion: <update status="167">all unknowns resolved and demoted; ready for Deployment Stage</update>
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Deployment Stage
|
|
2
|
+
|
|
3
|
+
YOU MUST act on the prompt.
|
|
4
|
+
|
|
5
|
+
## Turn Termination (CHOOSE ONLY ONE):
|
|
6
|
+
* Definition Stage Return: <update status="174">returning to Definition Stage</update>
|
|
7
|
+
* Discovery Stage Return: <update status="175">returning to Discovery Stage</update>
|
|
8
|
+
* Demotion Stage Return: <update status="176">returning to Demotion Stage</update>
|
|
9
|
+
* Deployment Stage Continuation: <update status="177">performing more actions</update>
|
|
10
|
+
* Deployment Stage Completion: <update status="200">{direct answer if prompt asked a question, summary of actions if not}</update>
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
YOU MUST act on the prompt.
|
|
2
|
-
|
|
3
|
-
Turn Termination (CHOOSE ONLY ONE):
|
|
4
|
-
|
|
5
|
-
Definition Stage Return: <update status="184">returning to definition stage</update>
|
|
6
|
-
Discovery Stage Return: <update status="185">returning to discovery stage</update>
|
|
7
|
-
Deployment Stage Continuation: <update status="188">performing more actions</update>
|
|
8
|
-
Deployment Stage Completion: <update status="200">summary of actions performed, or direct answer</update>
|
|
@@ -111,6 +111,7 @@ function renderContextTag(entry, demotedSet) {
|
|
|
111
111
|
const tag = entry.scheme ? entry.scheme : "file";
|
|
112
112
|
const turn = entry.source_turn ? ` turn="${entry.source_turn}"` : "";
|
|
113
113
|
const tokens = entry.aTokens != null ? ` tokens="${entry.aTokens}"` : "";
|
|
114
|
+
const lines = entry.vLines != null ? ` lines="${entry.vLines}"` : "";
|
|
114
115
|
const attrs =
|
|
115
116
|
typeof entry.attributes === "string"
|
|
116
117
|
? JSON.parse(entry.attributes)
|
|
@@ -140,7 +141,7 @@ function renderContextTag(entry, demotedSet) {
|
|
|
140
141
|
: "";
|
|
141
142
|
const summary = ` summary="${summaryText}"`;
|
|
142
143
|
|
|
143
|
-
const attrStr = `${turn}${status}${stateAttr}${outcomeAttr}${summary}${visibility}${tokens}${flag}`;
|
|
144
|
+
const attrStr = `${turn}${status}${stateAttr}${outcomeAttr}${summary}${visibility}${tokens}${lines}${flag}`;
|
|
144
145
|
if (entry.body) {
|
|
145
146
|
return `<${tag} path="${entry.path}"${attrStr}>${entry.body}</${tag}>`;
|
|
146
147
|
}
|
package/src/plugins/log/log.js
CHANGED
|
@@ -63,7 +63,14 @@ function renderLogTag(entry, rowsByPath) {
|
|
|
63
63
|
: entry.state
|
|
64
64
|
? stateToStatus(entry.state, entry.outcome)
|
|
65
65
|
: null;
|
|
66
|
-
|
|
66
|
+
// Prompts are uniformly status=200 — uniform value carries no signal
|
|
67
|
+
// and read as "settled, no action needed." Suppress so cultivation
|
|
68
|
+
// vocabulary (vary, demote, archive) applies to prompts the same
|
|
69
|
+
// way it applies to other log entries.
|
|
70
|
+
const status =
|
|
71
|
+
statusValue != null && action !== "prompt"
|
|
72
|
+
? ` status="${statusValue}"`
|
|
73
|
+
: "";
|
|
67
74
|
const outcomeAttr = entry.outcome ? ` outcome="${entry.outcome}"` : "";
|
|
68
75
|
// `tokens=` is the promotion premium (aTokens) of the thing this tag
|
|
69
76
|
// represents — what the model would free by demoting it. For actions
|
|
@@ -76,10 +83,20 @@ function renderLogTag(entry, rowsByPath) {
|
|
|
76
83
|
const isSlice = attrs?.lineStart != null;
|
|
77
84
|
const targetEntry = attrs?.path ? rowsByPath.get(attrs.path) : null;
|
|
78
85
|
let tokenSource = null;
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
86
|
+
let lineSource = null;
|
|
87
|
+
if (STREAM_NO_TOKENS.has(action)) {
|
|
88
|
+
tokenSource = null;
|
|
89
|
+
lineSource = null;
|
|
90
|
+
} else if (isSlice) {
|
|
91
|
+
tokenSource = entry.aTokens;
|
|
92
|
+
lineSource = entry.vLines;
|
|
93
|
+
} else if (targetEntry) {
|
|
94
|
+
tokenSource = targetEntry.aTokens;
|
|
95
|
+
lineSource = targetEntry.vLines;
|
|
96
|
+
} else {
|
|
97
|
+
tokenSource = entry.aTokens;
|
|
98
|
+
lineSource = entry.vLines;
|
|
99
|
+
}
|
|
83
100
|
const tokens = tokenSource != null ? ` tokens="${tokenSource}"` : "";
|
|
84
101
|
const summary =
|
|
85
102
|
typeof attrs?.summary === "string"
|
|
@@ -95,10 +112,13 @@ function renderLogTag(entry, rowsByPath) {
|
|
|
95
112
|
const target = attrs?.path ? ` target="${attrs.path}"` : "";
|
|
96
113
|
// Slice reads tag the log entry with lineStart/lineEnd/totalLines so
|
|
97
114
|
// the <get> tag surfaces `lines="a-b/total"` — a concrete handle for
|
|
98
|
-
// the model to re-issue or compare against another slice.
|
|
115
|
+
// the model to re-issue or compare against another slice. Non-slice
|
|
116
|
+
// entries surface the simple `lines="N"` from the projected body.
|
|
99
117
|
const lines = isSlice
|
|
100
118
|
? ` lines="${attrs.lineStart}-${attrs.lineEnd}/${attrs.totalLines}"`
|
|
101
|
-
:
|
|
119
|
+
: lineSource != null
|
|
120
|
+
? ` lines="${lineSource}"`
|
|
121
|
+
: "";
|
|
102
122
|
|
|
103
123
|
const attrStr = `${target}${status}${outcomeAttr}${query}${command}${summary}${lines}${tokens}`;
|
|
104
124
|
|
|
@@ -22,6 +22,12 @@ export default class Prompt {
|
|
|
22
22
|
const { entries: store, sequence: turn, runId, loopId } = rummy;
|
|
23
23
|
|
|
24
24
|
if (!isContinuation && prompt) {
|
|
25
|
+
// Each new prompt is the start of an independent state-machine
|
|
26
|
+
// cycle. Archive prior cycles' prompts and per-turn logs so they
|
|
27
|
+
// don't pollute Deployment-landing validation. Knowns, unknowns,
|
|
28
|
+
// and file entries persist (cross-cycle knowledge survives).
|
|
29
|
+
await store.archivePriorPromptArtifacts(runId, turn);
|
|
30
|
+
|
|
25
31
|
// prompt:// writable_by: ["plugin"] — explicit for clarity.
|
|
26
32
|
await store.set({
|
|
27
33
|
runId,
|
|
@@ -88,9 +94,9 @@ export default class Prompt {
|
|
|
88
94
|
? ` visibility="${promptEntry.visibility}"`
|
|
89
95
|
: "";
|
|
90
96
|
const tokens =
|
|
91
|
-
promptEntry?.aTokens != null
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
return `${content}<prompt mode="${mode}"${path} commands="${commands}"${warn}${reverted}${visibility}${tokens}>${body}</prompt>`;
|
|
97
|
+
promptEntry?.aTokens != null ? ` tokens="${promptEntry.aTokens}"` : "";
|
|
98
|
+
const lines =
|
|
99
|
+
promptEntry?.vLines != null ? ` lines="${promptEntry.vLines}"` : "";
|
|
100
|
+
return `${content}<prompt mode="${mode}"${path} commands="${commands}"${warn}${reverted}${visibility}${tokens}${lines}>${body}</prompt>`;
|
|
95
101
|
}
|
|
96
102
|
}
|