@possumtech/rummy 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +21 -0
- package/SPEC.md +84 -0
- package/package.json +8 -8
- package/scriptify/ask_run.js +77 -0
- package/src/agent/AgentLoop.js +30 -19
- package/src/agent/Entries.js +23 -2
- package/src/agent/ProjectAgent.js +2 -2
- package/src/agent/TurnExecutor.js +3 -0
- package/src/agent/known_queries.sql +1 -1
- package/src/agent/known_store.sql +5 -0
- package/src/agent/materializeContext.js +4 -2
- package/src/agent/runs.sql +19 -0
- package/src/agent/tokens.js +6 -0
- package/src/hooks/RummyContext.js +4 -0
- package/src/llm/LlmProvider.js +24 -21
- package/src/llm/errors.js +1 -1
- package/src/llm/retry.js +63 -0
- package/src/plugins/budget/budget.js +64 -18
- package/src/plugins/get/getDoc.md +3 -3
- package/src/plugins/instructions/instructions.js +123 -1
- package/src/plugins/instructions/instructions.md +20 -12
- package/src/plugins/instructions/instructions_104.md +4 -4
- package/src/plugins/instructions/instructions_105.md +28 -36
- package/src/plugins/instructions/instructions_106.md +21 -0
- package/src/plugins/instructions/instructions_107.md +10 -0
- package/src/plugins/instructions/instructions_108.md +0 -8
- package/src/plugins/known/known.js +2 -1
- package/src/plugins/log/log.js +27 -7
- package/src/plugins/prompt/prompt.js +10 -4
- package/src/plugins/rpc/rpc.js +11 -1
- package/src/plugins/update/update.js +18 -2
- package/src/plugins/yolo/yolo.js +192 -0
package/.env.example
CHANGED
|
@@ -36,6 +36,16 @@ RUMMY_RETENTION_DAYS=31
|
|
|
36
36
|
# Timeouts (ms)
|
|
37
37
|
RUMMY_RPC_TIMEOUT=30000
|
|
38
38
|
RUMMY_FETCH_TIMEOUT=300000
|
|
39
|
+
# Test harness — how long AuditClient waits for a single ask/act to reach
|
|
40
|
+
# terminal status. Sized for full-context ingest on large-window models.
|
|
41
|
+
RUMMY_TEST_RUN_TIMEOUT=3600000
|
|
42
|
+
|
|
43
|
+
# LLM retry policy: time-bounded exponential backoff with full jitter.
|
|
44
|
+
# DEADLINE is total wall-clock budget for an LLM call across all retries.
|
|
45
|
+
# MAX_BACKOFF caps each inter-attempt sleep so a long deadline doesn't
|
|
46
|
+
# yield 10-minute waits between attempts.
|
|
47
|
+
RUMMY_LLM_DEADLINE_MS=600000
|
|
48
|
+
RUMMY_LLM_MAX_BACKOFF_MS=30000
|
|
39
49
|
|
|
40
50
|
# Debug
|
|
41
51
|
# RUMMY_DEBUG=true
|
|
@@ -59,6 +69,17 @@ RUMMY_TOKEN_DIVISOR=2
|
|
|
59
69
|
# LLM temperature (0 = deterministic, 0.7 = creative). Client can override per-request.
|
|
60
70
|
RUMMY_TEMPERATURE=0.5
|
|
61
71
|
|
|
72
|
+
# Run Attribute Defaults
|
|
73
|
+
# Per-run attributes (passed in the run-creation set call) trump these.
|
|
74
|
+
# Strict "1" enables; unset / "0" / "" disables. Useful in profile env
|
|
75
|
+
# files (e.g. .env.tbench) layered via --env-file-if-exists.
|
|
76
|
+
#
|
|
77
|
+
# RUMMY_YOLO=1 # auto-accept every proposal (headless / CI / bench)
|
|
78
|
+
# RUMMY_NO_INTERACTION=1 # exclude <ask_user> from the tool list
|
|
79
|
+
# RUMMY_NO_WEB=1 # exclude <search> from the tool list
|
|
80
|
+
# RUMMY_NO_PROPOSALS=1 # exclude <ask_user>/<env>/<sh>
|
|
81
|
+
# RUMMY_NO_REPO=1 # skip rummy.repo scanning (file scan + overview)
|
|
82
|
+
|
|
62
83
|
# Provider Configuration
|
|
63
84
|
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
|
64
85
|
# OPENROUTER_API_KEY=
|
package/SPEC.md
CHANGED
|
@@ -524,6 +524,90 @@ Two mechanisms, operating at different layers:
|
|
|
524
524
|
status 403 and emits `error://`. The tool remains advertised; the
|
|
525
525
|
specific invocation is blocked.
|
|
526
526
|
|
|
527
|
+
### YOLO Mode {#yolo_mode}
|
|
528
|
+
|
|
529
|
+
When a run is started with the `yolo: true` attribute (parallel to
|
|
530
|
+
`noRepo`/`noWeb`/`noInteraction`/`noProposals`), the server fully
|
|
531
|
+
emulates a connected headless client: every proposal auto-accepts and
|
|
532
|
+
every sh/env command spawns server-side, streaming output to the
|
|
533
|
+
existing data-channel entries. No client involvement; no human
|
|
534
|
+
approval required.
|
|
535
|
+
|
|
536
|
+
**Plumbing.** The `yolo` attribute flows through the same path as
|
|
537
|
+
`noProposals`: `set run://` → `attributes.yolo` → AgentLoop loop config
|
|
538
|
+
JSON → RummyContext.yolo getter. The yolo plugin reads `rummy.yolo`
|
|
539
|
+
off the proposal-pending event payload and engages only when set.
|
|
540
|
+
|
|
541
|
+
**Behavior on yolo runs:**
|
|
542
|
+
|
|
543
|
+
1. **Auto-accept every proposal.** The yolo plugin listens to
|
|
544
|
+
`proposal.pending`, replicates AgentLoop.resolve()'s accept path
|
|
545
|
+
inline (`proposal.accepting` filter for veto, `proposal.content`
|
|
546
|
+
filter for body, `entries.set state="resolved"`,
|
|
547
|
+
`proposal.accepted` event for plugin side effects). The
|
|
548
|
+
`entries.waitForResolution` blocking call wakes immediately; the
|
|
549
|
+
loop continues without RPC roundtrip.
|
|
550
|
+
2. **Server-side sh/env execution.** For proposals on
|
|
551
|
+
`log://turn_N/sh/...` or `log://turn_N/env/...`, the yolo plugin
|
|
552
|
+
spawns the command in `projectRoot`, streams stdout/stderr to
|
|
553
|
+
`{dataBase}_1`/`{dataBase}_2` via `entries.set append=true`, and
|
|
554
|
+
transitions channels to terminal state on exit (200 / 500 mirror
|
|
555
|
+
of the existing `stream/completed` RPC contract). Done in-process,
|
|
556
|
+
no RPC roundtrip.
|
|
557
|
+
3. **Non-yolo runs unaffected.** Without `yolo: true`, the plugin's
|
|
558
|
+
`proposal.pending` listener returns early. Existing client-driven
|
|
559
|
+
resolution (rummy.nvim, AuditClient's file-edit auto-accept) works
|
|
560
|
+
exactly as before.
|
|
561
|
+
|
|
562
|
+
**Use cases.** E2E tests, benchmarks, CI, headless usage. The pattern
|
|
563
|
+
is opt-in per run; rummy.nvim does not set `yolo: true` because
|
|
564
|
+
human-in-the-loop control is the user-facing flow.
|
|
565
|
+
|
|
566
|
+
**Architectural placement.** The yolo plugin owns its flag handling
|
|
567
|
+
end-to-end — backbone files (TurnExecutor, AgentLoop) carry only the
|
|
568
|
+
plumbing for the attribute and the rummy-context payload enrichment
|
|
569
|
+
on `proposal.pending`. Feature logic stays in
|
|
570
|
+
`src/plugins/yolo/yolo.js`.
|
|
571
|
+
|
|
572
|
+
### Repo Overview {#repo_overview}
|
|
573
|
+
|
|
574
|
+
The `rummy.repo` plugin maintains a single `repo://overview` entry per
|
|
575
|
+
run, regenerated on every scan, that gives the model a navigable map
|
|
576
|
+
of the project. It is the entry-point for code-aware runs — files
|
|
577
|
+
themselves default to `archived` so a 5000-file repo doesn't dump
|
|
578
|
+
hundreds of thousands of tokens into context before any work happens.
|
|
579
|
+
|
|
580
|
+
**Entry contract.**
|
|
581
|
+
|
|
582
|
+
- Path: `repo://overview` (scheme `repo`, category `data`,
|
|
583
|
+
`model_visible: 1`)
|
|
584
|
+
- Visibility: `visible` (the navigation map is always in context)
|
|
585
|
+
- Body: a markdown structure containing the project root, file count,
|
|
586
|
+
root-level files, top-level directories with file counts,
|
|
587
|
+
active/readonly constraints, and a navigation legend showing the
|
|
588
|
+
promote/demote idioms.
|
|
589
|
+
- Visible projection: full body.
|
|
590
|
+
- Summarized projection: first ~12 lines + a truncation marker, so a
|
|
591
|
+
model can demote it once it has the layout memorized.
|
|
592
|
+
|
|
593
|
+
**File default visibility flip.**
|
|
594
|
+
|
|
595
|
+
`FileScanner` registers each tracked file at `archived` by default
|
|
596
|
+
(was `summarized`). Files with `constraint=active` still register at
|
|
597
|
+
`visible`. The model uses `repo://overview` to discover paths, then
|
|
598
|
+
promotes individual files via `<get path=...>` (visible, full body)
|
|
599
|
+
or whole subtrees via `<set path=".../**" visibility="summarized"/>`
|
|
600
|
+
(skim mode, symbols only).
|
|
601
|
+
|
|
602
|
+
**Bounded cost.** The overview body is constant-ish in size regardless
|
|
603
|
+
of repo size: root files capped, directory counts aggregated, no per-
|
|
604
|
+
file symbol enumeration. The token cost in context stays roughly
|
|
605
|
+
flat from a 30-file project to a 50,000-file monorepo.
|
|
606
|
+
|
|
607
|
+
**Disabled when noRepo.** Setting `noRepo: true` on a run skips the
|
|
608
|
+
scan entirely; no `repo://overview` is created and no file entries
|
|
609
|
+
are registered. Behaviour identical to pre-plugin runs.
|
|
610
|
+
|
|
527
611
|
### Streaming Entries {#streaming_entries}
|
|
528
612
|
|
|
529
613
|
Producers that generate output over time (shell commands, web fetches,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@possumtech/rummy",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.1",
|
|
4
4
|
"description": "Relational Unknowns Memory Management Yoke",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"llm"
|
|
@@ -41,16 +41,16 @@
|
|
|
41
41
|
"test:intg": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --test-concurrency=1 --test-force-exit --test $(find test/integration -name '*.test.js')",
|
|
42
42
|
"test:e2e": "mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --test-concurrency=1 --test-force-exit --test-reporter=spec --test $(find test/e2e -name '*.test.js') 2>&1 | tee /tmp/rummy_test_diag/e2e_$(date +%Y%m%dT%H%M%S).log",
|
|
43
43
|
"test:live": "mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --test-concurrency=1 --test-force-exit --test-reporter=spec --test $(find test/live -name '*.test.js') 2>&1 | tee /tmp/rummy_test_diag/live_$(date +%Y%m%dT%H%M%S).log",
|
|
44
|
-
"test:clean": "rm -rf test/lme/results test/
|
|
45
|
-
"test:mab:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/download.js",
|
|
46
|
-
"test:mab": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/mab_$(date +%Y%m%dT%H%M%S).log' --",
|
|
47
|
-
"test:grok": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --env-file-if-exists=.env.grok test/mab/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/mab_grok_$(date +%Y%m%dT%H%M%S).log' --",
|
|
48
|
-
"test:mab:taxonomy": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/runner.js --split Conflict_Resolution --row 0 --no-questions 2>&1 | tee /tmp/rummy_test_diag/taxonomy_$(date +%Y%m%dT%H%M%S).log' --",
|
|
49
|
-
"test:grok:taxonomy": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --env-file-if-exists=.env.grok test/mab/runner.js --split Conflict_Resolution --row 0 --no-questions 2>&1 | tee /tmp/rummy_test_diag/taxonomy_grok_$(date +%Y%m%dT%H%M%S).log' --",
|
|
44
|
+
"test:clean": "rm -rf test/lme/results test/swe/results test/swe/repos test/tmp /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal && echo 'Test artifacts cleaned.'",
|
|
50
45
|
"test:lme:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/lme/download.js",
|
|
51
46
|
"test:lme": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/lme/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/lme_$(date +%Y%m%dT%H%M%S).log' --",
|
|
52
|
-
"test:
|
|
47
|
+
"test:swe:setup": "bash test/swe/setup.sh",
|
|
48
|
+
"test:swe:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/swe/download.js",
|
|
49
|
+
"test:swe": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/swe/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/swe_$(date +%Y%m%dT%H%M%S).log' --",
|
|
50
|
+
"test:swe:eval": "bash -c 'cd test/swe && source .venv/bin/activate && python evaluate.py \"$@\"' --",
|
|
51
|
+
"test:swe:baseline": "bash -c 'cd test/swe && source .venv/bin/activate && python baseline.py \"$@\"' --",
|
|
53
52
|
"test:lme:clean": "rm -rf test/lme/results/*/",
|
|
53
|
+
"test:swe:clean": "rm -rf test/swe/results/*/ test/swe/repos/",
|
|
54
54
|
"test:clear": "rm -rf /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal /tmp/rummy-stories-*",
|
|
55
55
|
"test:demo": "node --env-file-if-exists=.env.example --env-file-if-exists=.env bin/demo.js",
|
|
56
56
|
"test:spec": "node test/spec-coverage.js"
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inject a follow-up question into an existing LME run and print the answer.
|
|
3
|
+
*
|
|
4
|
+
* Usage: node scriptify/ask_run.js <db_path> <run_alias> "your question"
|
|
5
|
+
*
|
|
6
|
+
* Reuses the run's full ingested context so the model answers with all
|
|
7
|
+
* its accumulated knowledge. Used as a debugging tool to interrogate
|
|
8
|
+
* the model's reasoning after a benchmark completes.
|
|
9
|
+
*/
|
|
10
|
+
import TestDb from "../test/helpers/TestDb.js";
|
|
11
|
+
import TestServer from "../test/helpers/TestServer.js";
|
|
12
|
+
import RpcClient from "../test/helpers/RpcClient.js";
|
|
13
|
+
|
|
14
|
+
const [, , dbPath, alias, ...questionParts] = process.argv;
|
|
15
|
+
const question = questionParts.join(" ");
|
|
16
|
+
|
|
17
|
+
if (!dbPath || !alias || !question) {
|
|
18
|
+
console.error(
|
|
19
|
+
'Usage: node scriptify/ask_run.js <db_path> <run_alias> "your question"',
|
|
20
|
+
);
|
|
21
|
+
process.exit(1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const tdb = await TestDb.createAt(dbPath);
|
|
25
|
+
const tserver = await TestServer.start(tdb);
|
|
26
|
+
const client = new RpcClient(tserver.url);
|
|
27
|
+
await client.connect();
|
|
28
|
+
await client.call("rummy/hello", {
|
|
29
|
+
name: "ask_run",
|
|
30
|
+
projectRoot: "/tmp/rummy-lme",
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
console.log(`Asking ${alias}: ${question}\n`);
|
|
34
|
+
|
|
35
|
+
const TERMINAL = [200, 204, 413, 422, 499, 500];
|
|
36
|
+
const startRes = await client.call("set", {
|
|
37
|
+
path: `run://${alias}`,
|
|
38
|
+
body: question,
|
|
39
|
+
attributes: {
|
|
40
|
+
model: "grok",
|
|
41
|
+
mode: "ask",
|
|
42
|
+
noRepo: true,
|
|
43
|
+
noInteraction: true,
|
|
44
|
+
noWeb: true,
|
|
45
|
+
noProposals: true,
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
const deadline = Date.now() + 600_000;
|
|
50
|
+
while (Date.now() < deadline) {
|
|
51
|
+
const row = await tdb.db.get_run_by_alias.get({ alias });
|
|
52
|
+
if (TERMINAL.includes(row.status)) break;
|
|
53
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const runRow = await tdb.db.get_run_by_alias.get({ alias });
|
|
57
|
+
const entries = await tdb.db.get_known_entries.all({ run_id: runRow.id });
|
|
58
|
+
const reasoning = entries
|
|
59
|
+
.filter((e) => e.scheme === "reasoning")
|
|
60
|
+
.toSorted((a, b) => b.turn - a.turn)[0];
|
|
61
|
+
const assistant = entries
|
|
62
|
+
.filter((e) => e.scheme === "assistant")
|
|
63
|
+
.toSorted((a, b) => b.turn - a.turn)[0];
|
|
64
|
+
|
|
65
|
+
if (reasoning) {
|
|
66
|
+
console.log("=== REASONING ===");
|
|
67
|
+
console.log(reasoning.body);
|
|
68
|
+
console.log("");
|
|
69
|
+
}
|
|
70
|
+
if (assistant) {
|
|
71
|
+
console.log("=== ANSWER ===");
|
|
72
|
+
console.log(assistant.body);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
await client.close();
|
|
76
|
+
await tserver.stop();
|
|
77
|
+
await tdb.cleanup();
|
package/src/agent/AgentLoop.js
CHANGED
|
@@ -81,19 +81,7 @@ export default class AgentLoop {
|
|
|
81
81
|
const runUsage = await this.#db.get_run_usage.get({ run_id: runId });
|
|
82
82
|
const history = await this.#entries.getLog(runId);
|
|
83
83
|
const unknowns = await this.#entries.getUnknowns(runId);
|
|
84
|
-
const latestSummary = history
|
|
85
|
-
.filter((e) => {
|
|
86
|
-
// Updates are under the unified log namespace at
|
|
87
|
-
// log://turn_N/update/<slug>. Match by path pattern rather
|
|
88
|
-
// than scheme (scheme is now "log" for all log entries).
|
|
89
|
-
if (!/^log:\/\/turn_\d+\/update\//.test(e.path)) return false;
|
|
90
|
-
const attrs =
|
|
91
|
-
typeof e.attributes === "string"
|
|
92
|
-
? JSON.parse(e.attributes)
|
|
93
|
-
: e.attributes;
|
|
94
|
-
return attrs?.status === 200;
|
|
95
|
-
})
|
|
96
|
-
.at(-1);
|
|
84
|
+
const latestSummary = this.#hooks.instructions.findLatestSummary(history);
|
|
97
85
|
|
|
98
86
|
// Always emit complete telemetry. When we don't have a fresh turn
|
|
99
87
|
// result (abort/max-turns/crash), read the last turn's context
|
|
@@ -287,10 +275,13 @@ export default class AgentLoop {
|
|
|
287
275
|
if (!project)
|
|
288
276
|
throw new Error(msg("error.project_not_found", { projectId }));
|
|
289
277
|
|
|
290
|
-
const noRepo = options?.noRepo ===
|
|
291
|
-
const noInteraction =
|
|
292
|
-
|
|
293
|
-
const
|
|
278
|
+
const noRepo = options?.noRepo ?? process.env.RUMMY_NO_REPO === "1";
|
|
279
|
+
const noInteraction =
|
|
280
|
+
options?.noInteraction ?? process.env.RUMMY_NO_INTERACTION === "1";
|
|
281
|
+
const noWeb = options?.noWeb ?? process.env.RUMMY_NO_WEB === "1";
|
|
282
|
+
const noProposals =
|
|
283
|
+
options?.noProposals ?? process.env.RUMMY_NO_PROPOSALS === "1";
|
|
284
|
+
const yolo = options?.yolo ?? process.env.RUMMY_YOLO === "1";
|
|
294
285
|
const requestedModel = model;
|
|
295
286
|
|
|
296
287
|
const runInfo = await this.ensureRun(
|
|
@@ -314,6 +305,7 @@ export default class AgentLoop {
|
|
|
314
305
|
noInteraction,
|
|
315
306
|
noWeb,
|
|
316
307
|
noProposals,
|
|
308
|
+
yolo,
|
|
317
309
|
temperature: options?.temperature,
|
|
318
310
|
}),
|
|
319
311
|
});
|
|
@@ -367,6 +359,7 @@ export default class AgentLoop {
|
|
|
367
359
|
noInteraction = false,
|
|
368
360
|
noWeb = false,
|
|
369
361
|
noProposals = false,
|
|
362
|
+
yolo = false,
|
|
370
363
|
} = loopConfig;
|
|
371
364
|
|
|
372
365
|
let result;
|
|
@@ -384,6 +377,7 @@ export default class AgentLoop {
|
|
|
384
377
|
noInteraction,
|
|
385
378
|
noWeb,
|
|
386
379
|
noProposals,
|
|
380
|
+
yolo,
|
|
387
381
|
options: { ...options, temperature: loopConfig.temperature },
|
|
388
382
|
hook,
|
|
389
383
|
signal: controller.signal,
|
|
@@ -448,6 +442,7 @@ export default class AgentLoop {
|
|
|
448
442
|
noInteraction,
|
|
449
443
|
noWeb,
|
|
450
444
|
noProposals,
|
|
445
|
+
yolo,
|
|
451
446
|
options,
|
|
452
447
|
hook,
|
|
453
448
|
signal,
|
|
@@ -534,6 +529,7 @@ export default class AgentLoop {
|
|
|
534
529
|
noWeb,
|
|
535
530
|
noInteraction,
|
|
536
531
|
noProposals,
|
|
532
|
+
yolo,
|
|
537
533
|
toolSet,
|
|
538
534
|
contextSize,
|
|
539
535
|
options: { ...options, isContinuation: loopIteration > 1 },
|
|
@@ -751,7 +747,7 @@ export default class AgentLoop {
|
|
|
751
747
|
return { run: runAlias, status: runRow.status };
|
|
752
748
|
}
|
|
753
749
|
|
|
754
|
-
async inject(runAlias, message, mode) {
|
|
750
|
+
async inject(runAlias, message, mode, options = {}) {
|
|
755
751
|
if (mode !== "ask" && mode !== "act") {
|
|
756
752
|
throw new Error(
|
|
757
753
|
`inject: mode is required and must be "ask" or "act" (got ${JSON.stringify(mode)})`,
|
|
@@ -761,6 +757,14 @@ export default class AgentLoop {
|
|
|
761
757
|
if (!runRow)
|
|
762
758
|
throw new Error(msg("error.run_not_found", { runId: runAlias }));
|
|
763
759
|
|
|
760
|
+
const noRepo = options?.noRepo ?? process.env.RUMMY_NO_REPO === "1";
|
|
761
|
+
const noInteraction =
|
|
762
|
+
options?.noInteraction ?? process.env.RUMMY_NO_INTERACTION === "1";
|
|
763
|
+
const noWeb = options?.noWeb ?? process.env.RUMMY_NO_WEB === "1";
|
|
764
|
+
const noProposals =
|
|
765
|
+
options?.noProposals ?? process.env.RUMMY_NO_PROPOSALS === "1";
|
|
766
|
+
const yolo = options?.yolo ?? process.env.RUMMY_YOLO === "1";
|
|
767
|
+
|
|
764
768
|
const nextTurn = runRow.next_turn;
|
|
765
769
|
|
|
766
770
|
await this.#entries.set({
|
|
@@ -784,7 +788,14 @@ export default class AgentLoop {
|
|
|
784
788
|
mode,
|
|
785
789
|
model: runRow.model,
|
|
786
790
|
prompt: message,
|
|
787
|
-
config:
|
|
791
|
+
config: JSON.stringify({
|
|
792
|
+
noRepo,
|
|
793
|
+
noInteraction,
|
|
794
|
+
noWeb,
|
|
795
|
+
noProposals,
|
|
796
|
+
yolo,
|
|
797
|
+
temperature: options?.temperature,
|
|
798
|
+
}),
|
|
788
799
|
});
|
|
789
800
|
|
|
790
801
|
const projectId = runRow.project_id;
|
package/src/agent/Entries.js
CHANGED
|
@@ -38,6 +38,7 @@ export default class Entries {
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
static scheme(path) {
|
|
41
|
+
if (!path) return null;
|
|
41
42
|
const idx = path.indexOf("://");
|
|
42
43
|
return idx > 0 ? path.slice(0, idx) : null;
|
|
43
44
|
}
|
|
@@ -474,7 +475,7 @@ export default class Entries {
|
|
|
474
475
|
runId,
|
|
475
476
|
path,
|
|
476
477
|
body = null,
|
|
477
|
-
{ limit = null, offset = null } = {},
|
|
478
|
+
{ limit = null, offset = null, includeAuditSchemes = false } = {},
|
|
478
479
|
) {
|
|
479
480
|
return this.#db.get_entries_by_pattern.all({
|
|
480
481
|
run_id: runId,
|
|
@@ -482,6 +483,7 @@ export default class Entries {
|
|
|
482
483
|
body: body ? body : null,
|
|
483
484
|
limit,
|
|
484
485
|
offset,
|
|
486
|
+
include_audit_schemes: includeAuditSchemes ? 1 : null,
|
|
485
487
|
});
|
|
486
488
|
}
|
|
487
489
|
|
|
@@ -494,7 +496,19 @@ export default class Entries {
|
|
|
494
496
|
}
|
|
495
497
|
}
|
|
496
498
|
|
|
497
|
-
waitForResolution(runId, path) {
|
|
499
|
+
async waitForResolution(runId, path) {
|
|
500
|
+
// Check current state first — if a synchronous in-process resolver
|
|
501
|
+
// (yolo) flipped the entry to terminal during proposal.pending,
|
|
502
|
+
// the state change has already happened and no future drain will
|
|
503
|
+
// fire. Without this guard, in-process resolvers would deadlock.
|
|
504
|
+
const current = await this.getState(runId, path);
|
|
505
|
+
if (
|
|
506
|
+
current &&
|
|
507
|
+
current.state !== "proposed" &&
|
|
508
|
+
current.state !== "streaming"
|
|
509
|
+
) {
|
|
510
|
+
return;
|
|
511
|
+
}
|
|
498
512
|
const normalized = Entries.normalizePath(path);
|
|
499
513
|
const key = `${runId}:${normalized}`;
|
|
500
514
|
return new Promise((resolve) => {
|
|
@@ -559,6 +573,13 @@ export default class Entries {
|
|
|
559
573
|
});
|
|
560
574
|
}
|
|
561
575
|
|
|
576
|
+
async archivePriorPromptArtifacts(runId, currentTurn) {
|
|
577
|
+
await this.#db.archive_prior_prompt_artifacts.run({
|
|
578
|
+
run_id: runId,
|
|
579
|
+
current_turn: currentTurn,
|
|
580
|
+
});
|
|
581
|
+
}
|
|
582
|
+
|
|
562
583
|
/**
|
|
563
584
|
* Demote all promoted entries for a run on a given turn. Returns the
|
|
564
585
|
* affected rows (path, tokens) so callers can summarize.
|
|
@@ -83,8 +83,8 @@ export default class ProjectAgent {
|
|
|
83
83
|
return this.#agentLoop.resolve(run, resolution);
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
-
async inject(run, message, mode) {
|
|
87
|
-
return this.#agentLoop.inject(run, message, mode);
|
|
86
|
+
async inject(run, message, mode, options = {}) {
|
|
87
|
+
return this.#agentLoop.inject(run, message, mode, options);
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
// Synchronously create (or fork) a run row and return the alias.
|
|
@@ -30,6 +30,7 @@ export default class TurnExecutor {
|
|
|
30
30
|
noWeb,
|
|
31
31
|
noInteraction,
|
|
32
32
|
noProposals,
|
|
33
|
+
yolo,
|
|
33
34
|
toolSet,
|
|
34
35
|
contextSize,
|
|
35
36
|
options,
|
|
@@ -70,6 +71,7 @@ export default class TurnExecutor {
|
|
|
70
71
|
noWeb,
|
|
71
72
|
noInteraction,
|
|
72
73
|
noProposals,
|
|
74
|
+
yolo,
|
|
73
75
|
toolSet,
|
|
74
76
|
contextSize,
|
|
75
77
|
systemPrompt: null,
|
|
@@ -332,6 +334,7 @@ export default class TurnExecutor {
|
|
|
332
334
|
projectId,
|
|
333
335
|
run: currentAlias,
|
|
334
336
|
proposed: [p],
|
|
337
|
+
rummy,
|
|
335
338
|
});
|
|
336
339
|
await this.#entries.waitForResolution(currentRunId, p.path);
|
|
337
340
|
const resolved = await this.#entries.getState(currentRunId, p.path);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
-- PREP: get_known_entries
|
|
2
2
|
SELECT
|
|
3
3
|
path, scheme, state, outcome, visibility, body, turn, hash
|
|
4
|
-
, attributes, countTokens(body) AS tokens, scope
|
|
4
|
+
, attributes, countTokens(body) AS tokens, scope, loop_id
|
|
5
5
|
FROM known_entries
|
|
6
6
|
WHERE run_id = :run_id
|
|
7
7
|
ORDER BY path;
|
|
@@ -223,15 +223,20 @@ WHERE run_id = :run_id AND entry_id IN (
|
|
|
223
223
|
);
|
|
224
224
|
|
|
225
225
|
-- PREP: get_entries_by_pattern
|
|
226
|
+
-- Default excludes audit schemes (system://, reasoning://, model://, user://,
|
|
227
|
+
-- assistant://, content://, instructions://) so model-facing tools never leak
|
|
228
|
+
-- internal entries. Internal callers that need them pass include_audit_schemes=1.
|
|
226
229
|
SELECT
|
|
227
230
|
e.path, e.body, e.scheme, rv.state, rv.outcome, rv.visibility
|
|
228
231
|
, countTokens(e.body) AS tokens, e.attributes
|
|
229
232
|
FROM run_views AS rv
|
|
230
233
|
JOIN entries AS e ON e.id = rv.entry_id
|
|
234
|
+
JOIN schemes AS s ON s.name = COALESCE(e.scheme, 'file')
|
|
231
235
|
WHERE
|
|
232
236
|
rv.run_id = :run_id
|
|
233
237
|
AND hedmatch(:path, e.path)
|
|
234
238
|
AND (:body IS NULL OR hedsearch(:body, e.body))
|
|
239
|
+
AND (:include_audit_schemes IS NOT NULL OR s.model_visible = 1)
|
|
235
240
|
ORDER BY e.path
|
|
236
241
|
LIMIT
|
|
237
242
|
COALESCE(:limit, -1)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import ContextAssembler from "./ContextAssembler.js";
|
|
2
|
-
import { countTokens } from "./tokens.js";
|
|
2
|
+
import { countLines, countTokens } from "./tokens.js";
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* Rebuild turn_context from v_model_context, then assemble messages.
|
|
@@ -54,7 +54,8 @@ export default async function materializeContext({
|
|
|
54
54
|
});
|
|
55
55
|
const vTokens = countTokens(visibleProjection);
|
|
56
56
|
const sTokens = countTokens(summarizedProjection);
|
|
57
|
-
|
|
57
|
+
const vLines = countLines(visibleProjection);
|
|
58
|
+
tokenAccounting.set(row.path, { vTokens, sTokens, vLines });
|
|
58
59
|
const projectedBody =
|
|
59
60
|
row.visibility === "visible" ? visibleProjection : summarizedProjection;
|
|
60
61
|
await db.insert_turn_context.run({
|
|
@@ -79,6 +80,7 @@ export default async function materializeContext({
|
|
|
79
80
|
row.vTokens = t.vTokens;
|
|
80
81
|
row.sTokens = t.sTokens;
|
|
81
82
|
row.aTokens = t.vTokens - t.sTokens;
|
|
83
|
+
row.vLines = t.vLines;
|
|
82
84
|
}
|
|
83
85
|
const lastCtx = await db.get_last_context_tokens.get({ run_id: runId });
|
|
84
86
|
// First turn of a new run has no prior context.
|
package/src/agent/runs.sql
CHANGED
|
@@ -92,6 +92,25 @@ SELECT
|
|
|
92
92
|
FROM run_views
|
|
93
93
|
WHERE run_id = :parent_run_id;
|
|
94
94
|
|
|
95
|
+
-- PREP: archive_prior_prompt_artifacts
|
|
96
|
+
-- Multi-prompt sessions accumulate artifacts from prior prompt cycles
|
|
97
|
+
-- (consumed prompts, their per-turn logs). These pollute the validator's
|
|
98
|
+
-- prior-prompts check on subsequent Deployment landings. Archive all
|
|
99
|
+
-- prior prompt:// entries and prior-turn log:// entries when a new
|
|
100
|
+
-- prompt arrives. Knowns/unknowns/file entries are untouched — they
|
|
101
|
+
-- carry persistent knowledge across cycles. The loop_id IS NULL clause
|
|
102
|
+
-- catches forked-in views from a parent run (per fork_known_entries),
|
|
103
|
+
-- which represent prior cycles' artifacts inherited into a clean child.
|
|
104
|
+
UPDATE run_views
|
|
105
|
+
SET visibility = 'archived'
|
|
106
|
+
WHERE run_id = :run_id
|
|
107
|
+
AND visibility != 'archived'
|
|
108
|
+
AND (turn < :current_turn OR loop_id IS NULL)
|
|
109
|
+
AND entry_id IN (
|
|
110
|
+
SELECT id FROM entries
|
|
111
|
+
WHERE scheme IN ('prompt', 'log')
|
|
112
|
+
);
|
|
113
|
+
|
|
95
114
|
-- PREP: get_active_runs
|
|
96
115
|
SELECT r.id
|
|
97
116
|
FROM runs AS r
|
package/src/agent/tokens.js
CHANGED
|
@@ -12,3 +12,9 @@ export function countTokens(text) {
|
|
|
12
12
|
if (!text) return 0;
|
|
13
13
|
return Math.ceil(text.length / DIVISOR);
|
|
14
14
|
}
|
|
15
|
+
|
|
16
|
+
export function countLines(text) {
|
|
17
|
+
if (!text) return 0;
|
|
18
|
+
const newlines = (text.match(/\n/g) || []).length;
|
|
19
|
+
return text.endsWith("\n") ? newlines : newlines + 1;
|
|
20
|
+
}
|
package/src/llm/LlmProvider.js
CHANGED
|
@@ -4,8 +4,12 @@ import {
|
|
|
4
4
|
isContextExceededMessage,
|
|
5
5
|
isTransientMessage,
|
|
6
6
|
} from "./errors.js";
|
|
7
|
+
import { retryWithBackoff } from "./retry.js";
|
|
7
8
|
|
|
8
|
-
const
|
|
9
|
+
const DEADLINE_MS = Number(process.env.RUMMY_LLM_DEADLINE_MS);
|
|
10
|
+
const MAX_BACKOFF_MS = Number(process.env.RUMMY_LLM_MAX_BACKOFF_MS);
|
|
11
|
+
if (!DEADLINE_MS) throw new Error("RUMMY_LLM_DEADLINE_MS must be set");
|
|
12
|
+
if (!MAX_BACKOFF_MS) throw new Error("RUMMY_LLM_MAX_BACKOFF_MS must be set");
|
|
9
13
|
|
|
10
14
|
/**
|
|
11
15
|
* Thin dispatcher over the LLM provider registry (`hooks.llm.providers`).
|
|
@@ -55,27 +59,26 @@ export default class LlmProvider {
|
|
|
55
59
|
);
|
|
56
60
|
}
|
|
57
61
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
77
|
-
throw err;
|
|
62
|
+
try {
|
|
63
|
+
return await retryWithBackoff(
|
|
64
|
+
() => provider.completion(messages, resolvedModel, resolvedOptions),
|
|
65
|
+
{
|
|
66
|
+
signal: options.signal,
|
|
67
|
+
deadlineMs: DEADLINE_MS,
|
|
68
|
+
maxDelayMs: MAX_BACKOFF_MS,
|
|
69
|
+
isRetryable: (err) => isTransientMessage(err.message),
|
|
70
|
+
onRetry: (err, attempt, delayMs, remainingMs) => {
|
|
71
|
+
console.error(
|
|
72
|
+
`[LLM] transient failure on ${provider.name} attempt ${attempt}: ${err.message}; retrying in ${delayMs}ms (${Math.round(remainingMs / 1000)}s deadline remaining)`,
|
|
73
|
+
);
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
);
|
|
77
|
+
} catch (err) {
|
|
78
|
+
if (isContextExceededMessage(err.message)) {
|
|
79
|
+
throw new ContextExceededError(err.message, { cause: err });
|
|
78
80
|
}
|
|
81
|
+
throw err;
|
|
79
82
|
}
|
|
80
83
|
}
|
|
81
84
|
|
package/src/llm/errors.js
CHANGED
|
@@ -14,7 +14,7 @@ export function isContextExceededMessage(message) {
|
|
|
14
14
|
}
|
|
15
15
|
|
|
16
16
|
const TRANSIENT_PATTERN =
|
|
17
|
-
/\b(503|429|timeout|ECONNREFUSED|ECONNRESET|
|
|
17
|
+
/\b(500|502|503|504|429|timeout|TimeoutError|aborted|unavailable|ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|EPIPE|ECONNABORTED|fetch failed)\b/i;
|
|
18
18
|
|
|
19
19
|
export function isTransientMessage(message) {
|
|
20
20
|
return TRANSIENT_PATTERN.test(String(message));
|