@possumtech/rummy 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -36,6 +36,16 @@ RUMMY_RETENTION_DAYS=31
36
36
  # Timeouts (ms)
37
37
  RUMMY_RPC_TIMEOUT=30000
38
38
  RUMMY_FETCH_TIMEOUT=300000
39
+ # Test harness — how long AuditClient waits for a single ask/act to reach
40
+ # terminal status. Sized for full-context ingest on large-window models.
41
+ RUMMY_TEST_RUN_TIMEOUT=3600000
42
+
43
+ # LLM retry policy: time-bounded exponential backoff with full jitter.
44
+ # DEADLINE is total wall-clock budget for an LLM call across all retries.
45
+ # MAX_BACKOFF caps each inter-attempt sleep so a long deadline doesn't
46
+ # yield 10-minute waits between attempts.
47
+ RUMMY_LLM_DEADLINE_MS=600000
48
+ RUMMY_LLM_MAX_BACKOFF_MS=30000
39
49
 
40
50
  # Debug
41
51
  # RUMMY_DEBUG=true
@@ -59,6 +69,17 @@ RUMMY_TOKEN_DIVISOR=2
59
69
  # LLM temperature (0 = deterministic, 0.7 = creative). Client can override per-request.
60
70
  RUMMY_TEMPERATURE=0.5
61
71
 
72
+ # Run Attribute Defaults
73
+ # Per-run attributes (passed in the run-creation set call) trump these.
74
+ # Strict "1" enables; unset / "0" / "" disables. Useful in profile env
75
+ # files (e.g. .env.tbench) layered via --env-file-if-exists.
76
+ #
77
+ # RUMMY_YOLO=1 # auto-accept every proposal (headless / CI / bench)
78
+ # RUMMY_NO_INTERACTION=1 # exclude <ask_user> from the tool list
79
+ # RUMMY_NO_WEB=1 # exclude <search> from the tool list
80
+ # RUMMY_NO_PROPOSALS=1 # exclude <ask_user>/<env>/<sh>
81
+ # RUMMY_NO_REPO=1 # skip rummy.repo scanning (file scan + overview)
82
+
62
83
  # Provider Configuration
63
84
  OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
64
85
  # OPENROUTER_API_KEY=
package/SPEC.md CHANGED
@@ -524,6 +524,90 @@ Two mechanisms, operating at different layers:
524
524
  status 403 and emits `error://`. The tool remains advertised; the
525
525
  specific invocation is blocked.
526
526
 
527
+ ### YOLO Mode {#yolo_mode}
528
+
529
+ When a run is started with the `yolo: true` attribute (parallel to
530
+ `noRepo`/`noWeb`/`noInteraction`/`noProposals`), the server fully
531
+ emulates a connected headless client: every proposal auto-accepts and
532
+ every sh/env command spawns server-side, streaming output to the
533
+ existing data-channel entries. No client involvement; no human
534
+ approval required.
535
+
536
+ **Plumbing.** The `yolo` attribute flows through the same path as
537
+ `noProposals`: `set run://` → `attributes.yolo` → AgentLoop loop config
538
+ JSON → RummyContext.yolo getter. The yolo plugin reads `rummy.yolo`
539
+ off the proposal-pending event payload and engages only when set.
540
+
541
+ **Behavior on yolo runs:**
542
+
543
+ 1. **Auto-accept every proposal.** The yolo plugin listens to
544
+ `proposal.pending`, replicates AgentLoop.resolve()'s accept path
545
+ inline (`proposal.accepting` filter for veto, `proposal.content`
546
+ filter for body, `entries.set state="resolved"`,
547
+ `proposal.accepted` event for plugin side effects). The
548
+ `entries.waitForResolution` blocking call wakes immediately; the
549
+ loop continues without RPC roundtrip.
550
+ 2. **Server-side sh/env execution.** For proposals on
551
+ `log://turn_N/sh/...` or `log://turn_N/env/...`, the yolo plugin
552
+ spawns the command in `projectRoot`, streams stdout/stderr to
553
+ `{dataBase}_1`/`{dataBase}_2` via `entries.set append=true`, and
554
+ transitions channels to terminal state on exit (200 / 500 mirror
555
+ of the existing `stream/completed` RPC contract). Done in-process,
556
+ no RPC roundtrip.
557
+ 3. **Non-yolo runs unaffected.** Without `yolo: true`, the plugin's
558
+ `proposal.pending` listener returns early. Existing client-driven
559
+ resolution (rummy.nvim, AuditClient's file-edit auto-accept) works
560
+ exactly as before.
561
+
562
+ **Use cases.** E2E tests, benchmarks, CI, headless usage. The pattern
563
+ is opt-in per run; rummy.nvim does not set `yolo: true` because
564
+ human-in-the-loop control is the user-facing flow.
565
+
566
+ **Architectural placement.** The yolo plugin owns its flag handling
567
+ end-to-end — backbone files (TurnExecutor, AgentLoop) carry only the
568
+ plumbing for the attribute and the rummy-context payload enrichment
569
+ on `proposal.pending`. Feature logic stays in
570
+ `src/plugins/yolo/yolo.js`.
571
+
572
+ ### Repo Overview {#repo_overview}
573
+
574
+ The `rummy.repo` plugin maintains a single `repo://overview` entry per
575
+ run, regenerated on every scan, that gives the model a navigable map
576
+ of the project. It is the entry-point for code-aware runs — files
577
+ themselves default to `archived` so a 5000-file repo doesn't dump
578
+ hundreds of thousands of tokens into context before any work happens.
579
+
580
+ **Entry contract.**
581
+
582
+ - Path: `repo://overview` (scheme `repo`, category `data`,
583
+ `model_visible: 1`)
584
+ - Visibility: `visible` (the navigation map is always in context)
585
+ - Body: a markdown structure containing the project root, file count,
586
+ root-level files, top-level directories with file counts,
587
+ active/readonly constraints, and a navigation legend showing the
588
+ promote/demote idioms.
589
+ - Visible projection: full body.
590
+ - Summarized projection: first ~12 lines + a truncation marker, so a
591
+ model can demote it once it has the layout memorized.
592
+
593
+ **File default visibility flip.**
594
+
595
+ `FileScanner` registers each tracked file at `archived` by default
596
+ (was `summarized`). Files with `constraint=active` still register at
597
+ `visible`. The model uses `repo://overview` to discover paths, then
598
+ promotes individual files via `<get path=...>` (visible, full body)
599
+ or whole subtrees via `<set path=".../**" visibility="summarized"/>`
600
+ (skim mode, symbols only).
601
+
602
+ **Bounded cost.** The overview body is constant-ish in size regardless
603
+ of repo size: root files capped, directory counts aggregated, no per-
604
+ file symbol enumeration. The token cost in context stays roughly
605
+ flat from a 30-file project to a 50,000-file monorepo.
606
+
607
+ **Disabled when noRepo.** Setting `noRepo: true` on a run skips the
608
+ scan entirely; no `repo://overview` is created and no file entries
609
+ are registered. Behaviour identical to pre-plugin runs.
610
+
527
611
  ### Streaming Entries {#streaming_entries}
528
612
 
529
613
  Producers that generate output over time (shell commands, web fetches,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@possumtech/rummy",
3
- "version": "2.0.0",
3
+ "version": "2.0.1",
4
4
  "description": "Relational Unknowns Memory Management Yoke",
5
5
  "keywords": [
6
6
  "llm"
@@ -41,16 +41,16 @@
41
41
  "test:intg": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --test-concurrency=1 --test-force-exit --test $(find test/integration -name '*.test.js')",
42
42
  "test:e2e": "mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --test-concurrency=1 --test-force-exit --test-reporter=spec --test $(find test/e2e -name '*.test.js') 2>&1 | tee /tmp/rummy_test_diag/e2e_$(date +%Y%m%dT%H%M%S).log",
43
43
  "test:live": "mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --test-concurrency=1 --test-force-exit --test-reporter=spec --test $(find test/live -name '*.test.js') 2>&1 | tee /tmp/rummy_test_diag/live_$(date +%Y%m%dT%H%M%S).log",
44
- "test:clean": "rm -rf test/lme/results test/mab/results test/tmp /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal && echo 'Test artifacts cleaned.'",
45
- "test:mab:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/download.js",
46
- "test:mab": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/mab_$(date +%Y%m%dT%H%M%S).log' --",
47
- "test:grok": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --env-file-if-exists=.env.grok test/mab/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/mab_grok_$(date +%Y%m%dT%H%M%S).log' --",
48
- "test:mab:taxonomy": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/runner.js --split Conflict_Resolution --row 0 --no-questions 2>&1 | tee /tmp/rummy_test_diag/taxonomy_$(date +%Y%m%dT%H%M%S).log' --",
49
- "test:grok:taxonomy": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --env-file-if-exists=.env.grok test/mab/runner.js --split Conflict_Resolution --row 0 --no-questions 2>&1 | tee /tmp/rummy_test_diag/taxonomy_grok_$(date +%Y%m%dT%H%M%S).log' --",
44
+ "test:clean": "rm -rf test/lme/results test/swe/results test/swe/repos test/tmp /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal && echo 'Test artifacts cleaned.'",
50
45
  "test:lme:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/lme/download.js",
51
46
  "test:lme": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/lme/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/lme_$(date +%Y%m%dT%H%M%S).log' --",
52
- "test:mab:clean": "rm -rf test/mab/results/*/",
47
+ "test:swe:setup": "bash test/swe/setup.sh",
48
+ "test:swe:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/swe/download.js",
49
+ "test:swe": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/swe/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/swe_$(date +%Y%m%dT%H%M%S).log' --",
50
+ "test:swe:eval": "bash -c 'cd test/swe && source .venv/bin/activate && python evaluate.py \"$@\"' --",
51
+ "test:swe:baseline": "bash -c 'cd test/swe && source .venv/bin/activate && python baseline.py \"$@\"' --",
53
52
  "test:lme:clean": "rm -rf test/lme/results/*/",
53
+ "test:swe:clean": "rm -rf test/swe/results/*/ test/swe/repos/",
54
54
  "test:clear": "rm -rf /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal /tmp/rummy-stories-*",
55
55
  "test:demo": "node --env-file-if-exists=.env.example --env-file-if-exists=.env bin/demo.js",
56
56
  "test:spec": "node test/spec-coverage.js"
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Inject a follow-up question into an existing LME run and print the answer.
3
+ *
4
+ * Usage: node scriptify/ask_run.js <db_path> <run_alias> "your question"
5
+ *
6
+ * Reuses the run's full ingested context so the model answers with all
7
+ * its accumulated knowledge. Used as a debugging tool to interrogate
8
+ * the model's reasoning after a benchmark completes.
9
+ */
10
+ import TestDb from "../test/helpers/TestDb.js";
11
+ import TestServer from "../test/helpers/TestServer.js";
12
+ import RpcClient from "../test/helpers/RpcClient.js";
13
+
14
+ const [, , dbPath, alias, ...questionParts] = process.argv;
15
+ const question = questionParts.join(" ");
16
+
17
+ if (!dbPath || !alias || !question) {
18
+ console.error(
19
+ 'Usage: node scriptify/ask_run.js <db_path> <run_alias> "your question"',
20
+ );
21
+ process.exit(1);
22
+ }
23
+
24
+ const tdb = await TestDb.createAt(dbPath);
25
+ const tserver = await TestServer.start(tdb);
26
+ const client = new RpcClient(tserver.url);
27
+ await client.connect();
28
+ await client.call("rummy/hello", {
29
+ name: "ask_run",
30
+ projectRoot: "/tmp/rummy-lme",
31
+ });
32
+
33
+ console.log(`Asking ${alias}: ${question}\n`);
34
+
35
+ const TERMINAL = [200, 204, 413, 422, 499, 500];
36
+ const startRes = await client.call("set", {
37
+ path: `run://${alias}`,
38
+ body: question,
39
+ attributes: {
40
+ model: "grok",
41
+ mode: "ask",
42
+ noRepo: true,
43
+ noInteraction: true,
44
+ noWeb: true,
45
+ noProposals: true,
46
+ },
47
+ });
48
+
49
+ const deadline = Date.now() + 600_000;
50
+ while (Date.now() < deadline) {
51
+ const row = await tdb.db.get_run_by_alias.get({ alias });
52
+ if (TERMINAL.includes(row.status)) break;
53
+ await new Promise((r) => setTimeout(r, 500));
54
+ }
55
+
56
+ const runRow = await tdb.db.get_run_by_alias.get({ alias });
57
+ const entries = await tdb.db.get_known_entries.all({ run_id: runRow.id });
58
+ const reasoning = entries
59
+ .filter((e) => e.scheme === "reasoning")
60
+ .toSorted((a, b) => b.turn - a.turn)[0];
61
+ const assistant = entries
62
+ .filter((e) => e.scheme === "assistant")
63
+ .toSorted((a, b) => b.turn - a.turn)[0];
64
+
65
+ if (reasoning) {
66
+ console.log("=== REASONING ===");
67
+ console.log(reasoning.body);
68
+ console.log("");
69
+ }
70
+ if (assistant) {
71
+ console.log("=== ANSWER ===");
72
+ console.log(assistant.body);
73
+ }
74
+
75
+ await client.close();
76
+ await tserver.stop();
77
+ await tdb.cleanup();
@@ -81,19 +81,7 @@ export default class AgentLoop {
81
81
  const runUsage = await this.#db.get_run_usage.get({ run_id: runId });
82
82
  const history = await this.#entries.getLog(runId);
83
83
  const unknowns = await this.#entries.getUnknowns(runId);
84
- const latestSummary = history
85
- .filter((e) => {
86
- // Updates are under the unified log namespace at
87
- // log://turn_N/update/<slug>. Match by path pattern rather
88
- // than scheme (scheme is now "log" for all log entries).
89
- if (!/^log:\/\/turn_\d+\/update\//.test(e.path)) return false;
90
- const attrs =
91
- typeof e.attributes === "string"
92
- ? JSON.parse(e.attributes)
93
- : e.attributes;
94
- return attrs?.status === 200;
95
- })
96
- .at(-1);
84
+ const latestSummary = this.#hooks.instructions.findLatestSummary(history);
97
85
 
98
86
  // Always emit complete telemetry. When we don't have a fresh turn
99
87
  // result (abort/max-turns/crash), read the last turn's context
@@ -287,10 +275,13 @@ export default class AgentLoop {
287
275
  if (!project)
288
276
  throw new Error(msg("error.project_not_found", { projectId }));
289
277
 
290
- const noRepo = options?.noRepo === true;
291
- const noInteraction = options?.noInteraction === true;
292
- const noWeb = options?.noWeb === true;
293
- const noProposals = options?.noProposals === true;
278
+ const noRepo = options?.noRepo ?? process.env.RUMMY_NO_REPO === "1";
279
+ const noInteraction =
280
+ options?.noInteraction ?? process.env.RUMMY_NO_INTERACTION === "1";
281
+ const noWeb = options?.noWeb ?? process.env.RUMMY_NO_WEB === "1";
282
+ const noProposals =
283
+ options?.noProposals ?? process.env.RUMMY_NO_PROPOSALS === "1";
284
+ const yolo = options?.yolo ?? process.env.RUMMY_YOLO === "1";
294
285
  const requestedModel = model;
295
286
 
296
287
  const runInfo = await this.ensureRun(
@@ -314,6 +305,7 @@ export default class AgentLoop {
314
305
  noInteraction,
315
306
  noWeb,
316
307
  noProposals,
308
+ yolo,
317
309
  temperature: options?.temperature,
318
310
  }),
319
311
  });
@@ -367,6 +359,7 @@ export default class AgentLoop {
367
359
  noInteraction = false,
368
360
  noWeb = false,
369
361
  noProposals = false,
362
+ yolo = false,
370
363
  } = loopConfig;
371
364
 
372
365
  let result;
@@ -384,6 +377,7 @@ export default class AgentLoop {
384
377
  noInteraction,
385
378
  noWeb,
386
379
  noProposals,
380
+ yolo,
387
381
  options: { ...options, temperature: loopConfig.temperature },
388
382
  hook,
389
383
  signal: controller.signal,
@@ -448,6 +442,7 @@ export default class AgentLoop {
448
442
  noInteraction,
449
443
  noWeb,
450
444
  noProposals,
445
+ yolo,
451
446
  options,
452
447
  hook,
453
448
  signal,
@@ -534,6 +529,7 @@ export default class AgentLoop {
534
529
  noWeb,
535
530
  noInteraction,
536
531
  noProposals,
532
+ yolo,
537
533
  toolSet,
538
534
  contextSize,
539
535
  options: { ...options, isContinuation: loopIteration > 1 },
@@ -751,7 +747,7 @@ export default class AgentLoop {
751
747
  return { run: runAlias, status: runRow.status };
752
748
  }
753
749
 
754
- async inject(runAlias, message, mode) {
750
+ async inject(runAlias, message, mode, options = {}) {
755
751
  if (mode !== "ask" && mode !== "act") {
756
752
  throw new Error(
757
753
  `inject: mode is required and must be "ask" or "act" (got ${JSON.stringify(mode)})`,
@@ -761,6 +757,14 @@ export default class AgentLoop {
761
757
  if (!runRow)
762
758
  throw new Error(msg("error.run_not_found", { runId: runAlias }));
763
759
 
760
+ const noRepo = options?.noRepo ?? process.env.RUMMY_NO_REPO === "1";
761
+ const noInteraction =
762
+ options?.noInteraction ?? process.env.RUMMY_NO_INTERACTION === "1";
763
+ const noWeb = options?.noWeb ?? process.env.RUMMY_NO_WEB === "1";
764
+ const noProposals =
765
+ options?.noProposals ?? process.env.RUMMY_NO_PROPOSALS === "1";
766
+ const yolo = options?.yolo ?? process.env.RUMMY_YOLO === "1";
767
+
764
768
  const nextTurn = runRow.next_turn;
765
769
 
766
770
  await this.#entries.set({
@@ -784,7 +788,14 @@ export default class AgentLoop {
784
788
  mode,
785
789
  model: runRow.model,
786
790
  prompt: message,
787
- config: "{}",
791
+ config: JSON.stringify({
792
+ noRepo,
793
+ noInteraction,
794
+ noWeb,
795
+ noProposals,
796
+ yolo,
797
+ temperature: options?.temperature,
798
+ }),
788
799
  });
789
800
 
790
801
  const projectId = runRow.project_id;
@@ -38,6 +38,7 @@ export default class Entries {
38
38
  }
39
39
 
40
40
  static scheme(path) {
41
+ if (!path) return null;
41
42
  const idx = path.indexOf("://");
42
43
  return idx > 0 ? path.slice(0, idx) : null;
43
44
  }
@@ -474,7 +475,7 @@ export default class Entries {
474
475
  runId,
475
476
  path,
476
477
  body = null,
477
- { limit = null, offset = null } = {},
478
+ { limit = null, offset = null, includeAuditSchemes = false } = {},
478
479
  ) {
479
480
  return this.#db.get_entries_by_pattern.all({
480
481
  run_id: runId,
@@ -482,6 +483,7 @@ export default class Entries {
482
483
  body: body ? body : null,
483
484
  limit,
484
485
  offset,
486
+ include_audit_schemes: includeAuditSchemes ? 1 : null,
485
487
  });
486
488
  }
487
489
 
@@ -494,7 +496,19 @@ export default class Entries {
494
496
  }
495
497
  }
496
498
 
497
- waitForResolution(runId, path) {
499
+ async waitForResolution(runId, path) {
500
+ // Check current state first — if a synchronous in-process resolver
501
+ // (yolo) flipped the entry to terminal during proposal.pending,
502
+ // the state change has already happened and no future drain will
503
+ // fire. Without this guard, in-process resolvers would deadlock.
504
+ const current = await this.getState(runId, path);
505
+ if (
506
+ current &&
507
+ current.state !== "proposed" &&
508
+ current.state !== "streaming"
509
+ ) {
510
+ return;
511
+ }
498
512
  const normalized = Entries.normalizePath(path);
499
513
  const key = `${runId}:${normalized}`;
500
514
  return new Promise((resolve) => {
@@ -559,6 +573,13 @@ export default class Entries {
559
573
  });
560
574
  }
561
575
 
576
+ async archivePriorPromptArtifacts(runId, currentTurn) {
577
+ await this.#db.archive_prior_prompt_artifacts.run({
578
+ run_id: runId,
579
+ current_turn: currentTurn,
580
+ });
581
+ }
582
+
562
583
  /**
563
584
  * Demote all promoted entries for a run on a given turn. Returns the
564
585
  * affected rows (path, tokens) so callers can summarize.
@@ -83,8 +83,8 @@ export default class ProjectAgent {
83
83
  return this.#agentLoop.resolve(run, resolution);
84
84
  }
85
85
 
86
- async inject(run, message, mode) {
87
- return this.#agentLoop.inject(run, message, mode);
86
+ async inject(run, message, mode, options = {}) {
87
+ return this.#agentLoop.inject(run, message, mode, options);
88
88
  }
89
89
 
90
90
  // Synchronously create (or fork) a run row and return the alias.
@@ -30,6 +30,7 @@ export default class TurnExecutor {
30
30
  noWeb,
31
31
  noInteraction,
32
32
  noProposals,
33
+ yolo,
33
34
  toolSet,
34
35
  contextSize,
35
36
  options,
@@ -70,6 +71,7 @@ export default class TurnExecutor {
70
71
  noWeb,
71
72
  noInteraction,
72
73
  noProposals,
74
+ yolo,
73
75
  toolSet,
74
76
  contextSize,
75
77
  systemPrompt: null,
@@ -332,6 +334,7 @@ export default class TurnExecutor {
332
334
  projectId,
333
335
  run: currentAlias,
334
336
  proposed: [p],
337
+ rummy,
335
338
  });
336
339
  await this.#entries.waitForResolution(currentRunId, p.path);
337
340
  const resolved = await this.#entries.getState(currentRunId, p.path);
@@ -1,7 +1,7 @@
1
1
  -- PREP: get_known_entries
2
2
  SELECT
3
3
  path, scheme, state, outcome, visibility, body, turn, hash
4
- , attributes, countTokens(body) AS tokens, scope
4
+ , attributes, countTokens(body) AS tokens, scope, loop_id
5
5
  FROM known_entries
6
6
  WHERE run_id = :run_id
7
7
  ORDER BY path;
@@ -223,15 +223,20 @@ WHERE run_id = :run_id AND entry_id IN (
223
223
  );
224
224
 
225
225
  -- PREP: get_entries_by_pattern
226
+ -- Default excludes audit schemes (system://, reasoning://, model://, user://,
227
+ -- assistant://, content://, instructions://) so model-facing tools never leak
228
+ -- internal entries. Internal callers that need them pass include_audit_schemes=1.
226
229
  SELECT
227
230
  e.path, e.body, e.scheme, rv.state, rv.outcome, rv.visibility
228
231
  , countTokens(e.body) AS tokens, e.attributes
229
232
  FROM run_views AS rv
230
233
  JOIN entries AS e ON e.id = rv.entry_id
234
+ JOIN schemes AS s ON s.name = COALESCE(e.scheme, 'file')
231
235
  WHERE
232
236
  rv.run_id = :run_id
233
237
  AND hedmatch(:path, e.path)
234
238
  AND (:body IS NULL OR hedsearch(:body, e.body))
239
+ AND (:include_audit_schemes IS NOT NULL OR s.model_visible = 1)
235
240
  ORDER BY e.path
236
241
  LIMIT
237
242
  COALESCE(:limit, -1)
@@ -1,5 +1,5 @@
1
1
  import ContextAssembler from "./ContextAssembler.js";
2
- import { countTokens } from "./tokens.js";
2
+ import { countLines, countTokens } from "./tokens.js";
3
3
 
4
4
  /**
5
5
  * Rebuild turn_context from v_model_context, then assemble messages.
@@ -54,7 +54,8 @@ export default async function materializeContext({
54
54
  });
55
55
  const vTokens = countTokens(visibleProjection);
56
56
  const sTokens = countTokens(summarizedProjection);
57
- tokenAccounting.set(row.path, { vTokens, sTokens });
57
+ const vLines = countLines(visibleProjection);
58
+ tokenAccounting.set(row.path, { vTokens, sTokens, vLines });
58
59
  const projectedBody =
59
60
  row.visibility === "visible" ? visibleProjection : summarizedProjection;
60
61
  await db.insert_turn_context.run({
@@ -79,6 +80,7 @@ export default async function materializeContext({
79
80
  row.vTokens = t.vTokens;
80
81
  row.sTokens = t.sTokens;
81
82
  row.aTokens = t.vTokens - t.sTokens;
83
+ row.vLines = t.vLines;
82
84
  }
83
85
  const lastCtx = await db.get_last_context_tokens.get({ run_id: runId });
84
86
  // First turn of a new run has no prior context.
@@ -92,6 +92,25 @@ SELECT
92
92
  FROM run_views
93
93
  WHERE run_id = :parent_run_id;
94
94
 
95
+ -- PREP: archive_prior_prompt_artifacts
96
+ -- Multi-prompt sessions accumulate artifacts from prior prompt cycles
97
+ -- (consumed prompts, their per-turn logs). These pollute the validator's
98
+ -- prior-prompts check on subsequent Deployment landings. Archive all
99
+ -- prior prompt:// entries and prior-turn log:// entries when a new
100
+ -- prompt arrives. Knowns/unknowns/file entries are untouched — they
101
+ -- carry persistent knowledge across cycles. The loop_id IS NULL clause
102
+ -- catches forked-in views from a parent run (per fork_known_entries),
103
+ -- which represent prior cycles' artifacts inherited into a clean child.
104
+ UPDATE run_views
105
+ SET visibility = 'archived'
106
+ WHERE run_id = :run_id
107
+ AND visibility != 'archived'
108
+ AND (turn < :current_turn OR loop_id IS NULL)
109
+ AND entry_id IN (
110
+ SELECT id FROM entries
111
+ WHERE scheme IN ('prompt', 'log')
112
+ );
113
+
95
114
  -- PREP: get_active_runs
96
115
  SELECT r.id
97
116
  FROM runs AS r
@@ -12,3 +12,9 @@ export function countTokens(text) {
12
12
  if (!text) return 0;
13
13
  return Math.ceil(text.length / DIVISOR);
14
14
  }
15
+
16
+ export function countLines(text) {
17
+ if (!text) return 0;
18
+ const newlines = (text.match(/\n/g) || []).length;
19
+ return text.endsWith("\n") ? newlines : newlines + 1;
20
+ }
@@ -106,6 +106,10 @@ export default class RummyContext {
106
106
  return this.#context.noProposals === true;
107
107
  }
108
108
 
109
+ get yolo() {
110
+ return this.#context.yolo === true;
111
+ }
112
+
109
113
  get toolSet() {
110
114
  return this.#context.toolSet;
111
115
  }
@@ -4,8 +4,12 @@ import {
4
4
  isContextExceededMessage,
5
5
  isTransientMessage,
6
6
  } from "./errors.js";
7
+ import { retryWithBackoff } from "./retry.js";
7
8
 
8
- const MAX_TRANSIENT_RETRIES = 3;
9
+ const DEADLINE_MS = Number(process.env.RUMMY_LLM_DEADLINE_MS);
10
+ const MAX_BACKOFF_MS = Number(process.env.RUMMY_LLM_MAX_BACKOFF_MS);
11
+ if (!DEADLINE_MS) throw new Error("RUMMY_LLM_DEADLINE_MS must be set");
12
+ if (!MAX_BACKOFF_MS) throw new Error("RUMMY_LLM_MAX_BACKOFF_MS must be set");
9
13
 
10
14
  /**
11
15
  * Thin dispatcher over the LLM provider registry (`hooks.llm.providers`).
@@ -55,27 +59,26 @@ export default class LlmProvider {
55
59
  );
56
60
  }
57
61
 
58
- for (let attempt = 0; ; attempt++) {
59
- try {
60
- return await provider.completion(
61
- messages,
62
- resolvedModel,
63
- resolvedOptions,
64
- );
65
- } catch (err) {
66
- if (isContextExceededMessage(err.message)) {
67
- throw new ContextExceededError(err.message, { cause: err });
68
- }
69
- if (
70
- isTransientMessage(err.message) &&
71
- attempt < MAX_TRANSIENT_RETRIES
72
- ) {
73
- const delay = 1000 * 2 ** attempt;
74
- await new Promise((r) => setTimeout(r, delay));
75
- continue;
76
- }
77
- throw err;
62
+ try {
63
+ return await retryWithBackoff(
64
+ () => provider.completion(messages, resolvedModel, resolvedOptions),
65
+ {
66
+ signal: options.signal,
67
+ deadlineMs: DEADLINE_MS,
68
+ maxDelayMs: MAX_BACKOFF_MS,
69
+ isRetryable: (err) => isTransientMessage(err.message),
70
+ onRetry: (err, attempt, delayMs, remainingMs) => {
71
+ console.error(
72
+ `[LLM] transient failure on ${provider.name} attempt ${attempt}: ${err.message}; retrying in ${delayMs}ms (${Math.round(remainingMs / 1000)}s deadline remaining)`,
73
+ );
74
+ },
75
+ },
76
+ );
77
+ } catch (err) {
78
+ if (isContextExceededMessage(err.message)) {
79
+ throw new ContextExceededError(err.message, { cause: err });
78
80
  }
81
+ throw err;
79
82
  }
80
83
  }
81
84
 
package/src/llm/errors.js CHANGED
@@ -14,7 +14,7 @@ export function isContextExceededMessage(message) {
14
14
  }
15
15
 
16
16
  const TRANSIENT_PATTERN =
17
- /\b(503|429|timeout|ECONNREFUSED|ECONNRESET|unavailable)\b/i;
17
+ /\b(500|502|503|504|429|timeout|TimeoutError|aborted|unavailable|ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|EPIPE|ECONNABORTED|fetch failed)\b/i;
18
18
 
19
19
  export function isTransientMessage(message) {
20
20
  return TRANSIENT_PATTERN.test(String(message));