@possumtech/rummy 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.env.example +2 -1
  2. package/PLUGINS.md +1 -1
  3. package/SPEC.md +181 -38
  4. package/migrations/001_initial_schema.sql +1 -1
  5. package/package.json +7 -3
  6. package/service.js +5 -3
  7. package/src/agent/AgentLoop.js +182 -136
  8. package/src/agent/ContextAssembler.js +2 -0
  9. package/src/agent/KnownStore.js +28 -85
  10. package/src/agent/ResponseHealer.js +65 -31
  11. package/src/agent/TurnExecutor.js +326 -181
  12. package/src/agent/XmlParser.js +5 -2
  13. package/src/agent/known_store.sql +48 -0
  14. package/src/agent/tokens.js +1 -0
  15. package/src/agent/turns.sql +5 -0
  16. package/src/hooks/HookRegistry.js +7 -0
  17. package/src/hooks/Hooks.js +1 -4
  18. package/src/hooks/ToolRegistry.js +2 -8
  19. package/src/plugins/budget/README.md +2 -14
  20. package/src/plugins/budget/budget.js +15 -39
  21. package/src/plugins/cp/cp.js +1 -1
  22. package/src/plugins/cp/cpDoc.js +1 -1
  23. package/src/plugins/get/get.js +71 -1
  24. package/src/plugins/get/getDoc.js +14 -4
  25. package/src/plugins/hedberg/matcher.js +10 -29
  26. package/src/plugins/instructions/preamble.md +16 -6
  27. package/src/plugins/known/known.js +4 -10
  28. package/src/plugins/known/knownDoc.js +15 -14
  29. package/src/plugins/mv/mv.js +18 -1
  30. package/src/plugins/mv/mvDoc.js +15 -1
  31. package/src/plugins/{current → performed}/README.md +4 -3
  32. package/src/plugins/{current/current.js → performed/performed.js} +15 -20
  33. package/src/plugins/previous/README.md +2 -1
  34. package/src/plugins/previous/previous.js +31 -25
  35. package/src/plugins/progress/README.md +1 -2
  36. package/src/plugins/progress/progress.js +15 -29
  37. package/src/plugins/prompt/prompt.js +0 -7
  38. package/src/plugins/rm/rm.js +27 -15
  39. package/src/plugins/rm/rmDoc.js +3 -3
  40. package/src/plugins/set/set.js +55 -19
  41. package/src/plugins/set/setDoc.js +6 -2
  42. package/src/plugins/telemetry/telemetry.js +14 -9
  43. package/src/plugins/unknown/README.md +2 -1
  44. package/src/plugins/unknown/unknown.js +5 -4
  45. package/src/server/ClientConnection.js +59 -45
  46. package/src/sql/v_model_context.sql +3 -13
  47. package/src/plugins/budget/BudgetGuard.js +0 -74
package/.env.example CHANGED
@@ -19,7 +19,8 @@ RUMMY_MMAP_MB=0
19
19
  RUMMY_MAX_TURNS=99
20
20
  RUMMY_MAX_UNKNOWN_WARNINGS=3
21
21
  RUMMY_MAX_STALLS=3
22
- RUMMY_MAX_REPETITIONS=3
22
+ RUMMY_MIN_CYCLES=3
23
+ RUMMY_MAX_CYCLE_PERIOD=4
23
24
  RUMMY_MAX_UPDATE_REPEATS=3
24
25
 
25
26
  # Hygiene
package/PLUGINS.md CHANGED
@@ -467,7 +467,7 @@ prepended above the plugin's summary view output.
467
467
  | `update` | Structural | Signal continued work |
468
468
  | `unknown` | Structural + Assembly | Register unknowns, render `<unknowns>` |
469
469
  | `previous` | Assembly | Render `<previous>` loop history |
470
- | `current` | Assembly | Render `<current>` active loop work |
470
+ | `performed` | Assembly | Render `<performed>` active loop work |
471
471
  | `progress` | Assembly | Render `<progress>` telemetry + warnings |
472
472
  | `prompt` | Assembly | Render `<prompt mode="ask|act">` tag |
473
473
  | `hedberg` | Utility | Pattern matching, interpretation, normalization |
package/SPEC.md CHANGED
@@ -82,7 +82,7 @@ Every entry plays one of four roles:
82
82
  | Role | Category | Section | Description |
83
83
  |------|----------|---------|-------------|
84
84
  | **Data** | `data` | `<knowns>` | Entries the model works with — persistent state |
85
- | **Logging** | `logging` | `<current>`/`<previous>` | Records of what happened — tool results, lifecycle signals |
85
+ | **Logging** | `logging` | `<performed>`/`<previous>` | Records of what happened — tool results, lifecycle signals |
86
86
  | **Unknowns** | `unknown` | `<unknowns>` | Open questions the model is tracking |
87
87
  | **Prompt** | `prompt` | `<prompt>` | The task driving the loop |
88
88
 
@@ -96,7 +96,6 @@ Every entry plays one of four roles:
96
96
  | `http://`, `https://` | data | Web content. |
97
97
  | `unknown://` | unknown | Unresolved questions. |
98
98
  | `prompt://` | prompt | User prompt with `mode` attribute (`ask`/`act`). |
99
- | `progress://` | prompt | Continuation prompt. |
100
99
  | `set://`, `get://`, `sh://`, `env://`, `rm://`, `mv://`, `cp://`, `ask_user://`, `search://` | logging | Tool result entries. |
101
100
  | `summarize://`, `update://` | logging | Lifecycle signals. |
102
101
  | `tool://` | audit | Internal plugin metadata. `model_visible = 0`. |
@@ -211,7 +210,7 @@ object is the same shape at every tier.
211
210
 
212
211
  Model tier restrictions enforced by unified `resolveForLoop(mode, flags)`.
213
212
  Ask mode excludes `sh`. Flags: `noInteraction` excludes `ask_user`,
214
- `noWeb` excludes `search`, `noBench` excludes `ask_user`/`env`/`sh`.
213
+ `noWeb` excludes `search`, `noProposals` excludes `ask_user`/`env`/`sh`.
215
214
  13 model tools: get, set, known, unknown, env, sh, rm, cp, mv, search,
216
215
  summarize, update, ask_user.
217
216
  Client tier requires project init. Plugin tier has no restrictions.
@@ -297,15 +296,17 @@ Two messages per turn. System = stable truth. User = active task.
297
296
  ...entries sorted by fidelity (index, summary, full), then by scheme
298
297
  </knowns>
299
298
  <previous>
300
- (pre-loop user prompt, model responses, agent warnings, and tools used, in order)
299
+ (pre-loop entries, each with turn, status, summary, fidelity, tokens)
301
300
  </previous>
302
- <unknowns></unknowns>
301
+ <unknowns>
302
+ (open questions, each with path, turn, fidelity, tokens)
303
+ </unknowns>
303
304
  [/system]
304
305
  [user]
305
- <current>
306
- (current loop model responses, agent warnings, and tools used, in order)
307
- </current>
308
- <progress>the above actions have been performed on this user prompt:</progress>
306
+ <performed>
307
+ (current loop entries, each with turn, status, summary, fidelity, tokens)
308
+ </performed>
309
+ <progress turn="N">token budget, fidelity stats, causal bridge</progress>
309
310
  <prompt mode="ask|act" tools="...">user prompt</prompt>
310
311
  [/user]
311
312
  ```
@@ -317,9 +318,9 @@ The `<prompt>` tag is present on every turn — first turn and
317
318
  continuations alike. The model always sees its task. The active prompt
318
319
  is extracted from its chronological position and placed last for maximum
319
320
  recency. `<progress>` bridges the gap, narrating the causal relationship
320
- between `<current>` (the work) and the prompt (the cause).
321
+ between `<performed>` (the work) and the prompt (the cause).
321
322
 
322
- ### 4.2 Loops, Previous, and Current
323
+ ### 4.2 Loops, Previous, and Performed
323
324
 
324
325
  A **loop** is one `ask` or `act` invocation and all its continuation
325
326
  turns until summarize, fail, or abort.
@@ -329,14 +330,14 @@ responses, tool results, agent warnings — the full chronicle in order.
329
330
  Lives in the system message as established history. Omitted on the
330
331
  first turn of the first loop.
331
332
 
332
- **Current** = the active loop's work so far. Model responses, tool
333
+ **Performed** = the active loop's work so far. Model responses, tool
333
334
  results, agent warnings — in order. Does NOT include the user prompt
334
335
  (one per loop, extracted to `<prompt>`). Lives in the user
335
336
  message as immediate context. Empty on the first turn of a loop.
336
337
 
337
338
  When a new prompt arrives on an existing run, the prior loop's
338
- `<current>` content plus its prompt move to `<previous>`. When a loop
339
- continues (next turn), new results append to `<current>`.
339
+ `<performed>` content plus its prompt move to `<previous>`. When a loop
340
+ continues (next turn), new results append to `<performed>`.
340
341
 
341
342
  ### 4.3 Key Entries
342
343
 
@@ -357,7 +358,7 @@ text from body + attributes.
357
358
  Each turn:
358
359
 
359
360
  1. Write `instructions://system` (empty body, attributes = { persona })
360
- 2. Emit `turn.started` — plugins write prompt/progress/instructions entries
361
+ 2. Emit `turn.started` — plugins write prompt/instructions entries
361
362
  3. Project `instructions://system` → instructions text
362
363
  4. Query `v_model_context` VIEW → visible entries
363
364
  5. Project each entry through its tool's `full`/`summary` projection
@@ -367,7 +368,7 @@ Each turn:
367
368
  - Previous plugin (priority 200) → `<previous>` section
368
369
  - Unknown plugin (priority 300) → `<unknowns>` section
369
370
  8. Invoke `assembly.user` filter chain (empty string as base):
370
- - Current plugin (priority 100) → `<current>` section
371
+ - Performed plugin (priority 100) → `<performed>` section
371
372
  - Progress plugin (priority 200) → `<progress>` section
372
373
  - Prompt plugin (priority 300) → `<prompt>` section
373
374
  9. Store as `system://N` and `user://N` audit entries
@@ -377,6 +378,12 @@ The VIEW determines visibility from `fidelity` and `status`:
377
378
  - `summary` → summary visible (model-authored `summary` attribute if set)
378
379
  - `index` → path listed, no content
379
380
  - `archive` → invisible (retrievable via `<get>`)
381
+
382
+ **Partial read:** `<get path="..." line="N" limit="M"/>` returns lines N through
383
+ N+M−1 of the entry body as the log item without changing fidelity or promoting
384
+ the entry to context. Use after reading `summary` fidelity (which gives line
385
+ numbers via repomap) to target a specific symbol. Single-path only — glob or
386
+ body filter with `line`/`limit` is a 400 error.
380
387
  - `status = 202` → invisible (proposed, pending client)
381
388
  - `model_visible = 0` → invisible (audit, tool, instructions)
382
389
 
@@ -400,6 +407,16 @@ during dispatch. `upsert()`, `promoteByPattern()`, and
400
407
  Exceeding the budget throws `BudgetExceeded` — the tool 413s, the
401
408
  guard trips, and all subsequent tools in the turn fail.
402
409
 
410
+ BudgetGuard ceiling = `floor(contextSize × 0.9) − 500`. The 500-token
411
+ buffer below the enforce ceiling absorbs two sources of overhead that
412
+ BudgetGuard cannot see: (a) `#record()`-phase writes that bypass the
413
+ guard (~15 tokens per command), and (b) loop transition overhead —
414
+ when a loop completes and a new one starts, entries shift from
415
+ `<performed>` to `<previous>` format, adding ~200–300 tokens to the
416
+ next assembly. Without this buffer, the base context can accumulate
417
+ to exactly the enforce ceiling, making it impossible for the panic
418
+ loop to start (panic prompt + loop overhead > ceiling).
419
+
403
420
  **Exemptions:** `status >= 400` entries (error results), `model_visible
404
421
  = 0` entries (audit), `fidelity = "archive"` entries (not in context).
405
422
 
@@ -415,30 +432,107 @@ formula, one file (`src/agent/tokens.js`), env-configurable. No
415
432
  external dependencies. `contextSize` is the ceiling. Over = 413.
416
433
  Under = 200. No margins.
417
434
 
418
- ### 4.6 Panic Mode
435
+ **Three token measures — never conflate them:**
436
+
437
+ | Measure | Source | Scope | Use |
438
+ |---|---|---|---|
439
+ | SQL entry tokens | `known_entries.tokens` = `ceil(chars / DIVISOR)` | Per entry | Model decision-making: "this entry costs N tokens" |
440
+ | Assembled estimate | `measureMessages(messages)` = sum of entry projections | Full packet | First-turn budget fallback only |
441
+ | Actual API tokens | `turns.context_tokens` = `usage.input_tokens` back-filled from LLM | Per turn | Budget enforcement on turns 2+; ground truth |
419
442
 
420
- When a new prompt arrives and the assembled context exceeds
421
- `contextSize`, the system enters panic mode instead of failing to
422
- the client.
443
+ `budget.enforce` uses the **actual API tokens** (`get_last_context_tokens`) when
444
+ available (turn 2+) and falls back to the assembled estimate on turn 1. The
445
+ estimate can be 3–7× off for XML/JSON-heavy content — do not rely on it for
446
+ anything that matters.
423
447
 
424
- 1. The failed loop is completed with 413 (audit trail)
425
- 2. A panic loop is enqueued (`mode = "panic"`, `noRepo = true`)
426
- 3. The original loop is re-enqueued to retry after panic
427
- 4. The model receives a prompt with the exact shortfall in tokens
428
- 5. Tools: get, set, known, unknown, rm, mv, cp, summarize, update
429
- 6. Excluded: sh, env, search, ask_user
448
+ **`context_tokens` vs `prompt_tokens` in step telemetry:**
449
+ - `context_tokens` in the step JSON = `turns.context_tokens` for that turn =
450
+ per-turn actual input tokens from the LLM API (e.g. 7900 tokens sent this turn)
451
+ - `prompt_tokens` in the step JSON = `SUM(turns.prompt_tokens)` for the run =
452
+ **cumulative** total across all turns (cost tracking, not a context size)
430
453
 
431
- **Strike system:** Each turn without context reduction = 1 strike.
432
- Any reduction resets the counter. 3 consecutive strikes = hard 413
433
- to client. Unlimited turns as long as the model makes progress.
454
+ These two will diverge rapidly on any multi-turn run. A run at turn 50 might show
455
+ `context_tokens: 8000` (context under control) and `prompt_tokens: 400000`
456
+ (total input tokens billed across the whole run). They are measuring orthogonal things.
434
457
 
435
- One panic attempt per drain cycle. If the retried original loop also
436
- 413s, hard-fail to the client.
458
+ ### 4.6 Panic Mode
459
+
460
+ **The invariant.** A panic is only ever triggered because the
461
+ assembled context was under the ceiling — and the new prompt pushed
462
+ it over. The existing context fit; the incoming prompt did not.
463
+ Panic mode replaces that too-large incoming prompt with a small
464
+ panic prompt on the same context. Therefore: the first turn of a
465
+ panic loop cannot 413. If it does, it is a bug.
466
+
467
+ **Trigger.** `TurnExecutor.execute()` assembles the full packet
468
+ (context + incoming prompt) before calling the LLM. If
469
+ `assembledTokens > contextSize`, it returns 413 without calling
470
+ the LLM. `#drainQueue` intercepts this and enters panic mode.
471
+
472
+ **Flow.**
473
+ 1. Complete the failed loop with status 413 (audit trail).
474
+ 2. Enqueue a panic loop (`mode = "panic"`, `noRepo = true`,
475
+ `prompt = panicPrompt`, `panicTarget` in config).
476
+ 3. Re-enqueue the original loop with `panicAttempted: true` in
477
+ its config JSON. This flag persists across drain cycles.
478
+ 4. `continue` — the drain loop claims the panic loop next.
479
+
480
+ After panic completes (model freed enough space), the retry loop
481
+ runs. If the retry also 413s, hard-fail to client. One panic
482
+ attempt per drain cycle — `panicAttempted` is checked both as a
483
+ local variable and on the re-enqueued loop's config.
484
+
485
+ **Panic target.** The model must compress context to below:
486
+
487
+ ```
488
+ panicTarget = MIN(contextSize × 0.75, contextSize − incomingTokens) − cushion
489
+ ```
437
490
 
438
- **`ToolRegistry.view()`** prepends `attributes.summary` above the
439
- plugin's summary view output at summary fidelity. The model authors
440
- summaries (<= 80 chars) via `<set summary="...">`. Summaries persist
441
- across fidelity changes.
491
+ `incomingTokens` is the raw token count of the original prompt.
492
+ `cushion` is a small safety margin (500 tokens) to absorb
493
+ materialization overhead. The target is expressed in materialized
494
+ token units — the same unit the system uses to measure completion
495
+ (see Token Math below).
496
+
497
+ **Two token contexts.**
498
+
499
+ The model reasons in *per-entry SQL tokens* — the token counts
500
+ visible in `<knowns>` entries. These are the granular unit the model
501
+ uses to decide which entries to target: "this entry is 200 tokens;
502
+ if I archive it, I save 200 tokens."
503
+
504
+ The system makes decisions using *actual API tokens* —
505
+ `turns.context_tokens` back-filled from `usage.input_tokens` after
506
+ each LLM call. SQL token sums do not equal actual API counts because
507
+ projections, assembly overhead, and fidelity transforms alter the
508
+ output; and the SQL estimate (`ceil(chars / DIVISOR)`) can be 3–7×
509
+ off for structured content. **Never use SQL token sums for ceiling or
510
+ budget decisions.** See §4.5 Token Measures for the full breakdown.
511
+
512
+ **Strike system.** After each panic turn, compare
513
+ `result.assembledTokens` (materialized) with `_lastPanicTokens`
514
+ (previous turn's materialized total):
515
+ - Decreased → reset strike counter to 0.
516
+ - Same or increased → increment strikes.
517
+ - 3 consecutive strikes → return 413 to `#drainQueue` → hard-fail.
518
+
519
+ Progress (any reduction) resets the counter. The model has
520
+ unlimited turns as long as it makes progress.
521
+
522
+ **Panic success.** After each turn, if `result.assembledTokens
523
+ <= panicTarget`, the panic loop exits with 200. The retry loop
524
+ then runs with the original prompt on the now-compressed context.
525
+
526
+ **Tool set.** `resolveForLoop("panic")` includes: get, set, known,
527
+ unknown, rm, mv, cp, summarize, update. Excludes: sh, env, search,
528
+ ask_user. `noRepo: true` — no file scanning during panic.
529
+
530
+ **What the model sees.** Turn 1 receives the panic prompt from
531
+ `budget.panicPrompt()`: the assembled token count, the target, and
532
+ the exact number of tokens to free. Turn 2+ receives a continuation
533
+ prompt. The model uses `<set fidelity="archive">`, `<mv
534
+ fidelity="index">`, and similar fidelity operations to free space,
535
+ concluding with `<summarize>` when done or `<update>` while working.
442
536
 
443
537
  ---
444
538
 
@@ -609,7 +703,7 @@ Tools are presented gather → reason → act → communicate. Position in
609
703
  the list implies priority. `get` is first. `ask_user` is last. The
610
704
  order is defined in `ToolRegistry.TOOL_ORDER` and applied by
611
705
  `resolveForLoop()`. The same method handles all tool exclusions —
612
- mode restrictions, `noInteraction`, `noWeb`, `noBench` — through
706
+ mode restrictions, `noInteraction`, `noWeb`, `noProposals` — through
613
707
  one unified mechanism.
614
708
 
615
709
  ### Pattern Distribution
@@ -656,7 +750,7 @@ Termination protocol:
656
750
  - Neither + investigation tools → stall counter (RUMMY_MAX_STALLS)
657
751
  - Neither + action-only tools → healed to summarize
658
752
  - Neither + plain text → healed to summarize
659
- - Repeated commands → loop detection (RUMMY_MAX_REPETITIONS)
753
+ - Repeated commands → cycle detection (RUMMY_MIN_CYCLES, RUMMY_MAX_CYCLE_PERIOD)
660
754
  - Repeated update text → stall (RUMMY_MAX_UPDATE_REPEATS)
661
755
 
662
756
  Format normalization:
@@ -697,6 +791,54 @@ See [PLUGINS.md](PLUGINS.md) for the hedberg pattern type reference.
697
791
 
698
792
  ---
699
793
 
794
+ ## 13. Debugging: E2E and Benchmark Results
795
+
796
+ ### E2E test failures
797
+
798
+ E2E tests use a temp DB at `/tmp/rummy_test_<timestamp>_<random>.db` (cleaned up after).
799
+ On failure, `AuditClient.assertRun` calls `dumpRun`, which prints a full turn-by-turn audit
800
+ to stdout. That output is in the background task log:
801
+
802
+ ```
803
+ /tmp/claude-1000/-home-hyzen-repo-rummy-main/<session-id>/tasks/<task-id>.output
804
+ ```
805
+
806
+ If oversized, the harness saves to:
807
+ ```
808
+ /home/hyzen/.claude/projects/-home-hyzen-repo-rummy-main/<session-id>/tool-results/<id>.txt
809
+ ```
810
+
811
+ The dump format is: `scheme:state path {attributes}\n body (120 chars)` grouped by turn.
812
+
813
+ Key things to look for in a dump:
814
+ - **202**: unresolved proposals — model issued `<sh>`, `<rm>`, or `<mv>` that needs approval
815
+ - **413**: budget overflow — assembled context exceeded ceiling before LLM call
816
+ - **BudgetGuard errors**: per-tool rejections mid-turn (`Budget exceeded: N tokens requested`)
817
+ - **`<sh>` in act/panic mode**: model fell back to shell when blocked (doc/prompt gap)
818
+ - Loop sequence: look for `mode` in `instructions://system` attrs to see which loop type ran
819
+
820
+ ### MAB benchmark
821
+
822
+ Results live in `test/mab/results/<ISO-timestamp>/mab.db`. Latest run = most recent dir.
823
+
824
+ ```js
825
+ // Query a MAB result DB directly:
826
+ import { DatabaseSync } from 'node:sqlite';
827
+ const db = new DatabaseSync('test/mab/results/<timestamp>/mab.db');
828
+ db.prepare('SELECT * FROM questions').all(); // all questions + scores
829
+ db.prepare('SELECT * FROM runs').all(); // individual model runs
830
+ ```
831
+
832
+ Run with: `npm run test:mab`
833
+
834
+ ### LME benchmark
835
+
836
+ Results live in `test/lme/results/<ISO-timestamp>/lme.db`. Same structure.
837
+
838
+ Run with: `npm run test:lme`
839
+
840
+ ---
841
+
700
842
  ## 12. Configuration
701
843
 
702
844
  ```env
@@ -704,7 +846,8 @@ RUMMY_HOME=~/.rummy
704
846
  RUMMY_TOKEN_DIVISOR=2
705
847
  RUMMY_MAX_TURNS=99
706
848
  RUMMY_MAX_STALLS=3
707
- RUMMY_MAX_REPETITIONS=3
849
+ RUMMY_MIN_CYCLES=3
850
+ RUMMY_MAX_CYCLE_PERIOD=4
708
851
  RUMMY_MAX_UPDATE_REPEATS=3
709
852
  RUMMY_RETENTION_DAYS=31
710
853
  RUMMY_TEMPERATURE=0.5
@@ -65,7 +65,7 @@ CREATE TABLE IF NOT EXISTS loops (
65
65
  id INTEGER PRIMARY KEY AUTOINCREMENT
66
66
  , run_id INTEGER NOT NULL REFERENCES runs (id) ON DELETE CASCADE
67
67
  , sequence INTEGER NOT NULL CHECK (sequence >= 1)
68
- , mode TEXT NOT NULL CHECK (mode IN ('ask', 'act', 'panic'))
68
+ , mode TEXT NOT NULL CHECK (mode IN ('ask', 'act'))
69
69
  , model TEXT
70
70
  , prompt TEXT NOT NULL DEFAULT ''
71
71
  , status INTEGER NOT NULL DEFAULT 100 CHECK (status BETWEEN 100 AND 599)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@possumtech/rummy",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "description": "Relational Unknowns Memory Management Yoke",
5
5
  "keywords": [
6
6
  "llm"
@@ -43,9 +43,12 @@
43
43
  "test:live": "mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --test-concurrency=1 --test-force-exit --test-reporter=spec --test $(find test/live -name '*.test.js') 2>&1 | tee /tmp/rummy_test_diag/live_$(date +%Y%m%dT%H%M%S).log",
44
44
  "test:clean": "rm -rf test/lme/results test/mab/results test/tmp /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal && echo 'Test artifacts cleaned.'",
45
45
  "test:mab:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/download.js",
46
- "test:mab": "mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/runner.js 2>&1 | tee /tmp/rummy_test_diag/mab_$(date +%Y%m%dT%H%M%S).log",
46
+ "test:mab": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/mab_$(date +%Y%m%dT%H%M%S).log' --",
47
+ "test:grok": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --env-file-if-exists=.env.grok test/mab/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/mab_grok_$(date +%Y%m%dT%H%M%S).log' --",
48
+ "test:mab:taxonomy": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/mab/runner.js --split Conflict_Resolution --row 0 --no-questions 2>&1 | tee /tmp/rummy_test_diag/taxonomy_$(date +%Y%m%dT%H%M%S).log' --",
49
+ "test:grok:taxonomy": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test --env-file-if-exists=.env.grok test/mab/runner.js --split Conflict_Resolution --row 0 --no-questions 2>&1 | tee /tmp/rummy_test_diag/taxonomy_grok_$(date +%Y%m%dT%H%M%S).log' --",
47
50
  "test:lme:get": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/lme/download.js",
48
- "test:lme": "mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/lme/runner.js 2>&1 | tee /tmp/rummy_test_diag/lme_$(date +%Y%m%dT%H%M%S).log",
51
+ "test:lme": "bash -c 'mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.test test/lme/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/lme_$(date +%Y%m%dT%H%M%S).log' --",
49
52
  "test:mab:clean": "rm -rf test/mab/results/*/",
50
53
  "test:lme:clean": "rm -rf test/lme/results/*/",
51
54
  "test:clear": "rm -rf /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal /tmp/rummy-stories-*"
@@ -56,6 +59,7 @@
56
59
  "dependencies": {
57
60
  "@possumtech/sqlrite": "^3.1.0",
58
61
  "@xmldom/xmldom": "^0.9.9",
62
+ "diff": "^8.0.4",
59
63
  "htmlparser2": "^12.0.0",
60
64
  "picomatch": "^4.0.4",
61
65
  "ws": "^8.19.0",
package/service.js CHANGED
@@ -18,13 +18,13 @@ if (gitCheck.error || gitCheck.status !== 0) {
18
18
  console.warn("[RUMMY] WARNING: 'git' not found. File tracking will use manual activation only.");
19
19
  }
20
20
 
21
- let SqlRite, SocketServer, registerPlugins, createHooks, RpcRegistry;
21
+ let SqlRite, SocketServer, registerPlugins, initPlugins, createHooks, RpcRegistry;
22
22
  try {
23
23
  SqlRite = (await import("@possumtech/sqlrite")).default;
24
24
  SocketServer = (await import("./src/server/SocketServer.js")).default;
25
25
  const pluginIndex = await import("./src/plugins/index.js");
26
26
  registerPlugins = pluginIndex.registerPlugins;
27
- var initPlugins = pluginIndex.initPlugins;
27
+ initPlugins = pluginIndex.initPlugins;
28
28
  createHooks = (await import("./src/hooks/Hooks.js")).default;
29
29
  RpcRegistry = (await import("./src/server/RpcRegistry.js")).default;
30
30
  } catch (err) {
@@ -81,10 +81,12 @@ async function main() {
81
81
  if (!key.startsWith("RUMMY_MODEL_")) continue;
82
82
  const alias = key.replace("RUMMY_MODEL_", "");
83
83
  const actual = process.env[key];
84
+ const contextEnv = process.env[`RUMMY_CONTEXT_${alias}`];
85
+ const context_length = contextEnv ? Number.parseInt(contextEnv, 10) : null;
84
86
  await db.upsert_model.get({
85
87
  alias,
86
88
  actual,
87
- context_length: null,
89
+ context_length,
88
90
  });
89
91
  modelAliases.push(alias);
90
92
  }