@blockrun/franklin 3.15.15 → 3.15.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +15 -0
- package/dist/router/local-elo.js +8 -0
- package/dist/session/storage.d.ts +2 -0
- package/dist/session/storage.js +18 -0
- package/dist/stats/audit.js +7 -0
- package/dist/stats/test-fixture.d.ts +20 -0
- package/dist/stats/test-fixture.js +31 -0
- package/dist/stats/tracker.js +7 -0
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -22,6 +22,8 @@ import { recordSessionUsage } from '../stats/session-tracker.js';
|
|
|
22
22
|
import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
|
|
23
23
|
import { logger, setDebugMode } from '../logger.js';
|
|
24
24
|
import { runDataHygiene } from '../storage/hygiene.js';
|
|
25
|
+
import { isTestFixtureModel } from '../stats/test-fixture.js';
|
|
26
|
+
import { setSessionPersistenceDisabled } from '../session/storage.js';
|
|
25
27
|
import { estimateCost, OPUS_PRICING } from '../pricing.js';
|
|
26
28
|
import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
27
29
|
import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
|
|
@@ -330,6 +332,14 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
330
332
|
// Wire stderr-mirroring of log lines to the same flag the agent already
|
|
331
333
|
// uses to gate verbose console output. File writes happen regardless.
|
|
332
334
|
setDebugMode(!!config.debug);
|
|
335
|
+
// In-process tests run interactiveSession() with model="local/test*"
|
|
336
|
+
// and were creating real session files on the user's machine —
|
|
337
|
+
// verified 19 of 33 metas (57.6%) were polluted on a real install.
|
|
338
|
+
// Gate session persistence at the entry point so the rest of the
|
|
339
|
+
// loop doesn't have to thread the flag through. Tests that genuinely
|
|
340
|
+
// exercise the persistence path use a non-fixture model name like
|
|
341
|
+
// `zai/glm-5.1` (mock-server-backed) so they keep writing.
|
|
342
|
+
setSessionPersistenceDisabled(isTestFixtureModel(config.model));
|
|
333
343
|
const client = new ModelClient({
|
|
334
344
|
apiUrl: config.apiUrl,
|
|
335
345
|
chain: config.chain,
|
|
@@ -1201,6 +1211,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1201
1211
|
inputTokens,
|
|
1202
1212
|
outputTokens: usage.outputTokens,
|
|
1203
1213
|
costUsd: costEstimate,
|
|
1214
|
+
// Any failed model this turn means the model that finally
|
|
1215
|
+
// succeeded was a fallback. Without this, audit log read 0%
|
|
1216
|
+
// fallbacks across 4k entries — useless for diagnosing whether
|
|
1217
|
+
// the routing chain is healthy or hot.
|
|
1218
|
+
fallback: turnFailedModels.size > 0,
|
|
1204
1219
|
source: 'agent',
|
|
1205
1220
|
workDir,
|
|
1206
1221
|
prompt: extractLastUserPrompt(history),
|
package/dist/router/local-elo.js
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import fs from 'node:fs';
|
|
9
9
|
import path from 'node:path';
|
|
10
10
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
11
|
+
import { isTestFixtureModel } from '../stats/test-fixture.js';
|
|
11
12
|
const HISTORY_FILE = path.join(BLOCKRUN_DIR, 'router-history.jsonl');
|
|
12
13
|
const MAX_RECORDS = 2000;
|
|
13
14
|
const K_FACTOR = 32; // Elo K-factor — how much each outcome shifts the rating
|
|
@@ -15,6 +16,13 @@ const K_FACTOR = 32; // Elo K-factor — how much each outcome shifts the rating
|
|
|
15
16
|
* Record a model outcome for local learning.
|
|
16
17
|
*/
|
|
17
18
|
export function recordOutcome(category, model, outcome, toolCalls) {
|
|
19
|
+
// Defensive: same fixture-model gate as appendAudit / recordUsage.
|
|
20
|
+
// router-history.jsonl is currently clean (test runs typically have
|
|
21
|
+
// an empty `lastRoutedCategory` and the agent loop already guards
|
|
22
|
+
// against that), but a future change to category detection would
|
|
23
|
+
// immediately leak. Belt-and-braces.
|
|
24
|
+
if (isTestFixtureModel(model))
|
|
25
|
+
return;
|
|
18
26
|
try {
|
|
19
27
|
fs.mkdirSync(path.dirname(HISTORY_FILE), { recursive: true });
|
|
20
28
|
const record = { ts: Date.now(), category, model, outcome, toolCalls };
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
* Saves conversation history as JSONL for resume capability.
|
|
4
4
|
*/
|
|
5
5
|
import type { Dialogue } from '../agent/types.js';
|
|
6
|
+
export declare function setSessionPersistenceDisabled(disabled: boolean): void;
|
|
7
|
+
export declare function isSessionPersistenceDisabled(): boolean;
|
|
6
8
|
export interface SessionMeta {
|
|
7
9
|
id: string;
|
|
8
10
|
model: string;
|
package/dist/session/storage.js
CHANGED
|
@@ -9,6 +9,20 @@ import { randomUUID } from 'node:crypto';
|
|
|
9
9
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
10
10
|
const MAX_SESSIONS = 20; // Keep last 20 sessions
|
|
11
11
|
let resolvedSessionsDir = null;
|
|
12
|
+
// When in-process tests run interactiveSession() with model="local/test*",
|
|
13
|
+
// session writes were creating real .jsonl + .meta.json files in the
|
|
14
|
+
// user's ~/.blockrun/sessions/ — verified 19 of 33 metas (57.6%) on a
|
|
15
|
+
// real machine. Toggled at session start by the agent loop based on the
|
|
16
|
+
// model name; defaults to enabled so production never accidentally goes
|
|
17
|
+
// silent. No-op writes when disabled — reads still work so resume tests
|
|
18
|
+
// can pre-seed state with their own writes if they want to.
|
|
19
|
+
let persistenceDisabled = false;
|
|
20
|
+
export function setSessionPersistenceDisabled(disabled) {
|
|
21
|
+
persistenceDisabled = disabled;
|
|
22
|
+
}
|
|
23
|
+
export function isSessionPersistenceDisabled() {
|
|
24
|
+
return persistenceDisabled;
|
|
25
|
+
}
|
|
12
26
|
function getSessionsDir() {
|
|
13
27
|
if (resolvedSessionsDir)
|
|
14
28
|
return resolvedSessionsDir;
|
|
@@ -69,6 +83,8 @@ export function createSessionId() {
|
|
|
69
83
|
* Save a message to the session transcript (append-only JSONL).
|
|
70
84
|
*/
|
|
71
85
|
export function appendToSession(sessionId, message) {
|
|
86
|
+
if (persistenceDisabled)
|
|
87
|
+
return;
|
|
72
88
|
const line = JSON.stringify(message) + '\n';
|
|
73
89
|
withWritableSessionDir(() => {
|
|
74
90
|
fs.appendFileSync(sessionPath(sessionId), line);
|
|
@@ -78,6 +94,8 @@ export function appendToSession(sessionId, message) {
|
|
|
78
94
|
* Update session metadata.
|
|
79
95
|
*/
|
|
80
96
|
export function updateSessionMeta(sessionId, meta) {
|
|
97
|
+
if (persistenceDisabled)
|
|
98
|
+
return;
|
|
81
99
|
withWritableSessionDir(() => {
|
|
82
100
|
const existing = loadSessionMeta(sessionId);
|
|
83
101
|
const updated = {
|
package/dist/stats/audit.js
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import fs from 'node:fs';
|
|
12
12
|
import path from 'node:path';
|
|
13
13
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
14
|
+
import { isTestFixtureModel } from './test-fixture.js';
|
|
14
15
|
const AUDIT_FILE = path.join(BLOCKRUN_DIR, 'franklin-audit.jsonl');
|
|
15
16
|
const PROMPT_PREVIEW_CHARS = 240;
|
|
16
17
|
// Cap the audit log at the most recent N entries. Without this the file
|
|
@@ -26,6 +27,12 @@ const TRIM_PROBE_BYTES = MAX_AUDIT_ENTRIES * 200;
|
|
|
26
27
|
const TRIM_CHECK_INTERVAL = 200;
|
|
27
28
|
let appendsSinceCheck = 0;
|
|
28
29
|
export function appendAudit(entry) {
|
|
30
|
+
// Tests run interactiveSession() in-process with model="local/test*"
|
|
31
|
+
// and would otherwise pollute the user's real audit log. Drop the
|
|
32
|
+
// entry before any disk write rather than relying on every test to
|
|
33
|
+
// remember to redirect HOME.
|
|
34
|
+
if (isTestFixtureModel(entry.model))
|
|
35
|
+
return;
|
|
29
36
|
try {
|
|
30
37
|
fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
|
|
31
38
|
const safe = {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test-fixture model detection.
|
|
3
|
+
*
|
|
4
|
+
* Tests in `test/local.mjs` run `interactiveSession()` in-process with
|
|
5
|
+
* model names like `local/test-model` and `local/test`. The agent loop
|
|
6
|
+
* persists every successful turn to `~/.blockrun/franklin-audit.jsonl`,
|
|
7
|
+
* `franklin-stats.json`, and the session store — which means tests
|
|
8
|
+
* pollute the user's real telemetry. Verified on a real machine:
|
|
9
|
+
* 2326 of 3969 audit entries (58.6%) and 84 of 1000 stats entries
|
|
10
|
+
* (8.4%) were `local/test*` test fixtures.
|
|
11
|
+
*
|
|
12
|
+
* The fix is to skip persistence when the model name follows the
|
|
13
|
+
* convention. Test prefixes are reserved (`local/test*` won't ever ship
|
|
14
|
+
* as a real model on the BlockRun gateway), so this is safe.
|
|
15
|
+
*
|
|
16
|
+
* Local LLMs that real users run (`local/llamafile`, `local/ollama`,
|
|
17
|
+
* `local/lmstudio`, etc.) are intentionally NOT filtered — only the
|
|
18
|
+
* `local/test` prefix.
|
|
19
|
+
*/
|
|
20
|
+
export declare function isTestFixtureModel(model: string | undefined | null): boolean;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test-fixture model detection.
|
|
3
|
+
*
|
|
4
|
+
* Tests in `test/local.mjs` run `interactiveSession()` in-process with
|
|
5
|
+
* model names like `local/test-model` and `local/test`. The agent loop
|
|
6
|
+
* persists every successful turn to `~/.blockrun/franklin-audit.jsonl`,
|
|
7
|
+
* `franklin-stats.json`, and the session store — which means tests
|
|
8
|
+
* pollute the user's real telemetry. Verified on a real machine:
|
|
9
|
+
* 2326 of 3969 audit entries (58.6%) and 84 of 1000 stats entries
|
|
10
|
+
* (8.4%) were `local/test*` test fixtures.
|
|
11
|
+
*
|
|
12
|
+
* The fix is to skip persistence when the model name follows the
|
|
13
|
+
* convention. Test prefixes are reserved (`local/test*` won't ever ship
|
|
14
|
+
* as a real model on the BlockRun gateway), so this is safe.
|
|
15
|
+
*
|
|
16
|
+
* Local LLMs that real users run (`local/llamafile`, `local/ollama`,
|
|
17
|
+
* `local/lmstudio`, etc.) are intentionally NOT filtered — only the
|
|
18
|
+
* `local/test` prefix.
|
|
19
|
+
*/
|
|
20
|
+
const TEST_FIXTURE_PREFIXES = [
|
|
21
|
+
'local/test',
|
|
22
|
+
];
|
|
23
|
+
export function isTestFixtureModel(model) {
|
|
24
|
+
if (!model)
|
|
25
|
+
return false;
|
|
26
|
+
for (const prefix of TEST_FIXTURE_PREFIXES) {
|
|
27
|
+
if (model.startsWith(prefix))
|
|
28
|
+
return true;
|
|
29
|
+
}
|
|
30
|
+
return false;
|
|
31
|
+
}
|
package/dist/stats/tracker.js
CHANGED
|
@@ -7,6 +7,7 @@ import path from 'node:path';
|
|
|
7
7
|
import os from 'node:os';
|
|
8
8
|
import { OPUS_PRICING } from '../pricing.js';
|
|
9
9
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
10
|
+
import { isTestFixtureModel } from './test-fixture.js';
|
|
10
11
|
let resolvedStatsFile = null;
|
|
11
12
|
function preferredStatsFile() {
|
|
12
13
|
return path.join(BLOCKRUN_DIR, 'franklin-stats.json');
|
|
@@ -156,6 +157,12 @@ export function flushStats() {
|
|
|
156
157
|
* Record a completed request for stats tracking
|
|
157
158
|
*/
|
|
158
159
|
export function recordUsage(model, inputTokens, outputTokens, costUsd, latencyMs, fallback = false) {
|
|
160
|
+
// Same rationale as appendAudit — tests run in-process with
|
|
161
|
+
// local/test* models and would otherwise mix into franklin-stats.json
|
|
162
|
+
// history (verified: 8.4% of a real user's 1000-entry history was
|
|
163
|
+
// test fixtures before this gate).
|
|
164
|
+
if (isTestFixtureModel(model))
|
|
165
|
+
return;
|
|
159
166
|
const stats = getCachedStats();
|
|
160
167
|
const now = Date.now();
|
|
161
168
|
// Update totals
|
package/package.json
CHANGED