@blockrun/franklin 3.15.14 → 3.15.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/loop.js +7 -0
- package/dist/session/storage.js +36 -0
- package/dist/stats/audit.js +7 -0
- package/dist/stats/test-fixture.d.ts +20 -0
- package/dist/stats/test-fixture.js +31 -0
- package/dist/stats/tracker.js +7 -0
- package/dist/storage/hygiene.d.ts +28 -0
- package/dist/storage/hygiene.js +134 -0
- package/package.json +1 -1
package/dist/agent/loop.js
CHANGED
|
@@ -21,6 +21,7 @@ import { recordUsage } from '../stats/tracker.js';
|
|
|
21
21
|
import { recordSessionUsage } from '../stats/session-tracker.js';
|
|
22
22
|
import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
|
|
23
23
|
import { logger, setDebugMode } from '../logger.js';
|
|
24
|
+
import { runDataHygiene } from '../storage/hygiene.js';
|
|
24
25
|
import { estimateCost, OPUS_PRICING } from '../pricing.js';
|
|
25
26
|
import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
26
27
|
import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
|
|
@@ -437,6 +438,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
437
438
|
persistSessionMeta();
|
|
438
439
|
};
|
|
439
440
|
pruneOldSessions(sessionId); // Cleanup old sessions on start, protect current
|
|
441
|
+
runDataHygiene(); // Trim ~/.blockrun/data + cost_log + remove legacy files
|
|
440
442
|
persistSessionMeta();
|
|
441
443
|
// Flush session meta on SIGINT/SIGTERM so mid-stream Ctrl+C doesn't
|
|
442
444
|
// leave a stale .meta.json (wrong turnCount/messageCount/cost).
|
|
@@ -1199,6 +1201,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1199
1201
|
inputTokens,
|
|
1200
1202
|
outputTokens: usage.outputTokens,
|
|
1201
1203
|
costUsd: costEstimate,
|
|
1204
|
+
// Any failed model this turn means the model that finally
|
|
1205
|
+
// succeeded was a fallback. Without this, audit log read 0%
|
|
1206
|
+
// fallbacks across 4k entries — useless for diagnosing whether
|
|
1207
|
+
// the routing chain is healthy or hot.
|
|
1208
|
+
fallback: turnFailedModels.size > 0,
|
|
1202
1209
|
source: 'agent',
|
|
1203
1210
|
workDir,
|
|
1204
1211
|
prompt: extractLastUserPrompt(history),
|
package/dist/session/storage.js
CHANGED
|
@@ -233,4 +233,40 @@ export function pruneOldSessions(activeSessionId) {
|
|
|
233
233
|
catch { /* ok */ }
|
|
234
234
|
}
|
|
235
235
|
}
|
|
236
|
+
// Sweep orphan jsonl files (left over from a session-id format change in
|
|
237
|
+
// earlier releases — meta deleted, jsonl stranded). The pre-3.x naming
|
|
238
|
+
// didn't include the random suffix, so the meta-driven prune above has
|
|
239
|
+
// no record of them and they accumulate forever. Verified on a real
|
|
240
|
+
// user machine: 21 metas, 121 jsonl, 100 orphans = ~1 MB stranded.
|
|
241
|
+
pruneOrphanJsonlFiles(activeSessionId);
|
|
242
|
+
}
|
|
243
|
+
function pruneOrphanJsonlFiles(activeSessionId) {
|
|
244
|
+
const dir = getSessionsDir();
|
|
245
|
+
let entries;
|
|
246
|
+
try {
|
|
247
|
+
entries = fs.readdirSync(dir);
|
|
248
|
+
}
|
|
249
|
+
catch {
|
|
250
|
+
return; // Sessions dir doesn't exist yet — nothing to prune.
|
|
251
|
+
}
|
|
252
|
+
const knownIds = new Set();
|
|
253
|
+
for (const f of entries) {
|
|
254
|
+
if (f.endsWith('.meta.json')) {
|
|
255
|
+
knownIds.add(f.slice(0, -'.meta.json'.length));
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
for (const f of entries) {
|
|
259
|
+
if (!f.endsWith('.jsonl'))
|
|
260
|
+
continue;
|
|
261
|
+
const id = f.slice(0, -'.jsonl'.length);
|
|
262
|
+
if (id === activeSessionId)
|
|
263
|
+
continue;
|
|
264
|
+
if (knownIds.has(id))
|
|
265
|
+
continue;
|
|
266
|
+
// No meta partner — orphan. Delete the jsonl.
|
|
267
|
+
try {
|
|
268
|
+
fs.unlinkSync(path.join(dir, f));
|
|
269
|
+
}
|
|
270
|
+
catch { /* ok */ }
|
|
271
|
+
}
|
|
236
272
|
}
|
package/dist/stats/audit.js
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import fs from 'node:fs';
|
|
12
12
|
import path from 'node:path';
|
|
13
13
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
14
|
+
import { isTestFixtureModel } from './test-fixture.js';
|
|
14
15
|
const AUDIT_FILE = path.join(BLOCKRUN_DIR, 'franklin-audit.jsonl');
|
|
15
16
|
const PROMPT_PREVIEW_CHARS = 240;
|
|
16
17
|
// Cap the audit log at the most recent N entries. Without this the file
|
|
@@ -26,6 +27,12 @@ const TRIM_PROBE_BYTES = MAX_AUDIT_ENTRIES * 200;
|
|
|
26
27
|
const TRIM_CHECK_INTERVAL = 200;
|
|
27
28
|
let appendsSinceCheck = 0;
|
|
28
29
|
export function appendAudit(entry) {
|
|
30
|
+
// Tests run interactiveSession() in-process with model="local/test*"
|
|
31
|
+
// and would otherwise pollute the user's real audit log. Drop the
|
|
32
|
+
// entry before any disk write rather than relying on every test to
|
|
33
|
+
// remember to redirect HOME.
|
|
34
|
+
if (isTestFixtureModel(entry.model))
|
|
35
|
+
return;
|
|
29
36
|
try {
|
|
30
37
|
fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
|
|
31
38
|
const safe = {
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test-fixture model detection.
|
|
3
|
+
*
|
|
4
|
+
* Tests in `test/local.mjs` run `interactiveSession()` in-process with
|
|
5
|
+
* model names like `local/test-model` and `local/test`. The agent loop
|
|
6
|
+
* persists every successful turn to `~/.blockrun/franklin-audit.jsonl`,
|
|
7
|
+
* `franklin-stats.json`, and the session store — which means tests
|
|
8
|
+
* pollute the user's real telemetry. Verified on a real machine:
|
|
9
|
+
* 2326 of 3969 audit entries (58.6%) and 84 of 1000 stats entries
|
|
10
|
+
* (8.4%) were `local/test*` test fixtures.
|
|
11
|
+
*
|
|
12
|
+
* The fix is to skip persistence when the model name follows the
|
|
13
|
+
* convention. Test prefixes are reserved (`local/test*` won't ever ship
|
|
14
|
+
* as a real model on the BlockRun gateway), so this is safe.
|
|
15
|
+
*
|
|
16
|
+
* Local LLMs that real users run (`local/llamafile`, `local/ollama`,
|
|
17
|
+
* `local/lmstudio`, etc.) are intentionally NOT filtered — only the
|
|
18
|
+
* `local/test` prefix.
|
|
19
|
+
*/
|
|
20
|
+
export declare function isTestFixtureModel(model: string | undefined | null): boolean;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test-fixture model detection.
|
|
3
|
+
*
|
|
4
|
+
* Tests in `test/local.mjs` run `interactiveSession()` in-process with
|
|
5
|
+
* model names like `local/test-model` and `local/test`. The agent loop
|
|
6
|
+
* persists every successful turn to `~/.blockrun/franklin-audit.jsonl`,
|
|
7
|
+
* `franklin-stats.json`, and the session store — which means tests
|
|
8
|
+
* pollute the user's real telemetry. Verified on a real machine:
|
|
9
|
+
* 2326 of 3969 audit entries (58.6%) and 84 of 1000 stats entries
|
|
10
|
+
* (8.4%) were `local/test*` test fixtures.
|
|
11
|
+
*
|
|
12
|
+
* The fix is to skip persistence when the model name follows the
|
|
13
|
+
* convention. Test prefixes are reserved (`local/test*` won't ever ship
|
|
14
|
+
* as a real model on the BlockRun gateway), so this is safe.
|
|
15
|
+
*
|
|
16
|
+
* Local LLMs that real users run (`local/llamafile`, `local/ollama`,
|
|
17
|
+
* `local/lmstudio`, etc.) are intentionally NOT filtered — only the
|
|
18
|
+
* `local/test` prefix.
|
|
19
|
+
*/
|
|
20
|
+
const TEST_FIXTURE_PREFIXES = [
|
|
21
|
+
'local/test',
|
|
22
|
+
];
|
|
23
|
+
export function isTestFixtureModel(model) {
|
|
24
|
+
if (!model)
|
|
25
|
+
return false;
|
|
26
|
+
for (const prefix of TEST_FIXTURE_PREFIXES) {
|
|
27
|
+
if (model.startsWith(prefix))
|
|
28
|
+
return true;
|
|
29
|
+
}
|
|
30
|
+
return false;
|
|
31
|
+
}
|
package/dist/stats/tracker.js
CHANGED
|
@@ -7,6 +7,7 @@ import path from 'node:path';
|
|
|
7
7
|
import os from 'node:os';
|
|
8
8
|
import { OPUS_PRICING } from '../pricing.js';
|
|
9
9
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
10
|
+
import { isTestFixtureModel } from './test-fixture.js';
|
|
10
11
|
let resolvedStatsFile = null;
|
|
11
12
|
function preferredStatsFile() {
|
|
12
13
|
return path.join(BLOCKRUN_DIR, 'franklin-stats.json');
|
|
@@ -156,6 +157,12 @@ export function flushStats() {
|
|
|
156
157
|
* Record a completed request for stats tracking
|
|
157
158
|
*/
|
|
158
159
|
export function recordUsage(model, inputTokens, outputTokens, costUsd, latencyMs, fallback = false) {
|
|
160
|
+
// Same rationale as appendAudit — tests run in-process with
|
|
161
|
+
// local/test* models and would otherwise mix into franklin-stats.json
|
|
162
|
+
// history (verified: 8.4% of a real user's 1000-entry history was
|
|
163
|
+
// test fixtures before this gate).
|
|
164
|
+
if (isTestFixtureModel(model))
|
|
165
|
+
return;
|
|
159
166
|
const stats = getCachedStats();
|
|
160
167
|
const now = Date.now();
|
|
161
168
|
// Update totals
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Data hygiene for ~/.blockrun/.
|
|
3
|
+
*
|
|
4
|
+
* Several files in this directory are written by the @blockrun/llm SDK or
|
|
5
|
+
* by older Franklin versions that didn't ship retention. Without periodic
|
|
6
|
+
* trimming they grow unbounded:
|
|
7
|
+
*
|
|
8
|
+
* - ~/.blockrun/data/ — every paid API call gets a JSON blob
|
|
9
|
+
* dropped here for forensic replay. SDK
|
|
10
|
+
* has no rotation; verified 5.7 MB across
|
|
11
|
+
* ~2 months of light use, will be 30 MB
|
|
12
|
+
* by year-end and slow `franklin insights`.
|
|
13
|
+
* - ~/.blockrun/cost_log.jsonl — append-only ledger of every paid call's
|
|
14
|
+
* cost. Same SDK; no rotation.
|
|
15
|
+
* - brcc-debug.log / brcc-stats.json / 0xcode-stats.json
|
|
16
|
+
* — legacy stats / log files from earlier
|
|
17
|
+
* product names. Not written by any
|
|
18
|
+
* current code path.
|
|
19
|
+
*
|
|
20
|
+
* Hygiene runs once per session start (cheap — just stat() + filter +
|
|
21
|
+
* unlinkSync). Best-effort: every operation is wrapped so a single failure
|
|
22
|
+
* never breaks agent boot.
|
|
23
|
+
*/
|
|
24
|
+
/**
|
|
25
|
+
* Top-level entry. Call once at agent session start. Catches its own
|
|
26
|
+
* errors so a bad disk never blocks startup.
|
|
27
|
+
*/
|
|
28
|
+
export declare function runDataHygiene(): void;
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Data hygiene for ~/.blockrun/.
|
|
3
|
+
*
|
|
4
|
+
* Several files in this directory are written by the @blockrun/llm SDK or
|
|
5
|
+
* by older Franklin versions that didn't ship retention. Without periodic
|
|
6
|
+
* trimming they grow unbounded:
|
|
7
|
+
*
|
|
8
|
+
* - ~/.blockrun/data/ — every paid API call gets a JSON blob
|
|
9
|
+
* dropped here for forensic replay. SDK
|
|
10
|
+
* has no rotation; verified 5.7 MB across
|
|
11
|
+
* ~2 months of light use, will be 30 MB
|
|
12
|
+
* by year-end and slow `franklin insights`.
|
|
13
|
+
* - ~/.blockrun/cost_log.jsonl — append-only ledger of every paid call's
|
|
14
|
+
* cost. Same SDK; no rotation.
|
|
15
|
+
* - brcc-debug.log / brcc-stats.json / 0xcode-stats.json
|
|
16
|
+
* — legacy stats / log files from earlier
|
|
17
|
+
* product names. Not written by any
|
|
18
|
+
* current code path.
|
|
19
|
+
*
|
|
20
|
+
* Hygiene runs once per session start (cheap — just stat() + filter +
|
|
21
|
+
* unlinkSync). Best-effort: every operation is wrapped so a single failure
|
|
22
|
+
* never breaks agent boot.
|
|
23
|
+
*/
|
|
24
|
+
import fs from 'node:fs';
|
|
25
|
+
import path from 'node:path';
|
|
26
|
+
import { BLOCKRUN_DIR } from '../config.js';
|
|
27
|
+
// Retention knobs. Tuned conservatively — a power user with 50+ calls/day
|
|
28
|
+
// for 30 days still fits in DATA_DIR_MAX_FILES, and 5000 cost-log entries
|
|
29
|
+
// covers months of normal use without truncating the running totals.
|
|
30
|
+
const DATA_DIR_MAX_AGE_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
|
|
31
|
+
const DATA_DIR_MAX_FILES = 2000;
|
|
32
|
+
const COST_LOG_MAX_ENTRIES = 5000;
|
|
33
|
+
// Cost log entries are tiny (~60 bytes — ts, endpoint, cost only). 40 bytes
|
|
34
|
+
// per entry keeps the probe under the real average so a slightly-overlong
|
|
35
|
+
// file always triggers the rescan rather than silently growing past cap.
|
|
36
|
+
const COST_LOG_PROBE_BYTES = COST_LOG_MAX_ENTRIES * 40;
|
|
37
|
+
// Legacy file names from earlier product iterations. All live directly in
|
|
38
|
+
// BLOCKRUN_DIR (only Franklin writes here, so these are safe to remove).
|
|
39
|
+
// `runcode-debug.log` is also handled by logs.ts's migration path; we
|
|
40
|
+
// delete the residual after migration in case it lingered.
|
|
41
|
+
const LEGACY_FILENAMES = [
|
|
42
|
+
'brcc-debug.log',
|
|
43
|
+
'brcc-stats.json',
|
|
44
|
+
'0xcode-stats.json',
|
|
45
|
+
'runcode-debug.log',
|
|
46
|
+
];
|
|
47
|
+
/**
|
|
48
|
+
* Top-level entry. Call once at agent session start. Catches its own
|
|
49
|
+
* errors so a bad disk never blocks startup.
|
|
50
|
+
*/
|
|
51
|
+
export function runDataHygiene() {
|
|
52
|
+
try {
|
|
53
|
+
trimDataDir();
|
|
54
|
+
}
|
|
55
|
+
catch { /* best effort */ }
|
|
56
|
+
try {
|
|
57
|
+
trimCostLog();
|
|
58
|
+
}
|
|
59
|
+
catch { /* best effort */ }
|
|
60
|
+
try {
|
|
61
|
+
removeLegacyFiles();
|
|
62
|
+
}
|
|
63
|
+
catch { /* best effort */ }
|
|
64
|
+
}
|
|
65
|
+
function trimDataDir() {
|
|
66
|
+
const dir = path.join(BLOCKRUN_DIR, 'data');
|
|
67
|
+
if (!fs.existsSync(dir))
|
|
68
|
+
return;
|
|
69
|
+
const entries = fs.readdirSync(dir);
|
|
70
|
+
if (entries.length === 0)
|
|
71
|
+
return;
|
|
72
|
+
const cutoff = Date.now() - DATA_DIR_MAX_AGE_MS;
|
|
73
|
+
const stats = [];
|
|
74
|
+
for (const name of entries) {
|
|
75
|
+
try {
|
|
76
|
+
const st = fs.statSync(path.join(dir, name));
|
|
77
|
+
if (!st.isFile())
|
|
78
|
+
continue;
|
|
79
|
+
stats.push({ name, mtime: st.mtimeMs });
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
// Best effort — skip unreadable entries.
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
// Pass 1: age-based delete.
|
|
86
|
+
for (const e of stats) {
|
|
87
|
+
if (e.mtime < cutoff) {
|
|
88
|
+
try {
|
|
89
|
+
fs.unlinkSync(path.join(dir, e.name));
|
|
90
|
+
}
|
|
91
|
+
catch { /* ok */ }
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// Pass 2: file-count cap. After age trim, if we still have too many,
|
|
95
|
+
// drop the oldest until we're under the cap. Power users can hit this
|
|
96
|
+
// when running multiple paid tools in tight loops.
|
|
97
|
+
const survivors = stats
|
|
98
|
+
.filter(e => e.mtime >= cutoff)
|
|
99
|
+
.sort((a, b) => a.mtime - b.mtime); // oldest first
|
|
100
|
+
const excess = survivors.length - DATA_DIR_MAX_FILES;
|
|
101
|
+
if (excess > 0) {
|
|
102
|
+
for (let i = 0; i < excess; i++) {
|
|
103
|
+
try {
|
|
104
|
+
fs.unlinkSync(path.join(dir, survivors[i].name));
|
|
105
|
+
}
|
|
106
|
+
catch { /* ok */ }
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
function trimCostLog() {
|
|
111
|
+
const file = path.join(BLOCKRUN_DIR, 'cost_log.jsonl');
|
|
112
|
+
if (!fs.existsSync(file))
|
|
113
|
+
return;
|
|
114
|
+
// Cheap probe — skip the full read+rewrite when the file is small.
|
|
115
|
+
const stat = fs.statSync(file);
|
|
116
|
+
if (stat.size < COST_LOG_PROBE_BYTES)
|
|
117
|
+
return;
|
|
118
|
+
const lines = fs.readFileSync(file, 'utf-8').split('\n').filter(Boolean);
|
|
119
|
+
if (lines.length <= COST_LOG_MAX_ENTRIES)
|
|
120
|
+
return;
|
|
121
|
+
const kept = lines.slice(lines.length - COST_LOG_MAX_ENTRIES);
|
|
122
|
+
fs.writeFileSync(file, kept.join('\n') + '\n');
|
|
123
|
+
}
|
|
124
|
+
function removeLegacyFiles() {
|
|
125
|
+
for (const name of LEGACY_FILENAMES) {
|
|
126
|
+
const p = path.join(BLOCKRUN_DIR, name);
|
|
127
|
+
if (!fs.existsSync(p))
|
|
128
|
+
continue;
|
|
129
|
+
try {
|
|
130
|
+
fs.unlinkSync(p);
|
|
131
|
+
}
|
|
132
|
+
catch { /* ok */ }
|
|
133
|
+
}
|
|
134
|
+
}
|
package/package.json
CHANGED