@blockrun/franklin 3.15.43 → 3.15.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brain/extract.js +5 -3
- package/dist/brain/store.d.ts +15 -0
- package/dist/brain/store.js +71 -0
- package/dist/storage/hygiene.d.ts +1 -0
- package/dist/storage/hygiene.js +6 -0
- package/package.json +1 -1
package/dist/brain/extract.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Franklin Brain — entity extraction from session traces.
|
|
3
3
|
* Uses cheap model to detect people, projects, companies from conversation.
|
|
4
4
|
*/
|
|
5
|
-
import { loadEntities, saveEntities, upsertEntity, addObservation, upsertRelation, } from './store.js';
|
|
5
|
+
import { loadEntities, saveEntities, upsertEntity, addObservation, upsertRelation, isJunkEntityName, } from './store.js';
|
|
6
6
|
const EXTRACTION_MODELS = [
|
|
7
7
|
'google/gemini-2.5-flash-lite',
|
|
8
8
|
'google/gemini-2.5-flash',
|
|
@@ -25,8 +25,9 @@ Also extract relationships between entities:
|
|
|
25
25
|
Rules:
|
|
26
26
|
- Only extract entities with CLEAR evidence in the conversation.
|
|
27
27
|
- Do NOT extract the AI agent itself or generic concepts ("TypeScript", "JavaScript").
|
|
28
|
+
- Do NOT extract programmatic strings that happen to appear in the transcript: tool permission patterns like "Bash(git commit:*)", object URIs (gs://, s3://, file://), glob patterns (paths with **), task IDs (t_xxx_xxx), session IDs, or hashes/UUIDs.
|
|
28
29
|
- DO extract specific people, specific projects, specific companies, specific products.
|
|
29
|
-
- Observations
|
|
30
|
+
- Observations must be concrete facts about the entity that would be useful in a future conversation. Do NOT include tautologies that restate the entity name ("This is a task ID for an ETL process") or generic statements that apply to any instance of the type.
|
|
30
31
|
- If no entities are found, return empty arrays.
|
|
31
32
|
|
|
32
33
|
Respond with ONLY a JSON object (no markdown fences):
|
|
@@ -69,7 +70,8 @@ function parseExtraction(raw) {
|
|
|
69
70
|
const parsed = JSON.parse(cleaned);
|
|
70
71
|
const entities = (parsed.entities || [])
|
|
71
72
|
.filter((e) => typeof e.name === 'string' && e.name.length > 1 &&
|
|
72
|
-
typeof e.type === 'string' && VALID_TYPES.has(e.type)
|
|
73
|
+
typeof e.type === 'string' && VALID_TYPES.has(e.type) &&
|
|
74
|
+
!isJunkEntityName(e.name))
|
|
73
75
|
.map((e) => ({
|
|
74
76
|
name: e.name.slice(0, 100),
|
|
75
77
|
type: e.type,
|
package/dist/brain/store.d.ts
CHANGED
|
@@ -3,6 +3,21 @@
|
|
|
3
3
|
* All in-memory with JSONL persistence. No database.
|
|
4
4
|
*/
|
|
5
5
|
import type { Entity, EntityType, Observation, Relation } from './types.js';
|
|
6
|
+
export declare function isJunkEntityName(name: string): boolean;
|
|
7
|
+
/**
|
|
8
|
+
* Remove existing junk entities (and their observations + relations)
|
|
9
|
+
* from disk. Called once per session start by runDataHygiene to clear
|
|
10
|
+
* accumulated low-quality extractions from earlier brain runs that
|
|
11
|
+
* predate the post-extraction filter.
|
|
12
|
+
*
|
|
13
|
+
* Returns counts so the hygiene report can surface the cleanup —
|
|
14
|
+
* silent purges are hard to verify.
|
|
15
|
+
*/
|
|
16
|
+
export declare function pruneJunkBrainEntries(): {
|
|
17
|
+
entitiesRemoved: number;
|
|
18
|
+
observationsRemoved: number;
|
|
19
|
+
relationsRemoved: number;
|
|
20
|
+
};
|
|
6
21
|
export declare function loadEntities(): Entity[];
|
|
7
22
|
export declare function saveEntities(entities: Entity[]): void;
|
|
8
23
|
/**
|
package/dist/brain/store.js
CHANGED
|
@@ -25,6 +25,77 @@ function uid() { return crypto.randomBytes(8).toString('hex'); }
|
|
|
25
25
|
function ensureDir() {
|
|
26
26
|
fs.mkdirSync(BRAIN_DIR, { recursive: true });
|
|
27
27
|
}
|
|
28
|
+
// Names the extractor model emits but that aren't real entities — they're
|
|
29
|
+
// programmatic strings that happened to be in the transcript. Verified
|
|
30
|
+
// 2026-05-04 on a real machine: 7 of 44 entities (16%) were junk by these
|
|
31
|
+
// patterns — `Bash(git commit:*)` (tool permission), `gs://bucket/path/**`
|
|
32
|
+
// (object URI + glob), `t_morkaf83_f03a0b10` (Franklin task runId tagged
|
|
33
|
+
// as "project"). The vacuous observations they then accumulated ("This is
|
|
34
|
+
// a task ID for an ETL process") leaked back into context on every later
|
|
35
|
+
// session. Keep the patterns conservative — anything that looks
|
|
36
|
+
// programmatic rather than nameable.
|
|
37
|
+
const JUNK_ENTITY_NAME_PATTERNS = [
|
|
38
|
+
/^[A-Z][a-zA-Z]*\(.*\)$/, // Tool-permission shape, e.g. Bash(...), Edit(...)
|
|
39
|
+
/^(?:gs|s3|file|https?):\/\//i, // URIs
|
|
40
|
+
/\*\*?(?:\/|$)/, // Glob patterns
|
|
41
|
+
/^t_[a-z0-9]+_[a-z0-9]{6,}$/i, // Franklin task runIds
|
|
42
|
+
/^run_[a-z0-9_-]+$/i, // Generic run/job ids
|
|
43
|
+
/^session-\d{4}-/, // Session ids
|
|
44
|
+
/^[0-9a-f]{16,}$/, // Hex hashes / commit shas / uuids without dashes
|
|
45
|
+
];
|
|
46
|
+
export function isJunkEntityName(name) {
|
|
47
|
+
const trimmed = name.trim();
|
|
48
|
+
if (trimmed.length < 2)
|
|
49
|
+
return true;
|
|
50
|
+
return JUNK_ENTITY_NAME_PATTERNS.some(rx => rx.test(trimmed));
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Remove existing junk entities (and their observations + relations)
|
|
54
|
+
* from disk. Called once per session start by runDataHygiene to clear
|
|
55
|
+
* accumulated low-quality extractions from earlier brain runs that
|
|
56
|
+
* predate the post-extraction filter.
|
|
57
|
+
*
|
|
58
|
+
* Returns counts so the hygiene report can surface the cleanup —
|
|
59
|
+
* silent purges are hard to verify.
|
|
60
|
+
*/
|
|
61
|
+
export function pruneJunkBrainEntries() {
|
|
62
|
+
const result = { entitiesRemoved: 0, observationsRemoved: 0, relationsRemoved: 0 };
|
|
63
|
+
let entities;
|
|
64
|
+
try {
|
|
65
|
+
entities = loadEntities();
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
if (entities.length === 0)
|
|
71
|
+
return result;
|
|
72
|
+
const junkIds = new Set();
|
|
73
|
+
const surviving = [];
|
|
74
|
+
for (const e of entities) {
|
|
75
|
+
if (isJunkEntityName(e.name)) {
|
|
76
|
+
junkIds.add(e.id);
|
|
77
|
+
result.entitiesRemoved++;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
surviving.push(e);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if (junkIds.size === 0)
|
|
84
|
+
return result;
|
|
85
|
+
// Drop observations + relations referencing the junk entities.
|
|
86
|
+
const obs = loadJsonl(OBSERVATIONS_FILE);
|
|
87
|
+
const survivingObs = obs.filter(o => !junkIds.has(o.entity_id));
|
|
88
|
+
result.observationsRemoved = obs.length - survivingObs.length;
|
|
89
|
+
const rels = loadJsonl(RELATIONS_FILE);
|
|
90
|
+
const survivingRels = rels.filter(r => !junkIds.has(r.from_id) && !junkIds.has(r.to_id));
|
|
91
|
+
result.relationsRemoved = rels.length - survivingRels.length;
|
|
92
|
+
// Atomic rewrites — saveJsonl uses tmp + rename so a crash mid-purge
|
|
93
|
+
// leaves the prior state intact.
|
|
94
|
+
saveEntities(surviving);
|
|
95
|
+
saveJsonl(OBSERVATIONS_FILE, survivingObs);
|
|
96
|
+
saveJsonl(RELATIONS_FILE, survivingRels);
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
28
99
|
// ─── Generic JSONL helpers ────────────────────────────────────────────────
|
|
29
100
|
function loadJsonl(file) {
|
|
30
101
|
try {
|
package/dist/storage/hygiene.js
CHANGED
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
import fs from 'node:fs';
|
|
25
25
|
import path from 'node:path';
|
|
26
26
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
27
|
+
import { pruneJunkBrainEntries } from '../brain/store.js';
|
|
27
28
|
// Retention knobs. Tuned conservatively — a power user with 50+ calls/day
|
|
28
29
|
// for 30 days still fits in DATA_DIR_MAX_FILES, and 5000 cost-log entries
|
|
29
30
|
// covers months of normal use without truncating the running totals.
|
|
@@ -49,6 +50,7 @@ const ZERO_REPORT = {
|
|
|
49
50
|
dataFilesTrimmed: 0,
|
|
50
51
|
costLogRowsTrimmed: 0,
|
|
51
52
|
orphanToolResultsRemoved: 0,
|
|
53
|
+
brainJunkEntitiesRemoved: 0,
|
|
52
54
|
};
|
|
53
55
|
/**
|
|
54
56
|
* Top-level entry. Call once at agent session start. Catches its own
|
|
@@ -75,6 +77,10 @@ export function runDataHygiene() {
|
|
|
75
77
|
report.orphanToolResultsRemoved = sweepOrphanToolResults();
|
|
76
78
|
}
|
|
77
79
|
catch { /* best effort */ }
|
|
80
|
+
try {
|
|
81
|
+
report.brainJunkEntitiesRemoved = pruneJunkBrainEntries().entitiesRemoved;
|
|
82
|
+
}
|
|
83
|
+
catch { /* best effort */ }
|
|
78
84
|
return report;
|
|
79
85
|
}
|
|
80
86
|
function trimDataDir() {
|
package/package.json
CHANGED