@blockrun/franklin 3.15.43 → 3.15.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brain/extract.js +5 -3
- package/dist/brain/store.d.ts +15 -0
- package/dist/brain/store.js +71 -0
- package/dist/storage/hygiene.d.ts +1 -0
- package/dist/storage/hygiene.js +6 -0
- package/dist/tools/imagegen.js +23 -1
- package/dist/tools/videogen.js +23 -2
- package/package.json +1 -1
package/dist/brain/extract.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Franklin Brain — entity extraction from session traces.
|
|
3
3
|
* Uses cheap model to detect people, projects, companies from conversation.
|
|
4
4
|
*/
|
|
5
|
-
import { loadEntities, saveEntities, upsertEntity, addObservation, upsertRelation, } from './store.js';
|
|
5
|
+
import { loadEntities, saveEntities, upsertEntity, addObservation, upsertRelation, isJunkEntityName, } from './store.js';
|
|
6
6
|
const EXTRACTION_MODELS = [
|
|
7
7
|
'google/gemini-2.5-flash-lite',
|
|
8
8
|
'google/gemini-2.5-flash',
|
|
@@ -25,8 +25,9 @@ Also extract relationships between entities:
|
|
|
25
25
|
Rules:
|
|
26
26
|
- Only extract entities with CLEAR evidence in the conversation.
|
|
27
27
|
- Do NOT extract the AI agent itself or generic concepts ("TypeScript", "JavaScript").
|
|
28
|
+
- Do NOT extract programmatic strings that happen to appear in the transcript: tool permission patterns like "Bash(git commit:*)", object URIs (gs://, s3://, file://), glob patterns (paths with **), task IDs (t_xxx_xxx), session IDs, or hashes/UUIDs.
|
|
28
29
|
- DO extract specific people, specific projects, specific companies, specific products.
|
|
29
|
-
- Observations
|
|
30
|
+
- Observations must be concrete facts about the entity that would be useful in a future conversation. Do NOT include tautologies that restate the entity name ("This is a task ID for an ETL process") or generic statements that apply to any instance of the type.
|
|
30
31
|
- If no entities are found, return empty arrays.
|
|
31
32
|
|
|
32
33
|
Respond with ONLY a JSON object (no markdown fences):
|
|
@@ -69,7 +70,8 @@ function parseExtraction(raw) {
|
|
|
69
70
|
const parsed = JSON.parse(cleaned);
|
|
70
71
|
const entities = (parsed.entities || [])
|
|
71
72
|
.filter((e) => typeof e.name === 'string' && e.name.length > 1 &&
|
|
72
|
-
typeof e.type === 'string' && VALID_TYPES.has(e.type)
|
|
73
|
+
typeof e.type === 'string' && VALID_TYPES.has(e.type) &&
|
|
74
|
+
!isJunkEntityName(e.name))
|
|
73
75
|
.map((e) => ({
|
|
74
76
|
name: e.name.slice(0, 100),
|
|
75
77
|
type: e.type,
|
package/dist/brain/store.d.ts
CHANGED
|
@@ -3,6 +3,21 @@
|
|
|
3
3
|
* All in-memory with JSONL persistence. No database.
|
|
4
4
|
*/
|
|
5
5
|
import type { Entity, EntityType, Observation, Relation } from './types.js';
|
|
6
|
+
export declare function isJunkEntityName(name: string): boolean;
|
|
7
|
+
/**
|
|
8
|
+
* Remove existing junk entities (and their observations + relations)
|
|
9
|
+
* from disk. Called once per session start by runDataHygiene to clear
|
|
10
|
+
* accumulated low-quality extractions from earlier brain runs that
|
|
11
|
+
* predate the post-extraction filter.
|
|
12
|
+
*
|
|
13
|
+
* Returns counts so the hygiene report can surface the cleanup —
|
|
14
|
+
* silent purges are hard to verify.
|
|
15
|
+
*/
|
|
16
|
+
export declare function pruneJunkBrainEntries(): {
|
|
17
|
+
entitiesRemoved: number;
|
|
18
|
+
observationsRemoved: number;
|
|
19
|
+
relationsRemoved: number;
|
|
20
|
+
};
|
|
6
21
|
export declare function loadEntities(): Entity[];
|
|
7
22
|
export declare function saveEntities(entities: Entity[]): void;
|
|
8
23
|
/**
|
package/dist/brain/store.js
CHANGED
|
@@ -25,6 +25,77 @@ function uid() { return crypto.randomBytes(8).toString('hex'); }
|
|
|
25
25
|
function ensureDir() {
|
|
26
26
|
fs.mkdirSync(BRAIN_DIR, { recursive: true });
|
|
27
27
|
}
|
|
28
|
+
// Names the extractor model emits but that aren't real entities — they're
|
|
29
|
+
// programmatic strings that happened to be in the transcript. Verified
|
|
30
|
+
// 2026-05-04 on a real machine: 7 of 44 entities (16%) were junk by these
|
|
31
|
+
// patterns — `Bash(git commit:*)` (tool permission), `gs://bucket/path/**`
|
|
32
|
+
// (object URI + glob), `t_morkaf83_f03a0b10` (Franklin task runId tagged
|
|
33
|
+
// as "project"). The vacuous observations they then accumulated ("This is
|
|
34
|
+
// a task ID for an ETL process") leaked back into context on every later
|
|
35
|
+
// session. Keep the patterns conservative — anything that looks
|
|
36
|
+
// programmatic rather than nameable.
|
|
37
|
+
const JUNK_ENTITY_NAME_PATTERNS = [
|
|
38
|
+
/^[A-Z][a-zA-Z]*\(.*\)$/, // Tool-permission shape, e.g. Bash(...), Edit(...)
|
|
39
|
+
/^(?:gs|s3|file|https?):\/\//i, // URIs
|
|
40
|
+
/\*\*?(?:\/|$)/, // Glob patterns
|
|
41
|
+
/^t_[a-z0-9]+_[a-z0-9]{6,}$/i, // Franklin task runIds
|
|
42
|
+
/^run_[a-z0-9_-]+$/i, // Generic run/job ids
|
|
43
|
+
/^session-\d{4}-/, // Session ids
|
|
44
|
+
/^[0-9a-f]{16,}$/, // Hex hashes / commit shas / uuids without dashes
|
|
45
|
+
];
|
|
46
|
+
export function isJunkEntityName(name) {
|
|
47
|
+
const trimmed = name.trim();
|
|
48
|
+
if (trimmed.length < 2)
|
|
49
|
+
return true;
|
|
50
|
+
return JUNK_ENTITY_NAME_PATTERNS.some(rx => rx.test(trimmed));
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Remove existing junk entities (and their observations + relations)
|
|
54
|
+
* from disk. Called once per session start by runDataHygiene to clear
|
|
55
|
+
* accumulated low-quality extractions from earlier brain runs that
|
|
56
|
+
* predate the post-extraction filter.
|
|
57
|
+
*
|
|
58
|
+
* Returns counts so the hygiene report can surface the cleanup —
|
|
59
|
+
* silent purges are hard to verify.
|
|
60
|
+
*/
|
|
61
|
+
export function pruneJunkBrainEntries() {
|
|
62
|
+
const result = { entitiesRemoved: 0, observationsRemoved: 0, relationsRemoved: 0 };
|
|
63
|
+
let entities;
|
|
64
|
+
try {
|
|
65
|
+
entities = loadEntities();
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
if (entities.length === 0)
|
|
71
|
+
return result;
|
|
72
|
+
const junkIds = new Set();
|
|
73
|
+
const surviving = [];
|
|
74
|
+
for (const e of entities) {
|
|
75
|
+
if (isJunkEntityName(e.name)) {
|
|
76
|
+
junkIds.add(e.id);
|
|
77
|
+
result.entitiesRemoved++;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
surviving.push(e);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if (junkIds.size === 0)
|
|
84
|
+
return result;
|
|
85
|
+
// Drop observations + relations referencing the junk entities.
|
|
86
|
+
const obs = loadJsonl(OBSERVATIONS_FILE);
|
|
87
|
+
const survivingObs = obs.filter(o => !junkIds.has(o.entity_id));
|
|
88
|
+
result.observationsRemoved = obs.length - survivingObs.length;
|
|
89
|
+
const rels = loadJsonl(RELATIONS_FILE);
|
|
90
|
+
const survivingRels = rels.filter(r => !junkIds.has(r.from_id) && !junkIds.has(r.to_id));
|
|
91
|
+
result.relationsRemoved = rels.length - survivingRels.length;
|
|
92
|
+
// Atomic rewrites — saveJsonl uses tmp + rename so a crash mid-purge
|
|
93
|
+
// leaves the prior state intact.
|
|
94
|
+
saveEntities(surviving);
|
|
95
|
+
saveJsonl(OBSERVATIONS_FILE, survivingObs);
|
|
96
|
+
saveJsonl(RELATIONS_FILE, survivingRels);
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
28
99
|
// ─── Generic JSONL helpers ────────────────────────────────────────────────
|
|
29
100
|
function loadJsonl(file) {
|
|
30
101
|
try {
|
package/dist/storage/hygiene.js
CHANGED
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
import fs from 'node:fs';
|
|
25
25
|
import path from 'node:path';
|
|
26
26
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
27
|
+
import { pruneJunkBrainEntries } from '../brain/store.js';
|
|
27
28
|
// Retention knobs. Tuned conservatively — a power user with 50+ calls/day
|
|
28
29
|
// for 30 days still fits in DATA_DIR_MAX_FILES, and 5000 cost-log entries
|
|
29
30
|
// covers months of normal use without truncating the running totals.
|
|
@@ -49,6 +50,7 @@ const ZERO_REPORT = {
|
|
|
49
50
|
dataFilesTrimmed: 0,
|
|
50
51
|
costLogRowsTrimmed: 0,
|
|
51
52
|
orphanToolResultsRemoved: 0,
|
|
53
|
+
brainJunkEntitiesRemoved: 0,
|
|
52
54
|
};
|
|
53
55
|
/**
|
|
54
56
|
* Top-level entry. Call once at agent session start. Catches its own
|
|
@@ -75,6 +77,10 @@ export function runDataHygiene() {
|
|
|
75
77
|
report.orphanToolResultsRemoved = sweepOrphanToolResults();
|
|
76
78
|
}
|
|
77
79
|
catch { /* best effort */ }
|
|
80
|
+
try {
|
|
81
|
+
report.brainJunkEntitiesRemoved = pruneJunkBrainEntries().entitiesRemoved;
|
|
82
|
+
}
|
|
83
|
+
catch { /* best effort */ }
|
|
78
84
|
return report;
|
|
79
85
|
}
|
|
80
86
|
function trimDataDir() {
|
package/dist/tools/imagegen.js
CHANGED
|
@@ -258,7 +258,29 @@ function buildExecute(deps) {
|
|
|
258
258
|
const result = await response.json();
|
|
259
259
|
const imageData = result.data?.[0];
|
|
260
260
|
if (!imageData) {
|
|
261
|
-
|
|
261
|
+
// Some gateways return 200 with an `error` / `message` field for
|
|
262
|
+
// moderation, quota, or upstream-model failures instead of using
|
|
263
|
+
// HTTP error codes. Without surfacing those, the agent sees only
|
|
264
|
+
// "No image data returned from API" and starts guessing — verified
|
|
265
|
+
// 2026-05-04: agent guessed "gpt-image-2 is forced to 1024x1024
|
|
266
|
+
// per the tool docs" and burned a retry on a size param that
|
|
267
|
+
// wasn't the actual cause. Surface the diagnostic so the agent
|
|
268
|
+
// (or user) can react.
|
|
269
|
+
const bits = [];
|
|
270
|
+
if (result.error !== undefined) {
|
|
271
|
+
bits.push(`error=${JSON.stringify(result.error).slice(0, 240)}`);
|
|
272
|
+
}
|
|
273
|
+
if (result.message !== undefined) {
|
|
274
|
+
bits.push(`message=${String(result.message).slice(0, 240)}`);
|
|
275
|
+
}
|
|
276
|
+
if (Array.isArray(result.data) && result.data.length === 0) {
|
|
277
|
+
bits.push('data=[] (empty array — likely content moderation)');
|
|
278
|
+
}
|
|
279
|
+
else if (result.data === undefined) {
|
|
280
|
+
bits.push('data field missing');
|
|
281
|
+
}
|
|
282
|
+
const detail = bits.length > 0 ? ` — ${bits.join('; ')}` : '';
|
|
283
|
+
return { output: `No image data returned from API${detail}`, isError: true };
|
|
262
284
|
}
|
|
263
285
|
// Save image. The /v1/images/image2image endpoint returns Gemini results
|
|
264
286
|
// as a data URI in `url`, so decode those locally instead of going through
|
package/dist/tools/videogen.js
CHANGED
|
@@ -192,7 +192,18 @@ function buildExecute(deps) {
|
|
|
192
192
|
ctx.abortSignal.removeEventListener('abort', submitAbort);
|
|
193
193
|
}
|
|
194
194
|
if (!submitResult.poll_url || !paymentHeaders) {
|
|
195
|
-
|
|
195
|
+
// Surface any diagnostic the body contained — same rationale as
|
|
196
|
+
// imagegen.ts: "missing field" tells the agent nothing about
|
|
197
|
+
// whether it was moderation, quota, or upstream model failure.
|
|
198
|
+
const bits = [];
|
|
199
|
+
if (!paymentHeaders)
|
|
200
|
+
bits.push('payment headers missing');
|
|
201
|
+
if (submitResult?.error !== undefined)
|
|
202
|
+
bits.push(`error=${JSON.stringify(submitResult.error).slice(0, 240)}`);
|
|
203
|
+
if (submitResult?.message !== undefined)
|
|
204
|
+
bits.push(`message=${String(submitResult.message).slice(0, 240)}`);
|
|
205
|
+
const detail = bits.length > 0 ? ` — ${bits.join('; ')}` : '';
|
|
206
|
+
return { output: `API did not return a poll_url for the video job${detail}`, isError: true };
|
|
196
207
|
}
|
|
197
208
|
// Phase 2: poll GET /v1/videos/generations/{id} with the SAME signed
|
|
198
209
|
// x-payment header until the job completes. Server settles on the first
|
|
@@ -218,7 +229,17 @@ function buildExecute(deps) {
|
|
|
218
229
|
const videoData = outcome.data;
|
|
219
230
|
const videoUrl = videoData.url;
|
|
220
231
|
if (!videoUrl) {
|
|
221
|
-
|
|
232
|
+
// Same diagnostic pattern as the submit-side path above.
|
|
233
|
+
const d = videoData;
|
|
234
|
+
const bits = [];
|
|
235
|
+
if (d.error !== undefined)
|
|
236
|
+
bits.push(`error=${JSON.stringify(d.error).slice(0, 240)}`);
|
|
237
|
+
if (d.message !== undefined)
|
|
238
|
+
bits.push(`message=${String(d.message).slice(0, 240)}`);
|
|
239
|
+
if (d.status !== undefined)
|
|
240
|
+
bits.push(`status=${String(d.status).slice(0, 80)}`);
|
|
241
|
+
const detail = bits.length > 0 ? ` — ${bits.join('; ')}` : '';
|
|
242
|
+
return { output: `No video URL returned from API${detail}`, isError: true };
|
|
222
243
|
}
|
|
223
244
|
try {
|
|
224
245
|
// Download the MP4
|
package/package.json
CHANGED