@blockrun/franklin 3.15.43 → 3.15.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * Franklin Brain — entity extraction from session traces.
3
3
  * Uses cheap model to detect people, projects, companies from conversation.
4
4
  */
5
- import { loadEntities, saveEntities, upsertEntity, addObservation, upsertRelation, } from './store.js';
5
+ import { loadEntities, saveEntities, upsertEntity, addObservation, upsertRelation, isJunkEntityName, } from './store.js';
6
6
  const EXTRACTION_MODELS = [
7
7
  'google/gemini-2.5-flash-lite',
8
8
  'google/gemini-2.5-flash',
@@ -25,8 +25,9 @@ Also extract relationships between entities:
25
25
  Rules:
26
26
  - Only extract entities with CLEAR evidence in the conversation.
27
27
  - Do NOT extract the AI agent itself or generic concepts ("TypeScript", "JavaScript").
28
+ - Do NOT extract programmatic strings that happen to appear in the transcript: tool permission patterns like "Bash(git commit:*)", object URIs (gs://, s3://, file://), glob patterns (paths with **), task IDs (t_xxx_xxx), session IDs, or hashes/UUIDs.
28
29
  - DO extract specific people, specific projects, specific companies, specific products.
29
- - Observations should be concrete facts, not vague descriptions.
30
+ - Observations must be concrete facts about the entity that would be useful in a future conversation. Do NOT include tautologies that restate the entity name ("This is a task ID for an ETL process") or generic statements that apply to any instance of the type.
30
31
  - If no entities are found, return empty arrays.
31
32
 
32
33
  Respond with ONLY a JSON object (no markdown fences):
@@ -69,7 +70,8 @@ function parseExtraction(raw) {
69
70
  const parsed = JSON.parse(cleaned);
70
71
  const entities = (parsed.entities || [])
71
72
  .filter((e) => typeof e.name === 'string' && e.name.length > 1 &&
72
- typeof e.type === 'string' && VALID_TYPES.has(e.type))
73
+ typeof e.type === 'string' && VALID_TYPES.has(e.type) &&
74
+ !isJunkEntityName(e.name))
73
75
  .map((e) => ({
74
76
  name: e.name.slice(0, 100),
75
77
  type: e.type,
@@ -3,6 +3,21 @@
3
3
  * All in-memory with JSONL persistence. No database.
4
4
  */
5
5
  import type { Entity, EntityType, Observation, Relation } from './types.js';
6
+ export declare function isJunkEntityName(name: string): boolean;
7
+ /**
8
+ * Remove existing junk entities (and their observations + relations)
9
+ * from disk. Called once per session start by runDataHygiene to clear
10
+ * accumulated low-quality extractions from earlier brain runs that
11
+ * predate the post-extraction filter.
12
+ *
13
+ * Returns counts so the hygiene report can surface the cleanup —
14
+ * silent purges are hard to verify.
15
+ */
16
+ export declare function pruneJunkBrainEntries(): {
17
+ entitiesRemoved: number;
18
+ observationsRemoved: number;
19
+ relationsRemoved: number;
20
+ };
6
21
  export declare function loadEntities(): Entity[];
7
22
  export declare function saveEntities(entities: Entity[]): void;
8
23
  /**
@@ -25,6 +25,77 @@ function uid() { return crypto.randomBytes(8).toString('hex'); }
25
25
  function ensureDir() {
26
26
  fs.mkdirSync(BRAIN_DIR, { recursive: true });
27
27
  }
28
+ // Names the extractor model emits but that aren't real entities — they're
29
+ // programmatic strings that happened to be in the transcript. Verified
30
+ // 2026-05-04 on a real machine: 7 of 44 entities (16%) were junk by these
31
+ // patterns — `Bash(git commit:*)` (tool permission), `gs://bucket/path/**`
32
+ // (object URI + glob), `t_morkaf83_f03a0b10` (Franklin task runId tagged
33
+ // as "project"). The vacuous observations they then accumulated ("This is
34
+ // a task ID for an ETL process") leaked back into context on every later
35
+ // session. Keep the patterns conservative — anything that looks
36
+ // programmatic rather than nameable.
37
+ const JUNK_ENTITY_NAME_PATTERNS = [
38
+ /^[A-Z][a-zA-Z]*\(.*\)$/, // Tool-permission shape, e.g. Bash(...), Edit(...)
39
+ /^(?:gs|s3|file|https?):\/\//i, // URIs
40
+ /\*\*?(?:\/|$)/, // Glob patterns
41
+ /^t_[a-z0-9]+_[a-z0-9]{6,}$/i, // Franklin task runIds
42
+ /^run_[a-z0-9_-]+$/i, // Generic run/job ids
43
+ /^session-\d{4}-/, // Session ids
44
+ /^[0-9a-f]{16,}$/, // Hex hashes / commit shas / uuids without dashes
45
+ ];
46
+ export function isJunkEntityName(name) {
47
+ const trimmed = name.trim();
48
+ if (trimmed.length < 2)
49
+ return true;
50
+ return JUNK_ENTITY_NAME_PATTERNS.some(rx => rx.test(trimmed));
51
+ }
52
+ /**
53
+ * Remove existing junk entities (and their observations + relations)
54
+ * from disk. Called once per session start by runDataHygiene to clear
55
+ * accumulated low-quality extractions from earlier brain runs that
56
+ * predate the post-extraction filter.
57
+ *
58
+ * Returns counts so the hygiene report can surface the cleanup —
59
+ * silent purges are hard to verify.
60
+ */
61
+ export function pruneJunkBrainEntries() {
62
+ const result = { entitiesRemoved: 0, observationsRemoved: 0, relationsRemoved: 0 };
63
+ let entities;
64
+ try {
65
+ entities = loadEntities();
66
+ }
67
+ catch {
68
+ return result;
69
+ }
70
+ if (entities.length === 0)
71
+ return result;
72
+ const junkIds = new Set();
73
+ const surviving = [];
74
+ for (const e of entities) {
75
+ if (isJunkEntityName(e.name)) {
76
+ junkIds.add(e.id);
77
+ result.entitiesRemoved++;
78
+ }
79
+ else {
80
+ surviving.push(e);
81
+ }
82
+ }
83
+ if (junkIds.size === 0)
84
+ return result;
85
+ // Drop observations + relations referencing the junk entities.
86
+ const obs = loadJsonl(OBSERVATIONS_FILE);
87
+ const survivingObs = obs.filter(o => !junkIds.has(o.entity_id));
88
+ result.observationsRemoved = obs.length - survivingObs.length;
89
+ const rels = loadJsonl(RELATIONS_FILE);
90
+ const survivingRels = rels.filter(r => !junkIds.has(r.from_id) && !junkIds.has(r.to_id));
91
+ result.relationsRemoved = rels.length - survivingRels.length;
92
+ // Atomic rewrites — saveJsonl uses tmp + rename so a crash mid-purge
93
+ // leaves the prior state intact.
94
+ saveEntities(surviving);
95
+ saveJsonl(OBSERVATIONS_FILE, survivingObs);
96
+ saveJsonl(RELATIONS_FILE, survivingRels);
97
+ return result;
98
+ }
28
99
  // ─── Generic JSONL helpers ────────────────────────────────────────────────
29
100
  function loadJsonl(file) {
30
101
  try {
@@ -32,6 +32,7 @@ export interface HygieneReport {
32
32
  dataFilesTrimmed: number;
33
33
  costLogRowsTrimmed: number;
34
34
  orphanToolResultsRemoved: number;
35
+ brainJunkEntitiesRemoved: number;
35
36
  }
36
37
  /**
37
38
  * Top-level entry. Call once at agent session start. Catches its own
@@ -24,6 +24,7 @@
24
24
  import fs from 'node:fs';
25
25
  import path from 'node:path';
26
26
  import { BLOCKRUN_DIR } from '../config.js';
27
+ import { pruneJunkBrainEntries } from '../brain/store.js';
27
28
  // Retention knobs. Tuned conservatively — a power user with 50+ calls/day
28
29
  // for 30 days still fits in DATA_DIR_MAX_FILES, and 5000 cost-log entries
29
30
  // covers months of normal use without truncating the running totals.
@@ -49,6 +50,7 @@ const ZERO_REPORT = {
49
50
  dataFilesTrimmed: 0,
50
51
  costLogRowsTrimmed: 0,
51
52
  orphanToolResultsRemoved: 0,
53
+ brainJunkEntitiesRemoved: 0,
52
54
  };
53
55
  /**
54
56
  * Top-level entry. Call once at agent session start. Catches its own
@@ -75,6 +77,10 @@ export function runDataHygiene() {
75
77
  report.orphanToolResultsRemoved = sweepOrphanToolResults();
76
78
  }
77
79
  catch { /* best effort */ }
80
+ try {
81
+ report.brainJunkEntitiesRemoved = pruneJunkBrainEntries().entitiesRemoved;
82
+ }
83
+ catch { /* best effort */ }
78
84
  return report;
79
85
  }
80
86
  function trimDataDir() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.43",
3
+ "version": "3.15.44",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {