@feelingmindful/thinking-graph 1.15.2 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
- import type { Node, Edge, Session, SkillRegistryEntry } from '../engine/types.js';
1
+ import type { Node, Edge, Session, SkillRegistryEntry, ScoredNode, NodeType } from '../engine/types.js';
2
2
  import { InMemoryAdapter } from './memory.js';
3
+ import type { HybridSearchOpts } from './adapter.js';
3
4
  export interface JSONLAdapterOpts {
4
5
  dir: string;
5
6
  }
@@ -10,12 +11,40 @@ export declare class JSONLAdapter extends InMemoryAdapter {
10
11
  private readonly skillsPath;
11
12
  private readonly embeddingsPath;
12
13
  private readonly vectorIndex;
14
+ private embedQueue;
15
+ private draining;
16
+ private drainPromise;
17
+ private readonly embedGen;
13
18
  constructor(opts: JSONLAdapterOpts);
14
19
  initialize(): Promise<void>;
15
20
  close(): Promise<void>;
21
+ /** Claim the next embed generation for an id (call at write time). */
22
+ private claimGen;
23
+ private embedAndStore;
24
+ private enqueueEmbed;
25
+ private startDrain;
26
+ private drainEmbedQueue;
27
+ /** Await all pending background embeddings (tests + graceful shutdown). */
28
+ flushEmbeds(): Promise<void>;
29
+ /**
30
+ * Rewrite nodes.jsonl from the in-memory survivors when append churn (from
31
+ * updateNode/reinforcement) has bloated it well past the live node count.
32
+ * Last-line-wins reload makes the dropped lines redundant; this just reclaims
33
+ * the space and keeps startup replay bounded.
34
+ */
35
+ private compactNodesIfBloated;
16
36
  private maybeRunSqliteMigration;
17
37
  insertNode(node: Node): Promise<void>;
38
+ updateNode(id: string, fields: Partial<Node>): Promise<void>;
39
+ findNearest(content: string, opts: {
40
+ type: NodeType;
41
+ projectId?: string;
42
+ }): Promise<{
43
+ id: string;
44
+ score: number;
45
+ } | null>;
18
46
  searchContent(query: string): Promise<Node[]>;
47
+ searchHybrid(opts: HybridSearchOpts): Promise<ScoredNode[]>;
19
48
  insertEdge(edge: Edge): Promise<boolean>;
20
49
  insertSession(session: Session): Promise<void>;
21
50
  updateSession(id: string, fields: Partial<Session>): Promise<void>;
@@ -1,9 +1,37 @@
1
- import { readFileSync, appendFileSync, existsSync, mkdirSync } from 'fs';
1
+ import { readFileSync, appendFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
2
2
  import { join } from 'path';
3
3
  import { InMemoryAdapter } from './memory.js';
4
4
  import { embedText } from './embedding.js';
5
+ import { fuseHybrid } from '../engine/fusion.js';
6
+ import { tokenize } from '../engine/dedup.js';
7
+ import { config } from '../config.js';
5
8
  import { VectorIndex } from './vector-index.js';
6
9
  import { migrateSqliteToJsonl } from './migrate-sqlite.js';
10
+ // Hybrid recall tuning. Dense (semantic) is weighted above lexical so that
11
+ // semantically-related nodes with no shared tokens still surface; lexical keeps
12
+ // exact-term matches competitive. Channel weights are env-overridable via
13
+ // config. DENSE_TOP_K caps the number of dense candidates kept (the cosine scan
14
+ // itself is O(N) over the filtered set). DENSE_MIN_COSINE drops near-orthogonal
15
+ // candidates so unrelated nodes are not pulled in (calibrated for bge-small).
16
+ const DENSE_TOP_K = 50;
17
+ const DENSE_MIN_COSINE = 0.25;
18
+ const DENSE_WEIGHT = config.denseWeight;
19
+ const LEXICAL_WEIGHT = config.lexicalWeight;
20
+ // updateNode (used by node reinforcement on every duplicate learn) appends a new
21
+ // line per write, so a hot-duplicate node accumulates dead lines. On reload, if
22
+ // nodes.jsonl has grown well beyond the live node count, rewrite it from the
23
+ // in-memory survivors to bound both disk size and replay cost.
24
+ const COMPACTION_MIN_LINES = 200;
25
+ const COMPACTION_FACTOR = 3;
26
+ // Async embedding (opt-in). When THINKING_GRAPH_ASYNC_EMBED=true, insertNode
27
+ // returns right after the durable JSONL append and embeds in the background, so
28
+ // a burst of writes is not blocked on model inference. Any vector missing after
29
+ // a crash is re-embedded on the next initialize() (self-healing). Default off
30
+ // preserves synchronous embed-on-write (and keeps read-your-writes semantics).
31
+ const ASYNC_EMBED = process.env.THINKING_GRAPH_ASYNC_EMBED === 'true';
32
+ // When embeddings are disabled, embedText always returns null, so there is
33
+ // nothing to defer or self-heal.
34
+ const EMBEDDINGS_ENABLED = process.env.THINKING_GRAPH_EMBEDDINGS !== 'false';
7
35
  export class JSONLAdapter extends InMemoryAdapter {
8
36
  nodesPath;
9
37
  edgesPath;
@@ -11,6 +39,12 @@ export class JSONLAdapter extends InMemoryAdapter {
11
39
  skillsPath;
12
40
  embeddingsPath;
13
41
  vectorIndex = new VectorIndex();
42
+ embedQueue = [];
43
+ draining = false;
44
+ drainPromise = null;
45
+ // Per-id embed generation, claimed at write time. The latest write for an id
46
+ // wins, so a stale deferred embed can't clobber a newer inline updateNode.
47
+ embedGen = new Map();
14
48
  constructor(opts) {
15
49
  super();
16
50
  this.nodesPath = join(opts.dir, 'nodes.jsonl');
@@ -22,9 +56,12 @@ export class JSONLAdapter extends InMemoryAdapter {
22
56
  async initialize() {
23
57
  mkdirSync(join(this.nodesPath, '..'), { recursive: true });
24
58
  await this.maybeRunSqliteMigration();
59
+ let rawNodeLines = 0;
25
60
  for (const line of readLines(this.nodesPath)) {
26
61
  await super.insertNode(JSON.parse(line));
62
+ rawNodeLines++;
27
63
  }
64
+ await this.compactNodesIfBloated(rawNodeLines);
28
65
  for (const line of readLines(this.edgesPath)) {
29
66
  await super.insertEdge(JSON.parse(line));
30
67
  }
@@ -47,8 +84,100 @@ export class JSONLAdapter extends InMemoryAdapter {
47
84
  await super.insertSkill(sk);
48
85
  }
49
86
  this.vectorIndex.load(this.embeddingsPath);
87
+ // Self-heal: re-embed any node whose vector never landed (e.g. a crash
88
+ // before the async queue drained). No-op in sync mode / fully-embedded
89
+ // stores, and skipped entirely when embeddings are disabled (nothing to do).
90
+ if (ASYNC_EMBED && EMBEDDINGS_ENABLED) {
91
+ const all = (await this.queryNodes({ limit: Number.MAX_SAFE_INTEGER })).items;
92
+ for (const n of all) {
93
+ if (!this.vectorIndex.has(n.id))
94
+ this.enqueueEmbed(n.id, n.content);
95
+ }
96
+ }
97
+ }
98
+ async close() {
99
+ await this.flushEmbeds();
100
+ }
101
+ // ─── Embedding (sync inline, or async background queue) ────────────────
102
+ /** Claim the next embed generation for an id (call at write time). */
103
+ claimGen(id) {
104
+ const gen = (this.embedGen.get(id) ?? 0) + 1;
105
+ this.embedGen.set(id, gen);
106
+ return gen;
107
+ }
108
+ async embedAndStore(id, content, gen) {
109
+ const vec = await embedText(content);
110
+ // Only write if this is still the latest write for the id — a stale deferred
111
+ // embed must not clobber a newer inline updateNode re-embed.
112
+ if (vec && this.embedGen.get(id) === gen) {
113
+ this.vectorIndex.append(this.embeddingsPath, id, vec);
114
+ }
115
+ }
116
+ enqueueEmbed(id, content) {
117
+ // Claim the generation now (write order), not when the drain later runs.
118
+ this.embedQueue.push({ id, content, gen: this.claimGen(id) });
119
+ this.startDrain();
120
+ }
121
+ startDrain() {
122
+ if (this.draining)
123
+ return;
124
+ this.draining = true;
125
+ this.drainPromise = this.drainEmbedQueue().finally(() => {
126
+ this.draining = false;
127
+ // Restart if anything was enqueued during the drain or in the tiny window
128
+ // before this handler ran — otherwise the item would sit undrained and
129
+ // flushEmbeds() could spin. The finally body is synchronous, so this
130
+ // check and the enqueue cannot interleave.
131
+ if (this.embedQueue.length > 0)
132
+ this.startDrain();
133
+ });
134
+ }
135
+ async drainEmbedQueue() {
136
+ // Take the current batch by swapping the array (O(1)) instead of shift()
137
+ // (O(n) each). Items enqueued during the batch land in the fresh array and
138
+ // are picked up by the next loop iteration.
139
+ while (this.embedQueue.length > 0) {
140
+ const batch = this.embedQueue;
141
+ this.embedQueue = [];
142
+ for (const { id, content, gen } of batch) {
143
+ // Skip if a newer write already superseded this one (generation moved on).
144
+ if (this.embedGen.get(id) !== gen)
145
+ continue;
146
+ // Never let a single failed embed reject the un-awaited drain promise
147
+ // (would surface as an unhandledRejection) or abort the rest of the batch.
148
+ try {
149
+ await this.embedAndStore(id, content, gen);
150
+ }
151
+ catch (err) {
152
+ console.error('[thinking-graph] background embed failed:', err);
153
+ }
154
+ }
155
+ }
156
+ }
157
+ /** Await all pending background embeddings (tests + graceful shutdown). */
158
+ async flushEmbeds() {
159
+ while (this.draining || this.embedQueue.length > 0) {
160
+ if (this.drainPromise)
161
+ await this.drainPromise;
162
+ else
163
+ await Promise.resolve();
164
+ }
165
+ }
166
+ /**
167
+ * Rewrite nodes.jsonl from the in-memory survivors when append churn (from
168
+ * updateNode/reinforcement) has bloated it well past the live node count.
169
+ * Last-line-wins reload makes the dropped lines redundant; this just reclaims
170
+ * the space and keeps startup replay bounded.
171
+ */
172
+ async compactNodesIfBloated(rawLineCount) {
173
+ if (rawLineCount <= COMPACTION_MIN_LINES)
174
+ return;
175
+ const nodes = (await this.queryNodes({ limit: Number.MAX_SAFE_INTEGER })).items;
176
+ if (rawLineCount <= COMPACTION_FACTOR * Math.max(1, nodes.length))
177
+ return;
178
+ const body = nodes.map(n => JSON.stringify(n)).join('\n');
179
+ writeFileSync(this.nodesPath, body ? body + '\n' : '', 'utf-8');
50
180
  }
51
- async close() { }
52
181
  async maybeRunSqliteMigration() {
53
182
  const dbPath = process.env.THINKING_GRAPH_PROJECT_DB;
54
183
  if (!dbPath)
@@ -78,9 +207,41 @@ export class JSONLAdapter extends InMemoryAdapter {
78
207
  async insertNode(node) {
79
208
  await super.insertNode(node);
80
209
  appendLine(this.nodesPath, node);
81
- const vec = await embedText(node.content);
82
- if (vec)
83
- this.vectorIndex.append(this.embeddingsPath, node.id, vec);
210
+ // Durable write is done; embed inline (default) or defer to the background
211
+ // queue so the write returns without waiting on model inference. Nothing to
212
+ // do when embeddings are disabled.
213
+ if (!EMBEDDINGS_ENABLED)
214
+ return;
215
+ if (ASYNC_EMBED)
216
+ this.enqueueEmbed(node.id, node.content);
217
+ else
218
+ await this.embedAndStore(node.id, node.content, this.claimGen(node.id));
219
+ }
220
+ async updateNode(id, fields) {
221
+ await super.updateNode(id, fields);
222
+ // Persist the new version; on reload the last line for an id wins.
223
+ const updated = await super.getNode(id);
224
+ if (!updated)
225
+ return;
226
+ appendLine(this.nodesPath, updated);
227
+ // Re-embed when content changed so the vector index does not go stale for
228
+ // findNearest / hybrid recall. (Reinforcement updates metadata only, so this
229
+ // is skipped on the hot duplicate-learn path.) Kept inline even in async mode
230
+ // — content updates are rare, and an inline write wins the drain's has() skip.
231
+ if (fields.content !== undefined && EMBEDDINGS_ENABLED) {
232
+ await this.embedAndStore(id, updated.content, this.claimGen(id));
233
+ }
234
+ }
235
+ async findNearest(content, opts) {
236
+ const vec = await embedText(content);
237
+ if (!vec)
238
+ return null;
239
+ const candidates = this.filterNodes({ type: opts.type, projectId: opts.projectId });
240
+ if (candidates.length === 0)
241
+ return null;
242
+ const allow = new Set(candidates.map(n => n.id));
243
+ const [top] = this.vectorIndex.search(vec, 1, allow);
244
+ return top ? { id: top.id, score: top.score } : null;
84
245
  }
85
246
  async searchContent(query) {
86
247
  const vec = await embedText(query);
@@ -97,6 +258,36 @@ export class JSONLAdapter extends InMemoryAdapter {
97
258
  }
98
259
  return nodes;
99
260
  }
261
+ async searchHybrid(opts) {
262
+ const { nodes, lexical } = await this.lexicalCandidates(opts);
263
+ if (nodes.size === 0)
264
+ return [];
265
+ const lexicalOnly = () => Promise.resolve(fuseHybrid({ nodes, channels: [{ scores: lexical, weight: 1 }] }));
266
+ // A query that tokenizes to nothing (punctuation/whitespace only) has no
267
+ // lexical or semantic intent — skip the embed call and the dense channel.
268
+ if (tokenize(opts.query).length === 0)
269
+ return lexicalOnly();
270
+ const qVec = await embedText(opts.query);
271
+ if (!qVec) {
272
+ // Embeddings disabled or model failed to load: degrade to lexical-only.
273
+ // (A cold-but-loadable model blocks until ready, then returns a vector.)
274
+ return lexicalOnly();
275
+ }
276
+ // Filter BEFORE ranking (allow-set) so a scoped recall sees the top-K of
277
+ // in-filter survivors, not the global top-K intersected with the filter.
278
+ const dense = new Map();
279
+ for (const m of this.vectorIndex.search(qVec, DENSE_TOP_K, new Set(nodes.keys()))) {
280
+ if (m.score >= DENSE_MIN_COSINE)
281
+ dense.set(m.id, m.score);
282
+ }
283
+ return fuseHybrid({
284
+ nodes,
285
+ channels: [
286
+ { scores: dense, weight: DENSE_WEIGHT },
287
+ { scores: lexical, weight: LEXICAL_WEIGHT },
288
+ ],
289
+ });
290
+ }
100
291
  // ─── Edges ─────────────────────────────────────────────
101
292
  async insertEdge(edge) {
102
293
  const created = await super.insertEdge(edge);
@@ -1,5 +1,5 @@
1
- import type { Node, Edge, Session, SkillRegistryEntry, NodeQuery, PaginatedResult, EdgeType, ExportOpts, GraphExport, GraphStats } from '../engine/types.js';
2
- import type { StorageAdapter } from './adapter.js';
1
+ import type { Node, Edge, Session, SkillRegistryEntry, NodeQuery, PaginatedResult, EdgeType, ScoredNode, ExportOpts, GraphExport, GraphStats } from '../engine/types.js';
2
+ import type { StorageAdapter, HybridSearchOpts } from './adapter.js';
3
3
  export declare class InMemoryAdapter implements StorageAdapter {
4
4
  private nodes;
5
5
  private edges;
@@ -8,9 +8,22 @@ export declare class InMemoryAdapter implements StorageAdapter {
8
8
  initialize(): Promise<void>;
9
9
  close(): Promise<void>;
10
10
  insertNode(node: Node): Promise<void>;
11
+ updateNode(id: string, fields: Partial<Node>): Promise<void>;
11
12
  getNode(id: string): Promise<Node | null>;
13
+ /** Apply attribute + substring filters without sorting or paginating. */
14
+ protected filterNodes(query: NodeQuery): Node[];
12
15
  queryNodes(query: NodeQuery): Promise<PaginatedResult<Node>>;
13
16
  searchContent(text: string, limit?: number): Promise<Node[]>;
17
+ /**
18
+ * Gather attribute-filtered candidates plus a lexical (token-coverage) score
19
+ * per node. Shared by the in-memory lexical-only hybrid and the JSONL
20
+ * dense+lexical hybrid (which extends this class and adds a vector channel).
21
+ */
22
+ protected lexicalCandidates(opts: HybridSearchOpts): Promise<{
23
+ nodes: Map<string, Node>;
24
+ lexical: Map<string, number>;
25
+ }>;
26
+ searchHybrid(opts: HybridSearchOpts): Promise<ScoredNode[]>;
14
27
  insertEdge(edge: Edge): Promise<boolean>;
15
28
  getEdgesFrom(nodeId: string, type?: EdgeType): Promise<Edge[]>;
16
29
  getEdgesTo(nodeId: string, type?: EdgeType): Promise<Edge[]>;
@@ -1,3 +1,10 @@
1
+ import { lexicalScoreTokens, fuseHybrid } from '../engine/fusion.js';
2
+ import { tokenize } from '../engine/dedup.js';
3
+ // Score for a raw-substring-only match (no exact token overlap). Keeps the
4
+ // hybrid lexical channel a strict superset of the legacy substring filter, so a
5
+ // subword query like "auth" still surfaces "authentication" nodes. Ranked below
6
+ // any genuine token-coverage match.
7
+ const SUBSTRING_FLOOR = 0.5;
1
8
  export class InMemoryAdapter {
2
9
  nodes = new Map();
3
10
  edges = new Map();
@@ -9,10 +16,17 @@ export class InMemoryAdapter {
9
16
  async insertNode(node) {
10
17
  this.nodes.set(node.id, { ...node });
11
18
  }
19
+ async updateNode(id, fields) {
20
+ const node = this.nodes.get(id);
21
+ if (node) {
22
+ this.nodes.set(id, { ...node, ...fields, id: node.id });
23
+ }
24
+ }
12
25
  async getNode(id) {
13
26
  return this.nodes.get(id) ?? null;
14
27
  }
15
- async queryNodes(query) {
28
+ /** Apply attribute + substring filters without sorting or paginating. */
29
+ filterNodes(query) {
16
30
  let results = [...this.nodes.values()];
17
31
  if (query.type) {
18
32
  const types = Array.isArray(query.type) ? query.type : [query.type];
@@ -31,6 +45,10 @@ export class InMemoryAdapter {
31
45
  const q = query.query.toLowerCase();
32
46
  results = results.filter(n => n.content.toLowerCase().includes(q));
33
47
  }
48
+ return results;
49
+ }
50
+ async queryNodes(query) {
51
+ const results = this.filterNodes(query);
34
52
  // Sort by createdAt desc
35
53
  results.sort((a, b) => b.createdAt.localeCompare(a.createdAt));
36
54
  const totalCount = results.length;
@@ -45,6 +63,41 @@ export class InMemoryAdapter {
45
63
  .filter(n => n.content.toLowerCase().includes(q))
46
64
  .slice(0, limit);
47
65
  }
66
+ /**
67
+ * Gather attribute-filtered candidates plus a lexical (token-coverage) score
68
+ * per node. Shared by the in-memory lexical-only hybrid and the JSONL
69
+ * dense+lexical hybrid (which extends this class and adds a vector channel).
70
+ */
71
+ async lexicalCandidates(opts) {
72
+ // Attribute filter only (no substring) and unsorted — fuseHybrid re-ranks.
73
+ const candidates = this.filterNodes({
74
+ type: opts.type,
75
+ sessionId: opts.sessionId,
76
+ projectId: opts.projectId,
77
+ crossProject: opts.crossProject,
78
+ since: opts.since,
79
+ });
80
+ const queryTokens = new Set(tokenize(opts.query));
81
+ const rawQuery = opts.query.toLowerCase().trim();
82
+ const nodes = new Map();
83
+ const lexical = new Map();
84
+ for (const n of candidates) {
85
+ nodes.set(n.id, n);
86
+ let s = lexicalScoreTokens(queryTokens, n.content);
87
+ // Fall back to legacy substring matching so subword/partial-token queries
88
+ // are never a regression from the old substring-only recall.
89
+ if (s === 0 && rawQuery && n.content.toLowerCase().includes(rawQuery)) {
90
+ s = SUBSTRING_FLOOR;
91
+ }
92
+ if (s > 0)
93
+ lexical.set(n.id, s);
94
+ }
95
+ return { nodes, lexical };
96
+ }
97
+ async searchHybrid(opts) {
98
+ const { nodes, lexical } = await this.lexicalCandidates(opts);
99
+ return fuseHybrid({ nodes, channels: [{ scores: lexical, weight: 1 }] });
100
+ }
48
101
  // ─── Edges ─────────────────────────────────────────────
49
102
  async insertEdge(edge) {
50
103
  // Check for duplicate (same source, target, type)
@@ -14,6 +14,7 @@ export declare class SQLiteAdapter implements StorageAdapter {
14
14
  private get;
15
15
  private all;
16
16
  insertNode(node: Node): Promise<void>;
17
+ updateNode(id: string, fields: Partial<Node>): Promise<void>;
17
18
  getNode(id: string): Promise<Node | null>;
18
19
  queryNodes(query: NodeQuery): Promise<PaginatedResult<Node>>;
19
20
  searchContent(text: string, limit?: number): Promise<Node[]>;
@@ -59,6 +59,32 @@ export class SQLiteAdapter {
59
59
  node.revisesThought ?? null,
60
60
  ]);
61
61
  }
62
+ // Supports only mutable content fields: content, metadata, updatedAt.
63
+ // (reinforceNode, the only runtime caller, updates just metadata + updatedAt;
64
+ // content is included for a general updateNode.) Other Node fields are
65
+ // intentionally not mutable here — this adapter is migration-only, and node
66
+ // identity/lineage fields (sessionId, projectId, thought*) are set once at
67
+ // insert. Extend the mapping if a real updateNode caller needs more.
68
+ async updateNode(id, fields) {
69
+ const sets = [];
70
+ const params = [];
71
+ if (fields.content !== undefined) {
72
+ sets.push('content = ?');
73
+ params.push(fields.content);
74
+ }
75
+ if (fields.metadata !== undefined) {
76
+ sets.push('metadata = ?');
77
+ params.push(JSON.stringify(fields.metadata));
78
+ }
79
+ if (fields.updatedAt !== undefined) {
80
+ sets.push('updated_at = ?');
81
+ params.push(fields.updatedAt);
82
+ }
83
+ if (sets.length > 0) {
84
+ params.push(id);
85
+ this.run(`UPDATE nodes SET ${sets.join(', ')} WHERE id = ?`, params);
86
+ }
87
+ }
62
88
  async getNode(id) {
63
89
  const row = this.get('SELECT * FROM nodes WHERE id = ?', [id]);
64
90
  return row ? this.rowToNode(row) : null;
@@ -6,6 +6,13 @@ export declare class VectorIndex {
6
6
  private readonly vectors;
7
7
  load(path: string): void;
8
8
  append(path: string, id: string, vec: Float32Array): void;
9
- search(queryVec: Float32Array, topK: number): VectorMatch[];
9
+ /**
10
+ * Top-K by cosine (vectors are L2-normalized so dot == cosine). When `allow`
11
+ * is given, only those ids are scored — this filters BEFORE ranking, so a
12
+ * scoped recall sees the top-K of the in-filter survivors rather than the
13
+ * global top-K intersected with the filter (which would silently drop
14
+ * in-filter matches ranked beyond K globally).
15
+ */
16
+ search(queryVec: Float32Array, topK: number, allow?: Set<string>): VectorMatch[];
10
17
  has(id: string): boolean;
11
18
  }
@@ -15,9 +15,18 @@ export class VectorIndex {
15
15
  this.vectors.set(id, vec);
16
16
  appendFileSync(path, JSON.stringify({ id, vec: Array.from(vec) }) + '\n', 'utf-8');
17
17
  }
18
- search(queryVec, topK) {
18
+ /**
19
+ * Top-K by cosine (vectors are L2-normalized so dot == cosine). When `allow`
20
+ * is given, only those ids are scored — this filters BEFORE ranking, so a
21
+ * scoped recall sees the top-K of the in-filter survivors rather than the
22
+ * global top-K intersected with the filter (which would silently drop
23
+ * in-filter matches ranked beyond K globally).
24
+ */
25
+ search(queryVec, topK, allow) {
19
26
  const results = [];
20
27
  for (const [id, vec] of this.vectors) {
28
+ if (allow && !allow.has(id))
29
+ continue;
21
30
  results.push({ id, score: dot(queryVec, vec) });
22
31
  }
23
32
  results.sort((a, b) => b.score - a.score);
@@ -32,21 +32,21 @@ export declare const executeSkillsSchema: z.ZodObject<{
32
32
  }, "strip", z.ZodTypeAny, {
33
33
  content: string;
34
34
  metadata?: Record<string, unknown> | undefined;
35
- severity?: "critical" | "high" | "medium" | "low" | undefined;
35
+ severity?: "high" | "critical" | "medium" | "low" | undefined;
36
36
  effort?: string | undefined;
37
37
  impact?: string | undefined;
38
38
  }, {
39
39
  content: string;
40
40
  metadata?: Record<string, unknown> | undefined;
41
- severity?: "critical" | "high" | "medium" | "low" | undefined;
41
+ severity?: "high" | "critical" | "medium" | "low" | undefined;
42
42
  effort?: string | undefined;
43
43
  impact?: string | undefined;
44
44
  }>, "many">>;
45
45
  sessionId: z.ZodOptional<z.ZodString>;
46
46
  projectId: z.ZodOptional<z.ZodString>;
47
47
  }, "strip", z.ZodTypeAny, {
48
- invocation: string;
49
48
  status: "completed" | "failed";
49
+ invocation: string;
50
50
  skill: string;
51
51
  stepNumber: number;
52
52
  plugin: string;
@@ -65,7 +65,7 @@ export declare const executeSkillsSchema: z.ZodObject<{
65
65
  techDebt?: {
66
66
  content: string;
67
67
  metadata?: Record<string, unknown> | undefined;
68
- severity?: "critical" | "high" | "medium" | "low" | undefined;
68
+ severity?: "high" | "critical" | "medium" | "low" | undefined;
69
69
  effort?: string | undefined;
70
70
  impact?: string | undefined;
71
71
  }[] | undefined;
@@ -75,10 +75,10 @@ export declare const executeSkillsSchema: z.ZodObject<{
75
75
  stepNumber: number;
76
76
  plugin: string;
77
77
  resultSummary: string;
78
+ status?: "completed" | "failed" | undefined;
78
79
  sessionId?: string | undefined;
79
80
  projectId?: string | undefined;
80
81
  metadata?: Record<string, unknown> | undefined;
81
- status?: "completed" | "failed" | undefined;
82
82
  purpose?: string | undefined;
83
83
  planId?: string | undefined;
84
84
  runId?: string | undefined;
@@ -90,7 +90,7 @@ export declare const executeSkillsSchema: z.ZodObject<{
90
90
  techDebt?: {
91
91
  content: string;
92
92
  metadata?: Record<string, unknown> | undefined;
93
- severity?: "critical" | "high" | "medium" | "low" | undefined;
93
+ severity?: "high" | "critical" | "medium" | "low" | undefined;
94
94
  effort?: string | undefined;
95
95
  impact?: string | undefined;
96
96
  }[] | undefined;
@@ -32,7 +32,7 @@ export declare const learnSchema: z.ZodObject<{
32
32
  sessionId?: string | undefined;
33
33
  projectId?: string | undefined;
34
34
  metadata?: Record<string, unknown> | undefined;
35
- severity?: "critical" | "high" | "medium" | "low" | undefined;
35
+ severity?: "high" | "critical" | "medium" | "low" | undefined;
36
36
  filePath?: string | undefined;
37
37
  lineRange?: [number, number] | undefined;
38
38
  relates?: {
@@ -49,7 +49,7 @@ export declare const learnSchema: z.ZodObject<{
49
49
  sessionId?: string | undefined;
50
50
  projectId?: string | undefined;
51
51
  metadata?: Record<string, unknown> | undefined;
52
- severity?: "critical" | "high" | "medium" | "low" | undefined;
52
+ severity?: "high" | "critical" | "medium" | "low" | undefined;
53
53
  filePath?: string | undefined;
54
54
  lineRange?: [number, number] | undefined;
55
55
  relates?: {
@@ -20,17 +20,22 @@ export const learnSchema = z.object({
20
20
  metadata: z.record(z.unknown()).optional(),
21
21
  });
22
22
  export async function learnHandler(graph, input, vault, projectSlug) {
23
- // Check for duplicate
24
- const existing = await graph.findSimilar(input.content, input.type, input.projectId);
25
- if (existing) {
23
+ // Banded duplicate detection (normalized-exact → semantic cosine → Jaccard).
24
+ const dup = await graph.findDuplicate(input.content, input.type, input.projectId);
25
+ if (dup && (dup.band === 'exact' || dup.band === 'high')) {
26
+ // High-confidence duplicate: reinforce the existing node (records the merge,
27
+ // preserves the new phrasing) instead of silently dropping the new content.
28
+ await graph.reinforceNode(dup.node.id, input.content);
26
29
  return {
27
30
  content: [{
28
31
  type: 'text',
29
32
  text: JSON.stringify({
30
- nodeId: existing.id,
31
- type: existing.type,
33
+ nodeId: dup.node.id,
34
+ type: dup.node.type,
32
35
  relatedCount: 0,
33
- duplicateOf: existing.id,
36
+ duplicateOf: dup.node.id,
37
+ merged: true,
38
+ band: dup.band,
34
39
  }),
35
40
  }],
36
41
  };
@@ -57,8 +62,19 @@ export async function learnHandler(graph, input, vault, projectSlug) {
57
62
  projectId: input.projectId,
58
63
  metadata,
59
64
  });
60
- // Create relationships
61
- let relatedCount = 0;
65
+ // Borderline near-duplicate: keep the new node but link it to the near-dup so
66
+ // the relationship is captured rather than creating a silent parallel copy.
67
+ if (dup && dup.band === 'borderline') {
68
+ await graph.addEdge({
69
+ sourceId: node.id,
70
+ targetId: dup.node.id,
71
+ type: 'similar_to',
72
+ reasoning: `Near-duplicate (score ${dup.score.toFixed(2)})`,
73
+ });
74
+ }
75
+ // Create relationships. The auto-created similar_to edge (borderline path
76
+ // above) counts as a relationship produced by this call.
77
+ let relatedCount = dup?.band === 'borderline' ? 1 : 0;
62
78
  if (input.relates) {
63
79
  for (const rel of input.relates) {
64
80
  await graph.addEdge({
@@ -113,6 +129,7 @@ export async function learnHandler(graph, input, vault, projectSlug) {
113
129
  type: node.type,
114
130
  relatedCount,
115
131
  duplicateOf: null,
132
+ ...(dup?.band === 'borderline' && { nearDuplicateOf: dup.node.id, band: 'borderline' }),
116
133
  ...(vaultPath && { vaultPath }),
117
134
  }),
118
135
  }],