@feelingmindful/thinking-graph 1.15.2 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +16 -0
- package/dist/config.js +39 -0
- package/dist/engine/dedup.d.ts +10 -0
- package/dist/engine/dedup.js +19 -0
- package/dist/engine/fusion.d.ts +75 -0
- package/dist/engine/fusion.js +144 -0
- package/dist/engine/graph.d.ts +35 -1
- package/dist/engine/graph.js +147 -1
- package/dist/engine/intent.d.ts +14 -0
- package/dist/engine/intent.js +19 -0
- package/dist/engine/types.d.ts +4 -0
- package/dist/storage/adapter.d.ts +30 -1
- package/dist/storage/jsonl.d.ts +30 -1
- package/dist/storage/jsonl.js +196 -5
- package/dist/storage/memory.d.ts +15 -2
- package/dist/storage/memory.js +54 -1
- package/dist/storage/sqlite.d.ts +1 -0
- package/dist/storage/sqlite.js +26 -0
- package/dist/storage/vector-index.d.ts +8 -1
- package/dist/storage/vector-index.js +10 -1
- package/dist/tools/execute-skills.d.ts +6 -6
- package/dist/tools/learn.d.ts +2 -2
- package/dist/tools/learn.js +25 -8
- package/dist/tools/recall.js +67 -14
- package/dist/tools/think.d.ts +1 -1
- package/dist/vault/bridge.d.ts +5 -4
- package/dist/vault/bridge.js +7 -5
- package/package.json +1 -1
package/dist/storage/jsonl.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import type { Node, Edge, Session, SkillRegistryEntry } from '../engine/types.js';
|
|
1
|
+
import type { Node, Edge, Session, SkillRegistryEntry, ScoredNode, NodeType } from '../engine/types.js';
|
|
2
2
|
import { InMemoryAdapter } from './memory.js';
|
|
3
|
+
import type { HybridSearchOpts } from './adapter.js';
|
|
3
4
|
export interface JSONLAdapterOpts {
|
|
4
5
|
dir: string;
|
|
5
6
|
}
|
|
@@ -10,12 +11,40 @@ export declare class JSONLAdapter extends InMemoryAdapter {
|
|
|
10
11
|
private readonly skillsPath;
|
|
11
12
|
private readonly embeddingsPath;
|
|
12
13
|
private readonly vectorIndex;
|
|
14
|
+
private embedQueue;
|
|
15
|
+
private draining;
|
|
16
|
+
private drainPromise;
|
|
17
|
+
private readonly embedGen;
|
|
13
18
|
constructor(opts: JSONLAdapterOpts);
|
|
14
19
|
initialize(): Promise<void>;
|
|
15
20
|
close(): Promise<void>;
|
|
21
|
+
/** Claim the next embed generation for an id (call at write time). */
|
|
22
|
+
private claimGen;
|
|
23
|
+
private embedAndStore;
|
|
24
|
+
private enqueueEmbed;
|
|
25
|
+
private startDrain;
|
|
26
|
+
private drainEmbedQueue;
|
|
27
|
+
/** Await all pending background embeddings (tests + graceful shutdown). */
|
|
28
|
+
flushEmbeds(): Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Rewrite nodes.jsonl from the in-memory survivors when append churn (from
|
|
31
|
+
* updateNode/reinforcement) has bloated it well past the live node count.
|
|
32
|
+
* Last-line-wins reload makes the dropped lines redundant; this just reclaims
|
|
33
|
+
* the space and keeps startup replay bounded.
|
|
34
|
+
*/
|
|
35
|
+
private compactNodesIfBloated;
|
|
16
36
|
private maybeRunSqliteMigration;
|
|
17
37
|
insertNode(node: Node): Promise<void>;
|
|
38
|
+
updateNode(id: string, fields: Partial<Node>): Promise<void>;
|
|
39
|
+
findNearest(content: string, opts: {
|
|
40
|
+
type: NodeType;
|
|
41
|
+
projectId?: string;
|
|
42
|
+
}): Promise<{
|
|
43
|
+
id: string;
|
|
44
|
+
score: number;
|
|
45
|
+
} | null>;
|
|
18
46
|
searchContent(query: string): Promise<Node[]>;
|
|
47
|
+
searchHybrid(opts: HybridSearchOpts): Promise<ScoredNode[]>;
|
|
19
48
|
insertEdge(edge: Edge): Promise<boolean>;
|
|
20
49
|
insertSession(session: Session): Promise<void>;
|
|
21
50
|
updateSession(id: string, fields: Partial<Session>): Promise<void>;
|
package/dist/storage/jsonl.js
CHANGED
|
@@ -1,9 +1,37 @@
|
|
|
1
|
-
import { readFileSync, appendFileSync, existsSync, mkdirSync } from 'fs';
|
|
1
|
+
import { readFileSync, appendFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
2
2
|
import { join } from 'path';
|
|
3
3
|
import { InMemoryAdapter } from './memory.js';
|
|
4
4
|
import { embedText } from './embedding.js';
|
|
5
|
+
import { fuseHybrid } from '../engine/fusion.js';
|
|
6
|
+
import { tokenize } from '../engine/dedup.js';
|
|
7
|
+
import { config } from '../config.js';
|
|
5
8
|
import { VectorIndex } from './vector-index.js';
|
|
6
9
|
import { migrateSqliteToJsonl } from './migrate-sqlite.js';
|
|
10
|
+
// Hybrid recall tuning. Dense (semantic) is weighted above lexical so that
|
|
11
|
+
// semantically-related nodes with no shared tokens still surface; lexical keeps
|
|
12
|
+
// exact-term matches competitive. Channel weights are env-overridable via
|
|
13
|
+
// config. DENSE_TOP_K caps the number of dense candidates kept (the cosine scan
|
|
14
|
+
// itself is O(N) over the filtered set). DENSE_MIN_COSINE drops near-orthogonal
|
|
15
|
+
// candidates so unrelated nodes are not pulled in (calibrated for bge-small).
|
|
16
|
+
const DENSE_TOP_K = 50;
|
|
17
|
+
const DENSE_MIN_COSINE = 0.25;
|
|
18
|
+
const DENSE_WEIGHT = config.denseWeight;
|
|
19
|
+
const LEXICAL_WEIGHT = config.lexicalWeight;
|
|
20
|
+
// updateNode (used by node reinforcement on every duplicate learn) appends a new
|
|
21
|
+
// line per write, so a hot-duplicate node accumulates dead lines. On reload, if
|
|
22
|
+
// nodes.jsonl has grown well beyond the live node count, rewrite it from the
|
|
23
|
+
// in-memory survivors to bound both disk size and replay cost.
|
|
24
|
+
const COMPACTION_MIN_LINES = 200;
|
|
25
|
+
const COMPACTION_FACTOR = 3;
|
|
26
|
+
// Async embedding (opt-in). When THINKING_GRAPH_ASYNC_EMBED=true, insertNode
|
|
27
|
+
// returns right after the durable JSONL append and embeds in the background, so
|
|
28
|
+
// a burst of writes is not blocked on model inference. Any vector missing after
|
|
29
|
+
// a crash is re-embedded on the next initialize() (self-healing). Default off
|
|
30
|
+
// preserves synchronous embed-on-write (and keeps read-your-writes semantics).
|
|
31
|
+
const ASYNC_EMBED = process.env.THINKING_GRAPH_ASYNC_EMBED === 'true';
|
|
32
|
+
// When embeddings are disabled, embedText always returns null, so there is
|
|
33
|
+
// nothing to defer or self-heal.
|
|
34
|
+
const EMBEDDINGS_ENABLED = process.env.THINKING_GRAPH_EMBEDDINGS !== 'false';
|
|
7
35
|
export class JSONLAdapter extends InMemoryAdapter {
|
|
8
36
|
nodesPath;
|
|
9
37
|
edgesPath;
|
|
@@ -11,6 +39,12 @@ export class JSONLAdapter extends InMemoryAdapter {
|
|
|
11
39
|
skillsPath;
|
|
12
40
|
embeddingsPath;
|
|
13
41
|
vectorIndex = new VectorIndex();
|
|
42
|
+
embedQueue = [];
|
|
43
|
+
draining = false;
|
|
44
|
+
drainPromise = null;
|
|
45
|
+
// Per-id embed generation, claimed at write time. The latest write for an id
|
|
46
|
+
// wins, so a stale deferred embed can't clobber a newer inline updateNode.
|
|
47
|
+
embedGen = new Map();
|
|
14
48
|
constructor(opts) {
|
|
15
49
|
super();
|
|
16
50
|
this.nodesPath = join(opts.dir, 'nodes.jsonl');
|
|
@@ -22,9 +56,12 @@ export class JSONLAdapter extends InMemoryAdapter {
|
|
|
22
56
|
async initialize() {
|
|
23
57
|
mkdirSync(join(this.nodesPath, '..'), { recursive: true });
|
|
24
58
|
await this.maybeRunSqliteMigration();
|
|
59
|
+
let rawNodeLines = 0;
|
|
25
60
|
for (const line of readLines(this.nodesPath)) {
|
|
26
61
|
await super.insertNode(JSON.parse(line));
|
|
62
|
+
rawNodeLines++;
|
|
27
63
|
}
|
|
64
|
+
await this.compactNodesIfBloated(rawNodeLines);
|
|
28
65
|
for (const line of readLines(this.edgesPath)) {
|
|
29
66
|
await super.insertEdge(JSON.parse(line));
|
|
30
67
|
}
|
|
@@ -47,8 +84,100 @@ export class JSONLAdapter extends InMemoryAdapter {
|
|
|
47
84
|
await super.insertSkill(sk);
|
|
48
85
|
}
|
|
49
86
|
this.vectorIndex.load(this.embeddingsPath);
|
|
87
|
+
// Self-heal: re-embed any node whose vector never landed (e.g. a crash
|
|
88
|
+
// before the async queue drained). No-op in sync mode / fully-embedded
|
|
89
|
+
// stores, and skipped entirely when embeddings are disabled (nothing to do).
|
|
90
|
+
if (ASYNC_EMBED && EMBEDDINGS_ENABLED) {
|
|
91
|
+
const all = (await this.queryNodes({ limit: Number.MAX_SAFE_INTEGER })).items;
|
|
92
|
+
for (const n of all) {
|
|
93
|
+
if (!this.vectorIndex.has(n.id))
|
|
94
|
+
this.enqueueEmbed(n.id, n.content);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
async close() {
|
|
99
|
+
await this.flushEmbeds();
|
|
100
|
+
}
|
|
101
|
+
// ─── Embedding (sync inline, or async background queue) ────────────────
|
|
102
|
+
/** Claim the next embed generation for an id (call at write time). */
|
|
103
|
+
claimGen(id) {
|
|
104
|
+
const gen = (this.embedGen.get(id) ?? 0) + 1;
|
|
105
|
+
this.embedGen.set(id, gen);
|
|
106
|
+
return gen;
|
|
107
|
+
}
|
|
108
|
+
async embedAndStore(id, content, gen) {
|
|
109
|
+
const vec = await embedText(content);
|
|
110
|
+
// Only write if this is still the latest write for the id — a stale deferred
|
|
111
|
+
// embed must not clobber a newer inline updateNode re-embed.
|
|
112
|
+
if (vec && this.embedGen.get(id) === gen) {
|
|
113
|
+
this.vectorIndex.append(this.embeddingsPath, id, vec);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
enqueueEmbed(id, content) {
|
|
117
|
+
// Claim the generation now (write order), not when the drain later runs.
|
|
118
|
+
this.embedQueue.push({ id, content, gen: this.claimGen(id) });
|
|
119
|
+
this.startDrain();
|
|
120
|
+
}
|
|
121
|
+
startDrain() {
|
|
122
|
+
if (this.draining)
|
|
123
|
+
return;
|
|
124
|
+
this.draining = true;
|
|
125
|
+
this.drainPromise = this.drainEmbedQueue().finally(() => {
|
|
126
|
+
this.draining = false;
|
|
127
|
+
// Restart if anything was enqueued during the drain or in the tiny window
|
|
128
|
+
// before this handler ran — otherwise the item would sit undrained and
|
|
129
|
+
// flushEmbeds() could spin. The finally body is synchronous, so this
|
|
130
|
+
// check and the enqueue cannot interleave.
|
|
131
|
+
if (this.embedQueue.length > 0)
|
|
132
|
+
this.startDrain();
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
async drainEmbedQueue() {
|
|
136
|
+
// Take the current batch by swapping the array (O(1)) instead of shift()
|
|
137
|
+
// (O(n) each). Items enqueued during the batch land in the fresh array and
|
|
138
|
+
// are picked up by the next loop iteration.
|
|
139
|
+
while (this.embedQueue.length > 0) {
|
|
140
|
+
const batch = this.embedQueue;
|
|
141
|
+
this.embedQueue = [];
|
|
142
|
+
for (const { id, content, gen } of batch) {
|
|
143
|
+
// Skip if a newer write already superseded this one (generation moved on).
|
|
144
|
+
if (this.embedGen.get(id) !== gen)
|
|
145
|
+
continue;
|
|
146
|
+
// Never let a single failed embed reject the un-awaited drain promise
|
|
147
|
+
// (would surface as an unhandledRejection) or abort the rest of the batch.
|
|
148
|
+
try {
|
|
149
|
+
await this.embedAndStore(id, content, gen);
|
|
150
|
+
}
|
|
151
|
+
catch (err) {
|
|
152
|
+
console.error('[thinking-graph] background embed failed:', err);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/** Await all pending background embeddings (tests + graceful shutdown). */
|
|
158
|
+
async flushEmbeds() {
|
|
159
|
+
while (this.draining || this.embedQueue.length > 0) {
|
|
160
|
+
if (this.drainPromise)
|
|
161
|
+
await this.drainPromise;
|
|
162
|
+
else
|
|
163
|
+
await Promise.resolve();
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Rewrite nodes.jsonl from the in-memory survivors when append churn (from
|
|
168
|
+
* updateNode/reinforcement) has bloated it well past the live node count.
|
|
169
|
+
* Last-line-wins reload makes the dropped lines redundant; this just reclaims
|
|
170
|
+
* the space and keeps startup replay bounded.
|
|
171
|
+
*/
|
|
172
|
+
async compactNodesIfBloated(rawLineCount) {
|
|
173
|
+
if (rawLineCount <= COMPACTION_MIN_LINES)
|
|
174
|
+
return;
|
|
175
|
+
const nodes = (await this.queryNodes({ limit: Number.MAX_SAFE_INTEGER })).items;
|
|
176
|
+
if (rawLineCount <= COMPACTION_FACTOR * Math.max(1, nodes.length))
|
|
177
|
+
return;
|
|
178
|
+
const body = nodes.map(n => JSON.stringify(n)).join('\n');
|
|
179
|
+
writeFileSync(this.nodesPath, body ? body + '\n' : '', 'utf-8');
|
|
50
180
|
}
|
|
51
|
-
async close() { }
|
|
52
181
|
async maybeRunSqliteMigration() {
|
|
53
182
|
const dbPath = process.env.THINKING_GRAPH_PROJECT_DB;
|
|
54
183
|
if (!dbPath)
|
|
@@ -78,9 +207,41 @@ export class JSONLAdapter extends InMemoryAdapter {
|
|
|
78
207
|
async insertNode(node) {
|
|
79
208
|
await super.insertNode(node);
|
|
80
209
|
appendLine(this.nodesPath, node);
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
210
|
+
// Durable write is done; embed inline (default) or defer to the background
|
|
211
|
+
// queue so the write returns without waiting on model inference. Nothing to
|
|
212
|
+
// do when embeddings are disabled.
|
|
213
|
+
if (!EMBEDDINGS_ENABLED)
|
|
214
|
+
return;
|
|
215
|
+
if (ASYNC_EMBED)
|
|
216
|
+
this.enqueueEmbed(node.id, node.content);
|
|
217
|
+
else
|
|
218
|
+
await this.embedAndStore(node.id, node.content, this.claimGen(node.id));
|
|
219
|
+
}
|
|
220
|
+
async updateNode(id, fields) {
|
|
221
|
+
await super.updateNode(id, fields);
|
|
222
|
+
// Persist the new version; on reload the last line for an id wins.
|
|
223
|
+
const updated = await super.getNode(id);
|
|
224
|
+
if (!updated)
|
|
225
|
+
return;
|
|
226
|
+
appendLine(this.nodesPath, updated);
|
|
227
|
+
// Re-embed when content changed so the vector index does not go stale for
|
|
228
|
+
// findNearest / hybrid recall. (Reinforcement updates metadata only, so this
|
|
229
|
+
// is skipped on the hot duplicate-learn path.) Kept inline even in async mode
|
|
230
|
+
// — content updates are rare, and an inline write wins the drain's has() skip.
|
|
231
|
+
if (fields.content !== undefined && EMBEDDINGS_ENABLED) {
|
|
232
|
+
await this.embedAndStore(id, updated.content, this.claimGen(id));
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
async findNearest(content, opts) {
|
|
236
|
+
const vec = await embedText(content);
|
|
237
|
+
if (!vec)
|
|
238
|
+
return null;
|
|
239
|
+
const candidates = this.filterNodes({ type: opts.type, projectId: opts.projectId });
|
|
240
|
+
if (candidates.length === 0)
|
|
241
|
+
return null;
|
|
242
|
+
const allow = new Set(candidates.map(n => n.id));
|
|
243
|
+
const [top] = this.vectorIndex.search(vec, 1, allow);
|
|
244
|
+
return top ? { id: top.id, score: top.score } : null;
|
|
84
245
|
}
|
|
85
246
|
async searchContent(query) {
|
|
86
247
|
const vec = await embedText(query);
|
|
@@ -97,6 +258,36 @@ export class JSONLAdapter extends InMemoryAdapter {
|
|
|
97
258
|
}
|
|
98
259
|
return nodes;
|
|
99
260
|
}
|
|
261
|
+
async searchHybrid(opts) {
|
|
262
|
+
const { nodes, lexical } = await this.lexicalCandidates(opts);
|
|
263
|
+
if (nodes.size === 0)
|
|
264
|
+
return [];
|
|
265
|
+
const lexicalOnly = () => Promise.resolve(fuseHybrid({ nodes, channels: [{ scores: lexical, weight: 1 }] }));
|
|
266
|
+
// A query that tokenizes to nothing (punctuation/whitespace only) has no
|
|
267
|
+
// lexical or semantic intent — skip the embed call and the dense channel.
|
|
268
|
+
if (tokenize(opts.query).length === 0)
|
|
269
|
+
return lexicalOnly();
|
|
270
|
+
const qVec = await embedText(opts.query);
|
|
271
|
+
if (!qVec) {
|
|
272
|
+
// Embeddings disabled or model failed to load: degrade to lexical-only.
|
|
273
|
+
// (A cold-but-loadable model blocks until ready, then returns a vector.)
|
|
274
|
+
return lexicalOnly();
|
|
275
|
+
}
|
|
276
|
+
// Filter BEFORE ranking (allow-set) so a scoped recall sees the top-K of
|
|
277
|
+
// in-filter survivors, not the global top-K intersected with the filter.
|
|
278
|
+
const dense = new Map();
|
|
279
|
+
for (const m of this.vectorIndex.search(qVec, DENSE_TOP_K, new Set(nodes.keys()))) {
|
|
280
|
+
if (m.score >= DENSE_MIN_COSINE)
|
|
281
|
+
dense.set(m.id, m.score);
|
|
282
|
+
}
|
|
283
|
+
return fuseHybrid({
|
|
284
|
+
nodes,
|
|
285
|
+
channels: [
|
|
286
|
+
{ scores: dense, weight: DENSE_WEIGHT },
|
|
287
|
+
{ scores: lexical, weight: LEXICAL_WEIGHT },
|
|
288
|
+
],
|
|
289
|
+
});
|
|
290
|
+
}
|
|
100
291
|
// ─── Edges ─────────────────────────────────────────────
|
|
101
292
|
async insertEdge(edge) {
|
|
102
293
|
const created = await super.insertEdge(edge);
|
package/dist/storage/memory.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { Node, Edge, Session, SkillRegistryEntry, NodeQuery, PaginatedResult, EdgeType, ExportOpts, GraphExport, GraphStats } from '../engine/types.js';
|
|
2
|
-
import type { StorageAdapter } from './adapter.js';
|
|
1
|
+
import type { Node, Edge, Session, SkillRegistryEntry, NodeQuery, PaginatedResult, EdgeType, ScoredNode, ExportOpts, GraphExport, GraphStats } from '../engine/types.js';
|
|
2
|
+
import type { StorageAdapter, HybridSearchOpts } from './adapter.js';
|
|
3
3
|
export declare class InMemoryAdapter implements StorageAdapter {
|
|
4
4
|
private nodes;
|
|
5
5
|
private edges;
|
|
@@ -8,9 +8,22 @@ export declare class InMemoryAdapter implements StorageAdapter {
|
|
|
8
8
|
initialize(): Promise<void>;
|
|
9
9
|
close(): Promise<void>;
|
|
10
10
|
insertNode(node: Node): Promise<void>;
|
|
11
|
+
updateNode(id: string, fields: Partial<Node>): Promise<void>;
|
|
11
12
|
getNode(id: string): Promise<Node | null>;
|
|
13
|
+
/** Apply attribute + substring filters without sorting or paginating. */
|
|
14
|
+
protected filterNodes(query: NodeQuery): Node[];
|
|
12
15
|
queryNodes(query: NodeQuery): Promise<PaginatedResult<Node>>;
|
|
13
16
|
searchContent(text: string, limit?: number): Promise<Node[]>;
|
|
17
|
+
/**
|
|
18
|
+
* Gather attribute-filtered candidates plus a lexical (token-coverage) score
|
|
19
|
+
* per node. Shared by the in-memory lexical-only hybrid and the JSONL
|
|
20
|
+
* dense+lexical hybrid (which extends this class and adds a vector channel).
|
|
21
|
+
*/
|
|
22
|
+
protected lexicalCandidates(opts: HybridSearchOpts): Promise<{
|
|
23
|
+
nodes: Map<string, Node>;
|
|
24
|
+
lexical: Map<string, number>;
|
|
25
|
+
}>;
|
|
26
|
+
searchHybrid(opts: HybridSearchOpts): Promise<ScoredNode[]>;
|
|
14
27
|
insertEdge(edge: Edge): Promise<boolean>;
|
|
15
28
|
getEdgesFrom(nodeId: string, type?: EdgeType): Promise<Edge[]>;
|
|
16
29
|
getEdgesTo(nodeId: string, type?: EdgeType): Promise<Edge[]>;
|
package/dist/storage/memory.js
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
import { lexicalScoreTokens, fuseHybrid } from '../engine/fusion.js';
|
|
2
|
+
import { tokenize } from '../engine/dedup.js';
|
|
3
|
+
// Score for a raw-substring-only match (no exact token overlap). Keeps the
|
|
4
|
+
// hybrid lexical channel a strict superset of the legacy substring filter, so a
|
|
5
|
+
// subword query like "auth" still surfaces "authentication" nodes. Ranked below
|
|
6
|
+
// any genuine token-coverage match.
|
|
7
|
+
const SUBSTRING_FLOOR = 0.5;
|
|
1
8
|
export class InMemoryAdapter {
|
|
2
9
|
nodes = new Map();
|
|
3
10
|
edges = new Map();
|
|
@@ -9,10 +16,17 @@ export class InMemoryAdapter {
|
|
|
9
16
|
async insertNode(node) {
|
|
10
17
|
this.nodes.set(node.id, { ...node });
|
|
11
18
|
}
|
|
19
|
+
async updateNode(id, fields) {
|
|
20
|
+
const node = this.nodes.get(id);
|
|
21
|
+
if (node) {
|
|
22
|
+
this.nodes.set(id, { ...node, ...fields, id: node.id });
|
|
23
|
+
}
|
|
24
|
+
}
|
|
12
25
|
async getNode(id) {
|
|
13
26
|
return this.nodes.get(id) ?? null;
|
|
14
27
|
}
|
|
15
|
-
|
|
28
|
+
/** Apply attribute + substring filters without sorting or paginating. */
|
|
29
|
+
filterNodes(query) {
|
|
16
30
|
let results = [...this.nodes.values()];
|
|
17
31
|
if (query.type) {
|
|
18
32
|
const types = Array.isArray(query.type) ? query.type : [query.type];
|
|
@@ -31,6 +45,10 @@ export class InMemoryAdapter {
|
|
|
31
45
|
const q = query.query.toLowerCase();
|
|
32
46
|
results = results.filter(n => n.content.toLowerCase().includes(q));
|
|
33
47
|
}
|
|
48
|
+
return results;
|
|
49
|
+
}
|
|
50
|
+
async queryNodes(query) {
|
|
51
|
+
const results = this.filterNodes(query);
|
|
34
52
|
// Sort by createdAt desc
|
|
35
53
|
results.sort((a, b) => b.createdAt.localeCompare(a.createdAt));
|
|
36
54
|
const totalCount = results.length;
|
|
@@ -45,6 +63,41 @@ export class InMemoryAdapter {
|
|
|
45
63
|
.filter(n => n.content.toLowerCase().includes(q))
|
|
46
64
|
.slice(0, limit);
|
|
47
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* Gather attribute-filtered candidates plus a lexical (token-coverage) score
|
|
68
|
+
* per node. Shared by the in-memory lexical-only hybrid and the JSONL
|
|
69
|
+
* dense+lexical hybrid (which extends this class and adds a vector channel).
|
|
70
|
+
*/
|
|
71
|
+
async lexicalCandidates(opts) {
|
|
72
|
+
// Attribute filter only (no substring) and unsorted — fuseHybrid re-ranks.
|
|
73
|
+
const candidates = this.filterNodes({
|
|
74
|
+
type: opts.type,
|
|
75
|
+
sessionId: opts.sessionId,
|
|
76
|
+
projectId: opts.projectId,
|
|
77
|
+
crossProject: opts.crossProject,
|
|
78
|
+
since: opts.since,
|
|
79
|
+
});
|
|
80
|
+
const queryTokens = new Set(tokenize(opts.query));
|
|
81
|
+
const rawQuery = opts.query.toLowerCase().trim();
|
|
82
|
+
const nodes = new Map();
|
|
83
|
+
const lexical = new Map();
|
|
84
|
+
for (const n of candidates) {
|
|
85
|
+
nodes.set(n.id, n);
|
|
86
|
+
let s = lexicalScoreTokens(queryTokens, n.content);
|
|
87
|
+
// Fall back to legacy substring matching so subword/partial-token queries
|
|
88
|
+
// are never a regression from the old substring-only recall.
|
|
89
|
+
if (s === 0 && rawQuery && n.content.toLowerCase().includes(rawQuery)) {
|
|
90
|
+
s = SUBSTRING_FLOOR;
|
|
91
|
+
}
|
|
92
|
+
if (s > 0)
|
|
93
|
+
lexical.set(n.id, s);
|
|
94
|
+
}
|
|
95
|
+
return { nodes, lexical };
|
|
96
|
+
}
|
|
97
|
+
async searchHybrid(opts) {
|
|
98
|
+
const { nodes, lexical } = await this.lexicalCandidates(opts);
|
|
99
|
+
return fuseHybrid({ nodes, channels: [{ scores: lexical, weight: 1 }] });
|
|
100
|
+
}
|
|
48
101
|
// ─── Edges ─────────────────────────────────────────────
|
|
49
102
|
async insertEdge(edge) {
|
|
50
103
|
// Check for duplicate (same source, target, type)
|
package/dist/storage/sqlite.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ export declare class SQLiteAdapter implements StorageAdapter {
|
|
|
14
14
|
private get;
|
|
15
15
|
private all;
|
|
16
16
|
insertNode(node: Node): Promise<void>;
|
|
17
|
+
updateNode(id: string, fields: Partial<Node>): Promise<void>;
|
|
17
18
|
getNode(id: string): Promise<Node | null>;
|
|
18
19
|
queryNodes(query: NodeQuery): Promise<PaginatedResult<Node>>;
|
|
19
20
|
searchContent(text: string, limit?: number): Promise<Node[]>;
|
package/dist/storage/sqlite.js
CHANGED
|
@@ -59,6 +59,32 @@ export class SQLiteAdapter {
|
|
|
59
59
|
node.revisesThought ?? null,
|
|
60
60
|
]);
|
|
61
61
|
}
|
|
62
|
+
// Supports only mutable content fields: content, metadata, updatedAt.
|
|
63
|
+
// (reinforceNode, the only runtime caller, updates just metadata + updatedAt;
|
|
64
|
+
// content is included for a general updateNode.) Other Node fields are
|
|
65
|
+
// intentionally not mutable here — this adapter is migration-only, and node
|
|
66
|
+
// identity/lineage fields (sessionId, projectId, thought*) are set once at
|
|
67
|
+
// insert. Extend the mapping if a real updateNode caller needs more.
|
|
68
|
+
async updateNode(id, fields) {
|
|
69
|
+
const sets = [];
|
|
70
|
+
const params = [];
|
|
71
|
+
if (fields.content !== undefined) {
|
|
72
|
+
sets.push('content = ?');
|
|
73
|
+
params.push(fields.content);
|
|
74
|
+
}
|
|
75
|
+
if (fields.metadata !== undefined) {
|
|
76
|
+
sets.push('metadata = ?');
|
|
77
|
+
params.push(JSON.stringify(fields.metadata));
|
|
78
|
+
}
|
|
79
|
+
if (fields.updatedAt !== undefined) {
|
|
80
|
+
sets.push('updated_at = ?');
|
|
81
|
+
params.push(fields.updatedAt);
|
|
82
|
+
}
|
|
83
|
+
if (sets.length > 0) {
|
|
84
|
+
params.push(id);
|
|
85
|
+
this.run(`UPDATE nodes SET ${sets.join(', ')} WHERE id = ?`, params);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
62
88
|
async getNode(id) {
|
|
63
89
|
const row = this.get('SELECT * FROM nodes WHERE id = ?', [id]);
|
|
64
90
|
return row ? this.rowToNode(row) : null;
|
|
@@ -6,6 +6,13 @@ export declare class VectorIndex {
|
|
|
6
6
|
private readonly vectors;
|
|
7
7
|
load(path: string): void;
|
|
8
8
|
append(path: string, id: string, vec: Float32Array): void;
|
|
9
|
-
|
|
9
|
+
/**
|
|
10
|
+
* Top-K by cosine (vectors are L2-normalized so dot == cosine). When `allow`
|
|
11
|
+
* is given, only those ids are scored — this filters BEFORE ranking, so a
|
|
12
|
+
* scoped recall sees the top-K of the in-filter survivors rather than the
|
|
13
|
+
* global top-K intersected with the filter (which would silently drop
|
|
14
|
+
* in-filter matches ranked beyond K globally).
|
|
15
|
+
*/
|
|
16
|
+
search(queryVec: Float32Array, topK: number, allow?: Set<string>): VectorMatch[];
|
|
10
17
|
has(id: string): boolean;
|
|
11
18
|
}
|
|
@@ -15,9 +15,18 @@ export class VectorIndex {
|
|
|
15
15
|
this.vectors.set(id, vec);
|
|
16
16
|
appendFileSync(path, JSON.stringify({ id, vec: Array.from(vec) }) + '\n', 'utf-8');
|
|
17
17
|
}
|
|
18
|
-
|
|
18
|
+
/**
|
|
19
|
+
* Top-K by cosine (vectors are L2-normalized so dot == cosine). When `allow`
|
|
20
|
+
* is given, only those ids are scored — this filters BEFORE ranking, so a
|
|
21
|
+
* scoped recall sees the top-K of the in-filter survivors rather than the
|
|
22
|
+
* global top-K intersected with the filter (which would silently drop
|
|
23
|
+
* in-filter matches ranked beyond K globally).
|
|
24
|
+
*/
|
|
25
|
+
search(queryVec, topK, allow) {
|
|
19
26
|
const results = [];
|
|
20
27
|
for (const [id, vec] of this.vectors) {
|
|
28
|
+
if (allow && !allow.has(id))
|
|
29
|
+
continue;
|
|
21
30
|
results.push({ id, score: dot(queryVec, vec) });
|
|
22
31
|
}
|
|
23
32
|
results.sort((a, b) => b.score - a.score);
|
|
@@ -32,21 +32,21 @@ export declare const executeSkillsSchema: z.ZodObject<{
|
|
|
32
32
|
}, "strip", z.ZodTypeAny, {
|
|
33
33
|
content: string;
|
|
34
34
|
metadata?: Record<string, unknown> | undefined;
|
|
35
|
-
severity?: "
|
|
35
|
+
severity?: "high" | "critical" | "medium" | "low" | undefined;
|
|
36
36
|
effort?: string | undefined;
|
|
37
37
|
impact?: string | undefined;
|
|
38
38
|
}, {
|
|
39
39
|
content: string;
|
|
40
40
|
metadata?: Record<string, unknown> | undefined;
|
|
41
|
-
severity?: "
|
|
41
|
+
severity?: "high" | "critical" | "medium" | "low" | undefined;
|
|
42
42
|
effort?: string | undefined;
|
|
43
43
|
impact?: string | undefined;
|
|
44
44
|
}>, "many">>;
|
|
45
45
|
sessionId: z.ZodOptional<z.ZodString>;
|
|
46
46
|
projectId: z.ZodOptional<z.ZodString>;
|
|
47
47
|
}, "strip", z.ZodTypeAny, {
|
|
48
|
-
invocation: string;
|
|
49
48
|
status: "completed" | "failed";
|
|
49
|
+
invocation: string;
|
|
50
50
|
skill: string;
|
|
51
51
|
stepNumber: number;
|
|
52
52
|
plugin: string;
|
|
@@ -65,7 +65,7 @@ export declare const executeSkillsSchema: z.ZodObject<{
|
|
|
65
65
|
techDebt?: {
|
|
66
66
|
content: string;
|
|
67
67
|
metadata?: Record<string, unknown> | undefined;
|
|
68
|
-
severity?: "
|
|
68
|
+
severity?: "high" | "critical" | "medium" | "low" | undefined;
|
|
69
69
|
effort?: string | undefined;
|
|
70
70
|
impact?: string | undefined;
|
|
71
71
|
}[] | undefined;
|
|
@@ -75,10 +75,10 @@ export declare const executeSkillsSchema: z.ZodObject<{
|
|
|
75
75
|
stepNumber: number;
|
|
76
76
|
plugin: string;
|
|
77
77
|
resultSummary: string;
|
|
78
|
+
status?: "completed" | "failed" | undefined;
|
|
78
79
|
sessionId?: string | undefined;
|
|
79
80
|
projectId?: string | undefined;
|
|
80
81
|
metadata?: Record<string, unknown> | undefined;
|
|
81
|
-
status?: "completed" | "failed" | undefined;
|
|
82
82
|
purpose?: string | undefined;
|
|
83
83
|
planId?: string | undefined;
|
|
84
84
|
runId?: string | undefined;
|
|
@@ -90,7 +90,7 @@ export declare const executeSkillsSchema: z.ZodObject<{
|
|
|
90
90
|
techDebt?: {
|
|
91
91
|
content: string;
|
|
92
92
|
metadata?: Record<string, unknown> | undefined;
|
|
93
|
-
severity?: "
|
|
93
|
+
severity?: "high" | "critical" | "medium" | "low" | undefined;
|
|
94
94
|
effort?: string | undefined;
|
|
95
95
|
impact?: string | undefined;
|
|
96
96
|
}[] | undefined;
|
package/dist/tools/learn.d.ts
CHANGED
|
@@ -32,7 +32,7 @@ export declare const learnSchema: z.ZodObject<{
|
|
|
32
32
|
sessionId?: string | undefined;
|
|
33
33
|
projectId?: string | undefined;
|
|
34
34
|
metadata?: Record<string, unknown> | undefined;
|
|
35
|
-
severity?: "
|
|
35
|
+
severity?: "high" | "critical" | "medium" | "low" | undefined;
|
|
36
36
|
filePath?: string | undefined;
|
|
37
37
|
lineRange?: [number, number] | undefined;
|
|
38
38
|
relates?: {
|
|
@@ -49,7 +49,7 @@ export declare const learnSchema: z.ZodObject<{
|
|
|
49
49
|
sessionId?: string | undefined;
|
|
50
50
|
projectId?: string | undefined;
|
|
51
51
|
metadata?: Record<string, unknown> | undefined;
|
|
52
|
-
severity?: "
|
|
52
|
+
severity?: "high" | "critical" | "medium" | "low" | undefined;
|
|
53
53
|
filePath?: string | undefined;
|
|
54
54
|
lineRange?: [number, number] | undefined;
|
|
55
55
|
relates?: {
|
package/dist/tools/learn.js
CHANGED
|
@@ -20,17 +20,22 @@ export const learnSchema = z.object({
|
|
|
20
20
|
metadata: z.record(z.unknown()).optional(),
|
|
21
21
|
});
|
|
22
22
|
export async function learnHandler(graph, input, vault, projectSlug) {
|
|
23
|
-
//
|
|
24
|
-
const
|
|
25
|
-
if (
|
|
23
|
+
// Banded duplicate detection (normalized-exact → semantic cosine → Jaccard).
|
|
24
|
+
const dup = await graph.findDuplicate(input.content, input.type, input.projectId);
|
|
25
|
+
if (dup && (dup.band === 'exact' || dup.band === 'high')) {
|
|
26
|
+
// High-confidence duplicate: reinforce the existing node (records the merge,
|
|
27
|
+
// preserves the new phrasing) instead of silently dropping the new content.
|
|
28
|
+
await graph.reinforceNode(dup.node.id, input.content);
|
|
26
29
|
return {
|
|
27
30
|
content: [{
|
|
28
31
|
type: 'text',
|
|
29
32
|
text: JSON.stringify({
|
|
30
|
-
nodeId:
|
|
31
|
-
type:
|
|
33
|
+
nodeId: dup.node.id,
|
|
34
|
+
type: dup.node.type,
|
|
32
35
|
relatedCount: 0,
|
|
33
|
-
duplicateOf:
|
|
36
|
+
duplicateOf: dup.node.id,
|
|
37
|
+
merged: true,
|
|
38
|
+
band: dup.band,
|
|
34
39
|
}),
|
|
35
40
|
}],
|
|
36
41
|
};
|
|
@@ -57,8 +62,19 @@ export async function learnHandler(graph, input, vault, projectSlug) {
|
|
|
57
62
|
projectId: input.projectId,
|
|
58
63
|
metadata,
|
|
59
64
|
});
|
|
60
|
-
//
|
|
61
|
-
|
|
65
|
+
// Borderline near-duplicate: keep the new node but link it to the near-dup so
|
|
66
|
+
// the relationship is captured rather than creating a silent parallel copy.
|
|
67
|
+
if (dup && dup.band === 'borderline') {
|
|
68
|
+
await graph.addEdge({
|
|
69
|
+
sourceId: node.id,
|
|
70
|
+
targetId: dup.node.id,
|
|
71
|
+
type: 'similar_to',
|
|
72
|
+
reasoning: `Near-duplicate (score ${dup.score.toFixed(2)})`,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
// Create relationships. The auto-created similar_to edge (borderline path
|
|
76
|
+
// above) counts as a relationship produced by this call.
|
|
77
|
+
let relatedCount = dup?.band === 'borderline' ? 1 : 0;
|
|
62
78
|
if (input.relates) {
|
|
63
79
|
for (const rel of input.relates) {
|
|
64
80
|
await graph.addEdge({
|
|
@@ -113,6 +129,7 @@ export async function learnHandler(graph, input, vault, projectSlug) {
|
|
|
113
129
|
type: node.type,
|
|
114
130
|
relatedCount,
|
|
115
131
|
duplicateOf: null,
|
|
132
|
+
...(dup?.band === 'borderline' && { nearDuplicateOf: dup.node.id, band: 'borderline' }),
|
|
116
133
|
...(vaultPath && { vaultPath }),
|
|
117
134
|
}),
|
|
118
135
|
}],
|