mindgraph-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/embeddings/embedder-interface.d.ts +15 -0
- package/dist/embeddings/embedder-interface.d.ts.map +1 -0
- package/dist/embeddings/embedder-interface.js +2 -0
- package/dist/embeddings/embedder-interface.js.map +1 -0
- package/dist/embeddings/ollama-embedder.d.ts +21 -0
- package/dist/embeddings/ollama-embedder.d.ts.map +1 -0
- package/dist/embeddings/ollama-embedder.js +78 -0
- package/dist/embeddings/ollama-embedder.js.map +1 -0
- package/dist/embeddings/transformers-embedder.d.ts +20 -0
- package/dist/embeddings/transformers-embedder.d.ts.map +1 -0
- package/dist/embeddings/transformers-embedder.js +61 -0
- package/dist/embeddings/transformers-embedder.js.map +1 -0
- package/dist/extraction/anthropic-extractor.d.ts +17 -0
- package/dist/extraction/anthropic-extractor.d.ts.map +1 -0
- package/dist/extraction/anthropic-extractor.js +43 -0
- package/dist/extraction/anthropic-extractor.js.map +1 -0
- package/dist/extraction/basic-extractor.d.ts +17 -0
- package/dist/extraction/basic-extractor.d.ts.map +1 -0
- package/dist/extraction/basic-extractor.js +135 -0
- package/dist/extraction/basic-extractor.js.map +1 -0
- package/dist/extraction/confidence-gate.d.ts +7 -0
- package/dist/extraction/confidence-gate.d.ts.map +1 -0
- package/dist/extraction/confidence-gate.js +13 -0
- package/dist/extraction/confidence-gate.js.map +1 -0
- package/dist/extraction/contradiction-detector.d.ts +46 -0
- package/dist/extraction/contradiction-detector.d.ts.map +1 -0
- package/dist/extraction/contradiction-detector.js +205 -0
- package/dist/extraction/contradiction-detector.js.map +1 -0
- package/dist/extraction/entity-index.d.ts +31 -0
- package/dist/extraction/entity-index.d.ts.map +1 -0
- package/dist/extraction/entity-index.js +90 -0
- package/dist/extraction/entity-index.js.map +1 -0
- package/dist/extraction/entity-resolver.d.ts +28 -0
- package/dist/extraction/entity-resolver.d.ts.map +1 -0
- package/dist/extraction/entity-resolver.js +111 -0
- package/dist/extraction/entity-resolver.js.map +1 -0
- package/dist/extraction/extractor-interface.d.ts +51 -0
- package/dist/extraction/extractor-interface.d.ts.map +1 -0
- package/dist/extraction/extractor-interface.js +2 -0
- package/dist/extraction/extractor-interface.js.map +1 -0
- package/dist/extraction/llm-extractor.d.ts +21 -0
- package/dist/extraction/llm-extractor.d.ts.map +1 -0
- package/dist/extraction/llm-extractor.js +97 -0
- package/dist/extraction/llm-extractor.js.map +1 -0
- package/dist/extraction/ollama-extractor.d.ts +18 -0
- package/dist/extraction/ollama-extractor.d.ts.map +1 -0
- package/dist/extraction/ollama-extractor.js +50 -0
- package/dist/extraction/ollama-extractor.js.map +1 -0
- package/dist/extraction/open-loop-detector.d.ts +24 -0
- package/dist/extraction/open-loop-detector.d.ts.map +1 -0
- package/dist/extraction/open-loop-detector.js +187 -0
- package/dist/extraction/open-loop-detector.js.map +1 -0
- package/dist/extraction/openai-extractor.d.ts +20 -0
- package/dist/extraction/openai-extractor.d.ts.map +1 -0
- package/dist/extraction/openai-extractor.js +44 -0
- package/dist/extraction/openai-extractor.js.map +1 -0
- package/dist/extraction/prompts/entity-extraction.d.ts +2 -0
- package/dist/extraction/prompts/entity-extraction.d.ts.map +1 -0
- package/dist/extraction/prompts/entity-extraction.js +42 -0
- package/dist/extraction/prompts/entity-extraction.js.map +1 -0
- package/dist/extraction/prompts/proposition-extraction.d.ts +2 -0
- package/dist/extraction/prompts/proposition-extraction.d.ts.map +1 -0
- package/dist/extraction/prompts/proposition-extraction.js +39 -0
- package/dist/extraction/prompts/proposition-extraction.js.map +1 -0
- package/dist/extraction/prompts/thought-extraction.d.ts +2 -0
- package/dist/extraction/prompts/thought-extraction.d.ts.map +1 -0
- package/dist/extraction/prompts/thought-extraction.js +41 -0
- package/dist/extraction/prompts/thought-extraction.js.map +1 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +33 -0
- package/dist/index.js.map +1 -0
- package/dist/ingestion/chunk-id.d.ts +27 -0
- package/dist/ingestion/chunk-id.d.ts.map +1 -0
- package/dist/ingestion/chunk-id.js +45 -0
- package/dist/ingestion/chunk-id.js.map +1 -0
- package/dist/ingestion/chunker.d.ts +29 -0
- package/dist/ingestion/chunker.d.ts.map +1 -0
- package/dist/ingestion/chunker.js +182 -0
- package/dist/ingestion/chunker.js.map +1 -0
- package/dist/ingestion/hasher.d.ts +7 -0
- package/dist/ingestion/hasher.d.ts.map +1 -0
- package/dist/ingestion/hasher.js +18 -0
- package/dist/ingestion/hasher.js.map +1 -0
- package/dist/ingestion/pipeline.d.ts +58 -0
- package/dist/ingestion/pipeline.d.ts.map +1 -0
- package/dist/ingestion/pipeline.js +653 -0
- package/dist/ingestion/pipeline.js.map +1 -0
- package/dist/models/citation.d.ts +2 -0
- package/dist/models/citation.d.ts.map +1 -0
- package/dist/models/citation.js +2 -0
- package/dist/models/citation.js.map +1 -0
- package/dist/models/extraction-result.d.ts +2 -0
- package/dist/models/extraction-result.d.ts.map +1 -0
- package/dist/models/extraction-result.js +2 -0
- package/dist/models/extraction-result.js.map +1 -0
- package/dist/models/query-result.d.ts +2 -0
- package/dist/models/query-result.d.ts.map +1 -0
- package/dist/models/query-result.js +2 -0
- package/dist/models/query-result.js.map +1 -0
- package/dist/query/answer-builder.d.ts +15 -0
- package/dist/query/answer-builder.d.ts.map +1 -0
- package/dist/query/answer-builder.js +51 -0
- package/dist/query/answer-builder.js.map +1 -0
- package/dist/query/citation-builder.d.ts +19 -0
- package/dist/query/citation-builder.d.ts.map +1 -0
- package/dist/query/citation-builder.js +54 -0
- package/dist/query/citation-builder.js.map +1 -0
- package/dist/query/graph-data.d.ts +39 -0
- package/dist/query/graph-data.d.ts.map +1 -0
- package/dist/query/graph-data.js +115 -0
- package/dist/query/graph-data.js.map +1 -0
- package/dist/query/graph-search.d.ts +43 -0
- package/dist/query/graph-search.d.ts.map +1 -0
- package/dist/query/graph-search.js +315 -0
- package/dist/query/graph-search.js.map +1 -0
- package/dist/query/query-engine.d.ts +41 -0
- package/dist/query/query-engine.d.ts.map +1 -0
- package/dist/query/query-engine.js +178 -0
- package/dist/query/query-engine.js.map +1 -0
- package/dist/query/semantic-search.d.ts +26 -0
- package/dist/query/semantic-search.d.ts.map +1 -0
- package/dist/query/semantic-search.js +132 -0
- package/dist/query/semantic-search.js.map +1 -0
- package/dist/schema/edge-types.d.ts +95 -0
- package/dist/schema/edge-types.d.ts.map +1 -0
- package/dist/schema/edge-types.js +6 -0
- package/dist/schema/edge-types.js.map +1 -0
- package/dist/schema/node-types.d.ts +100 -0
- package/dist/schema/node-types.d.ts.map +1 -0
- package/dist/schema/node-types.js +6 -0
- package/dist/schema/node-types.js.map +1 -0
- package/dist/schema/types.d.ts +134 -0
- package/dist/schema/types.d.ts.map +1 -0
- package/dist/schema/types.js +52 -0
- package/dist/schema/types.js.map +1 -0
- package/dist/schema/validation.d.ts +6 -0
- package/dist/schema/validation.d.ts.map +1 -0
- package/dist/schema/validation.js +140 -0
- package/dist/schema/validation.js.map +1 -0
- package/dist/storage/export-import.d.ts +28 -0
- package/dist/storage/export-import.d.ts.map +1 -0
- package/dist/storage/export-import.js +189 -0
- package/dist/storage/export-import.js.map +1 -0
- package/dist/storage/memory/memory-adapter.d.ts +36 -0
- package/dist/storage/memory/memory-adapter.d.ts.map +1 -0
- package/dist/storage/memory/memory-adapter.js +231 -0
- package/dist/storage/memory/memory-adapter.js.map +1 -0
- package/dist/storage/sqlite/graph-traversal.d.ts +11 -0
- package/dist/storage/sqlite/graph-traversal.d.ts.map +1 -0
- package/dist/storage/sqlite/graph-traversal.js +79 -0
- package/dist/storage/sqlite/graph-traversal.js.map +1 -0
- package/dist/storage/sqlite/query-builder.d.ts +8 -0
- package/dist/storage/sqlite/query-builder.d.ts.map +1 -0
- package/dist/storage/sqlite/query-builder.js +55 -0
- package/dist/storage/sqlite/query-builder.js.map +1 -0
- package/dist/storage/sqlite/sqlite-adapter.d.ts +53 -0
- package/dist/storage/sqlite/sqlite-adapter.d.ts.map +1 -0
- package/dist/storage/sqlite/sqlite-adapter.js +497 -0
- package/dist/storage/sqlite/sqlite-adapter.js.map +1 -0
- package/dist/storage/storage-interface.d.ts +64 -0
- package/dist/storage/storage-interface.d.ts.map +1 -0
- package/dist/storage/storage-interface.js +2 -0
- package/dist/storage/storage-interface.js.map +1 -0
- package/dist/utils/retry-fetch.d.ts +20 -0
- package/dist/utils/retry-fetch.d.ts.map +1 -0
- package/dist/utils/retry-fetch.js +71 -0
- package/dist/utils/retry-fetch.js.map +1 -0
- package/package.json +28 -0
|
@@ -0,0 +1,653 @@
|
|
|
1
|
+
import { EntityResolver } from '../extraction/entity-resolver.js';
|
|
2
|
+
import { EntityIndex } from '../extraction/entity-index.js';
|
|
3
|
+
import { applyConfidenceGate } from '../extraction/confidence-gate.js';
|
|
4
|
+
import { ContradictionDetector } from '../extraction/contradiction-detector.js';
|
|
5
|
+
import { chunkMarkdown, stripFrontmatter } from './chunker.js';
|
|
6
|
+
import { generateNoteId, generateEntityId, generateConceptId, generatePropositionId, generateThoughtId } from './chunk-id.js';
|
|
7
|
+
import { hashContent, hashStatement } from './hasher.js';
|
|
8
|
+
import { NodeType, RelType } from '../schema/types.js';
|
|
9
|
+
const DEFAULT_AGENT_ID = 'agent:self';
|
|
10
|
+
/**
|
|
11
|
+
* The ingestion pipeline orchestrates:
|
|
12
|
+
* hash check → chunk → extract → embed → store → detect contradictions
|
|
13
|
+
*/
|
|
14
|
+
export class IngestionPipeline {
|
|
15
|
+
constructor(storage, embedder, extractor, options = {}) {
|
|
16
|
+
this.storage = storage;
|
|
17
|
+
this.embedder = embedder;
|
|
18
|
+
this.extractor = extractor;
|
|
19
|
+
this.options = options;
|
|
20
|
+
this.entityResolver = new EntityResolver();
|
|
21
|
+
this.entityIndex = null;
|
|
22
|
+
this.contradictionDetector = new ContradictionDetector(storage, embedder);
|
|
23
|
+
}
|
|
24
|
+
async getEntityIndex() {
|
|
25
|
+
if (!this.entityIndex) {
|
|
26
|
+
this.entityIndex = new EntityIndex();
|
|
27
|
+
await this.entityIndex.loadFromStorage(this.storage);
|
|
28
|
+
}
|
|
29
|
+
return this.entityIndex;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Index a single note. Idempotent — skips if content hash unchanged.
|
|
33
|
+
*/
|
|
34
|
+
async indexNote(notePath, content) {
|
|
35
|
+
const stats = {
|
|
36
|
+
notePath,
|
|
37
|
+
chunksCreated: 0,
|
|
38
|
+
entitiesExtracted: 0,
|
|
39
|
+
conceptsExtracted: 0,
|
|
40
|
+
propositionsExtracted: 0,
|
|
41
|
+
thoughtsExtracted: 0,
|
|
42
|
+
embeddingsCreated: 0,
|
|
43
|
+
contradictionsDetected: 0,
|
|
44
|
+
skipped: false,
|
|
45
|
+
};
|
|
46
|
+
const contentHash = hashContent(content);
|
|
47
|
+
const noteId = generateNoteId(notePath);
|
|
48
|
+
// Check if note already indexed with same content
|
|
49
|
+
const existingNote = await this.storage.getNode(noteId);
|
|
50
|
+
if (existingNote && existingNote.data.contentHash === contentHash) {
|
|
51
|
+
stats.skipped = true;
|
|
52
|
+
return stats;
|
|
53
|
+
}
|
|
54
|
+
// Strip frontmatter
|
|
55
|
+
const { frontmatter, body } = stripFrontmatter(content);
|
|
56
|
+
// Extract metadata
|
|
57
|
+
const title = frontmatter.title ??
|
|
58
|
+
notePath.split('/').pop()?.replace(/\.md$/, '') ?? notePath;
|
|
59
|
+
const tags = extractTags(body, frontmatter);
|
|
60
|
+
const aliases = frontmatter.aliases ?? [];
|
|
61
|
+
const now = new Date().toISOString();
|
|
62
|
+
// Upsert Note node
|
|
63
|
+
const noteNode = {
|
|
64
|
+
id: noteId,
|
|
65
|
+
nodeType: NodeType.Note,
|
|
66
|
+
data: {
|
|
67
|
+
title,
|
|
68
|
+
path: notePath,
|
|
69
|
+
contentHash,
|
|
70
|
+
wordCount: body.split(/\s+/).length,
|
|
71
|
+
tags,
|
|
72
|
+
aliases,
|
|
73
|
+
frontmatter,
|
|
74
|
+
},
|
|
75
|
+
createdAt: existingNote?.createdAt ?? now,
|
|
76
|
+
updatedAt: now,
|
|
77
|
+
};
|
|
78
|
+
await this.storage.upsertNode(noteNode);
|
|
79
|
+
// Clean up old chunks for this note, then remove orphaned entities
|
|
80
|
+
const affectedIds = await this.cleanupNoteChunks(noteId, notePath);
|
|
81
|
+
await this.cleanupOrphanedNodes(affectedIds);
|
|
82
|
+
// Chunk the note
|
|
83
|
+
const chunks = chunkMarkdown(notePath, body, this.options.chunkerOptions);
|
|
84
|
+
stats.chunksCreated = chunks.length;
|
|
85
|
+
// Ensure default agent exists
|
|
86
|
+
await this.ensureDefaultAgent();
|
|
87
|
+
// Process each chunk
|
|
88
|
+
this.entityResolver.clear();
|
|
89
|
+
for (const chunk of chunks) {
|
|
90
|
+
await this.processChunk(chunk, noteId, notePath, body, stats);
|
|
91
|
+
}
|
|
92
|
+
// Batch embed all chunks
|
|
93
|
+
if (this.embedder && chunks.length > 0) {
|
|
94
|
+
try {
|
|
95
|
+
const vectors = await this.embedder.embedBatch(chunks.map((c) => c.text));
|
|
96
|
+
const embNow = new Date().toISOString();
|
|
97
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
98
|
+
await this.storage.upsertEmbedding({
|
|
99
|
+
chunkId: chunks[i].id,
|
|
100
|
+
vector: vectors[i],
|
|
101
|
+
modelId: this.embedder.modelId,
|
|
102
|
+
dimensions: this.embedder.dimensions,
|
|
103
|
+
createdAt: embNow,
|
|
104
|
+
});
|
|
105
|
+
stats.embeddingsCreated++;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
// Embedding failure is non-fatal
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Store resolved entities and concepts (merged across all chunks)
|
|
113
|
+
await this.storeResolvedEntities(noteId, notePath, stats);
|
|
114
|
+
return stats;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Remove a note and all its derived data.
|
|
118
|
+
*/
|
|
119
|
+
async forgetNote(notePath) {
|
|
120
|
+
const noteId = generateNoteId(notePath);
|
|
121
|
+
const affectedIds = await this.cleanupNoteChunks(noteId, notePath);
|
|
122
|
+
await this.storage.deleteEdgesByNode(noteId);
|
|
123
|
+
await this.storage.deleteNode(noteId);
|
|
124
|
+
await this.cleanupOrphanedNodes(affectedIds);
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Wipe all data.
|
|
128
|
+
*/
|
|
129
|
+
async wipeAll() {
|
|
130
|
+
// This is a destructive operation — must be called explicitly
|
|
131
|
+
const allNodes = await this.storage.queryNodes({});
|
|
132
|
+
for (const node of allNodes) {
|
|
133
|
+
await this.storage.deleteEdgesByNode(node.id);
|
|
134
|
+
await this.storage.deleteNode(node.id);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
async processChunk(chunk, noteId, notePath, noteContent, stats) {
|
|
138
|
+
const now = new Date().toISOString();
|
|
139
|
+
// Store chunk node
|
|
140
|
+
const chunkNode = {
|
|
141
|
+
id: chunk.id,
|
|
142
|
+
nodeType: NodeType.Chunk,
|
|
143
|
+
data: {
|
|
144
|
+
notePath: chunk.notePath,
|
|
145
|
+
text: chunk.text,
|
|
146
|
+
heading: chunk.heading,
|
|
147
|
+
headingLevel: chunk.headingLevel,
|
|
148
|
+
startOffset: chunk.startOffset,
|
|
149
|
+
endOffset: chunk.endOffset,
|
|
150
|
+
index: chunk.index,
|
|
151
|
+
contentHash: chunk.contentHash,
|
|
152
|
+
},
|
|
153
|
+
createdAt: now,
|
|
154
|
+
updatedAt: now,
|
|
155
|
+
};
|
|
156
|
+
await this.storage.upsertNode(chunkNode);
|
|
157
|
+
// HAS_CHUNK edge
|
|
158
|
+
await this.storage.upsertEdge({
|
|
159
|
+
id: `edge:has_chunk:${noteId}:${chunk.id}`,
|
|
160
|
+
sourceId: noteId,
|
|
161
|
+
targetId: chunk.id,
|
|
162
|
+
relType: RelType.HAS_CHUNK,
|
|
163
|
+
data: { index: chunk.index },
|
|
164
|
+
createdAt: now,
|
|
165
|
+
});
|
|
166
|
+
// FTS entry
|
|
167
|
+
await this.storage.upsertFTSEntry(chunk.id, chunk.text);
|
|
168
|
+
// Extract entities, concepts, propositions, thoughts
|
|
169
|
+
try {
|
|
170
|
+
let extraction = await this.extractor.extract(chunk, noteContent);
|
|
171
|
+
extraction = applyConfidenceGate(extraction, this.options.confidenceThreshold ?? 0.5);
|
|
172
|
+
// Collect for resolution
|
|
173
|
+
this.entityResolver.addEntities(extraction.entities);
|
|
174
|
+
this.entityResolver.addConcepts(extraction.concepts);
|
|
175
|
+
// Store propositions immediately (they're per-chunk)
|
|
176
|
+
for (const prop of extraction.propositions) {
|
|
177
|
+
await this.storeProposition(prop, chunk, notePath, stats);
|
|
178
|
+
}
|
|
179
|
+
// Store thoughts immediately
|
|
180
|
+
for (const thought of extraction.thoughts) {
|
|
181
|
+
await this.storeThought(thought, chunk, notePath, stats);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
catch {
|
|
185
|
+
// Extraction failure is non-fatal
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
async storeProposition(prop, chunk, notePath, stats) {
|
|
189
|
+
const now = new Date().toISOString();
|
|
190
|
+
const stmtHash = hashStatement(prop.statement);
|
|
191
|
+
const propId = generatePropositionId(stmtHash);
|
|
192
|
+
const propNode = {
|
|
193
|
+
id: propId,
|
|
194
|
+
nodeType: NodeType.Proposition,
|
|
195
|
+
data: {
|
|
196
|
+
statement: prop.statement,
|
|
197
|
+
domain: prop.domain,
|
|
198
|
+
confidence: prop.confidence,
|
|
199
|
+
isNegated: prop.isNegated,
|
|
200
|
+
statementHash: stmtHash,
|
|
201
|
+
},
|
|
202
|
+
createdAt: now,
|
|
203
|
+
updatedAt: now,
|
|
204
|
+
};
|
|
205
|
+
await this.storage.upsertNode(propNode);
|
|
206
|
+
// DERIVED_FROM edge with provenance
|
|
207
|
+
const provenance = {
|
|
208
|
+
notePath,
|
|
209
|
+
chunkId: chunk.id,
|
|
210
|
+
startOffset: prop.quoteStart + chunk.startOffset,
|
|
211
|
+
endOffset: prop.quoteEnd + chunk.startOffset,
|
|
212
|
+
quoteText: prop.quoteText,
|
|
213
|
+
quoteHash: hashContent(prop.quoteText),
|
|
214
|
+
};
|
|
215
|
+
await this.storage.upsertEdge({
|
|
216
|
+
id: `edge:derived:${propId}:${chunk.id}`,
|
|
217
|
+
sourceId: propId,
|
|
218
|
+
targetId: chunk.id,
|
|
219
|
+
relType: RelType.DERIVED_FROM,
|
|
220
|
+
data: {
|
|
221
|
+
provenance,
|
|
222
|
+
extractionMethod: this.extractor.name,
|
|
223
|
+
extractedAt: now,
|
|
224
|
+
},
|
|
225
|
+
createdAt: now,
|
|
226
|
+
});
|
|
227
|
+
// BELIEVES edge from default agent
|
|
228
|
+
await this.storage.upsertEdge({
|
|
229
|
+
id: `edge:believes:${DEFAULT_AGENT_ID}:${propId}`,
|
|
230
|
+
sourceId: DEFAULT_AGENT_ID,
|
|
231
|
+
targetId: propId,
|
|
232
|
+
relType: RelType.BELIEVES,
|
|
233
|
+
data: {
|
|
234
|
+
confidence: prop.confidence,
|
|
235
|
+
asOf: now,
|
|
236
|
+
},
|
|
237
|
+
createdAt: now,
|
|
238
|
+
});
|
|
239
|
+
// ABOUT edges — link proposition to entities/concepts mentioned in same chunk
|
|
240
|
+
await this.createAboutEdgesFromChunkMentions(propId, chunk.id, now);
|
|
241
|
+
stats.propositionsExtracted++;
|
|
242
|
+
// Detect contradictions
|
|
243
|
+
if (this.options.enableContradictionDetection !== false) {
|
|
244
|
+
try {
|
|
245
|
+
const candidates = await this.contradictionDetector.findCandidates(propId, 5, 0.6);
|
|
246
|
+
if (candidates.length > 0) {
|
|
247
|
+
const edgeIds = await this.contradictionDetector.createContradictionEdges(candidates);
|
|
248
|
+
stats.contradictionsDetected += edgeIds.length;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
catch {
|
|
252
|
+
// Non-fatal
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
async storeThought(thought, chunk, notePath, stats) {
|
|
257
|
+
const now = new Date().toISOString();
|
|
258
|
+
const stmtHash = hashStatement(thought.statement);
|
|
259
|
+
const thoughtId = generateThoughtId(stmtHash);
|
|
260
|
+
const thoughtNode = {
|
|
261
|
+
id: thoughtId,
|
|
262
|
+
nodeType: NodeType.Thought,
|
|
263
|
+
data: {
|
|
264
|
+
statement: thought.statement,
|
|
265
|
+
stance: thought.stance,
|
|
266
|
+
subject: thought.subject,
|
|
267
|
+
confidence: thought.confidence,
|
|
268
|
+
implications: thought.implications,
|
|
269
|
+
},
|
|
270
|
+
createdAt: now,
|
|
271
|
+
updatedAt: now,
|
|
272
|
+
};
|
|
273
|
+
await this.storage.upsertNode(thoughtNode);
|
|
274
|
+
// DERIVED_FROM edge
|
|
275
|
+
const provenance = {
|
|
276
|
+
notePath,
|
|
277
|
+
chunkId: chunk.id,
|
|
278
|
+
startOffset: thought.quoteStart + chunk.startOffset,
|
|
279
|
+
endOffset: thought.quoteEnd + chunk.startOffset,
|
|
280
|
+
quoteText: thought.quoteText,
|
|
281
|
+
quoteHash: hashContent(thought.quoteText),
|
|
282
|
+
};
|
|
283
|
+
await this.storage.upsertEdge({
|
|
284
|
+
id: `edge:derived:${thoughtId}:${chunk.id}`,
|
|
285
|
+
sourceId: thoughtId,
|
|
286
|
+
targetId: chunk.id,
|
|
287
|
+
relType: RelType.DERIVED_FROM,
|
|
288
|
+
data: {
|
|
289
|
+
provenance,
|
|
290
|
+
extractionMethod: this.extractor.name,
|
|
291
|
+
extractedAt: now,
|
|
292
|
+
},
|
|
293
|
+
createdAt: now,
|
|
294
|
+
});
|
|
295
|
+
// ENDORSES edge from default agent (thoughts use ENDORSES, not BELIEVES)
|
|
296
|
+
await this.storage.upsertEdge({
|
|
297
|
+
id: `edge:endorses:${DEFAULT_AGENT_ID}:${thoughtId}`,
|
|
298
|
+
sourceId: DEFAULT_AGENT_ID,
|
|
299
|
+
targetId: thoughtId,
|
|
300
|
+
relType: RelType.ENDORSES,
|
|
301
|
+
data: {
|
|
302
|
+
confidence: thought.confidence,
|
|
303
|
+
asOf: now,
|
|
304
|
+
},
|
|
305
|
+
createdAt: now,
|
|
306
|
+
});
|
|
307
|
+
// ABOUT edges — link thought to matching entity/concept by subject
|
|
308
|
+
await this.createAboutEdgesForThought(thoughtId, thought.subject, now);
|
|
309
|
+
stats.thoughtsExtracted++;
|
|
310
|
+
}
|
|
311
|
+
async createAboutEdgesFromChunkMentions(propId, chunkId, now) {
|
|
312
|
+
// Find entities/concepts mentioned in the source chunk
|
|
313
|
+
const mentionEdges = await this.storage.queryEdges({
|
|
314
|
+
sourceId: chunkId,
|
|
315
|
+
relType: RelType.MENTIONS,
|
|
316
|
+
});
|
|
317
|
+
for (const mentionEdge of mentionEdges) {
|
|
318
|
+
const targetNode = await this.storage.getNode(mentionEdge.targetId);
|
|
319
|
+
if (targetNode &&
|
|
320
|
+
(targetNode.nodeType === NodeType.Entity ||
|
|
321
|
+
targetNode.nodeType === NodeType.Concept ||
|
|
322
|
+
targetNode.nodeType === NodeType.Event)) {
|
|
323
|
+
await this.storage.upsertEdge({
|
|
324
|
+
id: `edge:about:${propId}:${targetNode.id}`,
|
|
325
|
+
sourceId: propId,
|
|
326
|
+
targetId: targetNode.id,
|
|
327
|
+
relType: RelType.ABOUT,
|
|
328
|
+
data: { relevance: 0.7 },
|
|
329
|
+
createdAt: now,
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
async createAboutEdgesForThought(thoughtId, subject, now) {
|
|
335
|
+
if (!subject)
|
|
336
|
+
return;
|
|
337
|
+
const subjectLower = subject.toLowerCase();
|
|
338
|
+
// Search entities matching subject
|
|
339
|
+
const entities = await this.storage.queryNodes({ nodeType: NodeType.Entity });
|
|
340
|
+
for (const entity of entities) {
|
|
341
|
+
const name = entity.data.name;
|
|
342
|
+
if (name && name.toLowerCase().includes(subjectLower)) {
|
|
343
|
+
await this.storage.upsertEdge({
|
|
344
|
+
id: `edge:about:${thoughtId}:${entity.id}`,
|
|
345
|
+
sourceId: thoughtId,
|
|
346
|
+
targetId: entity.id,
|
|
347
|
+
relType: RelType.ABOUT,
|
|
348
|
+
data: { relevance: 0.8 },
|
|
349
|
+
createdAt: now,
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
// Search concepts matching subject
|
|
354
|
+
const concepts = await this.storage.queryNodes({ nodeType: NodeType.Concept });
|
|
355
|
+
for (const concept of concepts) {
|
|
356
|
+
const name = concept.data.name;
|
|
357
|
+
if (name && name.toLowerCase().includes(subjectLower)) {
|
|
358
|
+
await this.storage.upsertEdge({
|
|
359
|
+
id: `edge:about:${thoughtId}:${concept.id}`,
|
|
360
|
+
sourceId: thoughtId,
|
|
361
|
+
targetId: concept.id,
|
|
362
|
+
relType: RelType.ABOUT,
|
|
363
|
+
data: { relevance: 0.8 },
|
|
364
|
+
createdAt: now,
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
async storeResolvedEntities(noteId, notePath, stats) {
|
|
370
|
+
const now = new Date().toISOString();
|
|
371
|
+
const entities = this.entityResolver.getResolvedEntities();
|
|
372
|
+
const concepts = this.entityResolver.getResolvedConcepts();
|
|
373
|
+
const entityIndex = await this.getEntityIndex();
|
|
374
|
+
for (const entity of entities) {
|
|
375
|
+
const existingId = entityIndex.findMatch(entity.name, entity.aliases);
|
|
376
|
+
let entityId;
|
|
377
|
+
let finalNode;
|
|
378
|
+
if (existingId) {
|
|
379
|
+
// Merge with existing entity
|
|
380
|
+
entityId = existingId;
|
|
381
|
+
const existingNode = await this.storage.getNode(existingId);
|
|
382
|
+
if (existingNode) {
|
|
383
|
+
const existingData = existingNode.data;
|
|
384
|
+
const existingAliases = existingData.aliases ?? [];
|
|
385
|
+
const existingTypeCounts = existingData.typeCounts ?? {};
|
|
386
|
+
// If the existing name was classified under a single type, seed typeCounts
|
|
387
|
+
if (Object.keys(existingTypeCounts).length === 0 && existingData.entityType) {
|
|
388
|
+
existingTypeCounts[existingData.entityType] = 1;
|
|
389
|
+
}
|
|
390
|
+
// Increment count for the new entity's type
|
|
391
|
+
const newTypeCounts = { ...existingTypeCounts };
|
|
392
|
+
newTypeCounts[entity.entityType] = (newTypeCounts[entity.entityType] ?? 0) + 1;
|
|
393
|
+
// Determine winning type (highest count)
|
|
394
|
+
let winningType = entity.entityType;
|
|
395
|
+
let maxCount = 0;
|
|
396
|
+
for (const [type, count] of Object.entries(newTypeCounts)) {
|
|
397
|
+
if (count > maxCount) {
|
|
398
|
+
maxCount = count;
|
|
399
|
+
winningType = type;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
// Union aliases
|
|
403
|
+
const allAliases = new Set(existingAliases);
|
|
404
|
+
for (const alias of entity.aliases)
|
|
405
|
+
allAliases.add(alias);
|
|
406
|
+
// Add new name as alias if different from existing name
|
|
407
|
+
const existingName = existingData.name;
|
|
408
|
+
if (entity.name.toLowerCase() !== existingName.toLowerCase()) {
|
|
409
|
+
allAliases.add(entity.name);
|
|
410
|
+
}
|
|
411
|
+
// Remove canonical name from aliases
|
|
412
|
+
allAliases.delete(existingName);
|
|
413
|
+
finalNode = {
|
|
414
|
+
id: existingId,
|
|
415
|
+
nodeType: NodeType.Entity,
|
|
416
|
+
data: {
|
|
417
|
+
name: existingName,
|
|
418
|
+
entityType: winningType,
|
|
419
|
+
aliases: Array.from(allAliases),
|
|
420
|
+
description: existingData.description || entity.description,
|
|
421
|
+
confidence: Math.max(existingData.confidence, entity.confidence),
|
|
422
|
+
typeCounts: newTypeCounts,
|
|
423
|
+
},
|
|
424
|
+
createdAt: existingNode.createdAt,
|
|
425
|
+
updatedAt: now,
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
else {
|
|
429
|
+
// Existing ID found in index but node missing from storage — create fresh
|
|
430
|
+
entityId = generateEntityId(entity.name);
|
|
431
|
+
finalNode = {
|
|
432
|
+
id: entityId,
|
|
433
|
+
nodeType: NodeType.Entity,
|
|
434
|
+
data: {
|
|
435
|
+
name: entity.name,
|
|
436
|
+
entityType: entity.entityType,
|
|
437
|
+
aliases: entity.aliases,
|
|
438
|
+
description: entity.description,
|
|
439
|
+
confidence: entity.confidence,
|
|
440
|
+
typeCounts: { [entity.entityType]: 1 },
|
|
441
|
+
},
|
|
442
|
+
createdAt: now,
|
|
443
|
+
updatedAt: now,
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
else {
|
|
448
|
+
// New entity
|
|
449
|
+
entityId = generateEntityId(entity.name);
|
|
450
|
+
finalNode = {
|
|
451
|
+
id: entityId,
|
|
452
|
+
nodeType: NodeType.Entity,
|
|
453
|
+
data: {
|
|
454
|
+
name: entity.name,
|
|
455
|
+
entityType: entity.entityType,
|
|
456
|
+
aliases: entity.aliases,
|
|
457
|
+
description: entity.description,
|
|
458
|
+
confidence: entity.confidence,
|
|
459
|
+
typeCounts: { [entity.entityType]: 1 },
|
|
460
|
+
},
|
|
461
|
+
createdAt: now,
|
|
462
|
+
updatedAt: now,
|
|
463
|
+
};
|
|
464
|
+
}
|
|
465
|
+
await this.storage.upsertNode(finalNode);
|
|
466
|
+
// Keep index current
|
|
467
|
+
const finalData = finalNode.data;
|
|
468
|
+
entityIndex.register(entityId, finalData.name, finalData.aliases ?? []);
|
|
469
|
+
// MENTIONS edges from chunks that mention this entity
|
|
470
|
+
if (entity.spans.length > 0) {
|
|
471
|
+
const noteChunks = await this.storage.queryEdges({
|
|
472
|
+
sourceId: noteId,
|
|
473
|
+
relType: RelType.HAS_CHUNK,
|
|
474
|
+
});
|
|
475
|
+
for (const chunkEdge of noteChunks) {
|
|
476
|
+
const chunkNode = await this.storage.getNode(chunkEdge.targetId);
|
|
477
|
+
if (!chunkNode)
|
|
478
|
+
continue;
|
|
479
|
+
const chunkText = chunkNode.data.text ?? '';
|
|
480
|
+
if (chunkText.toLowerCase().includes(entity.name.toLowerCase())) {
|
|
481
|
+
await this.storage.upsertEdge({
|
|
482
|
+
id: `edge:mentions:${chunkEdge.targetId}:${entityId}`,
|
|
483
|
+
sourceId: chunkEdge.targetId,
|
|
484
|
+
targetId: entityId,
|
|
485
|
+
relType: RelType.MENTIONS,
|
|
486
|
+
data: { count: 1, spans: [] },
|
|
487
|
+
createdAt: now,
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
stats.entitiesExtracted++;
|
|
493
|
+
}
|
|
494
|
+
for (const concept of concepts) {
|
|
495
|
+
const existingConceptId = entityIndex.findMatch(concept.name, []);
|
|
496
|
+
let conceptId;
|
|
497
|
+
let finalConceptNode;
|
|
498
|
+
if (existingConceptId) {
|
|
499
|
+
// Merge with existing concept
|
|
500
|
+
conceptId = existingConceptId;
|
|
501
|
+
const existingNode = await this.storage.getNode(existingConceptId);
|
|
502
|
+
if (existingNode) {
|
|
503
|
+
const existingData = existingNode.data;
|
|
504
|
+
const existingAliases = existingData.aliases ?? [];
|
|
505
|
+
const allAliases = new Set(existingAliases);
|
|
506
|
+
finalConceptNode = {
|
|
507
|
+
id: existingConceptId,
|
|
508
|
+
nodeType: NodeType.Concept,
|
|
509
|
+
data: {
|
|
510
|
+
name: existingData.name,
|
|
511
|
+
domain: existingData.domain || concept.domain,
|
|
512
|
+
description: existingData.description || concept.description,
|
|
513
|
+
aliases: Array.from(allAliases),
|
|
514
|
+
confidence: Math.max(existingData.confidence, concept.confidence),
|
|
515
|
+
},
|
|
516
|
+
createdAt: existingNode.createdAt,
|
|
517
|
+
updatedAt: now,
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
else {
|
|
521
|
+
conceptId = generateConceptId(concept.name);
|
|
522
|
+
finalConceptNode = {
|
|
523
|
+
id: conceptId,
|
|
524
|
+
nodeType: NodeType.Concept,
|
|
525
|
+
data: {
|
|
526
|
+
name: concept.name,
|
|
527
|
+
domain: concept.domain,
|
|
528
|
+
description: concept.description,
|
|
529
|
+
aliases: [],
|
|
530
|
+
confidence: concept.confidence,
|
|
531
|
+
},
|
|
532
|
+
createdAt: now,
|
|
533
|
+
updatedAt: now,
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
else {
|
|
538
|
+
conceptId = generateConceptId(concept.name);
|
|
539
|
+
finalConceptNode = {
|
|
540
|
+
id: conceptId,
|
|
541
|
+
nodeType: NodeType.Concept,
|
|
542
|
+
data: {
|
|
543
|
+
name: concept.name,
|
|
544
|
+
domain: concept.domain,
|
|
545
|
+
description: concept.description,
|
|
546
|
+
aliases: [],
|
|
547
|
+
confidence: concept.confidence,
|
|
548
|
+
},
|
|
549
|
+
createdAt: now,
|
|
550
|
+
updatedAt: now,
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
await this.storage.upsertNode(finalConceptNode);
|
|
554
|
+
// Keep index current
|
|
555
|
+
const cData = finalConceptNode.data;
|
|
556
|
+
entityIndex.register(conceptId, cData.name, cData.aliases ?? []);
|
|
557
|
+
stats.conceptsExtracted++;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
async cleanupOrphans() {
|
|
561
|
+
const entityIndex = await this.getEntityIndex();
|
|
562
|
+
let cleaned = 0;
|
|
563
|
+
for (const nodeType of [NodeType.Entity, NodeType.Concept]) {
|
|
564
|
+
const nodes = await this.storage.queryNodes({ nodeType });
|
|
565
|
+
for (const node of nodes) {
|
|
566
|
+
const mentionEdges = await this.storage.queryEdges({
|
|
567
|
+
targetId: node.id,
|
|
568
|
+
relType: RelType.MENTIONS,
|
|
569
|
+
});
|
|
570
|
+
if (mentionEdges.length === 0) {
|
|
571
|
+
await this.storage.deleteEdgesByNode(node.id);
|
|
572
|
+
await this.storage.deleteNode(node.id);
|
|
573
|
+
entityIndex.remove(node.id);
|
|
574
|
+
cleaned++;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
return cleaned;
|
|
579
|
+
}
|
|
580
|
+
async cleanupNoteChunks(noteId, _notePath) {
|
|
581
|
+
const affectedEntityIds = new Set();
|
|
582
|
+
const existingEdges = await this.storage.queryEdges({
|
|
583
|
+
sourceId: noteId,
|
|
584
|
+
relType: RelType.HAS_CHUNK,
|
|
585
|
+
});
|
|
586
|
+
for (const edge of existingEdges) {
|
|
587
|
+
// Collect entities mentioned by this chunk before deleting
|
|
588
|
+
const mentions = await this.storage.queryEdges({
|
|
589
|
+
sourceId: edge.targetId,
|
|
590
|
+
relType: RelType.MENTIONS,
|
|
591
|
+
});
|
|
592
|
+
for (const m of mentions)
|
|
593
|
+
affectedEntityIds.add(m.targetId);
|
|
594
|
+
await this.storage.deleteFTSEntry(edge.targetId);
|
|
595
|
+
await this.storage.deleteEmbedding(edge.targetId);
|
|
596
|
+
await this.storage.deleteEdgesByNode(edge.targetId);
|
|
597
|
+
await this.storage.deleteNode(edge.targetId);
|
|
598
|
+
}
|
|
599
|
+
return affectedEntityIds;
|
|
600
|
+
}
|
|
601
|
+
async cleanupOrphanedNodes(candidateIds) {
|
|
602
|
+
const entityIndex = await this.getEntityIndex();
|
|
603
|
+
let cleaned = 0;
|
|
604
|
+
for (const nodeId of candidateIds) {
|
|
605
|
+
const node = await this.storage.getNode(nodeId);
|
|
606
|
+
if (!node)
|
|
607
|
+
continue;
|
|
608
|
+
if (node.nodeType !== NodeType.Entity && node.nodeType !== NodeType.Concept)
|
|
609
|
+
continue;
|
|
610
|
+
const mentions = await this.storage.queryEdges({ targetId: nodeId, relType: RelType.MENTIONS });
|
|
611
|
+
if (mentions.length === 0) {
|
|
612
|
+
await this.storage.deleteEdgesByNode(nodeId);
|
|
613
|
+
await this.storage.deleteNode(nodeId);
|
|
614
|
+
entityIndex.remove(nodeId);
|
|
615
|
+
cleaned++;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
return cleaned;
|
|
619
|
+
}
|
|
620
|
+
async ensureDefaultAgent() {
|
|
621
|
+
const existing = await this.storage.getNode(DEFAULT_AGENT_ID);
|
|
622
|
+
if (!existing) {
|
|
623
|
+
await this.storage.upsertNode({
|
|
624
|
+
id: DEFAULT_AGENT_ID,
|
|
625
|
+
nodeType: NodeType.Agent,
|
|
626
|
+
data: {
|
|
627
|
+
name: 'self',
|
|
628
|
+
agentType: 'self',
|
|
629
|
+
description: 'The vault owner / note author',
|
|
630
|
+
},
|
|
631
|
+
createdAt: new Date().toISOString(),
|
|
632
|
+
updatedAt: new Date().toISOString(),
|
|
633
|
+
});
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
function extractTags(content, frontmatter) {
|
|
638
|
+
const tags = new Set();
|
|
639
|
+
// Frontmatter tags
|
|
640
|
+
if (Array.isArray(frontmatter.tags)) {
|
|
641
|
+
for (const tag of frontmatter.tags) {
|
|
642
|
+
tags.add(String(tag));
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
// Inline #tags
|
|
646
|
+
const tagRegex = /#([a-zA-Z][\w/-]*)/g;
|
|
647
|
+
let match;
|
|
648
|
+
while ((match = tagRegex.exec(content)) !== null) {
|
|
649
|
+
tags.add(match[1]);
|
|
650
|
+
}
|
|
651
|
+
return Array.from(tags);
|
|
652
|
+
}
|
|
653
|
+
//# sourceMappingURL=pipeline.js.map
|