mindgraph-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/dist/embeddings/embedder-interface.d.ts +15 -0
  2. package/dist/embeddings/embedder-interface.d.ts.map +1 -0
  3. package/dist/embeddings/embedder-interface.js +2 -0
  4. package/dist/embeddings/embedder-interface.js.map +1 -0
  5. package/dist/embeddings/ollama-embedder.d.ts +21 -0
  6. package/dist/embeddings/ollama-embedder.d.ts.map +1 -0
  7. package/dist/embeddings/ollama-embedder.js +78 -0
  8. package/dist/embeddings/ollama-embedder.js.map +1 -0
  9. package/dist/embeddings/transformers-embedder.d.ts +20 -0
  10. package/dist/embeddings/transformers-embedder.d.ts.map +1 -0
  11. package/dist/embeddings/transformers-embedder.js +61 -0
  12. package/dist/embeddings/transformers-embedder.js.map +1 -0
  13. package/dist/extraction/anthropic-extractor.d.ts +17 -0
  14. package/dist/extraction/anthropic-extractor.d.ts.map +1 -0
  15. package/dist/extraction/anthropic-extractor.js +43 -0
  16. package/dist/extraction/anthropic-extractor.js.map +1 -0
  17. package/dist/extraction/basic-extractor.d.ts +17 -0
  18. package/dist/extraction/basic-extractor.d.ts.map +1 -0
  19. package/dist/extraction/basic-extractor.js +135 -0
  20. package/dist/extraction/basic-extractor.js.map +1 -0
  21. package/dist/extraction/confidence-gate.d.ts +7 -0
  22. package/dist/extraction/confidence-gate.d.ts.map +1 -0
  23. package/dist/extraction/confidence-gate.js +13 -0
  24. package/dist/extraction/confidence-gate.js.map +1 -0
  25. package/dist/extraction/contradiction-detector.d.ts +46 -0
  26. package/dist/extraction/contradiction-detector.d.ts.map +1 -0
  27. package/dist/extraction/contradiction-detector.js +205 -0
  28. package/dist/extraction/contradiction-detector.js.map +1 -0
  29. package/dist/extraction/entity-index.d.ts +31 -0
  30. package/dist/extraction/entity-index.d.ts.map +1 -0
  31. package/dist/extraction/entity-index.js +90 -0
  32. package/dist/extraction/entity-index.js.map +1 -0
  33. package/dist/extraction/entity-resolver.d.ts +28 -0
  34. package/dist/extraction/entity-resolver.d.ts.map +1 -0
  35. package/dist/extraction/entity-resolver.js +111 -0
  36. package/dist/extraction/entity-resolver.js.map +1 -0
  37. package/dist/extraction/extractor-interface.d.ts +51 -0
  38. package/dist/extraction/extractor-interface.d.ts.map +1 -0
  39. package/dist/extraction/extractor-interface.js +2 -0
  40. package/dist/extraction/extractor-interface.js.map +1 -0
  41. package/dist/extraction/llm-extractor.d.ts +21 -0
  42. package/dist/extraction/llm-extractor.d.ts.map +1 -0
  43. package/dist/extraction/llm-extractor.js +97 -0
  44. package/dist/extraction/llm-extractor.js.map +1 -0
  45. package/dist/extraction/ollama-extractor.d.ts +18 -0
  46. package/dist/extraction/ollama-extractor.d.ts.map +1 -0
  47. package/dist/extraction/ollama-extractor.js +50 -0
  48. package/dist/extraction/ollama-extractor.js.map +1 -0
  49. package/dist/extraction/open-loop-detector.d.ts +24 -0
  50. package/dist/extraction/open-loop-detector.d.ts.map +1 -0
  51. package/dist/extraction/open-loop-detector.js +187 -0
  52. package/dist/extraction/open-loop-detector.js.map +1 -0
  53. package/dist/extraction/openai-extractor.d.ts +20 -0
  54. package/dist/extraction/openai-extractor.d.ts.map +1 -0
  55. package/dist/extraction/openai-extractor.js +44 -0
  56. package/dist/extraction/openai-extractor.js.map +1 -0
  57. package/dist/extraction/prompts/entity-extraction.d.ts +2 -0
  58. package/dist/extraction/prompts/entity-extraction.d.ts.map +1 -0
  59. package/dist/extraction/prompts/entity-extraction.js +42 -0
  60. package/dist/extraction/prompts/entity-extraction.js.map +1 -0
  61. package/dist/extraction/prompts/proposition-extraction.d.ts +2 -0
  62. package/dist/extraction/prompts/proposition-extraction.d.ts.map +1 -0
  63. package/dist/extraction/prompts/proposition-extraction.js +39 -0
  64. package/dist/extraction/prompts/proposition-extraction.js.map +1 -0
  65. package/dist/extraction/prompts/thought-extraction.d.ts +2 -0
  66. package/dist/extraction/prompts/thought-extraction.d.ts.map +1 -0
  67. package/dist/extraction/prompts/thought-extraction.js +41 -0
  68. package/dist/extraction/prompts/thought-extraction.js.map +1 -0
  69. package/dist/index.d.ts +41 -0
  70. package/dist/index.d.ts.map +1 -0
  71. package/dist/index.js +33 -0
  72. package/dist/index.js.map +1 -0
  73. package/dist/ingestion/chunk-id.d.ts +27 -0
  74. package/dist/ingestion/chunk-id.d.ts.map +1 -0
  75. package/dist/ingestion/chunk-id.js +45 -0
  76. package/dist/ingestion/chunk-id.js.map +1 -0
  77. package/dist/ingestion/chunker.d.ts +29 -0
  78. package/dist/ingestion/chunker.d.ts.map +1 -0
  79. package/dist/ingestion/chunker.js +182 -0
  80. package/dist/ingestion/chunker.js.map +1 -0
  81. package/dist/ingestion/hasher.d.ts +7 -0
  82. package/dist/ingestion/hasher.d.ts.map +1 -0
  83. package/dist/ingestion/hasher.js +18 -0
  84. package/dist/ingestion/hasher.js.map +1 -0
  85. package/dist/ingestion/pipeline.d.ts +58 -0
  86. package/dist/ingestion/pipeline.d.ts.map +1 -0
  87. package/dist/ingestion/pipeline.js +653 -0
  88. package/dist/ingestion/pipeline.js.map +1 -0
  89. package/dist/models/citation.d.ts +2 -0
  90. package/dist/models/citation.d.ts.map +1 -0
  91. package/dist/models/citation.js +2 -0
  92. package/dist/models/citation.js.map +1 -0
  93. package/dist/models/extraction-result.d.ts +2 -0
  94. package/dist/models/extraction-result.d.ts.map +1 -0
  95. package/dist/models/extraction-result.js +2 -0
  96. package/dist/models/extraction-result.js.map +1 -0
  97. package/dist/models/query-result.d.ts +2 -0
  98. package/dist/models/query-result.d.ts.map +1 -0
  99. package/dist/models/query-result.js +2 -0
  100. package/dist/models/query-result.js.map +1 -0
  101. package/dist/query/answer-builder.d.ts +15 -0
  102. package/dist/query/answer-builder.d.ts.map +1 -0
  103. package/dist/query/answer-builder.js +51 -0
  104. package/dist/query/answer-builder.js.map +1 -0
  105. package/dist/query/citation-builder.d.ts +19 -0
  106. package/dist/query/citation-builder.d.ts.map +1 -0
  107. package/dist/query/citation-builder.js +54 -0
  108. package/dist/query/citation-builder.js.map +1 -0
  109. package/dist/query/graph-data.d.ts +39 -0
  110. package/dist/query/graph-data.d.ts.map +1 -0
  111. package/dist/query/graph-data.js +115 -0
  112. package/dist/query/graph-data.js.map +1 -0
  113. package/dist/query/graph-search.d.ts +43 -0
  114. package/dist/query/graph-search.d.ts.map +1 -0
  115. package/dist/query/graph-search.js +315 -0
  116. package/dist/query/graph-search.js.map +1 -0
  117. package/dist/query/query-engine.d.ts +41 -0
  118. package/dist/query/query-engine.d.ts.map +1 -0
  119. package/dist/query/query-engine.js +178 -0
  120. package/dist/query/query-engine.js.map +1 -0
  121. package/dist/query/semantic-search.d.ts +26 -0
  122. package/dist/query/semantic-search.d.ts.map +1 -0
  123. package/dist/query/semantic-search.js +132 -0
  124. package/dist/query/semantic-search.js.map +1 -0
  125. package/dist/schema/edge-types.d.ts +95 -0
  126. package/dist/schema/edge-types.d.ts.map +1 -0
  127. package/dist/schema/edge-types.js +6 -0
  128. package/dist/schema/edge-types.js.map +1 -0
  129. package/dist/schema/node-types.d.ts +100 -0
  130. package/dist/schema/node-types.d.ts.map +1 -0
  131. package/dist/schema/node-types.js +6 -0
  132. package/dist/schema/node-types.js.map +1 -0
  133. package/dist/schema/types.d.ts +134 -0
  134. package/dist/schema/types.d.ts.map +1 -0
  135. package/dist/schema/types.js +52 -0
  136. package/dist/schema/types.js.map +1 -0
  137. package/dist/schema/validation.d.ts +6 -0
  138. package/dist/schema/validation.d.ts.map +1 -0
  139. package/dist/schema/validation.js +140 -0
  140. package/dist/schema/validation.js.map +1 -0
  141. package/dist/storage/export-import.d.ts +28 -0
  142. package/dist/storage/export-import.d.ts.map +1 -0
  143. package/dist/storage/export-import.js +189 -0
  144. package/dist/storage/export-import.js.map +1 -0
  145. package/dist/storage/memory/memory-adapter.d.ts +36 -0
  146. package/dist/storage/memory/memory-adapter.d.ts.map +1 -0
  147. package/dist/storage/memory/memory-adapter.js +231 -0
  148. package/dist/storage/memory/memory-adapter.js.map +1 -0
  149. package/dist/storage/sqlite/graph-traversal.d.ts +11 -0
  150. package/dist/storage/sqlite/graph-traversal.d.ts.map +1 -0
  151. package/dist/storage/sqlite/graph-traversal.js +79 -0
  152. package/dist/storage/sqlite/graph-traversal.js.map +1 -0
  153. package/dist/storage/sqlite/query-builder.d.ts +8 -0
  154. package/dist/storage/sqlite/query-builder.d.ts.map +1 -0
  155. package/dist/storage/sqlite/query-builder.js +55 -0
  156. package/dist/storage/sqlite/query-builder.js.map +1 -0
  157. package/dist/storage/sqlite/sqlite-adapter.d.ts +53 -0
  158. package/dist/storage/sqlite/sqlite-adapter.d.ts.map +1 -0
  159. package/dist/storage/sqlite/sqlite-adapter.js +497 -0
  160. package/dist/storage/sqlite/sqlite-adapter.js.map +1 -0
  161. package/dist/storage/storage-interface.d.ts +64 -0
  162. package/dist/storage/storage-interface.d.ts.map +1 -0
  163. package/dist/storage/storage-interface.js +2 -0
  164. package/dist/storage/storage-interface.js.map +1 -0
  165. package/dist/utils/retry-fetch.d.ts +20 -0
  166. package/dist/utils/retry-fetch.d.ts.map +1 -0
  167. package/dist/utils/retry-fetch.js +71 -0
  168. package/dist/utils/retry-fetch.js.map +1 -0
  169. package/package.json +28 -0
@@ -0,0 +1,653 @@
1
+ import { EntityResolver } from '../extraction/entity-resolver.js';
2
+ import { EntityIndex } from '../extraction/entity-index.js';
3
+ import { applyConfidenceGate } from '../extraction/confidence-gate.js';
4
+ import { ContradictionDetector } from '../extraction/contradiction-detector.js';
5
+ import { chunkMarkdown, stripFrontmatter } from './chunker.js';
6
+ import { generateNoteId, generateEntityId, generateConceptId, generatePropositionId, generateThoughtId } from './chunk-id.js';
7
+ import { hashContent, hashStatement } from './hasher.js';
8
+ import { NodeType, RelType } from '../schema/types.js';
9
+ const DEFAULT_AGENT_ID = 'agent:self';
10
+ /**
11
+ * The ingestion pipeline orchestrates:
12
+ * hash check → chunk → extract → embed → store → detect contradictions
13
+ */
14
+ export class IngestionPipeline {
15
+ constructor(storage, embedder, extractor, options = {}) {
16
+ this.storage = storage;
17
+ this.embedder = embedder;
18
+ this.extractor = extractor;
19
+ this.options = options;
20
+ this.entityResolver = new EntityResolver();
21
+ this.entityIndex = null;
22
+ this.contradictionDetector = new ContradictionDetector(storage, embedder);
23
+ }
24
+ async getEntityIndex() {
25
+ if (!this.entityIndex) {
26
+ this.entityIndex = new EntityIndex();
27
+ await this.entityIndex.loadFromStorage(this.storage);
28
+ }
29
+ return this.entityIndex;
30
+ }
31
+ /**
32
+ * Index a single note. Idempotent — skips if content hash unchanged.
33
+ */
34
+ async indexNote(notePath, content) {
35
+ const stats = {
36
+ notePath,
37
+ chunksCreated: 0,
38
+ entitiesExtracted: 0,
39
+ conceptsExtracted: 0,
40
+ propositionsExtracted: 0,
41
+ thoughtsExtracted: 0,
42
+ embeddingsCreated: 0,
43
+ contradictionsDetected: 0,
44
+ skipped: false,
45
+ };
46
+ const contentHash = hashContent(content);
47
+ const noteId = generateNoteId(notePath);
48
+ // Check if note already indexed with same content
49
+ const existingNote = await this.storage.getNode(noteId);
50
+ if (existingNote && existingNote.data.contentHash === contentHash) {
51
+ stats.skipped = true;
52
+ return stats;
53
+ }
54
+ // Strip frontmatter
55
+ const { frontmatter, body } = stripFrontmatter(content);
56
+ // Extract metadata
57
+ const title = frontmatter.title ??
58
+ notePath.split('/').pop()?.replace(/\.md$/, '') ?? notePath;
59
+ const tags = extractTags(body, frontmatter);
60
+ const aliases = frontmatter.aliases ?? [];
61
+ const now = new Date().toISOString();
62
+ // Upsert Note node
63
+ const noteNode = {
64
+ id: noteId,
65
+ nodeType: NodeType.Note,
66
+ data: {
67
+ title,
68
+ path: notePath,
69
+ contentHash,
70
+ wordCount: body.split(/\s+/).length,
71
+ tags,
72
+ aliases,
73
+ frontmatter,
74
+ },
75
+ createdAt: existingNote?.createdAt ?? now,
76
+ updatedAt: now,
77
+ };
78
+ await this.storage.upsertNode(noteNode);
79
+ // Clean up old chunks for this note, then remove orphaned entities
80
+ const affectedIds = await this.cleanupNoteChunks(noteId, notePath);
81
+ await this.cleanupOrphanedNodes(affectedIds);
82
+ // Chunk the note
83
+ const chunks = chunkMarkdown(notePath, body, this.options.chunkerOptions);
84
+ stats.chunksCreated = chunks.length;
85
+ // Ensure default agent exists
86
+ await this.ensureDefaultAgent();
87
+ // Process each chunk
88
+ this.entityResolver.clear();
89
+ for (const chunk of chunks) {
90
+ await this.processChunk(chunk, noteId, notePath, body, stats);
91
+ }
92
+ // Batch embed all chunks
93
+ if (this.embedder && chunks.length > 0) {
94
+ try {
95
+ const vectors = await this.embedder.embedBatch(chunks.map((c) => c.text));
96
+ const embNow = new Date().toISOString();
97
+ for (let i = 0; i < vectors.length; i++) {
98
+ await this.storage.upsertEmbedding({
99
+ chunkId: chunks[i].id,
100
+ vector: vectors[i],
101
+ modelId: this.embedder.modelId,
102
+ dimensions: this.embedder.dimensions,
103
+ createdAt: embNow,
104
+ });
105
+ stats.embeddingsCreated++;
106
+ }
107
+ }
108
+ catch {
109
+ // Embedding failure is non-fatal
110
+ }
111
+ }
112
+ // Store resolved entities and concepts (merged across all chunks)
113
+ await this.storeResolvedEntities(noteId, notePath, stats);
114
+ return stats;
115
+ }
116
+ /**
117
+ * Remove a note and all its derived data.
118
+ */
119
+ async forgetNote(notePath) {
120
+ const noteId = generateNoteId(notePath);
121
+ const affectedIds = await this.cleanupNoteChunks(noteId, notePath);
122
+ await this.storage.deleteEdgesByNode(noteId);
123
+ await this.storage.deleteNode(noteId);
124
+ await this.cleanupOrphanedNodes(affectedIds);
125
+ }
126
+ /**
127
+ * Wipe all data.
128
+ */
129
+ async wipeAll() {
130
+ // This is a destructive operation — must be called explicitly
131
+ const allNodes = await this.storage.queryNodes({});
132
+ for (const node of allNodes) {
133
+ await this.storage.deleteEdgesByNode(node.id);
134
+ await this.storage.deleteNode(node.id);
135
+ }
136
+ }
137
+ async processChunk(chunk, noteId, notePath, noteContent, stats) {
138
+ const now = new Date().toISOString();
139
+ // Store chunk node
140
+ const chunkNode = {
141
+ id: chunk.id,
142
+ nodeType: NodeType.Chunk,
143
+ data: {
144
+ notePath: chunk.notePath,
145
+ text: chunk.text,
146
+ heading: chunk.heading,
147
+ headingLevel: chunk.headingLevel,
148
+ startOffset: chunk.startOffset,
149
+ endOffset: chunk.endOffset,
150
+ index: chunk.index,
151
+ contentHash: chunk.contentHash,
152
+ },
153
+ createdAt: now,
154
+ updatedAt: now,
155
+ };
156
+ await this.storage.upsertNode(chunkNode);
157
+ // HAS_CHUNK edge
158
+ await this.storage.upsertEdge({
159
+ id: `edge:has_chunk:${noteId}:${chunk.id}`,
160
+ sourceId: noteId,
161
+ targetId: chunk.id,
162
+ relType: RelType.HAS_CHUNK,
163
+ data: { index: chunk.index },
164
+ createdAt: now,
165
+ });
166
+ // FTS entry
167
+ await this.storage.upsertFTSEntry(chunk.id, chunk.text);
168
+ // Extract entities, concepts, propositions, thoughts
169
+ try {
170
+ let extraction = await this.extractor.extract(chunk, noteContent);
171
+ extraction = applyConfidenceGate(extraction, this.options.confidenceThreshold ?? 0.5);
172
+ // Collect for resolution
173
+ this.entityResolver.addEntities(extraction.entities);
174
+ this.entityResolver.addConcepts(extraction.concepts);
175
+ // Store propositions immediately (they're per-chunk)
176
+ for (const prop of extraction.propositions) {
177
+ await this.storeProposition(prop, chunk, notePath, stats);
178
+ }
179
+ // Store thoughts immediately
180
+ for (const thought of extraction.thoughts) {
181
+ await this.storeThought(thought, chunk, notePath, stats);
182
+ }
183
+ }
184
+ catch {
185
+ // Extraction failure is non-fatal
186
+ }
187
+ }
188
+ async storeProposition(prop, chunk, notePath, stats) {
189
+ const now = new Date().toISOString();
190
+ const stmtHash = hashStatement(prop.statement);
191
+ const propId = generatePropositionId(stmtHash);
192
+ const propNode = {
193
+ id: propId,
194
+ nodeType: NodeType.Proposition,
195
+ data: {
196
+ statement: prop.statement,
197
+ domain: prop.domain,
198
+ confidence: prop.confidence,
199
+ isNegated: prop.isNegated,
200
+ statementHash: stmtHash,
201
+ },
202
+ createdAt: now,
203
+ updatedAt: now,
204
+ };
205
+ await this.storage.upsertNode(propNode);
206
+ // DERIVED_FROM edge with provenance
207
+ const provenance = {
208
+ notePath,
209
+ chunkId: chunk.id,
210
+ startOffset: prop.quoteStart + chunk.startOffset,
211
+ endOffset: prop.quoteEnd + chunk.startOffset,
212
+ quoteText: prop.quoteText,
213
+ quoteHash: hashContent(prop.quoteText),
214
+ };
215
+ await this.storage.upsertEdge({
216
+ id: `edge:derived:${propId}:${chunk.id}`,
217
+ sourceId: propId,
218
+ targetId: chunk.id,
219
+ relType: RelType.DERIVED_FROM,
220
+ data: {
221
+ provenance,
222
+ extractionMethod: this.extractor.name,
223
+ extractedAt: now,
224
+ },
225
+ createdAt: now,
226
+ });
227
+ // BELIEVES edge from default agent
228
+ await this.storage.upsertEdge({
229
+ id: `edge:believes:${DEFAULT_AGENT_ID}:${propId}`,
230
+ sourceId: DEFAULT_AGENT_ID,
231
+ targetId: propId,
232
+ relType: RelType.BELIEVES,
233
+ data: {
234
+ confidence: prop.confidence,
235
+ asOf: now,
236
+ },
237
+ createdAt: now,
238
+ });
239
+ // ABOUT edges — link proposition to entities/concepts mentioned in same chunk
240
+ await this.createAboutEdgesFromChunkMentions(propId, chunk.id, now);
241
+ stats.propositionsExtracted++;
242
+ // Detect contradictions
243
+ if (this.options.enableContradictionDetection !== false) {
244
+ try {
245
+ const candidates = await this.contradictionDetector.findCandidates(propId, 5, 0.6);
246
+ if (candidates.length > 0) {
247
+ const edgeIds = await this.contradictionDetector.createContradictionEdges(candidates);
248
+ stats.contradictionsDetected += edgeIds.length;
249
+ }
250
+ }
251
+ catch {
252
+ // Non-fatal
253
+ }
254
+ }
255
+ }
256
+ async storeThought(thought, chunk, notePath, stats) {
257
+ const now = new Date().toISOString();
258
+ const stmtHash = hashStatement(thought.statement);
259
+ const thoughtId = generateThoughtId(stmtHash);
260
+ const thoughtNode = {
261
+ id: thoughtId,
262
+ nodeType: NodeType.Thought,
263
+ data: {
264
+ statement: thought.statement,
265
+ stance: thought.stance,
266
+ subject: thought.subject,
267
+ confidence: thought.confidence,
268
+ implications: thought.implications,
269
+ },
270
+ createdAt: now,
271
+ updatedAt: now,
272
+ };
273
+ await this.storage.upsertNode(thoughtNode);
274
+ // DERIVED_FROM edge
275
+ const provenance = {
276
+ notePath,
277
+ chunkId: chunk.id,
278
+ startOffset: thought.quoteStart + chunk.startOffset,
279
+ endOffset: thought.quoteEnd + chunk.startOffset,
280
+ quoteText: thought.quoteText,
281
+ quoteHash: hashContent(thought.quoteText),
282
+ };
283
+ await this.storage.upsertEdge({
284
+ id: `edge:derived:${thoughtId}:${chunk.id}`,
285
+ sourceId: thoughtId,
286
+ targetId: chunk.id,
287
+ relType: RelType.DERIVED_FROM,
288
+ data: {
289
+ provenance,
290
+ extractionMethod: this.extractor.name,
291
+ extractedAt: now,
292
+ },
293
+ createdAt: now,
294
+ });
295
+ // ENDORSES edge from default agent (thoughts use ENDORSES, not BELIEVES)
296
+ await this.storage.upsertEdge({
297
+ id: `edge:endorses:${DEFAULT_AGENT_ID}:${thoughtId}`,
298
+ sourceId: DEFAULT_AGENT_ID,
299
+ targetId: thoughtId,
300
+ relType: RelType.ENDORSES,
301
+ data: {
302
+ confidence: thought.confidence,
303
+ asOf: now,
304
+ },
305
+ createdAt: now,
306
+ });
307
+ // ABOUT edges — link thought to matching entity/concept by subject
308
+ await this.createAboutEdgesForThought(thoughtId, thought.subject, now);
309
+ stats.thoughtsExtracted++;
310
+ }
311
+ async createAboutEdgesFromChunkMentions(propId, chunkId, now) {
312
+ // Find entities/concepts mentioned in the source chunk
313
+ const mentionEdges = await this.storage.queryEdges({
314
+ sourceId: chunkId,
315
+ relType: RelType.MENTIONS,
316
+ });
317
+ for (const mentionEdge of mentionEdges) {
318
+ const targetNode = await this.storage.getNode(mentionEdge.targetId);
319
+ if (targetNode &&
320
+ (targetNode.nodeType === NodeType.Entity ||
321
+ targetNode.nodeType === NodeType.Concept ||
322
+ targetNode.nodeType === NodeType.Event)) {
323
+ await this.storage.upsertEdge({
324
+ id: `edge:about:${propId}:${targetNode.id}`,
325
+ sourceId: propId,
326
+ targetId: targetNode.id,
327
+ relType: RelType.ABOUT,
328
+ data: { relevance: 0.7 },
329
+ createdAt: now,
330
+ });
331
+ }
332
+ }
333
+ }
334
+ async createAboutEdgesForThought(thoughtId, subject, now) {
335
+ if (!subject)
336
+ return;
337
+ const subjectLower = subject.toLowerCase();
338
+ // Search entities matching subject
339
+ const entities = await this.storage.queryNodes({ nodeType: NodeType.Entity });
340
+ for (const entity of entities) {
341
+ const name = entity.data.name;
342
+ if (name && name.toLowerCase().includes(subjectLower)) {
343
+ await this.storage.upsertEdge({
344
+ id: `edge:about:${thoughtId}:${entity.id}`,
345
+ sourceId: thoughtId,
346
+ targetId: entity.id,
347
+ relType: RelType.ABOUT,
348
+ data: { relevance: 0.8 },
349
+ createdAt: now,
350
+ });
351
+ }
352
+ }
353
+ // Search concepts matching subject
354
+ const concepts = await this.storage.queryNodes({ nodeType: NodeType.Concept });
355
+ for (const concept of concepts) {
356
+ const name = concept.data.name;
357
+ if (name && name.toLowerCase().includes(subjectLower)) {
358
+ await this.storage.upsertEdge({
359
+ id: `edge:about:${thoughtId}:${concept.id}`,
360
+ sourceId: thoughtId,
361
+ targetId: concept.id,
362
+ relType: RelType.ABOUT,
363
+ data: { relevance: 0.8 },
364
+ createdAt: now,
365
+ });
366
+ }
367
+ }
368
+ }
369
+ async storeResolvedEntities(noteId, notePath, stats) {
370
+ const now = new Date().toISOString();
371
+ const entities = this.entityResolver.getResolvedEntities();
372
+ const concepts = this.entityResolver.getResolvedConcepts();
373
+ const entityIndex = await this.getEntityIndex();
374
+ for (const entity of entities) {
375
+ const existingId = entityIndex.findMatch(entity.name, entity.aliases);
376
+ let entityId;
377
+ let finalNode;
378
+ if (existingId) {
379
+ // Merge with existing entity
380
+ entityId = existingId;
381
+ const existingNode = await this.storage.getNode(existingId);
382
+ if (existingNode) {
383
+ const existingData = existingNode.data;
384
+ const existingAliases = existingData.aliases ?? [];
385
+ const existingTypeCounts = existingData.typeCounts ?? {};
386
+ // If the existing name was classified under a single type, seed typeCounts
387
+ if (Object.keys(existingTypeCounts).length === 0 && existingData.entityType) {
388
+ existingTypeCounts[existingData.entityType] = 1;
389
+ }
390
+ // Increment count for the new entity's type
391
+ const newTypeCounts = { ...existingTypeCounts };
392
+ newTypeCounts[entity.entityType] = (newTypeCounts[entity.entityType] ?? 0) + 1;
393
+ // Determine winning type (highest count)
394
+ let winningType = entity.entityType;
395
+ let maxCount = 0;
396
+ for (const [type, count] of Object.entries(newTypeCounts)) {
397
+ if (count > maxCount) {
398
+ maxCount = count;
399
+ winningType = type;
400
+ }
401
+ }
402
+ // Union aliases
403
+ const allAliases = new Set(existingAliases);
404
+ for (const alias of entity.aliases)
405
+ allAliases.add(alias);
406
+ // Add new name as alias if different from existing name
407
+ const existingName = existingData.name;
408
+ if (entity.name.toLowerCase() !== existingName.toLowerCase()) {
409
+ allAliases.add(entity.name);
410
+ }
411
+ // Remove canonical name from aliases
412
+ allAliases.delete(existingName);
413
+ finalNode = {
414
+ id: existingId,
415
+ nodeType: NodeType.Entity,
416
+ data: {
417
+ name: existingName,
418
+ entityType: winningType,
419
+ aliases: Array.from(allAliases),
420
+ description: existingData.description || entity.description,
421
+ confidence: Math.max(existingData.confidence, entity.confidence),
422
+ typeCounts: newTypeCounts,
423
+ },
424
+ createdAt: existingNode.createdAt,
425
+ updatedAt: now,
426
+ };
427
+ }
428
+ else {
429
+ // Existing ID found in index but node missing from storage — create fresh
430
+ entityId = generateEntityId(entity.name);
431
+ finalNode = {
432
+ id: entityId,
433
+ nodeType: NodeType.Entity,
434
+ data: {
435
+ name: entity.name,
436
+ entityType: entity.entityType,
437
+ aliases: entity.aliases,
438
+ description: entity.description,
439
+ confidence: entity.confidence,
440
+ typeCounts: { [entity.entityType]: 1 },
441
+ },
442
+ createdAt: now,
443
+ updatedAt: now,
444
+ };
445
+ }
446
+ }
447
+ else {
448
+ // New entity
449
+ entityId = generateEntityId(entity.name);
450
+ finalNode = {
451
+ id: entityId,
452
+ nodeType: NodeType.Entity,
453
+ data: {
454
+ name: entity.name,
455
+ entityType: entity.entityType,
456
+ aliases: entity.aliases,
457
+ description: entity.description,
458
+ confidence: entity.confidence,
459
+ typeCounts: { [entity.entityType]: 1 },
460
+ },
461
+ createdAt: now,
462
+ updatedAt: now,
463
+ };
464
+ }
465
+ await this.storage.upsertNode(finalNode);
466
+ // Keep index current
467
+ const finalData = finalNode.data;
468
+ entityIndex.register(entityId, finalData.name, finalData.aliases ?? []);
469
+ // MENTIONS edges from chunks that mention this entity
470
+ if (entity.spans.length > 0) {
471
+ const noteChunks = await this.storage.queryEdges({
472
+ sourceId: noteId,
473
+ relType: RelType.HAS_CHUNK,
474
+ });
475
+ for (const chunkEdge of noteChunks) {
476
+ const chunkNode = await this.storage.getNode(chunkEdge.targetId);
477
+ if (!chunkNode)
478
+ continue;
479
+ const chunkText = chunkNode.data.text ?? '';
480
+ if (chunkText.toLowerCase().includes(entity.name.toLowerCase())) {
481
+ await this.storage.upsertEdge({
482
+ id: `edge:mentions:${chunkEdge.targetId}:${entityId}`,
483
+ sourceId: chunkEdge.targetId,
484
+ targetId: entityId,
485
+ relType: RelType.MENTIONS,
486
+ data: { count: 1, spans: [] },
487
+ createdAt: now,
488
+ });
489
+ }
490
+ }
491
+ }
492
+ stats.entitiesExtracted++;
493
+ }
494
+ for (const concept of concepts) {
495
+ const existingConceptId = entityIndex.findMatch(concept.name, []);
496
+ let conceptId;
497
+ let finalConceptNode;
498
+ if (existingConceptId) {
499
+ // Merge with existing concept
500
+ conceptId = existingConceptId;
501
+ const existingNode = await this.storage.getNode(existingConceptId);
502
+ if (existingNode) {
503
+ const existingData = existingNode.data;
504
+ const existingAliases = existingData.aliases ?? [];
505
+ const allAliases = new Set(existingAliases);
506
+ finalConceptNode = {
507
+ id: existingConceptId,
508
+ nodeType: NodeType.Concept,
509
+ data: {
510
+ name: existingData.name,
511
+ domain: existingData.domain || concept.domain,
512
+ description: existingData.description || concept.description,
513
+ aliases: Array.from(allAliases),
514
+ confidence: Math.max(existingData.confidence, concept.confidence),
515
+ },
516
+ createdAt: existingNode.createdAt,
517
+ updatedAt: now,
518
+ };
519
+ }
520
+ else {
521
+ conceptId = generateConceptId(concept.name);
522
+ finalConceptNode = {
523
+ id: conceptId,
524
+ nodeType: NodeType.Concept,
525
+ data: {
526
+ name: concept.name,
527
+ domain: concept.domain,
528
+ description: concept.description,
529
+ aliases: [],
530
+ confidence: concept.confidence,
531
+ },
532
+ createdAt: now,
533
+ updatedAt: now,
534
+ };
535
+ }
536
+ }
537
+ else {
538
+ conceptId = generateConceptId(concept.name);
539
+ finalConceptNode = {
540
+ id: conceptId,
541
+ nodeType: NodeType.Concept,
542
+ data: {
543
+ name: concept.name,
544
+ domain: concept.domain,
545
+ description: concept.description,
546
+ aliases: [],
547
+ confidence: concept.confidence,
548
+ },
549
+ createdAt: now,
550
+ updatedAt: now,
551
+ };
552
+ }
553
+ await this.storage.upsertNode(finalConceptNode);
554
+ // Keep index current
555
+ const cData = finalConceptNode.data;
556
+ entityIndex.register(conceptId, cData.name, cData.aliases ?? []);
557
+ stats.conceptsExtracted++;
558
+ }
559
+ }
560
+ async cleanupOrphans() {
561
+ const entityIndex = await this.getEntityIndex();
562
+ let cleaned = 0;
563
+ for (const nodeType of [NodeType.Entity, NodeType.Concept]) {
564
+ const nodes = await this.storage.queryNodes({ nodeType });
565
+ for (const node of nodes) {
566
+ const mentionEdges = await this.storage.queryEdges({
567
+ targetId: node.id,
568
+ relType: RelType.MENTIONS,
569
+ });
570
+ if (mentionEdges.length === 0) {
571
+ await this.storage.deleteEdgesByNode(node.id);
572
+ await this.storage.deleteNode(node.id);
573
+ entityIndex.remove(node.id);
574
+ cleaned++;
575
+ }
576
+ }
577
+ }
578
+ return cleaned;
579
+ }
580
+ async cleanupNoteChunks(noteId, _notePath) {
581
+ const affectedEntityIds = new Set();
582
+ const existingEdges = await this.storage.queryEdges({
583
+ sourceId: noteId,
584
+ relType: RelType.HAS_CHUNK,
585
+ });
586
+ for (const edge of existingEdges) {
587
+ // Collect entities mentioned by this chunk before deleting
588
+ const mentions = await this.storage.queryEdges({
589
+ sourceId: edge.targetId,
590
+ relType: RelType.MENTIONS,
591
+ });
592
+ for (const m of mentions)
593
+ affectedEntityIds.add(m.targetId);
594
+ await this.storage.deleteFTSEntry(edge.targetId);
595
+ await this.storage.deleteEmbedding(edge.targetId);
596
+ await this.storage.deleteEdgesByNode(edge.targetId);
597
+ await this.storage.deleteNode(edge.targetId);
598
+ }
599
+ return affectedEntityIds;
600
+ }
601
+ async cleanupOrphanedNodes(candidateIds) {
602
+ const entityIndex = await this.getEntityIndex();
603
+ let cleaned = 0;
604
+ for (const nodeId of candidateIds) {
605
+ const node = await this.storage.getNode(nodeId);
606
+ if (!node)
607
+ continue;
608
+ if (node.nodeType !== NodeType.Entity && node.nodeType !== NodeType.Concept)
609
+ continue;
610
+ const mentions = await this.storage.queryEdges({ targetId: nodeId, relType: RelType.MENTIONS });
611
+ if (mentions.length === 0) {
612
+ await this.storage.deleteEdgesByNode(nodeId);
613
+ await this.storage.deleteNode(nodeId);
614
+ entityIndex.remove(nodeId);
615
+ cleaned++;
616
+ }
617
+ }
618
+ return cleaned;
619
+ }
620
+ async ensureDefaultAgent() {
621
+ const existing = await this.storage.getNode(DEFAULT_AGENT_ID);
622
+ if (!existing) {
623
+ await this.storage.upsertNode({
624
+ id: DEFAULT_AGENT_ID,
625
+ nodeType: NodeType.Agent,
626
+ data: {
627
+ name: 'self',
628
+ agentType: 'self',
629
+ description: 'The vault owner / note author',
630
+ },
631
+ createdAt: new Date().toISOString(),
632
+ updatedAt: new Date().toISOString(),
633
+ });
634
+ }
635
+ }
636
+ }
637
+ function extractTags(content, frontmatter) {
638
+ const tags = new Set();
639
+ // Frontmatter tags
640
+ if (Array.isArray(frontmatter.tags)) {
641
+ for (const tag of frontmatter.tags) {
642
+ tags.add(String(tag));
643
+ }
644
+ }
645
+ // Inline #tags
646
+ const tagRegex = /#([a-zA-Z][\w/-]*)/g;
647
+ let match;
648
+ while ((match = tagRegex.exec(content)) !== null) {
649
+ tags.add(match[1]);
650
+ }
651
+ return Array.from(tags);
652
+ }
653
+ //# sourceMappingURL=pipeline.js.map