mcard-js 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -3
- package/dist/model/ContentTypeInterpreter.d.ts +16 -0
- package/dist/model/ContentTypeInterpreter.d.ts.map +1 -0
- package/dist/model/ContentTypeInterpreter.js +78 -0
- package/dist/model/ContentTypeInterpreter.js.map +1 -0
- package/dist/model/GTime.d.ts +15 -0
- package/dist/model/GTime.d.ts.map +1 -1
- package/dist/model/GTime.js +44 -2
- package/dist/model/GTime.js.map +1 -1
- package/dist/model/detectors/BaseDetector.d.ts +26 -0
- package/dist/model/detectors/BaseDetector.d.ts.map +1 -0
- package/dist/model/detectors/BaseDetector.js +5 -0
- package/dist/model/detectors/BaseDetector.js.map +1 -0
- package/dist/model/detectors/BinaryDetector.d.ts +12 -0
- package/dist/model/detectors/BinaryDetector.d.ts.map +1 -0
- package/dist/model/detectors/BinaryDetector.js +82 -0
- package/dist/model/detectors/BinaryDetector.js.map +1 -0
- package/dist/model/detectors/DataFormatDetectors.d.ts +29 -0
- package/dist/model/detectors/DataFormatDetectors.d.ts.map +1 -0
- package/dist/model/detectors/DataFormatDetectors.js +223 -0
- package/dist/model/detectors/DataFormatDetectors.js.map +1 -0
- package/dist/model/detectors/LanguageDetector.d.ts +11 -0
- package/dist/model/detectors/LanguageDetector.d.ts.map +1 -0
- package/dist/model/detectors/LanguageDetector.js +148 -0
- package/dist/model/detectors/LanguageDetector.js.map +1 -0
- package/dist/model/detectors/MarkupDetectors.d.ts +22 -0
- package/dist/model/detectors/MarkupDetectors.d.ts.map +1 -0
- package/dist/model/detectors/MarkupDetectors.js +132 -0
- package/dist/model/detectors/MarkupDetectors.js.map +1 -0
- package/dist/model/detectors/OBJDetector.d.ts +8 -0
- package/dist/model/detectors/OBJDetector.d.ts.map +1 -0
- package/dist/model/detectors/OBJDetector.js +48 -0
- package/dist/model/detectors/OBJDetector.js.map +1 -0
- package/dist/model/detectors/registry.d.ts +13 -0
- package/dist/model/detectors/registry.d.ts.map +1 -0
- package/dist/model/detectors/registry.js +67 -0
- package/dist/model/detectors/registry.js.map +1 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.d.ts +20 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.d.ts.map +1 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.js +42 -0
- package/dist/ptr/llm/providers/OllamaEmbeddingProvider.js.map +1 -0
- package/dist/ptr/node/CLMLoader.d.ts.map +1 -1
- package/dist/ptr/node/CLMLoader.js +8 -0
- package/dist/ptr/node/CLMLoader.js.map +1 -1
- package/dist/ptr/node/CLMRunner.d.ts.map +1 -1
- package/dist/ptr/node/CLMRunner.js +4 -3
- package/dist/ptr/node/CLMRunner.js.map +1 -1
- package/dist/ptr/node/Runtimes.d.ts +7 -0
- package/dist/ptr/node/Runtimes.d.ts.map +1 -1
- package/dist/ptr/node/Runtimes.js +94 -3
- package/dist/ptr/node/Runtimes.js.map +1 -1
- package/dist/rag/HandleVectorStore.d.ts +201 -0
- package/dist/rag/HandleVectorStore.d.ts.map +1 -0
- package/dist/rag/HandleVectorStore.js +527 -0
- package/dist/rag/HandleVectorStore.js.map +1 -0
- package/dist/rag/PersistentIndexer.d.ts +144 -0
- package/dist/rag/PersistentIndexer.d.ts.map +1 -0
- package/dist/rag/PersistentIndexer.js +275 -0
- package/dist/rag/PersistentIndexer.js.map +1 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.d.ts +87 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.d.ts.map +1 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.js +164 -0
- package/dist/rag/embeddings/VisionEmbeddingProvider.js.map +1 -0
- package/dist/rag/graph/community.d.ts +56 -0
- package/dist/rag/graph/community.d.ts.map +1 -0
- package/dist/rag/graph/community.js +247 -0
- package/dist/rag/graph/community.js.map +1 -0
- package/dist/rag/graph/extractor.d.ts +99 -0
- package/dist/rag/graph/extractor.d.ts.map +1 -0
- package/dist/rag/graph/extractor.js +210 -0
- package/dist/rag/graph/extractor.js.map +1 -0
- package/dist/rag/graph/store.d.ts +176 -0
- package/dist/rag/graph/store.d.ts.map +1 -0
- package/dist/rag/graph/store.js +504 -0
- package/dist/rag/graph/store.js.map +1 -0
- package/dist/rag/index.d.ts +19 -0
- package/dist/rag/index.d.ts.map +1 -0
- package/dist/rag/index.js +24 -0
- package/dist/rag/index.js.map +1 -0
- package/dist/rag/semanticVersioning.d.ts +187 -0
- package/dist/rag/semanticVersioning.d.ts.map +1 -0
- package/dist/rag/semanticVersioning.js +253 -0
- package/dist/rag/semanticVersioning.js.map +1 -0
- package/dist/storage/IndexedDBEngine.d.ts.map +1 -1
- package/dist/storage/IndexedDBEngine.js +9 -1
- package/dist/storage/IndexedDBEngine.js.map +1 -1
- package/dist/storage/SqliteNodeEngine.d.ts.map +1 -1
- package/dist/storage/SqliteNodeEngine.js +26 -4
- package/dist/storage/SqliteNodeEngine.js.map +1 -1
- package/dist/storage/SqliteWasmEngine.d.ts.map +1 -1
- package/dist/storage/SqliteWasmEngine.js +9 -1
- package/dist/storage/SqliteWasmEngine.js.map +1 -1
- package/dist/storage/StorageAdapter.d.ts +2 -0
- package/dist/storage/StorageAdapter.d.ts.map +1 -1
- package/dist/storage/VectorStore.d.ts +28 -5
- package/dist/storage/VectorStore.d.ts.map +1 -1
- package/dist/storage/VectorStore.js +110 -50
- package/dist/storage/VectorStore.js.map +1 -1
- package/dist/storage/schema.d.ts +123 -80
- package/dist/storage/schema.d.ts.map +1 -1
- package/dist/storage/schema.js +305 -217
- package/dist/storage/schema.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision Embedding Provider
|
|
3
|
+
*
|
|
4
|
+
* Multimodal embedding provider that uses vision models to describe images,
|
|
5
|
+
* then embeds the descriptions for vector search.
|
|
6
|
+
*
|
|
7
|
+
* Mirrors Python: mcard/rag/embeddings/vision.py
|
|
8
|
+
*/
|
|
9
|
+
import { OllamaEmbeddingProvider } from '../../ptr/llm/providers/OllamaEmbeddingProvider';
|
|
10
|
+
export const VISION_MODELS = {
|
|
11
|
+
'moondream': {
|
|
12
|
+
description: 'Moondream - Tiny, high-performance vision language model',
|
|
13
|
+
size: '1.7GB',
|
|
14
|
+
},
|
|
15
|
+
'llama3.2-vision': {
|
|
16
|
+
description: 'Llama 3.2 Vision - 11B multimodal model',
|
|
17
|
+
size: '7.9GB',
|
|
18
|
+
},
|
|
19
|
+
'llava': {
|
|
20
|
+
description: 'LLaVA - Large Language and Vision Assistant',
|
|
21
|
+
size: '4.7GB',
|
|
22
|
+
},
|
|
23
|
+
'minicpm-v': {
|
|
24
|
+
description: 'MiniCPM-V - Efficient vision-language model',
|
|
25
|
+
size: '5.6GB',
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
const DEFAULT_VISION_MODEL = 'moondream';
|
|
29
|
+
const DEFAULT_DESCRIPTION_PROMPT = `Describe this image in detail for semantic search.
|
|
30
|
+
Include:
|
|
31
|
+
- Main subject and objects visible
|
|
32
|
+
- Colors, textures, and visual elements
|
|
33
|
+
- Any text visible in the image
|
|
34
|
+
- Context, setting, or environment
|
|
35
|
+
- Actions or relationships between elements
|
|
36
|
+
|
|
37
|
+
Be comprehensive but concise. Focus on searchable details.`;
|
|
38
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
39
|
+
// VisionEmbeddingProvider Class
|
|
40
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
41
|
+
/**
|
|
42
|
+
* Multimodal embedding provider for images.
|
|
43
|
+
*
|
|
44
|
+
* Uses a two-stage approach:
|
|
45
|
+
* 1. Vision model generates a text description of the image
|
|
46
|
+
* 2. Text embedding model converts description to vector
|
|
47
|
+
*
|
|
48
|
+
* This enables semantic search over images using existing vector infrastructure.
|
|
49
|
+
*
|
|
50
|
+
* Usage:
|
|
51
|
+
* const provider = new VisionEmbeddingProvider();
|
|
52
|
+
*
|
|
53
|
+
* // Embed an image (path, bytes, or base64)
|
|
54
|
+
* const embedding = await provider.embedImage("path/to/image.jpg");
|
|
55
|
+
*/
|
|
56
|
+
export class VisionEmbeddingProvider {
|
|
57
|
+
visionModel;
|
|
58
|
+
baseUrl;
|
|
59
|
+
descriptionPrompt;
|
|
60
|
+
textEmbedder;
|
|
61
|
+
constructor(config = {}) {
|
|
62
|
+
this.visionModel = config.visionModel || DEFAULT_VISION_MODEL;
|
|
63
|
+
this.baseUrl = (config.ollamaBaseUrl || 'http://localhost:11434').replace(/\/$/, '');
|
|
64
|
+
this.descriptionPrompt = config.descriptionPrompt || DEFAULT_DESCRIPTION_PROMPT;
|
|
65
|
+
this.textEmbedder = new OllamaEmbeddingProvider(config.embeddingModel || 'nomic-embed-text', this.baseUrl);
|
|
66
|
+
}
|
|
67
|
+
get modelName() {
|
|
68
|
+
return `vision:${this.visionModel}+${this.textEmbedder.modelName}`;
|
|
69
|
+
}
|
|
70
|
+
get providerName() {
|
|
71
|
+
return 'ollama-vision';
|
|
72
|
+
}
|
|
73
|
+
get dimensions() {
|
|
74
|
+
return 768; // TODO: Should get this dynamically from textEmbedder, but interface assumes sync access
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Generate text description of an image.
|
|
78
|
+
*
|
|
79
|
+
* @param imageData - Image as base64 string or Uint8Array
|
|
80
|
+
* @param prompt - Optional custom prompt
|
|
81
|
+
*/
|
|
82
|
+
async describeImage(imageData, prompt) {
|
|
83
|
+
let imageB64;
|
|
84
|
+
if (imageData instanceof Uint8Array) {
|
|
85
|
+
imageB64 = this.arrayBufferToBase64(imageData);
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
// Assume it's base64 string or file path
|
|
89
|
+
// Note: In Node.js we might check for file path, but kept abstract here
|
|
90
|
+
imageB64 = imageData;
|
|
91
|
+
}
|
|
92
|
+
const url = `${this.baseUrl}/api/generate`;
|
|
93
|
+
const payload = {
|
|
94
|
+
model: this.visionModel,
|
|
95
|
+
prompt: prompt || this.descriptionPrompt,
|
|
96
|
+
images: [imageB64],
|
|
97
|
+
stream: false
|
|
98
|
+
};
|
|
99
|
+
const response = await fetch(url, {
|
|
100
|
+
method: 'POST',
|
|
101
|
+
headers: { 'Content-Type': 'application/json' },
|
|
102
|
+
body: JSON.stringify(payload)
|
|
103
|
+
});
|
|
104
|
+
if (!response.ok) {
|
|
105
|
+
throw new Error(`Vision model call failed: ${response.status} ${response.statusText}`);
|
|
106
|
+
}
|
|
107
|
+
const result = await response.json();
|
|
108
|
+
return result.response || '';
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Generate embedding for an image.
|
|
112
|
+
*/
|
|
113
|
+
async embedImage(imageData, prompt) {
|
|
114
|
+
const description = await this.describeImage(imageData, prompt);
|
|
115
|
+
if (!description) {
|
|
116
|
+
throw new Error('Vision model returned empty description');
|
|
117
|
+
}
|
|
118
|
+
console.debug(`Image description: ${description.slice(0, 100)}...`);
|
|
119
|
+
return this.textEmbedder.embed(description);
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Generate embedding and return description.
|
|
123
|
+
*/
|
|
124
|
+
async embedImageWithDescription(imageData, prompt) {
|
|
125
|
+
const description = await this.describeImage(imageData, prompt);
|
|
126
|
+
const embedding = await this.textEmbedder.embed(description);
|
|
127
|
+
return { embedding, description };
|
|
128
|
+
}
|
|
129
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
130
|
+
// EmbeddingProvider Implementation
|
|
131
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
132
|
+
async embed(text) {
|
|
133
|
+
return this.textEmbedder.embed(text);
|
|
134
|
+
}
|
|
135
|
+
async embedBatch(texts) {
|
|
136
|
+
return this.textEmbedder.embedBatch(texts);
|
|
137
|
+
}
|
|
138
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
139
|
+
// Utility
|
|
140
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
141
|
+
/**
|
|
142
|
+
* Convert Uint8Array to base64 string
|
|
143
|
+
*/
|
|
144
|
+
arrayBufferToBase64(buffer) {
|
|
145
|
+
let binary = '';
|
|
146
|
+
const len = buffer.byteLength;
|
|
147
|
+
for (let i = 0; i < len; i++) {
|
|
148
|
+
binary += String.fromCharCode(buffer[i]);
|
|
149
|
+
}
|
|
150
|
+
return btoa(binary);
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Get provider information
|
|
154
|
+
*/
|
|
155
|
+
getInfo() {
|
|
156
|
+
return {
|
|
157
|
+
provider: this.providerName,
|
|
158
|
+
visionModel: this.visionModel,
|
|
159
|
+
embeddingModel: this.textEmbedder.modelName,
|
|
160
|
+
availableModels: Object.keys(VISION_MODELS)
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=VisionEmbeddingProvider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"VisionEmbeddingProvider.js","sourceRoot":"","sources":["../../../src/rag/embeddings/VisionEmbeddingProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,uBAAuB,EAAE,MAAM,iDAAiD,CAAC;AAa1F,MAAM,CAAC,MAAM,aAAa,GAAG;IACzB,WAAW,EAAE;QACT,WAAW,EAAE,0DAA0D;QACvE,IAAI,EAAE,OAAO;KAChB;IACD,iBAAiB,EAAE;QACf,WAAW,EAAE,yCAAyC;QACtD,IAAI,EAAE,OAAO;KAChB;IACD,OAAO,EAAE;QACL,WAAW,EAAE,6CAA6C;QAC1D,IAAI,EAAE,OAAO;KAChB;IACD,WAAW,EAAE;QACT,WAAW,EAAE,6CAA6C;QAC1D,IAAI,EAAE,OAAO;KAChB;CACJ,CAAC;AAEF,MAAM,oBAAoB,GAAG,WAAW,CAAC;AAEzC,MAAM,0BAA0B,GAAG;;;;;;;;2DAQwB,CAAC;AAE5D,gFAAgF;AAChF,gCAAgC;AAChC,gFAAgF;AAEhF;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,uBAAuB;IACxB,WAAW,CAAS;IACpB,OAAO,CAAS;IAChB,iBAAiB,CAAS;IAC1B,YAAY,CAA0B;IAE9C,YAAY,SAA+B,EAAE;QACzC,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,oBAAoB,CAAC;QAC9D,IAAI,CAAC,OAAO,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,wBAAwB,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACrF,IAAI,CAAC,iBAAiB,GAAG,MAAM,CAAC,iBAAiB,IAAI,0BAA0B,CAAC;QAEhF,IAAI,CAAC,YAAY,GAAG,IAAI,uBAAuB,CAC3C,MAAM,CAAC,cAAc,IAAI,kBAAkB,EAC3C,IAAI,CAAC,OAAO,CACf,CAAC;IACN,CAAC;IAED,IAAI,SAAS;QACT,OAAO,UAAU,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,YAAY,CAAC,SAAS,EAAE,CAAC;IACvE,CAAC;IAED,IAAI,YAAY;QACZ,OAAO,eAAe,CAAC;IAC3B,CAAC;IAED,IAAI,UAAU;QACV,OAAO,GAAG,CAAC,CAAC,yFAAyF;IACzG,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,aAAa,CAAC,SAA8B,EAAE,MAAe;QAC/D,IAAI,QAAgB,CAAC;QAErB,IAAI,SAAS,YAAY,UAAU,EAAE,CAAC;YAClC,QAAQ,GAAG,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QACnD,CAAC;aAAM,CAAC;YACJ,yCAAyC;YACzC,wEAAwE;YACxE,QAAQ,GAAG,SAAS,CAAC;QACzB,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,eAAe,CAAC;QAE3C,MAAM,OAAO,GAAG;YACZ,KAAK,EAAE,IAAI,CAAC,WAAW;YACvB,MAAM,EAAE,MAAM,IAAI,IAAI,CAAC,iBAAiB;YACxC,MAAM,EAAE,CAAC,QAAQ,CAAC;YAClB,MAAM,EAAE,KAAK;SAChB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC9B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;SAChC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QAC3F,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACrC,OAAO,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,SAA8B,EAAE,MAAe;QAC5D,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAEhE,IAAI,CAAC,WAAW,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,sBAAsB,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;QACpE,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,yBAAyB,CAC3B,SAA8B,EAC9B,MAAe;QAEf,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAChE,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAC7D,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;IACtC,CAAC;IAED,4EAA4E;IAC5E,mCAAmC;IACnC,4EAA4E;IAE5E,KAAK,CAAC,KAAK,CAAC,IAAY;QACpB,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC5B,OAAO,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;IAC/C,CAAC;IAED,4EAA4E;IAC5E,UAAU;IACV,4EAA4E;IAE5E;;OAEG;IACK,mBAAmB,CAAC,MAAkB;QAC1C,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,MAAM,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3B,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,OAAO;QACH,OAAO;YACH,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,cAAc,EAAE,IAAI,CAAC,YAAY,CAAC,SAAS;YAC3C,eAAe,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC;SAC9C,CAAC;IACN,CAAC;CACJ"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Community Detection and Summarization
|
|
3
|
+
*
|
|
4
|
+
* Label Propagation Algorithm (LPA) for community detection
|
|
5
|
+
* and LLM-based hierarchical summarization.
|
|
6
|
+
*
|
|
7
|
+
* Mirrors Python: mcard/rag/graph/community.py
|
|
8
|
+
*/
|
|
9
|
+
import { GraphStore } from './store';
|
|
10
|
+
/**
|
|
11
|
+
* Detect communities using asynchronous Label Propagation.
|
|
12
|
+
*
|
|
13
|
+
* @param store - GraphStore instance
|
|
14
|
+
* @param maxIter - Maximum iterations
|
|
15
|
+
* @returns List of communities, where each community is a list of entity IDs
|
|
16
|
+
*/
|
|
17
|
+
export declare function detectCommunities(store: GraphStore, maxIter?: number): number[][];
|
|
18
|
+
export interface CommunitySummarizerConfig {
|
|
19
|
+
model: string;
|
|
20
|
+
ollamaBaseUrl: string;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Summarizes graph communities using LLM.
|
|
24
|
+
*/
|
|
25
|
+
export declare class CommunitySummarizer {
|
|
26
|
+
private store;
|
|
27
|
+
private config;
|
|
28
|
+
constructor(store: GraphStore, config?: Partial<CommunitySummarizerConfig>);
|
|
29
|
+
/**
|
|
30
|
+
* Summarize communities and store them in the DB.
|
|
31
|
+
*
|
|
32
|
+
* @returns Count of summaries generated
|
|
33
|
+
*/
|
|
34
|
+
summarizeAndStore(communities: number[][]): Promise<number>;
|
|
35
|
+
/**
|
|
36
|
+
* Prepare entity context for summarization
|
|
37
|
+
*/
|
|
38
|
+
private prepareContext;
|
|
39
|
+
/**
|
|
40
|
+
* Generate summary using LLM
|
|
41
|
+
*/
|
|
42
|
+
private generateSummary;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Detect communities and optionally summarize them.
|
|
46
|
+
*
|
|
47
|
+
* @param store - GraphStore instance
|
|
48
|
+
* @param summarize - Whether to generate LLM summaries
|
|
49
|
+
* @param config - Summarizer configuration
|
|
50
|
+
* @returns Object with communities and summary count
|
|
51
|
+
*/
|
|
52
|
+
export declare function detectAndSummarizeCommunities(store: GraphStore, summarize?: boolean, config?: Partial<CommunitySummarizerConfig>): Promise<{
|
|
53
|
+
communities: number[][];
|
|
54
|
+
summaryCount: number;
|
|
55
|
+
}>;
|
|
56
|
+
//# sourceMappingURL=community.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"community.d.ts","sourceRoot":"","sources":["../../../src/rag/graph/community.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAMrC;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,GAAE,MAAW,GAAG,MAAM,EAAE,EAAE,CAwGrF;AAyCD,MAAM,WAAW,yBAAyB;IACtC,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,MAAM,CAAC;CACzB;AAOD;;GAEG;AACH,qBAAa,mBAAmB;IAC5B,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,MAAM,CAA4B;gBAE9B,KAAK,EAAE,UAAU,EAAE,MAAM,GAAE,OAAO,CAAC,yBAAyB,CAAM;IAK9E;;;;OAIG;IACG,iBAAiB,CAAC,WAAW,EAAE,MAAM,EAAE,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAqBjE;;OAEG;IACH,OAAO,CAAC,cAAc;IActB;;OAEG;YACW,eAAe;CAyChC;AAMD;;;;;;;GAOG;AACH,wBAAsB,6BAA6B,CAC/C,KAAK,EAAE,UAAU,EACjB,SAAS,GAAE,OAAe,EAC1B,MAAM,CAAC,EAAE,OAAO,CAAC,yBAAyB,CAAC,GAC5C,OAAO,CAAC;IAAE,WAAW,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,CAAC,CAU5D"}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Community Detection and Summarization
|
|
3
|
+
*
|
|
4
|
+
* Label Propagation Algorithm (LPA) for community detection
|
|
5
|
+
* and LLM-based hierarchical summarization.
|
|
6
|
+
*
|
|
7
|
+
* Mirrors Python: mcard/rag/graph/community.py
|
|
8
|
+
*/
|
|
9
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
10
|
+
// Community Detection
|
|
11
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
12
|
+
/**
|
|
13
|
+
* Detect communities using asynchronous Label Propagation.
|
|
14
|
+
*
|
|
15
|
+
* @param store - GraphStore instance
|
|
16
|
+
* @param maxIter - Maximum iterations
|
|
17
|
+
* @returns List of communities, where each community is a list of entity IDs
|
|
18
|
+
*/
|
|
19
|
+
export function detectCommunities(store, maxIter = 20) {
|
|
20
|
+
console.info('Starting community detection (LPA)...');
|
|
21
|
+
// 1. Build Adjacency List from database
|
|
22
|
+
const adj = new Map();
|
|
23
|
+
const nodes = new Set();
|
|
24
|
+
// Get all relationships and build undirected adjacency list
|
|
25
|
+
const stmt = store.db.prepare('SELECT source_entity_id, target_entity_id FROM graph_relationships');
|
|
26
|
+
const rows = stmt.all();
|
|
27
|
+
for (const { source_entity_id: src, target_entity_id: tgt } of rows) {
|
|
28
|
+
// Add both directions for undirected graph
|
|
29
|
+
if (!adj.has(src))
|
|
30
|
+
adj.set(src, []);
|
|
31
|
+
if (!adj.has(tgt))
|
|
32
|
+
adj.set(tgt, []);
|
|
33
|
+
adj.get(src).push(tgt);
|
|
34
|
+
adj.get(tgt).push(src);
|
|
35
|
+
nodes.add(src);
|
|
36
|
+
nodes.add(tgt);
|
|
37
|
+
}
|
|
38
|
+
const nodeList = Array.from(nodes);
|
|
39
|
+
if (nodeList.length === 0) {
|
|
40
|
+
console.warn('No nodes found for community detection');
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
console.debug(`Graph size: ${nodes.size} nodes, ${rows.length} edges`);
|
|
44
|
+
// 2. Initialize Labels (each node starts in its own community)
|
|
45
|
+
const labels = new Map();
|
|
46
|
+
for (const node of nodes) {
|
|
47
|
+
labels.set(node, node);
|
|
48
|
+
}
|
|
49
|
+
// 3. Propagate Labels
|
|
50
|
+
for (let i = 0; i < maxIter; i++) {
|
|
51
|
+
let changes = 0;
|
|
52
|
+
// Shuffle nodes for asynchronous update
|
|
53
|
+
shuffleArray(nodeList);
|
|
54
|
+
for (const node of nodeList) {
|
|
55
|
+
const neighbors = adj.get(node) || [];
|
|
56
|
+
if (neighbors.length === 0)
|
|
57
|
+
continue;
|
|
58
|
+
// Count neighbor labels
|
|
59
|
+
const neighborLabels = neighbors.map(n => labels.get(n));
|
|
60
|
+
const counts = new Map();
|
|
61
|
+
for (const label of neighborLabels) {
|
|
62
|
+
counts.set(label, (counts.get(label) || 0) + 1);
|
|
63
|
+
}
|
|
64
|
+
// Find most frequent label (ties broken randomly)
|
|
65
|
+
let maxFreq = 0;
|
|
66
|
+
for (const count of counts.values()) {
|
|
67
|
+
if (count > maxFreq)
|
|
68
|
+
maxFreq = count;
|
|
69
|
+
}
|
|
70
|
+
const bestLabels = [];
|
|
71
|
+
for (const [label, count] of counts.entries()) {
|
|
72
|
+
if (count === maxFreq)
|
|
73
|
+
bestLabels.push(label);
|
|
74
|
+
}
|
|
75
|
+
const newLabel = bestLabels[Math.floor(Math.random() * bestLabels.length)];
|
|
76
|
+
if (labels.get(node) !== newLabel) {
|
|
77
|
+
labels.set(node, newLabel);
|
|
78
|
+
changes++;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
console.debug(`LPA Iteration ${i + 1}: ${changes} changes`);
|
|
82
|
+
if (changes === 0) {
|
|
83
|
+
console.info(`LPA converged after ${i + 1} iterations`);
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
// 4. Group Communities
|
|
88
|
+
const communities = new Map();
|
|
89
|
+
for (const [node, label] of labels.entries()) {
|
|
90
|
+
if (!communities.has(label)) {
|
|
91
|
+
communities.set(label, []);
|
|
92
|
+
}
|
|
93
|
+
communities.get(label).push(node);
|
|
94
|
+
}
|
|
95
|
+
const result = Array.from(communities.values());
|
|
96
|
+
console.info(`Detected ${result.length} communities`);
|
|
97
|
+
// Sort for deterministic output (by size desc, then first ID)
|
|
98
|
+
result.sort((a, b) => {
|
|
99
|
+
if (b.length !== a.length)
|
|
100
|
+
return b.length - a.length;
|
|
101
|
+
return Math.min(...a) - Math.min(...b);
|
|
102
|
+
});
|
|
103
|
+
return result;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Fisher-Yates shuffle algorithm
|
|
107
|
+
*/
|
|
108
|
+
function shuffleArray(array) {
|
|
109
|
+
for (let i = array.length - 1; i > 0; i--) {
|
|
110
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
111
|
+
[array[i], array[j]] = [array[j], array[i]];
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
115
|
+
// Community Summarization Prompts
|
|
116
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
117
|
+
const SUMMARIZE_SYSTEM_PROMPT = `You are an expert graph analyst.
|
|
118
|
+
Your task is to summarize a "community" of related entities from a knowledge graph.
|
|
119
|
+
Focus on the common themes, purposes, or technologies that connect these entities.
|
|
120
|
+
Synthesize the descriptions into a cohesive whole.`;
|
|
121
|
+
const SUMMARIZE_USER_PROMPT = `Analyze the following list of entities and their descriptions which form a community in a knowledge graph.
|
|
122
|
+
Create a Title and valid JSON Summary.
|
|
123
|
+
|
|
124
|
+
--- BEGIN ENTITY LIST ---
|
|
125
|
+
{entity_text}
|
|
126
|
+
--- END ENTITY LIST ---
|
|
127
|
+
|
|
128
|
+
Requirement:
|
|
129
|
+
- Provide a short Title.
|
|
130
|
+
- Provide a detailed Summary of common themes.
|
|
131
|
+
- Output MUST be valid JSON in the format:
|
|
132
|
+
{
|
|
133
|
+
"title": "Community Title",
|
|
134
|
+
"summary": "Detailed summary..."
|
|
135
|
+
}`;
|
|
136
|
+
const DEFAULT_SUMMARIZER_CONFIG = {
|
|
137
|
+
model: 'gemma3:latest',
|
|
138
|
+
ollamaBaseUrl: 'http://localhost:11434',
|
|
139
|
+
};
|
|
140
|
+
/**
|
|
141
|
+
* Summarizes graph communities using LLM.
|
|
142
|
+
*/
|
|
143
|
+
export class CommunitySummarizer {
|
|
144
|
+
store;
|
|
145
|
+
config;
|
|
146
|
+
constructor(store, config = {}) {
|
|
147
|
+
this.store = store;
|
|
148
|
+
this.config = { ...DEFAULT_SUMMARIZER_CONFIG, ...config };
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Summarize communities and store them in the DB.
|
|
152
|
+
*
|
|
153
|
+
* @returns Count of summaries generated
|
|
154
|
+
*/
|
|
155
|
+
async summarizeAndStore(communities) {
|
|
156
|
+
let count = 0;
|
|
157
|
+
for (const commIds of communities) {
|
|
158
|
+
// Prepare context
|
|
159
|
+
const entityText = this.prepareContext(commIds);
|
|
160
|
+
if (!entityText)
|
|
161
|
+
continue;
|
|
162
|
+
try {
|
|
163
|
+
const [title, summary] = await this.generateSummary(entityText);
|
|
164
|
+
this.store.addCommunity(title, summary, commIds);
|
|
165
|
+
count++;
|
|
166
|
+
console.info(`Generated community: ${title}`);
|
|
167
|
+
}
|
|
168
|
+
catch (error) {
|
|
169
|
+
console.error(`Failed to summarize community: ${error}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return count;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Prepare entity context for summarization
|
|
176
|
+
*/
|
|
177
|
+
prepareContext(ids) {
|
|
178
|
+
const lines = [];
|
|
179
|
+
// Limit context size to first 30 entities
|
|
180
|
+
for (const eid of ids.slice(0, 30)) {
|
|
181
|
+
const ent = this.store.getEntityById(eid);
|
|
182
|
+
if (ent) {
|
|
183
|
+
lines.push(`- ${ent.name} (${ent.type}): ${ent.description}`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return lines.join('\n');
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Generate summary using LLM
|
|
190
|
+
*/
|
|
191
|
+
async generateSummary(entityText) {
|
|
192
|
+
const prompt = SUMMARIZE_USER_PROMPT.replace('{entity_text}', entityText);
|
|
193
|
+
const url = `${this.config.ollamaBaseUrl}/api/generate`;
|
|
194
|
+
const payload = {
|
|
195
|
+
model: this.config.model,
|
|
196
|
+
prompt: `${SUMMARIZE_SYSTEM_PROMPT}\n\n${prompt}`,
|
|
197
|
+
stream: false,
|
|
198
|
+
options: {
|
|
199
|
+
temperature: 0.3,
|
|
200
|
+
}
|
|
201
|
+
};
|
|
202
|
+
const response = await fetch(url, {
|
|
203
|
+
method: 'POST',
|
|
204
|
+
headers: { 'Content-Type': 'application/json' },
|
|
205
|
+
body: JSON.stringify(payload),
|
|
206
|
+
});
|
|
207
|
+
if (!response.ok) {
|
|
208
|
+
throw new Error(`LLM call failed: ${response.status}`);
|
|
209
|
+
}
|
|
210
|
+
const result = await response.json();
|
|
211
|
+
const content = result.response || '';
|
|
212
|
+
// Parse JSON from response
|
|
213
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
214
|
+
if (jsonMatch) {
|
|
215
|
+
try {
|
|
216
|
+
const data = JSON.parse(jsonMatch[0]);
|
|
217
|
+
return [data.title || 'Unknown Community', data.summary || ''];
|
|
218
|
+
}
|
|
219
|
+
catch {
|
|
220
|
+
// Fall through to fallback
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
// Fallback if no JSON found
|
|
224
|
+
return ['Community Summary', content.slice(0, 500)];
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
228
|
+
// Convenience Function
|
|
229
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
230
|
+
/**
|
|
231
|
+
* Detect communities and optionally summarize them.
|
|
232
|
+
*
|
|
233
|
+
* @param store - GraphStore instance
|
|
234
|
+
* @param summarize - Whether to generate LLM summaries
|
|
235
|
+
* @param config - Summarizer configuration
|
|
236
|
+
* @returns Object with communities and summary count
|
|
237
|
+
*/
|
|
238
|
+
export async function detectAndSummarizeCommunities(store, summarize = false, config) {
|
|
239
|
+
const communities = detectCommunities(store);
|
|
240
|
+
let summaryCount = 0;
|
|
241
|
+
if (summarize && communities.length > 0) {
|
|
242
|
+
const summarizer = new CommunitySummarizer(store, config);
|
|
243
|
+
summaryCount = await summarizer.summarizeAndStore(communities);
|
|
244
|
+
}
|
|
245
|
+
return { communities, summaryCount };
|
|
246
|
+
}
|
|
247
|
+
//# sourceMappingURL=community.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"community.js","sourceRoot":"","sources":["../../../src/rag/graph/community.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,gFAAgF;AAChF,sBAAsB;AACtB,gFAAgF;AAEhF;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,KAAiB,EAAE,UAAkB,EAAE;IACrE,OAAO,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;IAEtD,wCAAwC;IACxC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAoB,CAAC;IACxC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,4DAA4D;IAC5D,MAAM,IAAI,GAAI,KAAa,CAAC,EAAE,CAAC,OAAO,CAClC,oEAAoE,CACvE,CAAC;IACF,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAmE,CAAC;IAEzF,KAAK,MAAM,EAAE,gBAAgB,EAAE,GAAG,EAAE,gBAAgB,EAAE,GAAG,EAAE,IAAI,IAAI,EAAE,CAAC;QAClE,2CAA2C;QAC3C,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACpC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QAEpC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACxB,GAAG,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAExB,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACf,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;QACvD,OAAO,EAAE,CAAC;IACd,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,eAAe,KAAK,CAAC,IAAI,WAAW,IAAI,CAAC,MAAM,QAAQ,CAAC,CAAC;IAEvE,+DAA+D;IAC/D,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,sBAAsB;IACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/B,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,wCAAwC;QACxC,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvB,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC1B,MAAM,SAAS,GAAG,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAErC,wBAAwB;YACxB,MAAM,cAAc,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC;YAC1D,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;YAEzC,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;gBACjC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACpD,CAAC;YAED,kDAAkD;YAClD,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;gBAClC,IAAI,KAAK,GAAG,OAAO;oBAAE,OAAO,GAAG,KAAK,CAAC;YACzC,CAAC;YAED,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;gBAC5C,IAAI,KAAK,KAAK,OAAO;oBAAE,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClD,CAAC;YAED,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;YAE3E,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAChC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;gBAC3B,OAAO,EAAE,CAAC;YACd,CAAC;QACL,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,iBAAiB,CAAC,GAAG,CAAC,KAAK,OAAO,UAAU,CAAC,CAAC;QAE5D,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;YAChB,OAAO,CAAC,IAAI,CAAC,uBAAuB,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;YACxD,MAAM;QACV,CAAC;IACL,CAAC;IAED,uBAAuB;IACvB,MAAM,WAAW,GAAG,IAAI,GAAG,EAAoB,CAAC;IAChD,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;QAC3C,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1B,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC/B,CAAC;QACD,WAAW,CAAC,GAAG,CAAC,KAAK,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,IAAI,CAAC,YAAY,MAAM,CAAC,MAAM,cAAc,CAAC,CAAC;IAEtD,8DAA8D;IAC9D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACjB,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;YAAE,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;QACtD,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAI,KAAU;IAC/B,KAAK,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC;AACL,CAAC;AAED,gFAAgF;AAChF,kCAAkC;AAClC,gFAAgF;AAEhF,MAAM,uBAAuB,GAAG;;;mDAGmB,CAAC;AAEpD,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;EAc5B,CAAC;AAWH,MAAM,yBAAyB,GAA8B;IACzD,KAAK,EAAE,eAAe;IACtB,aAAa,EAAE,wBAAwB;CAC1C,CAAC;AAEF;;GAEG;AACH,MAAM,OAAO,mBAAmB;IACpB,KAAK,CAAa;IAClB,MAAM,CAA4B;IAE1C,YAAY,KAAiB,EAAE,SAA6C,EAAE;QAC1E,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,yBAAyB,EAAE,GAAG,MAAM,EAAE,CAAC;IAC9D,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,iBAAiB,CAAC,WAAuB;QAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,OAAO,IAAI,WAAW,EAAE,CAAC;YAChC,kBAAkB;YAClB,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAChD,IAAI,CAAC,UAAU;gBAAE,SAAS;YAE1B,IAAI,CAAC;gBACD,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;gBAChE,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;gBACjD,KAAK,EAAE,CAAC;gBACR,OAAO,CAAC,IAAI,CAAC,wBAAwB,KAAK,EAAE,CAAC,CAAC;YAClD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,OAAO,CAAC,KAAK,CAAC,kCAAkC,KAAK,EAAE,CAAC,CAAC;YAC7D,CAAC;QACL,CAAC;QAED,OAAO,KAAK,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,GAAa;QAChC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,0CAA0C;QAC1C,KAAK,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAC1C,IAAI,GAAG,EAAE,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;YAClE,CAAC;QACL,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAAC,UAAkB;QAC5C,MAAM,MAAM,GAAG,qBAAqB,CAAC,OAAO,CAAC,eAAe,EAAE,UAAU,CAAC,CAAC;QAE1E,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,eAAe,CAAC;QAExD,MAAM,OAAO,GAAG;YACZ,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,MAAM,EAAE,GAAG,uBAAuB,OAAO,MAAM,EAAE;YACjD,MAAM,EAAE,KAAK;YACb,OAAO,EAAE;gBACL,WAAW,EAAE,GAAG;aACnB;SACJ,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC9B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;SAChC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC;QAEtC,2BAA2B;QAC3B,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC/C,IAAI,SAAS,EAAE,CAAC;YACZ,IAAI,CAAC;gBACD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBACtC,OAAO,CAAC,IAAI,CAAC,KAAK,IAAI,mBAAmB,EAAE,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;YACnE,CAAC;YAAC,MAAM,CAAC;gBACL,2BAA2B;YAC/B,CAAC;QACL,CAAC;QAED,4BAA4B;QAC5B,OAAO,CAAC,mBAAmB,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;IACxD,CAAC;CACJ;AAED,gFAAgF;AAChF,uBAAuB;AACvB,gFAAgF;AAEhF;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,6BAA6B,CAC/C,KAAiB,EACjB,YAAqB,KAAK,EAC1B,MAA2C;IAE3C,MAAM,WAAW,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAE7C,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,SAAS,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtC,MAAM,UAAU,GAAG,IAAI,mBAAmB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QAC1D,YAAY,GAAG,MAAM,UAAU,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;IACnE,CAAC;IAED,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC;AACzC,CAAC"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Graph Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts entities and relationships from MCard content using LLM.
|
|
5
|
+
*
|
|
6
|
+
* Mirrors Python: mcard/rag/graph/extractor.py
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Represents an entity extracted from text.
|
|
10
|
+
*/
|
|
11
|
+
export interface Entity {
|
|
12
|
+
name: string;
|
|
13
|
+
type: EntityType;
|
|
14
|
+
description: string;
|
|
15
|
+
id?: number;
|
|
16
|
+
}
|
|
17
|
+
export type EntityType = 'CONCEPT' | 'TECHNOLOGY' | 'PERSON' | 'ORGANIZATION' | 'OTHER';
|
|
18
|
+
/**
|
|
19
|
+
* Represents a relationship between two entities.
|
|
20
|
+
*/
|
|
21
|
+
export interface Relationship {
|
|
22
|
+
source: string;
|
|
23
|
+
target: string;
|
|
24
|
+
relationship: string;
|
|
25
|
+
description: string;
|
|
26
|
+
weight: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Result from entity/relationship extraction.
|
|
30
|
+
*/
|
|
31
|
+
export interface ExtractionResult {
|
|
32
|
+
entities: Entity[];
|
|
33
|
+
relationships: Relationship[];
|
|
34
|
+
success: boolean;
|
|
35
|
+
error?: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Create an Entity object
|
|
39
|
+
*/
|
|
40
|
+
export declare function createEntity(name: string, type?: EntityType, description?: string): Entity;
|
|
41
|
+
/**
|
|
42
|
+
* Create a Relationship object
|
|
43
|
+
*/
|
|
44
|
+
export declare function createRelationship(source: string, target: string, relationship: string, description?: string, weight?: number): Relationship;
|
|
45
|
+
/**
|
|
46
|
+
* Create an ExtractionResult object
|
|
47
|
+
*/
|
|
48
|
+
export declare function createExtractionResult(entities?: Entity[], relationships?: Relationship[], success?: boolean, error?: string): ExtractionResult;
|
|
49
|
+
export declare const EXTRACTION_SYSTEM_PROMPT = "You are an expert at extracting structured information from text.\nGiven a text, identify:\n1. ENTITIES: Named concepts, technologies, people, organizations, or things\n2. RELATIONSHIPS: How entities relate to each other\n\nRespond ONLY with valid JSON in this format:\n{\n \"entities\": [\n {\"name\": \"EntityName\", \"type\": \"CONCEPT|TECHNOLOGY|PERSON|ORGANIZATION|OTHER\", \"description\": \"Brief description\"}\n ],\n \"relationships\": [\n {\"source\": \"Entity1\", \"target\": \"Entity2\", \"relationship\": \"verb phrase\", \"description\": \"Optional context\"}\n ]\n}\n\nEntity types:\n- CONCEPT: Abstract ideas, methodologies, patterns (e.g., \"content-addressable storage\")\n- TECHNOLOGY: Systems, libraries, frameworks (e.g., \"SQLite\", \"Python\") \n- PERSON: People names\n- ORGANIZATION: Companies, groups\n- OTHER: Anything else\n\nKeep entity names concise but unique. Use present tense for relationships.";
|
|
50
|
+
export declare const EXTRACTION_USER_PROMPT = "Extract entities and relationships from this text:\n\n---\n{content}\n---\n\nRemember: Return ONLY valid JSON.";
|
|
51
|
+
export interface GraphExtractorConfig {
|
|
52
|
+
model: string;
|
|
53
|
+
temperature: number;
|
|
54
|
+
maxRetries: number;
|
|
55
|
+
ollamaBaseUrl: string;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Extracts entities and relationships from text using LLM.
|
|
59
|
+
*
|
|
60
|
+
* Usage:
|
|
61
|
+
* const extractor = new GraphExtractor({ model: 'gemma3:latest' });
|
|
62
|
+
* const result = await extractor.extract("MCard is a TypeScript library...");
|
|
63
|
+
*
|
|
64
|
+
* for (const entity of result.entities) {
|
|
65
|
+
* console.log(`${entity.name} (${entity.type})`);
|
|
66
|
+
* }
|
|
67
|
+
*
|
|
68
|
+
* for (const rel of result.relationships) {
|
|
69
|
+
* console.log(`${rel.source} --${rel.relationship}--> ${rel.target}`);
|
|
70
|
+
* }
|
|
71
|
+
*/
|
|
72
|
+
export declare class GraphExtractor {
|
|
73
|
+
private config;
|
|
74
|
+
constructor(config?: Partial<GraphExtractorConfig>);
|
|
75
|
+
/**
|
|
76
|
+
* Extract entities and relationships from content.
|
|
77
|
+
*
|
|
78
|
+
* @param content - Text to extract from
|
|
79
|
+
* @returns ExtractionResult with entities and relationships
|
|
80
|
+
*/
|
|
81
|
+
extract(content: string): Promise<ExtractionResult>;
|
|
82
|
+
/**
|
|
83
|
+
* Call LLM for extraction
|
|
84
|
+
*/
|
|
85
|
+
private callLLM;
|
|
86
|
+
/**
|
|
87
|
+
* Parse LLM response into structured data
|
|
88
|
+
*/
|
|
89
|
+
private parseResponse;
|
|
90
|
+
/**
|
|
91
|
+
* Try to clean up malformed JSON
|
|
92
|
+
*/
|
|
93
|
+
private cleanJson;
|
|
94
|
+
/**
|
|
95
|
+
* Extract from multiple texts
|
|
96
|
+
*/
|
|
97
|
+
extractBatch(contents: string[]): Promise<ExtractionResult[]>;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../../src/rag/graph/extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAMH;;GAEG;AACH,MAAM,WAAW,MAAM;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,MAAM,UAAU,GAAG,SAAS,GAAG,YAAY,GAAG,QAAQ,GAAG,cAAc,GAAG,OAAO,CAAC;AAExF;;GAEG;AACH,MAAM,WAAW,YAAY;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC7B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAClB;AAMD;;GAEG;AACH,wBAAgB,YAAY,CACxB,IAAI,EAAE,MAAM,EACZ,IAAI,GAAE,UAAoB,EAC1B,WAAW,GAAE,MAAW,GACzB,MAAM,CAER;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAC9B,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,WAAW,GAAE,MAAW,EACxB,MAAM,GAAE,MAAY,GACrB,YAAY,CAEd;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAClC,QAAQ,GAAE,MAAM,EAAO,EACvB,aAAa,GAAE,YAAY,EAAO,EAClC,OAAO,GAAE,OAAc,EACvB,KAAK,CAAC,EAAE,MAAM,GACf,gBAAgB,CAElB;AAMD,eAAO,MAAM,wBAAwB,g7BAsBsC,CAAC;AAE5E,eAAO,MAAM,sBAAsB,mHAMD,CAAC;AAMnC,MAAM,WAAW,oBAAoB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;CACzB;AASD;;;;;;;;;;;;;;GAcG;AACH,qBAAa,cAAc;IACvB,OAAO,CAAC,MAAM,CAAuB;gBAEzB,MAAM,GAAE,OAAO,CAAC,oBAAoB,CAAM;IAItD;;;;;OAKG;IACG,OAAO,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA+BzD;;OAEG;YACW,OAAO;IA4BrB;;OAEG;IACH,OAAO,CAAC,aAAa;IA6CrB;;OAEG;IACH,OAAO,CAAC,SAAS;IAQjB;;OAEG;IACG,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;CAOtE"}
|