@cmdoss/memwal-sdk 0.6.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +547 -547
- package/BENCHMARKS.md +238 -238
- package/README.md +310 -181
- package/dist/ai-sdk/tools.d.ts +2 -2
- package/dist/ai-sdk/tools.js +2 -2
- package/dist/client/ClientMemoryManager.js +2 -2
- package/dist/client/ClientMemoryManager.js.map +1 -1
- package/dist/client/PersonalDataWallet.d.ts.map +1 -1
- package/dist/client/SimplePDWClient.d.ts +29 -1
- package/dist/client/SimplePDWClient.d.ts.map +1 -1
- package/dist/client/SimplePDWClient.js +45 -13
- package/dist/client/SimplePDWClient.js.map +1 -1
- package/dist/client/namespaces/EmbeddingsNamespace.d.ts +1 -1
- package/dist/client/namespaces/EmbeddingsNamespace.js +1 -1
- package/dist/client/namespaces/MemoryNamespace.d.ts +31 -0
- package/dist/client/namespaces/MemoryNamespace.d.ts.map +1 -1
- package/dist/client/namespaces/MemoryNamespace.js +272 -39
- package/dist/client/namespaces/MemoryNamespace.js.map +1 -1
- package/dist/client/namespaces/consolidated/AINamespace.d.ts +2 -2
- package/dist/client/namespaces/consolidated/AINamespace.js +2 -2
- package/dist/client/namespaces/consolidated/BlockchainNamespace.d.ts +12 -2
- package/dist/client/namespaces/consolidated/BlockchainNamespace.d.ts.map +1 -1
- package/dist/client/namespaces/consolidated/BlockchainNamespace.js +62 -4
- package/dist/client/namespaces/consolidated/BlockchainNamespace.js.map +1 -1
- package/dist/client/namespaces/consolidated/StorageNamespace.d.ts +67 -2
- package/dist/client/namespaces/consolidated/StorageNamespace.d.ts.map +1 -1
- package/dist/client/namespaces/consolidated/StorageNamespace.js +549 -16
- package/dist/client/namespaces/consolidated/StorageNamespace.js.map +1 -1
- package/dist/config/ConfigurationHelper.js +61 -61
- package/dist/config/defaults.js +2 -2
- package/dist/config/defaults.js.map +1 -1
- package/dist/graph/GraphService.js +21 -21
- package/dist/graph/GraphService.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/infrastructure/seal/EncryptionService.d.ts +9 -5
- package/dist/infrastructure/seal/EncryptionService.d.ts.map +1 -1
- package/dist/infrastructure/seal/EncryptionService.js +37 -15
- package/dist/infrastructure/seal/EncryptionService.js.map +1 -1
- package/dist/infrastructure/seal/SealService.d.ts +13 -5
- package/dist/infrastructure/seal/SealService.d.ts.map +1 -1
- package/dist/infrastructure/seal/SealService.js +36 -34
- package/dist/infrastructure/seal/SealService.js.map +1 -1
- package/dist/langchain/createPDWRAG.js +30 -30
- package/dist/retrieval/MemoryDecryptionPipeline.d.ts.map +1 -1
- package/dist/retrieval/MemoryDecryptionPipeline.js +2 -1
- package/dist/retrieval/MemoryDecryptionPipeline.js.map +1 -1
- package/dist/retrieval/MemoryRetrievalService.d.ts +31 -0
- package/dist/retrieval/MemoryRetrievalService.d.ts.map +1 -1
- package/dist/retrieval/MemoryRetrievalService.js +44 -4
- package/dist/retrieval/MemoryRetrievalService.js.map +1 -1
- package/dist/services/CapabilityService.d.ts.map +1 -1
- package/dist/services/CapabilityService.js +30 -14
- package/dist/services/CapabilityService.js.map +1 -1
- package/dist/services/CrossContextPermissionService.d.ts.map +1 -1
- package/dist/services/CrossContextPermissionService.js +9 -7
- package/dist/services/CrossContextPermissionService.js.map +1 -1
- package/dist/services/EmbeddingService.d.ts +28 -1
- package/dist/services/EmbeddingService.d.ts.map +1 -1
- package/dist/services/EmbeddingService.js +54 -0
- package/dist/services/EmbeddingService.js.map +1 -1
- package/dist/services/EncryptionService.d.ts.map +1 -1
- package/dist/services/EncryptionService.js +6 -5
- package/dist/services/EncryptionService.js.map +1 -1
- package/dist/services/GeminiAIService.js +309 -309
- package/dist/services/IndexManager.d.ts +5 -1
- package/dist/services/IndexManager.d.ts.map +1 -1
- package/dist/services/IndexManager.js +17 -40
- package/dist/services/IndexManager.js.map +1 -1
- package/dist/services/QueryService.js +1 -1
- package/dist/services/QueryService.js.map +1 -1
- package/dist/services/StorageService.d.ts +11 -0
- package/dist/services/StorageService.d.ts.map +1 -1
- package/dist/services/StorageService.js +73 -10
- package/dist/services/StorageService.js.map +1 -1
- package/dist/services/TransactionService.d.ts +20 -0
- package/dist/services/TransactionService.d.ts.map +1 -1
- package/dist/services/TransactionService.js +43 -0
- package/dist/services/TransactionService.js.map +1 -1
- package/dist/services/ViewService.js +2 -2
- package/dist/services/ViewService.js.map +1 -1
- package/dist/services/storage/QuiltBatchManager.d.ts +101 -1
- package/dist/services/storage/QuiltBatchManager.d.ts.map +1 -1
- package/dist/services/storage/QuiltBatchManager.js +410 -20
- package/dist/services/storage/QuiltBatchManager.js.map +1 -1
- package/dist/services/storage/index.d.ts +1 -1
- package/dist/services/storage/index.d.ts.map +1 -1
- package/dist/services/storage/index.js.map +1 -1
- package/dist/utils/LRUCache.d.ts +106 -0
- package/dist/utils/LRUCache.d.ts.map +1 -0
- package/dist/utils/LRUCache.js +281 -0
- package/dist/utils/LRUCache.js.map +1 -0
- package/dist/utils/index.d.ts +1 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +2 -0
- package/dist/utils/index.js.map +1 -1
- package/dist/utils/memoryIndexOnChain.d.ts +212 -0
- package/dist/utils/memoryIndexOnChain.d.ts.map +1 -0
- package/dist/utils/memoryIndexOnChain.js +312 -0
- package/dist/utils/memoryIndexOnChain.js.map +1 -0
- package/dist/utils/rebuildIndexNode.d.ts +29 -0
- package/dist/utils/rebuildIndexNode.d.ts.map +1 -1
- package/dist/utils/rebuildIndexNode.js +366 -98
- package/dist/utils/rebuildIndexNode.js.map +1 -1
- package/dist/vector/HnswWasmService.d.ts +20 -5
- package/dist/vector/HnswWasmService.d.ts.map +1 -1
- package/dist/vector/HnswWasmService.js +73 -40
- package/dist/vector/HnswWasmService.js.map +1 -1
- package/dist/vector/IHnswService.d.ts +10 -1
- package/dist/vector/IHnswService.d.ts.map +1 -1
- package/dist/vector/IHnswService.js.map +1 -1
- package/dist/vector/NodeHnswService.d.ts +16 -0
- package/dist/vector/NodeHnswService.d.ts.map +1 -1
- package/dist/vector/NodeHnswService.js +84 -5
- package/dist/vector/NodeHnswService.js.map +1 -1
- package/dist/vector/createHnswService.d.ts +1 -1
- package/dist/vector/createHnswService.js +1 -1
- package/dist/vector/index.d.ts +1 -1
- package/dist/vector/index.js +1 -1
- package/package.json +157 -157
- package/src/access/PermissionService.ts +635 -635
- package/src/aggregation/AggregationService.ts +389 -389
- package/src/ai-sdk/PDWVectorStore.ts +715 -715
- package/src/ai-sdk/index.ts +65 -65
- package/src/ai-sdk/tools.ts +460 -460
- package/src/ai-sdk/types.ts +404 -404
- package/src/batch/BatchManager.ts +597 -597
- package/src/batch/BatchingService.ts +429 -429
- package/src/batch/MemoryProcessingCache.ts +492 -492
- package/src/batch/index.ts +30 -30
- package/src/browser.ts +200 -200
- package/src/client/ClientMemoryManager.ts +987 -987
- package/src/client/PersonalDataWallet.ts +345 -345
- package/src/client/SimplePDWClient.ts +1289 -1222
- package/src/client/factory.ts +154 -154
- package/src/client/namespaces/AnalyticsNamespace.ts +377 -377
- package/src/client/namespaces/BatchNamespace.ts +356 -356
- package/src/client/namespaces/CacheNamespace.ts +123 -123
- package/src/client/namespaces/CapabilityNamespace.ts +217 -217
- package/src/client/namespaces/ClassifyNamespace.ts +169 -169
- package/src/client/namespaces/ContextNamespace.ts +297 -297
- package/src/client/namespaces/EmbeddingsNamespace.ts +99 -99
- package/src/client/namespaces/EncryptionNamespace.ts +221 -221
- package/src/client/namespaces/GraphNamespace.ts +468 -468
- package/src/client/namespaces/IndexNamespace.ts +361 -361
- package/src/client/namespaces/MemoryNamespace.ts +1422 -1135
- package/src/client/namespaces/PermissionsNamespace.ts +254 -254
- package/src/client/namespaces/PipelineNamespace.ts +220 -220
- package/src/client/namespaces/SearchNamespace.ts +1049 -1049
- package/src/client/namespaces/StorageNamespace.ts +458 -458
- package/src/client/namespaces/TxNamespace.ts +260 -260
- package/src/client/namespaces/WalletNamespace.ts +243 -243
- package/src/client/namespaces/consolidated/AINamespace.ts +449 -449
- package/src/client/namespaces/consolidated/BlockchainNamespace.ts +607 -546
- package/src/client/namespaces/consolidated/SecurityNamespace.ts +648 -648
- package/src/client/namespaces/consolidated/StorageNamespace.ts +1141 -497
- package/src/client/namespaces/consolidated/index.ts +39 -39
- package/src/client/signers/KeypairSigner.ts +108 -108
- package/src/client/signers/UnifiedSigner.ts +110 -110
- package/src/client/signers/WalletAdapterSigner.ts +159 -159
- package/src/client/signers/index.ts +26 -26
- package/src/config/ConfigurationHelper.ts +412 -412
- package/src/config/defaults.ts +51 -51
- package/src/config/index.ts +8 -8
- package/src/config/validation.ts +70 -70
- package/src/core/index.ts +14 -14
- package/src/core/interfaces/IService.ts +307 -307
- package/src/core/interfaces/index.ts +8 -8
- package/src/core/types/capability.ts +297 -297
- package/src/core/types/index.ts +870 -870
- package/src/core/types/wallet.ts +270 -270
- package/src/core/types.ts +9 -9
- package/src/core/wallet.ts +222 -222
- package/src/embedding/index.ts +19 -19
- package/src/embedding/types.ts +357 -357
- package/src/errors/index.ts +602 -602
- package/src/errors/recovery.ts +461 -461
- package/src/errors/validation.ts +567 -567
- package/src/generated/pdw/capability.ts +319 -319
- package/src/graph/GraphService.ts +887 -887
- package/src/graph/KnowledgeGraphManager.ts +728 -728
- package/src/graph/index.ts +25 -25
- package/src/index.ts +498 -474
- package/src/infrastructure/index.ts +22 -22
- package/src/infrastructure/seal/EncryptionService.ts +628 -603
- package/src/infrastructure/seal/SealService.ts +613 -615
- package/src/infrastructure/seal/index.ts +9 -9
- package/src/infrastructure/sui/BlockchainManager.ts +627 -627
- package/src/infrastructure/sui/SuiService.ts +888 -888
- package/src/infrastructure/sui/index.ts +9 -9
- package/src/infrastructure/walrus/StorageManager.ts +604 -604
- package/src/infrastructure/walrus/WalrusStorageService.ts +612 -612
- package/src/infrastructure/walrus/index.ts +9 -9
- package/src/langchain/PDWEmbeddings.ts +145 -145
- package/src/langchain/PDWVectorStore.ts +456 -456
- package/src/langchain/createPDWRAG.ts +303 -303
- package/src/langchain/index.ts +47 -47
- package/src/permissions/ConsentRepository.browser.ts +249 -249
- package/src/permissions/ConsentRepository.ts +364 -364
- package/src/pipeline/MemoryPipeline.ts +862 -862
- package/src/pipeline/PipelineManager.ts +683 -683
- package/src/pipeline/index.ts +26 -26
- package/src/retrieval/AdvancedSearchService.ts +629 -629
- package/src/retrieval/MemoryAnalyticsService.ts +711 -711
- package/src/retrieval/MemoryDecryptionPipeline.ts +825 -824
- package/src/retrieval/MemoryRetrievalService.ts +904 -830
- package/src/retrieval/index.ts +42 -42
- package/src/services/BatchService.ts +352 -352
- package/src/services/CapabilityService.ts +464 -448
- package/src/services/ClassifierService.ts +465 -465
- package/src/services/CrossContextPermissionService.ts +486 -484
- package/src/services/EmbeddingService.ts +771 -706
- package/src/services/EncryptionService.ts +712 -711
- package/src/services/GeminiAIService.ts +753 -753
- package/src/services/IndexManager.ts +977 -1004
- package/src/services/MemoryIndexService.ts +1003 -1003
- package/src/services/MemoryService.ts +369 -369
- package/src/services/QueryService.ts +890 -890
- package/src/services/StorageService.ts +1182 -1111
- package/src/services/TransactionService.ts +838 -790
- package/src/services/VectorService.ts +462 -462
- package/src/services/ViewService.ts +484 -484
- package/src/services/index.ts +25 -25
- package/src/services/storage/BlobAttributesManager.ts +333 -333
- package/src/services/storage/KnowledgeGraphManager.ts +425 -425
- package/src/services/storage/MemorySearchManager.ts +387 -387
- package/src/services/storage/QuiltBatchManager.ts +1130 -660
- package/src/services/storage/WalrusMetadataManager.ts +268 -268
- package/src/services/storage/WalrusStorageManager.ts +287 -287
- package/src/services/storage/index.ts +57 -52
- package/src/types/index.ts +13 -13
- package/src/utils/LRUCache.ts +378 -0
- package/src/utils/index.ts +76 -68
- package/src/utils/memoryIndexOnChain.ts +507 -0
- package/src/utils/rebuildIndex.ts +290 -290
- package/src/utils/rebuildIndexNode.ts +771 -424
- package/src/vector/BrowserHnswIndexService.ts +758 -758
- package/src/vector/HnswWasmService.ts +731 -679
- package/src/vector/IHnswService.ts +233 -224
- package/src/vector/NodeHnswService.ts +833 -735
- package/src/vector/VectorManager.ts +478 -478
- package/src/vector/createHnswService.ts +135 -135
- package/src/vector/index.ts +56 -56
- package/src/wallet/ContextWalletService.ts +656 -656
- package/src/wallet/MainWalletService.ts +317 -317
|
@@ -1,888 +1,888 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* GraphService - Knowledge Graph Extraction and Management
|
|
3
|
-
*
|
|
4
|
-
* Ports sophisticated knowledge graph logic from the backend with AI-powered
|
|
5
|
-
* entity/relationship extraction, graph traversal, and intelligent updates.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { EmbeddingService } from '../services/EmbeddingService';
|
|
9
|
-
import { GeminiAIService, type GeminiConfig } from '../services/GeminiAIService';
|
|
10
|
-
|
|
11
|
-
export interface Entity {
|
|
12
|
-
id: string;
|
|
13
|
-
label: string;
|
|
14
|
-
type: string;
|
|
15
|
-
properties?: Record<string, any>;
|
|
16
|
-
confidence?: number;
|
|
17
|
-
sourceMemoryIds?: string[];
|
|
18
|
-
createdAt?: Date;
|
|
19
|
-
lastUpdated?: Date;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface Relationship {
|
|
23
|
-
id?: string;
|
|
24
|
-
source: string;
|
|
25
|
-
target: string;
|
|
26
|
-
label: string;
|
|
27
|
-
type?: string;
|
|
28
|
-
properties?: Record<string, any>;
|
|
29
|
-
confidence?: number;
|
|
30
|
-
sourceMemoryIds?: string[];
|
|
31
|
-
createdAt?: Date;
|
|
32
|
-
lastUpdated?: Date;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export interface KnowledgeGraph {
|
|
36
|
-
entities: Entity[];
|
|
37
|
-
relationships: Relationship[];
|
|
38
|
-
metadata: {
|
|
39
|
-
version: string;
|
|
40
|
-
createdAt: Date;
|
|
41
|
-
lastUpdated: Date;
|
|
42
|
-
totalEntities: number;
|
|
43
|
-
totalRelationships: number;
|
|
44
|
-
sourceMemories: string[];
|
|
45
|
-
};
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export interface GraphExtractionResult {
|
|
49
|
-
entities: Entity[];
|
|
50
|
-
relationships: Relationship[];
|
|
51
|
-
confidence: number;
|
|
52
|
-
processingTimeMs: number;
|
|
53
|
-
extractedFromMemory: string;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
export interface GraphQueryResult {
|
|
57
|
-
entities: Entity[];
|
|
58
|
-
relationships: Relationship[];
|
|
59
|
-
paths?: Array<{
|
|
60
|
-
entities: string[];
|
|
61
|
-
relationships: string[];
|
|
62
|
-
score: number;
|
|
63
|
-
}>;
|
|
64
|
-
totalResults: number;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
export interface GraphConfig {
|
|
68
|
-
extractionModel?: string;
|
|
69
|
-
confidenceThreshold?: number;
|
|
70
|
-
maxHops?: number;
|
|
71
|
-
enableEmbeddings?: boolean;
|
|
72
|
-
deduplicationThreshold?: number;
|
|
73
|
-
geminiApiKey?: string;
|
|
74
|
-
geminiConfig?: Partial<GeminiConfig>;
|
|
75
|
-
useMockAI?: boolean; // For testing purposes
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* Advanced knowledge graph service with AI extraction and intelligent management
|
|
80
|
-
*/
|
|
81
|
-
export class GraphService {
|
|
82
|
-
private embeddingService?: EmbeddingService;
|
|
83
|
-
private geminiAI?: GeminiAIService;
|
|
84
|
-
private readonly config: Required<GraphConfig>;
|
|
85
|
-
private graphs = new Map<string, KnowledgeGraph>(); // User graphs cache
|
|
86
|
-
|
|
87
|
-
private extractionStats = {
|
|
88
|
-
totalExtractions: 0,
|
|
89
|
-
averageEntities: 0,
|
|
90
|
-
averageRelationships: 0,
|
|
91
|
-
averageConfidence: 0,
|
|
92
|
-
processingTime: 0
|
|
93
|
-
};
|
|
94
|
-
|
|
95
|
-
constructor(
|
|
96
|
-
config: Partial<GraphConfig> = {},
|
|
97
|
-
embeddingService?: EmbeddingService
|
|
98
|
-
) {
|
|
99
|
-
this.config = {
|
|
100
|
-
extractionModel: config.extractionModel || process.env.AI_CHAT_MODEL || 'google/gemini-2.5-flash',
|
|
101
|
-
confidenceThreshold: config.confidenceThreshold || 0.5,
|
|
102
|
-
maxHops: config.maxHops || 3,
|
|
103
|
-
enableEmbeddings: config.enableEmbeddings !== false,
|
|
104
|
-
deduplicationThreshold: config.deduplicationThreshold || 0.85,
|
|
105
|
-
geminiApiKey: config.geminiApiKey || process.env.GOOGLE_AI_API_KEY || process.env.GEMINI_API_KEY || '',
|
|
106
|
-
geminiConfig: config.geminiConfig || {},
|
|
107
|
-
useMockAI: config.useMockAI || false
|
|
108
|
-
};
|
|
109
|
-
|
|
110
|
-
this.embeddingService = embeddingService;
|
|
111
|
-
|
|
112
|
-
// Initialize Gemini AI service if API key is provided and not using mock
|
|
113
|
-
if (this.config.geminiApiKey && !this.config.useMockAI) {
|
|
114
|
-
try {
|
|
115
|
-
this.geminiAI = new GeminiAIService({
|
|
116
|
-
apiKey: this.config.geminiApiKey,
|
|
117
|
-
model: this.config.extractionModel,
|
|
118
|
-
...this.config.geminiConfig
|
|
119
|
-
});
|
|
120
|
-
} catch (error) {
|
|
121
|
-
console.warn('Failed to initialize Gemini AI service, falling back to mock:', error);
|
|
122
|
-
this.config.useMockAI = true;
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// ==================== GRAPH CREATION & MANAGEMENT ====================
|
|
128
|
-
|
|
129
|
-
/**
|
|
130
|
-
* Create empty knowledge graph
|
|
131
|
-
*/
|
|
132
|
-
createGraph(userId?: string): KnowledgeGraph {
|
|
133
|
-
const graph: KnowledgeGraph = {
|
|
134
|
-
entities: [],
|
|
135
|
-
relationships: [],
|
|
136
|
-
metadata: {
|
|
137
|
-
version: '1.0',
|
|
138
|
-
createdAt: new Date(),
|
|
139
|
-
lastUpdated: new Date(),
|
|
140
|
-
totalEntities: 0,
|
|
141
|
-
totalRelationships: 0,
|
|
142
|
-
sourceMemories: []
|
|
143
|
-
}
|
|
144
|
-
};
|
|
145
|
-
|
|
146
|
-
if (userId) {
|
|
147
|
-
this.graphs.set(userId, graph);
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
return graph;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* Get cached graph for user
|
|
155
|
-
*/
|
|
156
|
-
getUserGraph(userId: string): KnowledgeGraph | undefined {
|
|
157
|
-
return this.graphs.get(userId);
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Cache graph for user
|
|
162
|
-
*/
|
|
163
|
-
setUserGraph(userId: string, graph: KnowledgeGraph): void {
|
|
164
|
-
this.graphs.set(userId, graph);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// ==================== ENTITY & RELATIONSHIP EXTRACTION ====================
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* Extract entities and relationships from memory content using AI
|
|
171
|
-
*/
|
|
172
|
-
async extractEntitiesAndRelationships(
|
|
173
|
-
content: string,
|
|
174
|
-
memoryId: string,
|
|
175
|
-
options: {
|
|
176
|
-
includeEmbeddings?: boolean;
|
|
177
|
-
confidenceThreshold?: number;
|
|
178
|
-
} = {}
|
|
179
|
-
): Promise<GraphExtractionResult> {
|
|
180
|
-
const startTime = Date.now();
|
|
181
|
-
|
|
182
|
-
try {
|
|
183
|
-
// Use real Gemini AI if available, otherwise fall back to mock
|
|
184
|
-
let entities: Entity[] = [];
|
|
185
|
-
let relationships: Relationship[] = [];
|
|
186
|
-
|
|
187
|
-
if (this.geminiAI && !this.config.useMockAI) {
|
|
188
|
-
// Use real Gemini AI service
|
|
189
|
-
const aiResult = await this.geminiAI.extractEntitiesAndRelationships({
|
|
190
|
-
content,
|
|
191
|
-
confidenceThreshold: options.confidenceThreshold || this.config.confidenceThreshold
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
// Convert AI service format to GraphService format
|
|
195
|
-
entities = aiResult.entities.map(e => ({
|
|
196
|
-
id: e.id,
|
|
197
|
-
label: e.label,
|
|
198
|
-
type: e.type,
|
|
199
|
-
confidence: e.confidence,
|
|
200
|
-
properties: e.properties,
|
|
201
|
-
sourceMemoryIds: [memoryId],
|
|
202
|
-
createdAt: new Date(),
|
|
203
|
-
lastUpdated: new Date()
|
|
204
|
-
}));
|
|
205
|
-
|
|
206
|
-
relationships = aiResult.relationships.map(r => ({
|
|
207
|
-
id: this.generateRelationshipId(r),
|
|
208
|
-
source: r.source,
|
|
209
|
-
target: r.target,
|
|
210
|
-
label: r.label,
|
|
211
|
-
type: r.type,
|
|
212
|
-
confidence: r.confidence,
|
|
213
|
-
sourceMemoryIds: [memoryId],
|
|
214
|
-
createdAt: new Date(),
|
|
215
|
-
lastUpdated: new Date()
|
|
216
|
-
}));
|
|
217
|
-
|
|
218
|
-
} else {
|
|
219
|
-
// Fall back to mock implementation
|
|
220
|
-
console.warn('Using mock AI extraction - configure Gemini API key for real AI processing');
|
|
221
|
-
const response = await this.mockGeminiResponse(content);
|
|
222
|
-
const extracted = this.parseExtractionResponse(response, memoryId);
|
|
223
|
-
entities = extracted.entities;
|
|
224
|
-
relationships = extracted.relationships;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
const processingTime = Date.now() - startTime;
|
|
228
|
-
|
|
229
|
-
// Filter by confidence threshold
|
|
230
|
-
const confidenceThreshold = options.confidenceThreshold || this.config.confidenceThreshold;
|
|
231
|
-
entities = entities.filter(e => (e.confidence || 0) >= confidenceThreshold);
|
|
232
|
-
relationships = relationships.filter(r => (r.confidence || 0) >= confidenceThreshold);
|
|
233
|
-
|
|
234
|
-
// Calculate overall confidence
|
|
235
|
-
const confidence = this.calculateExtractionConfidence(entities, relationships);
|
|
236
|
-
|
|
237
|
-
// Update statistics
|
|
238
|
-
this.updateExtractionStats(entities, relationships, confidence, processingTime);
|
|
239
|
-
|
|
240
|
-
return {
|
|
241
|
-
entities,
|
|
242
|
-
relationships,
|
|
243
|
-
confidence,
|
|
244
|
-
processingTimeMs: processingTime,
|
|
245
|
-
extractedFromMemory: memoryId
|
|
246
|
-
};
|
|
247
|
-
|
|
248
|
-
} catch (error) {
|
|
249
|
-
console.error('Entity extraction failed:', error);
|
|
250
|
-
|
|
251
|
-
return {
|
|
252
|
-
entities: [],
|
|
253
|
-
relationships: [],
|
|
254
|
-
confidence: 0,
|
|
255
|
-
processingTimeMs: Date.now() - startTime,
|
|
256
|
-
extractedFromMemory: memoryId
|
|
257
|
-
};
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
/**
|
|
262
|
-
* Extract entities from multiple memories in batch
|
|
263
|
-
*/
|
|
264
|
-
async extractFromMemoriesBatch(
|
|
265
|
-
memories: Array<{ id: string; content: string }>,
|
|
266
|
-
options: {
|
|
267
|
-
batchSize?: number;
|
|
268
|
-
delayMs?: number;
|
|
269
|
-
} = {}
|
|
270
|
-
): Promise<GraphExtractionResult[]> {
|
|
271
|
-
const batchSize = options.batchSize || 5;
|
|
272
|
-
const delayMs = options.delayMs || 1000;
|
|
273
|
-
const results: GraphExtractionResult[] = [];
|
|
274
|
-
|
|
275
|
-
// Process in batches to avoid rate limiting
|
|
276
|
-
for (let i = 0; i < memories.length; i += batchSize) {
|
|
277
|
-
const batch = memories.slice(i, i + batchSize);
|
|
278
|
-
|
|
279
|
-
const batchPromises = batch.map(memory =>
|
|
280
|
-
this.extractEntitiesAndRelationships(memory.content, memory.id)
|
|
281
|
-
);
|
|
282
|
-
|
|
283
|
-
const batchResults = await Promise.all(batchPromises);
|
|
284
|
-
results.push(...batchResults);
|
|
285
|
-
|
|
286
|
-
// Delay between batches
|
|
287
|
-
if (i + batchSize < memories.length) {
|
|
288
|
-
await this.delay(delayMs);
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
return results;
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
// ==================== GRAPH OPERATIONS ====================
|
|
296
|
-
|
|
297
|
-
/**
|
|
298
|
-
* Add extracted data to knowledge graph with intelligent deduplication
|
|
299
|
-
*/
|
|
300
|
-
addToGraph(
|
|
301
|
-
graph: KnowledgeGraph,
|
|
302
|
-
newEntities: Entity[],
|
|
303
|
-
newRelationships: Relationship[],
|
|
304
|
-
sourceMemoryId?: string
|
|
305
|
-
): KnowledgeGraph {
|
|
306
|
-
try {
|
|
307
|
-
const updatedGraph = { ...graph };
|
|
308
|
-
const now = new Date();
|
|
309
|
-
|
|
310
|
-
// Filter out null/undefined entities
|
|
311
|
-
const validNewEntities = newEntities.filter(e => e != null && e.id && e.label);
|
|
312
|
-
const validNewRelationships = newRelationships.filter(r => r != null && r.source && r.target && r.label);
|
|
313
|
-
|
|
314
|
-
// Track existing entities for deduplication
|
|
315
|
-
const existingEntities = new Map(graph.entities.map(e => [e.id, e]));
|
|
316
|
-
|
|
317
|
-
// Process entities with intelligent merging
|
|
318
|
-
const processedEntities = [...graph.entities];
|
|
319
|
-
const addedEntityIds = new Set<string>();
|
|
320
|
-
|
|
321
|
-
for (const newEntity of validNewEntities) {
|
|
322
|
-
const existing = existingEntities.get(newEntity.id);
|
|
323
|
-
|
|
324
|
-
if (existing) {
|
|
325
|
-
// Merge with existing entity
|
|
326
|
-
const merged = this.mergeEntities(existing, newEntity, sourceMemoryId);
|
|
327
|
-
const index = processedEntities.findIndex(e => e.id === existing.id);
|
|
328
|
-
processedEntities[index] = merged;
|
|
329
|
-
} else {
|
|
330
|
-
// Check for similar entities (fuzzy matching)
|
|
331
|
-
const similar = this.findSimilarEntity(newEntity, processedEntities);
|
|
332
|
-
|
|
333
|
-
if (similar && this.calculateEntitySimilarity(newEntity, similar) > this.config.deduplicationThreshold) {
|
|
334
|
-
// Merge with similar entity
|
|
335
|
-
const merged = this.mergeEntities(similar, newEntity, sourceMemoryId);
|
|
336
|
-
const index = processedEntities.findIndex(e => e.id === similar.id);
|
|
337
|
-
processedEntities[index] = merged;
|
|
338
|
-
addedEntityIds.add(similar.id);
|
|
339
|
-
} else {
|
|
340
|
-
// Add as new entity
|
|
341
|
-
const entityWithMetadata = {
|
|
342
|
-
...newEntity,
|
|
343
|
-
createdAt: now,
|
|
344
|
-
lastUpdated: now,
|
|
345
|
-
sourceMemoryIds: sourceMemoryId ? [sourceMemoryId] : []
|
|
346
|
-
};
|
|
347
|
-
processedEntities.push(entityWithMetadata);
|
|
348
|
-
addedEntityIds.add(newEntity.id);
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
// Process relationships with deduplication
|
|
354
|
-
const processedRelationships = [...graph.relationships];
|
|
355
|
-
const relationshipKey = (r: Relationship) => `${r.source}|${r.target}|${r.label}`;
|
|
356
|
-
const existingRelationshipKeys = new Set(graph.relationships.map(relationshipKey));
|
|
357
|
-
|
|
358
|
-
for (const newRel of validNewRelationships) {
|
|
359
|
-
const key = relationshipKey(newRel);
|
|
360
|
-
|
|
361
|
-
if (!existingRelationshipKeys.has(key)) {
|
|
362
|
-
// Verify entities exist
|
|
363
|
-
const sourceExists = processedEntities.some(e => e.id === newRel.source);
|
|
364
|
-
const targetExists = processedEntities.some(e => e.id === newRel.target);
|
|
365
|
-
|
|
366
|
-
if (sourceExists && targetExists) {
|
|
367
|
-
const relationshipWithMetadata = {
|
|
368
|
-
...newRel,
|
|
369
|
-
id: this.generateRelationshipId(newRel),
|
|
370
|
-
createdAt: now,
|
|
371
|
-
lastUpdated: now,
|
|
372
|
-
sourceMemoryIds: sourceMemoryId ? [sourceMemoryId] : []
|
|
373
|
-
};
|
|
374
|
-
processedRelationships.push(relationshipWithMetadata);
|
|
375
|
-
}
|
|
376
|
-
} else {
|
|
377
|
-
// Update existing relationship
|
|
378
|
-
const existingIndex = processedRelationships.findIndex(r => relationshipKey(r) === key);
|
|
379
|
-
if (existingIndex >= 0) {
|
|
380
|
-
const existing = processedRelationships[existingIndex];
|
|
381
|
-
processedRelationships[existingIndex] = {
|
|
382
|
-
...existing,
|
|
383
|
-
lastUpdated: now,
|
|
384
|
-
confidence: Math.max(existing.confidence || 0, newRel.confidence || 0),
|
|
385
|
-
sourceMemoryIds: [
|
|
386
|
-
...(existing.sourceMemoryIds || []),
|
|
387
|
-
...(sourceMemoryId ? [sourceMemoryId] : [])
|
|
388
|
-
]
|
|
389
|
-
};
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
// Update graph metadata
|
|
395
|
-
updatedGraph.entities = processedEntities;
|
|
396
|
-
updatedGraph.relationships = processedRelationships;
|
|
397
|
-
updatedGraph.metadata = {
|
|
398
|
-
...graph.metadata,
|
|
399
|
-
lastUpdated: now,
|
|
400
|
-
totalEntities: processedEntities.length,
|
|
401
|
-
totalRelationships: processedRelationships.length,
|
|
402
|
-
sourceMemories: sourceMemoryId
|
|
403
|
-
? [...new Set([...graph.metadata.sourceMemories, sourceMemoryId])]
|
|
404
|
-
: graph.metadata.sourceMemories
|
|
405
|
-
};
|
|
406
|
-
|
|
407
|
-
return updatedGraph;
|
|
408
|
-
|
|
409
|
-
} catch (error) {
|
|
410
|
-
// Only log detailed errors in development mode
|
|
411
|
-
if (process.env.NODE_ENV === 'development') {
|
|
412
|
-
console.error('Error adding to graph:', error);
|
|
413
|
-
}
|
|
414
|
-
return graph; // Return original graph on error
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
/**
|
|
419
|
-
* Find related entities using graph traversal
|
|
420
|
-
*/
|
|
421
|
-
findRelatedEntities(
|
|
422
|
-
graph: KnowledgeGraph,
|
|
423
|
-
seedEntityIds: string[],
|
|
424
|
-
options: {
|
|
425
|
-
maxHops?: number;
|
|
426
|
-
relationshipTypes?: string[];
|
|
427
|
-
includeWeights?: boolean;
|
|
428
|
-
} = {}
|
|
429
|
-
): GraphQueryResult {
|
|
430
|
-
const maxHops = options.maxHops || this.config.maxHops;
|
|
431
|
-
const relationshipTypes = options.relationshipTypes;
|
|
432
|
-
|
|
433
|
-
try {
|
|
434
|
-
// BFS traversal to find related entities
|
|
435
|
-
const visited = new Set<string>(seedEntityIds);
|
|
436
|
-
const relatedEntityIds = new Set<string>(seedEntityIds);
|
|
437
|
-
const discoveredRelationships = new Set<string>();
|
|
438
|
-
const paths: Array<{ entities: string[]; relationships: string[]; score: number }> = [];
|
|
439
|
-
|
|
440
|
-
let currentHop = 0;
|
|
441
|
-
let frontier = seedEntityIds;
|
|
442
|
-
|
|
443
|
-
while (currentHop < maxHops && frontier.length > 0) {
|
|
444
|
-
const nextFrontier: string[] = [];
|
|
445
|
-
|
|
446
|
-
for (const entityId of frontier) {
|
|
447
|
-
// Find relationships involving this entity
|
|
448
|
-
const relationships = graph.relationships.filter(r => {
|
|
449
|
-
const isInvolved = (r.source === entityId || r.target === entityId);
|
|
450
|
-
const typeMatch = !relationshipTypes || relationshipTypes.includes(r.type || r.label);
|
|
451
|
-
return isInvolved && typeMatch;
|
|
452
|
-
});
|
|
453
|
-
|
|
454
|
-
for (const relationship of relationships) {
|
|
455
|
-
const neighborId = relationship.source === entityId ? relationship.target : relationship.source;
|
|
456
|
-
|
|
457
|
-
if (!visited.has(neighborId)) {
|
|
458
|
-
visited.add(neighborId);
|
|
459
|
-
relatedEntityIds.add(neighborId);
|
|
460
|
-
nextFrontier.push(neighborId);
|
|
461
|
-
discoveredRelationships.add(relationship.id || this.generateRelationshipId(relationship));
|
|
462
|
-
|
|
463
|
-
// Track path
|
|
464
|
-
if (options.includeWeights) {
|
|
465
|
-
paths.push({
|
|
466
|
-
entities: [entityId, neighborId],
|
|
467
|
-
relationships: [relationship.id || this.generateRelationshipId(relationship)],
|
|
468
|
-
score: relationship.confidence || 0.5
|
|
469
|
-
});
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
frontier = nextFrontier;
|
|
476
|
-
currentHop++;
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
// Get entity and relationship objects
|
|
480
|
-
const relatedEntities = graph.entities.filter(e => relatedEntityIds.has(e.id));
|
|
481
|
-
const relatedRelationships = graph.relationships.filter(r =>
|
|
482
|
-
discoveredRelationships.has(r.id || this.generateRelationshipId(r))
|
|
483
|
-
);
|
|
484
|
-
|
|
485
|
-
return {
|
|
486
|
-
entities: relatedEntities,
|
|
487
|
-
relationships: relatedRelationships,
|
|
488
|
-
paths: options.includeWeights ? paths : undefined,
|
|
489
|
-
totalResults: relatedEntities.length
|
|
490
|
-
};
|
|
491
|
-
|
|
492
|
-
} catch (error) {
|
|
493
|
-
console.error('Error finding related entities:', error);
|
|
494
|
-
return {
|
|
495
|
-
entities: [],
|
|
496
|
-
relationships: [],
|
|
497
|
-
totalResults: 0
|
|
498
|
-
};
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
|
|
502
|
-
/**
|
|
503
|
-
* Query graph by entity type or relationship patterns
|
|
504
|
-
*/
|
|
505
|
-
queryGraph(
|
|
506
|
-
graph: KnowledgeGraph,
|
|
507
|
-
query: {
|
|
508
|
-
entityTypes?: string[];
|
|
509
|
-
relationshipTypes?: string[];
|
|
510
|
-
searchText?: string;
|
|
511
|
-
limit?: number;
|
|
512
|
-
}
|
|
513
|
-
): GraphQueryResult {
|
|
514
|
-
try {
|
|
515
|
-
// Handle null/undefined graph
|
|
516
|
-
if (!graph || !graph.entities || !graph.relationships) {
|
|
517
|
-
return {
|
|
518
|
-
entities: [],
|
|
519
|
-
relationships: [],
|
|
520
|
-
totalResults: 0
|
|
521
|
-
};
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
let entities = graph.entities;
|
|
525
|
-
let relationships = graph.relationships;
|
|
526
|
-
|
|
527
|
-
// Filter by entity types
|
|
528
|
-
if (query.entityTypes && query.entityTypes.length > 0) {
|
|
529
|
-
entities = entities.filter(e => query.entityTypes!.includes(e.type));
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
// Filter by relationship types
|
|
533
|
-
if (query.relationshipTypes && query.relationshipTypes.length > 0) {
|
|
534
|
-
relationships = relationships.filter(r =>
|
|
535
|
-
query.relationshipTypes!.includes(r.type || r.label)
|
|
536
|
-
);
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
// Text search in labels and properties
|
|
540
|
-
if (query.searchText) {
|
|
541
|
-
const searchLower = query.searchText.toLowerCase();
|
|
542
|
-
|
|
543
|
-
entities = entities.filter(e =>
|
|
544
|
-
e.label.toLowerCase().includes(searchLower) ||
|
|
545
|
-
JSON.stringify(e.properties || {}).toLowerCase().includes(searchLower)
|
|
546
|
-
);
|
|
547
|
-
|
|
548
|
-
relationships = relationships.filter(r =>
|
|
549
|
-
r.label.toLowerCase().includes(searchLower) ||
|
|
550
|
-
JSON.stringify(r.properties || {}).toLowerCase().includes(searchLower)
|
|
551
|
-
);
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
// Apply limit
|
|
555
|
-
if (query.limit) {
|
|
556
|
-
entities = entities.slice(0, query.limit);
|
|
557
|
-
relationships = relationships.slice(0, query.limit);
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
return {
|
|
561
|
-
entities,
|
|
562
|
-
relationships,
|
|
563
|
-
totalResults: entities.length + relationships.length
|
|
564
|
-
};
|
|
565
|
-
|
|
566
|
-
} catch (error) {
|
|
567
|
-
// Only log detailed errors in development mode
|
|
568
|
-
if (process.env.NODE_ENV === 'development') {
|
|
569
|
-
console.error('Error querying graph:', error);
|
|
570
|
-
}
|
|
571
|
-
return {
|
|
572
|
-
entities: [],
|
|
573
|
-
relationships: [],
|
|
574
|
-
totalResults: 0
|
|
575
|
-
};
|
|
576
|
-
}
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
// ==================== STATISTICS & MONITORING ====================
|
|
580
|
-
|
|
581
|
-
/**
|
|
582
|
-
* Get graph statistics
|
|
583
|
-
*/
|
|
584
|
-
getGraphStats(graph: KnowledgeGraph) {
|
|
585
|
-
const entityTypes = new Map<string, number>();
|
|
586
|
-
const relationshipTypes = new Map<string, number>();
|
|
587
|
-
|
|
588
|
-
// Count entity types
|
|
589
|
-
for (const entity of graph.entities) {
|
|
590
|
-
entityTypes.set(entity.type, (entityTypes.get(entity.type) || 0) + 1);
|
|
591
|
-
}
|
|
592
|
-
|
|
593
|
-
// Count relationship types
|
|
594
|
-
for (const relationship of graph.relationships) {
|
|
595
|
-
const type = relationship.type || relationship.label;
|
|
596
|
-
relationshipTypes.set(type, (relationshipTypes.get(type) || 0) + 1);
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
// Calculate connectivity metrics
|
|
600
|
-
const entityConnections = new Map<string, number>();
|
|
601
|
-
for (const rel of graph.relationships) {
|
|
602
|
-
entityConnections.set(rel.source, (entityConnections.get(rel.source) || 0) + 1);
|
|
603
|
-
entityConnections.set(rel.target, (entityConnections.get(rel.target) || 0) + 1);
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
const avgConnections = graph.entities.length > 0
|
|
607
|
-
? Array.from(entityConnections.values()).reduce((sum, count) => sum + count, 0) / graph.entities.length
|
|
608
|
-
: 0;
|
|
609
|
-
|
|
610
|
-
return {
|
|
611
|
-
totalEntities: graph.entities.length,
|
|
612
|
-
totalRelationships: graph.relationships.length,
|
|
613
|
-
entityTypes: Object.fromEntries(entityTypes),
|
|
614
|
-
relationshipTypes: Object.fromEntries(relationshipTypes),
|
|
615
|
-
averageConnections: avgConnections,
|
|
616
|
-
graphDensity: graph.entities.length > 0
|
|
617
|
-
? (graph.relationships.length * 2) / (graph.entities.length * (graph.entities.length - 1))
|
|
618
|
-
: 0,
|
|
619
|
-
extractionStats: this.extractionStats,
|
|
620
|
-
lastUpdated: graph.metadata.lastUpdated
|
|
621
|
-
};
|
|
622
|
-
}
|
|
623
|
-
|
|
624
|
-
// ==================== PRIVATE METHODS ====================
|
|
625
|
-
|
|
626
|
-
private buildExtractionPrompt(content: string): string {
|
|
627
|
-
return `
|
|
628
|
-
Extract entities and relationships from the following text. Focus on meaningful entities (people, places, concepts, organizations) and clear relationships between them.
|
|
629
|
-
|
|
630
|
-
Format your response as valid JSON with "entities" and "relationships" arrays.
|
|
631
|
-
|
|
632
|
-
For entities:
|
|
633
|
-
- "id": unique identifier using meaningful names with underscores (e.g., "john_doe", "machine_learning")
|
|
634
|
-
- "label": display name (e.g., "John Doe", "Machine Learning")
|
|
635
|
-
- "type": entity type (person, concept, organization, location, event, skill, technology, etc.)
|
|
636
|
-
- "confidence": confidence score 0.0-1.0
|
|
637
|
-
|
|
638
|
-
For relationships:
|
|
639
|
-
- "source": source entity id
|
|
640
|
-
- "target": target entity id
|
|
641
|
-
- "label": relationship description (e.g., "works at", "uses", "located in")
|
|
642
|
-
- "confidence": confidence score 0.0-1.0
|
|
643
|
-
|
|
644
|
-
TEXT:
|
|
645
|
-
${content}
|
|
646
|
-
|
|
647
|
-
JSON:`;
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
private async mockGeminiResponse(content: string): Promise<string> {
|
|
651
|
-
// Mock response for development - replace with actual AI service
|
|
652
|
-
const entities = this.extractEntitiesHeuristic(content);
|
|
653
|
-
const relationships = this.extractRelationshipsHeuristic(content, entities);
|
|
654
|
-
|
|
655
|
-
return JSON.stringify({
|
|
656
|
-
entities: entities.map(e => ({
|
|
657
|
-
id: e.id,
|
|
658
|
-
label: e.label,
|
|
659
|
-
type: e.type,
|
|
660
|
-
confidence: 0.8
|
|
661
|
-
})),
|
|
662
|
-
relationships: relationships.map(r => ({
|
|
663
|
-
source: r.source,
|
|
664
|
-
target: r.target,
|
|
665
|
-
label: r.label,
|
|
666
|
-
confidence: 0.7
|
|
667
|
-
}))
|
|
668
|
-
});
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
private extractEntitiesHeuristic(content: string): Entity[] {
|
|
672
|
-
const entities: Entity[] = [];
|
|
673
|
-
|
|
674
|
-
// Simple heuristic extraction (replace with actual AI)
|
|
675
|
-
const words = content.split(/\s+/);
|
|
676
|
-
const capitalizedWords = words.filter(word =>
|
|
677
|
-
/^[A-Z][a-z]+/.test(word) && word.length > 2
|
|
678
|
-
);
|
|
679
|
-
|
|
680
|
-
for (const word of capitalizedWords.slice(0, 5)) {
|
|
681
|
-
entities.push({
|
|
682
|
-
id: word.toLowerCase().replace(/[^\w]/g, '_'),
|
|
683
|
-
label: word,
|
|
684
|
-
type: 'concept',
|
|
685
|
-
confidence: 0.6
|
|
686
|
-
});
|
|
687
|
-
}
|
|
688
|
-
|
|
689
|
-
return entities;
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
private extractRelationshipsHeuristic(content: string, entities: Entity[]): Relationship[] {
|
|
693
|
-
const relationships: Relationship[] = [];
|
|
694
|
-
|
|
695
|
-
// Simple relationship extraction
|
|
696
|
-
if (entities.length >= 2) {
|
|
697
|
-
relationships.push({
|
|
698
|
-
source: entities[0].id,
|
|
699
|
-
target: entities[1].id,
|
|
700
|
-
label: 'related to',
|
|
701
|
-
confidence: 0.5
|
|
702
|
-
});
|
|
703
|
-
}
|
|
704
|
-
|
|
705
|
-
return relationships;
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
private parseExtractionResponse(response: string, memoryId: string): { entities: Entity[]; relationships: Relationship[] } {
|
|
709
|
-
try {
|
|
710
|
-
const parsed = JSON.parse(response);
|
|
711
|
-
|
|
712
|
-
if (!parsed.entities || !Array.isArray(parsed.entities) ||
|
|
713
|
-
!parsed.relationships || !Array.isArray(parsed.relationships)) {
|
|
714
|
-
throw new Error('Invalid response format');
|
|
715
|
-
}
|
|
716
|
-
|
|
717
|
-
const entities: Entity[] = parsed.entities.map((e: any) => ({
|
|
718
|
-
id: this.sanitizeId(e.id || `entity_${Math.random().toString(36).substring(2, 10)}`),
|
|
719
|
-
label: e.label || 'Unnamed Entity',
|
|
720
|
-
type: e.type || 'concept',
|
|
721
|
-
confidence: e.confidence || 0.5,
|
|
722
|
-
sourceMemoryIds: [memoryId]
|
|
723
|
-
}));
|
|
724
|
-
|
|
725
|
-
const idMap = new Map<string, string>();
|
|
726
|
-
parsed.entities.forEach((e: any, i: number) => {
|
|
727
|
-
idMap.set(e.id || '', entities[i].id);
|
|
728
|
-
});
|
|
729
|
-
|
|
730
|
-
const relationships: Relationship[] = parsed.relationships
|
|
731
|
-
.filter((r: any) => r.source && r.target && idMap.has(r.source) && idMap.has(r.target))
|
|
732
|
-
.map((r: any) => ({
|
|
733
|
-
source: idMap.get(r.source) || '',
|
|
734
|
-
target: idMap.get(r.target) || '',
|
|
735
|
-
label: r.label || 'related to',
|
|
736
|
-
confidence: r.confidence || 0.5,
|
|
737
|
-
sourceMemoryIds: [memoryId]
|
|
738
|
-
}));
|
|
739
|
-
|
|
740
|
-
return { entities, relationships };
|
|
741
|
-
|
|
742
|
-
} catch (error) {
|
|
743
|
-
console.error('Failed to parse extraction response:', error);
|
|
744
|
-
return { entities: [], relationships: [] };
|
|
745
|
-
}
|
|
746
|
-
}
|
|
747
|
-
|
|
748
|
-
private sanitizeId(id: string): string {
|
|
749
|
-
return id.replace(/[^\w_-]/g, '_').toLowerCase();
|
|
750
|
-
}
|
|
751
|
-
|
|
752
|
-
private calculateExtractionConfidence(entities: Entity[], relationships: Relationship[]): number {
|
|
753
|
-
if (entities.length === 0 && relationships.length === 0) return 0;
|
|
754
|
-
|
|
755
|
-
const entityConfidences = entities.map(e => e.confidence || 0.5);
|
|
756
|
-
const relationshipConfidences = relationships.map(r => r.confidence || 0.5);
|
|
757
|
-
|
|
758
|
-
const allConfidences = [...entityConfidences, ...relationshipConfidences];
|
|
759
|
-
return allConfidences.reduce((sum, conf) => sum + conf, 0) / allConfidences.length;
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
private mergeEntities(existing: Entity, newEntity: Entity, sourceMemoryId?: string): Entity {
|
|
763
|
-
return {
|
|
764
|
-
...existing,
|
|
765
|
-
label: newEntity.label || existing.label,
|
|
766
|
-
type: newEntity.type || existing.type,
|
|
767
|
-
confidence: Math.max(existing.confidence || 0, newEntity.confidence || 0),
|
|
768
|
-
properties: { ...existing.properties, ...newEntity.properties },
|
|
769
|
-
sourceMemoryIds: [
|
|
770
|
-
...(existing.sourceMemoryIds || []),
|
|
771
|
-
...(sourceMemoryId ? [sourceMemoryId] : [])
|
|
772
|
-
],
|
|
773
|
-
lastUpdated: new Date()
|
|
774
|
-
};
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
private findSimilarEntity(entity: Entity, entities: Entity[]): Entity | undefined {
|
|
778
|
-
for (const existing of entities) {
|
|
779
|
-
if (this.calculateEntitySimilarity(entity, existing) > this.config.deduplicationThreshold) {
|
|
780
|
-
return existing;
|
|
781
|
-
}
|
|
782
|
-
}
|
|
783
|
-
return undefined;
|
|
784
|
-
}
|
|
785
|
-
|
|
786
|
-
private calculateEntitySimilarity(a: Entity, b: Entity): number {
|
|
787
|
-
// Simple similarity based on label and type
|
|
788
|
-
const labelSimilarity = this.stringSimilarity(a.label.toLowerCase(), b.label.toLowerCase());
|
|
789
|
-
const typeSimilarity = a.type === b.type ? 1.0 : 0.0;
|
|
790
|
-
|
|
791
|
-
return (labelSimilarity * 0.8) + (typeSimilarity * 0.2);
|
|
792
|
-
}
|
|
793
|
-
|
|
794
|
-
private stringSimilarity(a: string, b: string): number {
|
|
795
|
-
const longer = a.length > b.length ? a : b;
|
|
796
|
-
const shorter = a.length > b.length ? b : a;
|
|
797
|
-
|
|
798
|
-
if (longer.length === 0) return 1.0;
|
|
799
|
-
|
|
800
|
-
const distance = this.levenshteinDistance(longer, shorter);
|
|
801
|
-
return (longer.length - distance) / longer.length;
|
|
802
|
-
}
|
|
803
|
-
|
|
804
|
-
private levenshteinDistance(a: string, b: string): number {
|
|
805
|
-
const matrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));
|
|
806
|
-
|
|
807
|
-
for (let i = 0; i <= a.length; i++) matrix[0][i] = i;
|
|
808
|
-
for (let j = 0; j <= b.length; j++) matrix[j][0] = j;
|
|
809
|
-
|
|
810
|
-
for (let j = 1; j <= b.length; j++) {
|
|
811
|
-
for (let i = 1; i <= a.length; i++) {
|
|
812
|
-
const indicator = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
813
|
-
matrix[j][i] = Math.min(
|
|
814
|
-
matrix[j][i - 1] + 1,
|
|
815
|
-
matrix[j - 1][i] + 1,
|
|
816
|
-
matrix[j - 1][i - 1] + indicator
|
|
817
|
-
);
|
|
818
|
-
}
|
|
819
|
-
}
|
|
820
|
-
|
|
821
|
-
return matrix[b.length][a.length];
|
|
822
|
-
}
|
|
823
|
-
|
|
824
|
-
private generateRelationshipId(relationship: Relationship): string {
|
|
825
|
-
const content = `${relationship.source}_${relationship.target}_${relationship.label}`;
|
|
826
|
-
return this.sanitizeId(content);
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
private updateExtractionStats(entities: Entity[], relationships: Relationship[], confidence: number, processingTime: number): void {
|
|
830
|
-
this.extractionStats.totalExtractions++;
|
|
831
|
-
this.extractionStats.averageEntities =
|
|
832
|
-
(this.extractionStats.averageEntities + entities.length) / this.extractionStats.totalExtractions;
|
|
833
|
-
this.extractionStats.averageRelationships =
|
|
834
|
-
(this.extractionStats.averageRelationships + relationships.length) / this.extractionStats.totalExtractions;
|
|
835
|
-
this.extractionStats.averageConfidence =
|
|
836
|
-
(this.extractionStats.averageConfidence + confidence) / this.extractionStats.totalExtractions;
|
|
837
|
-
this.extractionStats.processingTime =
|
|
838
|
-
(this.extractionStats.processingTime + processingTime) / this.extractionStats.totalExtractions;
|
|
839
|
-
}
|
|
840
|
-
|
|
841
|
-
private delay(ms: number): Promise<void> {
|
|
842
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
// ==================== SERVICE MANAGEMENT ====================
|
|
846
|
-
|
|
847
|
-
/**
|
|
848
|
-
* Test AI service connectivity
|
|
849
|
-
*/
|
|
850
|
-
async testAIConnection(): Promise<{ connected: boolean; usingMock: boolean; service: string }> {
|
|
851
|
-
if (this.config.useMockAI || !this.geminiAI) {
|
|
852
|
-
return { connected: false, usingMock: true, service: 'mock' };
|
|
853
|
-
}
|
|
854
|
-
|
|
855
|
-
try {
|
|
856
|
-
const connected = await this.geminiAI.testConnection();
|
|
857
|
-
return { connected, usingMock: false, service: 'gemini' };
|
|
858
|
-
} catch (error) {
|
|
859
|
-
console.error('AI connection test failed:', error);
|
|
860
|
-
return { connected: false, usingMock: false, service: 'gemini' };
|
|
861
|
-
}
|
|
862
|
-
}
|
|
863
|
-
|
|
864
|
-
/**
|
|
865
|
-
* Get service configuration (without sensitive data)
|
|
866
|
-
*/
|
|
867
|
-
getConfig(): Omit<Required<GraphConfig>, 'geminiApiKey'> & { aiConfigured: boolean } {
|
|
868
|
-
return {
|
|
869
|
-
extractionModel: this.config.extractionModel,
|
|
870
|
-
confidenceThreshold: this.config.confidenceThreshold,
|
|
871
|
-
maxHops: this.config.maxHops,
|
|
872
|
-
enableEmbeddings: this.config.enableEmbeddings,
|
|
873
|
-
deduplicationThreshold: this.config.deduplicationThreshold,
|
|
874
|
-
geminiConfig: this.config.geminiConfig,
|
|
875
|
-
useMockAI: this.config.useMockAI,
|
|
876
|
-
aiConfigured: !!this.config.geminiApiKey && !this.config.useMockAI
|
|
877
|
-
};
|
|
878
|
-
}
|
|
879
|
-
|
|
880
|
-
/**
|
|
881
|
-
* Get extraction statistics
|
|
882
|
-
*/
|
|
883
|
-
getExtractionStats() {
|
|
884
|
-
return { ...this.extractionStats };
|
|
885
|
-
}
|
|
886
|
-
}
|
|
887
|
-
|
|
1
|
+
/**
|
|
2
|
+
* GraphService - Knowledge Graph Extraction and Management
|
|
3
|
+
*
|
|
4
|
+
* Ports sophisticated knowledge graph logic from the backend with AI-powered
|
|
5
|
+
* entity/relationship extraction, graph traversal, and intelligent updates.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { EmbeddingService } from '../services/EmbeddingService';
|
|
9
|
+
import { GeminiAIService, type GeminiConfig } from '../services/GeminiAIService';
|
|
10
|
+
|
|
11
|
+
export interface Entity {
|
|
12
|
+
id: string;
|
|
13
|
+
label: string;
|
|
14
|
+
type: string;
|
|
15
|
+
properties?: Record<string, any>;
|
|
16
|
+
confidence?: number;
|
|
17
|
+
sourceMemoryIds?: string[];
|
|
18
|
+
createdAt?: Date;
|
|
19
|
+
lastUpdated?: Date;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface Relationship {
|
|
23
|
+
id?: string;
|
|
24
|
+
source: string;
|
|
25
|
+
target: string;
|
|
26
|
+
label: string;
|
|
27
|
+
type?: string;
|
|
28
|
+
properties?: Record<string, any>;
|
|
29
|
+
confidence?: number;
|
|
30
|
+
sourceMemoryIds?: string[];
|
|
31
|
+
createdAt?: Date;
|
|
32
|
+
lastUpdated?: Date;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface KnowledgeGraph {
|
|
36
|
+
entities: Entity[];
|
|
37
|
+
relationships: Relationship[];
|
|
38
|
+
metadata: {
|
|
39
|
+
version: string;
|
|
40
|
+
createdAt: Date;
|
|
41
|
+
lastUpdated: Date;
|
|
42
|
+
totalEntities: number;
|
|
43
|
+
totalRelationships: number;
|
|
44
|
+
sourceMemories: string[];
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface GraphExtractionResult {
|
|
49
|
+
entities: Entity[];
|
|
50
|
+
relationships: Relationship[];
|
|
51
|
+
confidence: number;
|
|
52
|
+
processingTimeMs: number;
|
|
53
|
+
extractedFromMemory: string;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface GraphQueryResult {
|
|
57
|
+
entities: Entity[];
|
|
58
|
+
relationships: Relationship[];
|
|
59
|
+
paths?: Array<{
|
|
60
|
+
entities: string[];
|
|
61
|
+
relationships: string[];
|
|
62
|
+
score: number;
|
|
63
|
+
}>;
|
|
64
|
+
totalResults: number;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface GraphConfig {
|
|
68
|
+
extractionModel?: string;
|
|
69
|
+
confidenceThreshold?: number;
|
|
70
|
+
maxHops?: number;
|
|
71
|
+
enableEmbeddings?: boolean;
|
|
72
|
+
deduplicationThreshold?: number;
|
|
73
|
+
geminiApiKey?: string;
|
|
74
|
+
geminiConfig?: Partial<GeminiConfig>;
|
|
75
|
+
useMockAI?: boolean; // For testing purposes
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Advanced knowledge graph service with AI extraction and intelligent management
|
|
80
|
+
*/
|
|
81
|
+
export class GraphService {
|
|
82
|
+
private embeddingService?: EmbeddingService;
|
|
83
|
+
private geminiAI?: GeminiAIService;
|
|
84
|
+
private readonly config: Required<GraphConfig>;
|
|
85
|
+
private graphs = new Map<string, KnowledgeGraph>(); // User graphs cache
|
|
86
|
+
|
|
87
|
+
private extractionStats = {
|
|
88
|
+
totalExtractions: 0,
|
|
89
|
+
averageEntities: 0,
|
|
90
|
+
averageRelationships: 0,
|
|
91
|
+
averageConfidence: 0,
|
|
92
|
+
processingTime: 0
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
constructor(
|
|
96
|
+
config: Partial<GraphConfig> = {},
|
|
97
|
+
embeddingService?: EmbeddingService
|
|
98
|
+
) {
|
|
99
|
+
this.config = {
|
|
100
|
+
extractionModel: config.extractionModel || process.env.AI_CHAT_MODEL || 'google/gemini-2.5-flash',
|
|
101
|
+
confidenceThreshold: config.confidenceThreshold || 0.5,
|
|
102
|
+
maxHops: config.maxHops || 3,
|
|
103
|
+
enableEmbeddings: config.enableEmbeddings !== false,
|
|
104
|
+
deduplicationThreshold: config.deduplicationThreshold || 0.85,
|
|
105
|
+
geminiApiKey: config.geminiApiKey || process.env.OPENROUTER_API_KEY || process.env.GOOGLE_AI_API_KEY || process.env.GEMINI_API_KEY || '',
|
|
106
|
+
geminiConfig: config.geminiConfig || {},
|
|
107
|
+
useMockAI: config.useMockAI || false
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
this.embeddingService = embeddingService;
|
|
111
|
+
|
|
112
|
+
// Initialize Gemini AI service if API key is provided and not using mock
|
|
113
|
+
if (this.config.geminiApiKey && !this.config.useMockAI) {
|
|
114
|
+
try {
|
|
115
|
+
this.geminiAI = new GeminiAIService({
|
|
116
|
+
apiKey: this.config.geminiApiKey,
|
|
117
|
+
model: this.config.extractionModel,
|
|
118
|
+
...this.config.geminiConfig
|
|
119
|
+
});
|
|
120
|
+
} catch (error) {
|
|
121
|
+
console.warn('Failed to initialize Gemini AI service, falling back to mock:', error);
|
|
122
|
+
this.config.useMockAI = true;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ==================== GRAPH CREATION & MANAGEMENT ====================
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Create empty knowledge graph
|
|
131
|
+
*/
|
|
132
|
+
createGraph(userId?: string): KnowledgeGraph {
|
|
133
|
+
const graph: KnowledgeGraph = {
|
|
134
|
+
entities: [],
|
|
135
|
+
relationships: [],
|
|
136
|
+
metadata: {
|
|
137
|
+
version: '1.0',
|
|
138
|
+
createdAt: new Date(),
|
|
139
|
+
lastUpdated: new Date(),
|
|
140
|
+
totalEntities: 0,
|
|
141
|
+
totalRelationships: 0,
|
|
142
|
+
sourceMemories: []
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
if (userId) {
|
|
147
|
+
this.graphs.set(userId, graph);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return graph;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Get cached graph for user
|
|
155
|
+
*/
|
|
156
|
+
getUserGraph(userId: string): KnowledgeGraph | undefined {
|
|
157
|
+
return this.graphs.get(userId);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Cache graph for user
|
|
162
|
+
*/
|
|
163
|
+
setUserGraph(userId: string, graph: KnowledgeGraph): void {
|
|
164
|
+
this.graphs.set(userId, graph);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// ==================== ENTITY & RELATIONSHIP EXTRACTION ====================
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Extract entities and relationships from memory content using AI
|
|
171
|
+
*/
|
|
172
|
+
async extractEntitiesAndRelationships(
|
|
173
|
+
content: string,
|
|
174
|
+
memoryId: string,
|
|
175
|
+
options: {
|
|
176
|
+
includeEmbeddings?: boolean;
|
|
177
|
+
confidenceThreshold?: number;
|
|
178
|
+
} = {}
|
|
179
|
+
): Promise<GraphExtractionResult> {
|
|
180
|
+
const startTime = Date.now();
|
|
181
|
+
|
|
182
|
+
try {
|
|
183
|
+
// Use real Gemini AI if available, otherwise fall back to mock
|
|
184
|
+
let entities: Entity[] = [];
|
|
185
|
+
let relationships: Relationship[] = [];
|
|
186
|
+
|
|
187
|
+
if (this.geminiAI && !this.config.useMockAI) {
|
|
188
|
+
// Use real Gemini AI service
|
|
189
|
+
const aiResult = await this.geminiAI.extractEntitiesAndRelationships({
|
|
190
|
+
content,
|
|
191
|
+
confidenceThreshold: options.confidenceThreshold || this.config.confidenceThreshold
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// Convert AI service format to GraphService format
|
|
195
|
+
entities = aiResult.entities.map(e => ({
|
|
196
|
+
id: e.id,
|
|
197
|
+
label: e.label,
|
|
198
|
+
type: e.type,
|
|
199
|
+
confidence: e.confidence,
|
|
200
|
+
properties: e.properties,
|
|
201
|
+
sourceMemoryIds: [memoryId],
|
|
202
|
+
createdAt: new Date(),
|
|
203
|
+
lastUpdated: new Date()
|
|
204
|
+
}));
|
|
205
|
+
|
|
206
|
+
relationships = aiResult.relationships.map(r => ({
|
|
207
|
+
id: this.generateRelationshipId(r),
|
|
208
|
+
source: r.source,
|
|
209
|
+
target: r.target,
|
|
210
|
+
label: r.label,
|
|
211
|
+
type: r.type,
|
|
212
|
+
confidence: r.confidence,
|
|
213
|
+
sourceMemoryIds: [memoryId],
|
|
214
|
+
createdAt: new Date(),
|
|
215
|
+
lastUpdated: new Date()
|
|
216
|
+
}));
|
|
217
|
+
|
|
218
|
+
} else {
|
|
219
|
+
// Fall back to mock implementation
|
|
220
|
+
console.warn('Using mock AI extraction - configure Gemini API key for real AI processing');
|
|
221
|
+
const response = await this.mockGeminiResponse(content);
|
|
222
|
+
const extracted = this.parseExtractionResponse(response, memoryId);
|
|
223
|
+
entities = extracted.entities;
|
|
224
|
+
relationships = extracted.relationships;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const processingTime = Date.now() - startTime;
|
|
228
|
+
|
|
229
|
+
// Filter by confidence threshold
|
|
230
|
+
const confidenceThreshold = options.confidenceThreshold || this.config.confidenceThreshold;
|
|
231
|
+
entities = entities.filter(e => (e.confidence || 0) >= confidenceThreshold);
|
|
232
|
+
relationships = relationships.filter(r => (r.confidence || 0) >= confidenceThreshold);
|
|
233
|
+
|
|
234
|
+
// Calculate overall confidence
|
|
235
|
+
const confidence = this.calculateExtractionConfidence(entities, relationships);
|
|
236
|
+
|
|
237
|
+
// Update statistics
|
|
238
|
+
this.updateExtractionStats(entities, relationships, confidence, processingTime);
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
entities,
|
|
242
|
+
relationships,
|
|
243
|
+
confidence,
|
|
244
|
+
processingTimeMs: processingTime,
|
|
245
|
+
extractedFromMemory: memoryId
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
} catch (error) {
|
|
249
|
+
console.error('Entity extraction failed:', error);
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
entities: [],
|
|
253
|
+
relationships: [],
|
|
254
|
+
confidence: 0,
|
|
255
|
+
processingTimeMs: Date.now() - startTime,
|
|
256
|
+
extractedFromMemory: memoryId
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Extract entities from multiple memories in batch
|
|
263
|
+
*/
|
|
264
|
+
async extractFromMemoriesBatch(
|
|
265
|
+
memories: Array<{ id: string; content: string }>,
|
|
266
|
+
options: {
|
|
267
|
+
batchSize?: number;
|
|
268
|
+
delayMs?: number;
|
|
269
|
+
} = {}
|
|
270
|
+
): Promise<GraphExtractionResult[]> {
|
|
271
|
+
const batchSize = options.batchSize || 5;
|
|
272
|
+
const delayMs = options.delayMs || 1000;
|
|
273
|
+
const results: GraphExtractionResult[] = [];
|
|
274
|
+
|
|
275
|
+
// Process in batches to avoid rate limiting
|
|
276
|
+
for (let i = 0; i < memories.length; i += batchSize) {
|
|
277
|
+
const batch = memories.slice(i, i + batchSize);
|
|
278
|
+
|
|
279
|
+
const batchPromises = batch.map(memory =>
|
|
280
|
+
this.extractEntitiesAndRelationships(memory.content, memory.id)
|
|
281
|
+
);
|
|
282
|
+
|
|
283
|
+
const batchResults = await Promise.all(batchPromises);
|
|
284
|
+
results.push(...batchResults);
|
|
285
|
+
|
|
286
|
+
// Delay between batches
|
|
287
|
+
if (i + batchSize < memories.length) {
|
|
288
|
+
await this.delay(delayMs);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
return results;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// ==================== GRAPH OPERATIONS ====================
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Add extracted data to knowledge graph with intelligent deduplication
|
|
299
|
+
*/
|
|
300
|
+
addToGraph(
|
|
301
|
+
graph: KnowledgeGraph,
|
|
302
|
+
newEntities: Entity[],
|
|
303
|
+
newRelationships: Relationship[],
|
|
304
|
+
sourceMemoryId?: string
|
|
305
|
+
): KnowledgeGraph {
|
|
306
|
+
try {
|
|
307
|
+
const updatedGraph = { ...graph };
|
|
308
|
+
const now = new Date();
|
|
309
|
+
|
|
310
|
+
// Filter out null/undefined entities
|
|
311
|
+
const validNewEntities = newEntities.filter(e => e != null && e.id && e.label);
|
|
312
|
+
const validNewRelationships = newRelationships.filter(r => r != null && r.source && r.target && r.label);
|
|
313
|
+
|
|
314
|
+
// Track existing entities for deduplication
|
|
315
|
+
const existingEntities = new Map(graph.entities.map(e => [e.id, e]));
|
|
316
|
+
|
|
317
|
+
// Process entities with intelligent merging
|
|
318
|
+
const processedEntities = [...graph.entities];
|
|
319
|
+
const addedEntityIds = new Set<string>();
|
|
320
|
+
|
|
321
|
+
for (const newEntity of validNewEntities) {
|
|
322
|
+
const existing = existingEntities.get(newEntity.id);
|
|
323
|
+
|
|
324
|
+
if (existing) {
|
|
325
|
+
// Merge with existing entity
|
|
326
|
+
const merged = this.mergeEntities(existing, newEntity, sourceMemoryId);
|
|
327
|
+
const index = processedEntities.findIndex(e => e.id === existing.id);
|
|
328
|
+
processedEntities[index] = merged;
|
|
329
|
+
} else {
|
|
330
|
+
// Check for similar entities (fuzzy matching)
|
|
331
|
+
const similar = this.findSimilarEntity(newEntity, processedEntities);
|
|
332
|
+
|
|
333
|
+
if (similar && this.calculateEntitySimilarity(newEntity, similar) > this.config.deduplicationThreshold) {
|
|
334
|
+
// Merge with similar entity
|
|
335
|
+
const merged = this.mergeEntities(similar, newEntity, sourceMemoryId);
|
|
336
|
+
const index = processedEntities.findIndex(e => e.id === similar.id);
|
|
337
|
+
processedEntities[index] = merged;
|
|
338
|
+
addedEntityIds.add(similar.id);
|
|
339
|
+
} else {
|
|
340
|
+
// Add as new entity
|
|
341
|
+
const entityWithMetadata = {
|
|
342
|
+
...newEntity,
|
|
343
|
+
createdAt: now,
|
|
344
|
+
lastUpdated: now,
|
|
345
|
+
sourceMemoryIds: sourceMemoryId ? [sourceMemoryId] : []
|
|
346
|
+
};
|
|
347
|
+
processedEntities.push(entityWithMetadata);
|
|
348
|
+
addedEntityIds.add(newEntity.id);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Process relationships with deduplication
|
|
354
|
+
const processedRelationships = [...graph.relationships];
|
|
355
|
+
const relationshipKey = (r: Relationship) => `${r.source}|${r.target}|${r.label}`;
|
|
356
|
+
const existingRelationshipKeys = new Set(graph.relationships.map(relationshipKey));
|
|
357
|
+
|
|
358
|
+
for (const newRel of validNewRelationships) {
|
|
359
|
+
const key = relationshipKey(newRel);
|
|
360
|
+
|
|
361
|
+
if (!existingRelationshipKeys.has(key)) {
|
|
362
|
+
// Verify entities exist
|
|
363
|
+
const sourceExists = processedEntities.some(e => e.id === newRel.source);
|
|
364
|
+
const targetExists = processedEntities.some(e => e.id === newRel.target);
|
|
365
|
+
|
|
366
|
+
if (sourceExists && targetExists) {
|
|
367
|
+
const relationshipWithMetadata = {
|
|
368
|
+
...newRel,
|
|
369
|
+
id: this.generateRelationshipId(newRel),
|
|
370
|
+
createdAt: now,
|
|
371
|
+
lastUpdated: now,
|
|
372
|
+
sourceMemoryIds: sourceMemoryId ? [sourceMemoryId] : []
|
|
373
|
+
};
|
|
374
|
+
processedRelationships.push(relationshipWithMetadata);
|
|
375
|
+
}
|
|
376
|
+
} else {
|
|
377
|
+
// Update existing relationship
|
|
378
|
+
const existingIndex = processedRelationships.findIndex(r => relationshipKey(r) === key);
|
|
379
|
+
if (existingIndex >= 0) {
|
|
380
|
+
const existing = processedRelationships[existingIndex];
|
|
381
|
+
processedRelationships[existingIndex] = {
|
|
382
|
+
...existing,
|
|
383
|
+
lastUpdated: now,
|
|
384
|
+
confidence: Math.max(existing.confidence || 0, newRel.confidence || 0),
|
|
385
|
+
sourceMemoryIds: [
|
|
386
|
+
...(existing.sourceMemoryIds || []),
|
|
387
|
+
...(sourceMemoryId ? [sourceMemoryId] : [])
|
|
388
|
+
]
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// Update graph metadata
|
|
395
|
+
updatedGraph.entities = processedEntities;
|
|
396
|
+
updatedGraph.relationships = processedRelationships;
|
|
397
|
+
updatedGraph.metadata = {
|
|
398
|
+
...graph.metadata,
|
|
399
|
+
lastUpdated: now,
|
|
400
|
+
totalEntities: processedEntities.length,
|
|
401
|
+
totalRelationships: processedRelationships.length,
|
|
402
|
+
sourceMemories: sourceMemoryId
|
|
403
|
+
? [...new Set([...graph.metadata.sourceMemories, sourceMemoryId])]
|
|
404
|
+
: graph.metadata.sourceMemories
|
|
405
|
+
};
|
|
406
|
+
|
|
407
|
+
return updatedGraph;
|
|
408
|
+
|
|
409
|
+
} catch (error) {
|
|
410
|
+
// Only log detailed errors in development mode
|
|
411
|
+
if (process.env.NODE_ENV === 'development') {
|
|
412
|
+
console.error('Error adding to graph:', error);
|
|
413
|
+
}
|
|
414
|
+
return graph; // Return original graph on error
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Find related entities using graph traversal
|
|
420
|
+
*/
|
|
421
|
+
findRelatedEntities(
|
|
422
|
+
graph: KnowledgeGraph,
|
|
423
|
+
seedEntityIds: string[],
|
|
424
|
+
options: {
|
|
425
|
+
maxHops?: number;
|
|
426
|
+
relationshipTypes?: string[];
|
|
427
|
+
includeWeights?: boolean;
|
|
428
|
+
} = {}
|
|
429
|
+
): GraphQueryResult {
|
|
430
|
+
const maxHops = options.maxHops || this.config.maxHops;
|
|
431
|
+
const relationshipTypes = options.relationshipTypes;
|
|
432
|
+
|
|
433
|
+
try {
|
|
434
|
+
// BFS traversal to find related entities
|
|
435
|
+
const visited = new Set<string>(seedEntityIds);
|
|
436
|
+
const relatedEntityIds = new Set<string>(seedEntityIds);
|
|
437
|
+
const discoveredRelationships = new Set<string>();
|
|
438
|
+
const paths: Array<{ entities: string[]; relationships: string[]; score: number }> = [];
|
|
439
|
+
|
|
440
|
+
let currentHop = 0;
|
|
441
|
+
let frontier = seedEntityIds;
|
|
442
|
+
|
|
443
|
+
while (currentHop < maxHops && frontier.length > 0) {
|
|
444
|
+
const nextFrontier: string[] = [];
|
|
445
|
+
|
|
446
|
+
for (const entityId of frontier) {
|
|
447
|
+
// Find relationships involving this entity
|
|
448
|
+
const relationships = graph.relationships.filter(r => {
|
|
449
|
+
const isInvolved = (r.source === entityId || r.target === entityId);
|
|
450
|
+
const typeMatch = !relationshipTypes || relationshipTypes.includes(r.type || r.label);
|
|
451
|
+
return isInvolved && typeMatch;
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
for (const relationship of relationships) {
|
|
455
|
+
const neighborId = relationship.source === entityId ? relationship.target : relationship.source;
|
|
456
|
+
|
|
457
|
+
if (!visited.has(neighborId)) {
|
|
458
|
+
visited.add(neighborId);
|
|
459
|
+
relatedEntityIds.add(neighborId);
|
|
460
|
+
nextFrontier.push(neighborId);
|
|
461
|
+
discoveredRelationships.add(relationship.id || this.generateRelationshipId(relationship));
|
|
462
|
+
|
|
463
|
+
// Track path
|
|
464
|
+
if (options.includeWeights) {
|
|
465
|
+
paths.push({
|
|
466
|
+
entities: [entityId, neighborId],
|
|
467
|
+
relationships: [relationship.id || this.generateRelationshipId(relationship)],
|
|
468
|
+
score: relationship.confidence || 0.5
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
frontier = nextFrontier;
|
|
476
|
+
currentHop++;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Get entity and relationship objects
|
|
480
|
+
const relatedEntities = graph.entities.filter(e => relatedEntityIds.has(e.id));
|
|
481
|
+
const relatedRelationships = graph.relationships.filter(r =>
|
|
482
|
+
discoveredRelationships.has(r.id || this.generateRelationshipId(r))
|
|
483
|
+
);
|
|
484
|
+
|
|
485
|
+
return {
|
|
486
|
+
entities: relatedEntities,
|
|
487
|
+
relationships: relatedRelationships,
|
|
488
|
+
paths: options.includeWeights ? paths : undefined,
|
|
489
|
+
totalResults: relatedEntities.length
|
|
490
|
+
};
|
|
491
|
+
|
|
492
|
+
} catch (error) {
|
|
493
|
+
console.error('Error finding related entities:', error);
|
|
494
|
+
return {
|
|
495
|
+
entities: [],
|
|
496
|
+
relationships: [],
|
|
497
|
+
totalResults: 0
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Query graph by entity type or relationship patterns
|
|
504
|
+
*/
|
|
505
|
+
queryGraph(
|
|
506
|
+
graph: KnowledgeGraph,
|
|
507
|
+
query: {
|
|
508
|
+
entityTypes?: string[];
|
|
509
|
+
relationshipTypes?: string[];
|
|
510
|
+
searchText?: string;
|
|
511
|
+
limit?: number;
|
|
512
|
+
}
|
|
513
|
+
): GraphQueryResult {
|
|
514
|
+
try {
|
|
515
|
+
// Handle null/undefined graph
|
|
516
|
+
if (!graph || !graph.entities || !graph.relationships) {
|
|
517
|
+
return {
|
|
518
|
+
entities: [],
|
|
519
|
+
relationships: [],
|
|
520
|
+
totalResults: 0
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
let entities = graph.entities;
|
|
525
|
+
let relationships = graph.relationships;
|
|
526
|
+
|
|
527
|
+
// Filter by entity types
|
|
528
|
+
if (query.entityTypes && query.entityTypes.length > 0) {
|
|
529
|
+
entities = entities.filter(e => query.entityTypes!.includes(e.type));
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Filter by relationship types
|
|
533
|
+
if (query.relationshipTypes && query.relationshipTypes.length > 0) {
|
|
534
|
+
relationships = relationships.filter(r =>
|
|
535
|
+
query.relationshipTypes!.includes(r.type || r.label)
|
|
536
|
+
);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Text search in labels and properties
|
|
540
|
+
if (query.searchText) {
|
|
541
|
+
const searchLower = query.searchText.toLowerCase();
|
|
542
|
+
|
|
543
|
+
entities = entities.filter(e =>
|
|
544
|
+
e.label.toLowerCase().includes(searchLower) ||
|
|
545
|
+
JSON.stringify(e.properties || {}).toLowerCase().includes(searchLower)
|
|
546
|
+
);
|
|
547
|
+
|
|
548
|
+
relationships = relationships.filter(r =>
|
|
549
|
+
r.label.toLowerCase().includes(searchLower) ||
|
|
550
|
+
JSON.stringify(r.properties || {}).toLowerCase().includes(searchLower)
|
|
551
|
+
);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// Apply limit
|
|
555
|
+
if (query.limit) {
|
|
556
|
+
entities = entities.slice(0, query.limit);
|
|
557
|
+
relationships = relationships.slice(0, query.limit);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
return {
|
|
561
|
+
entities,
|
|
562
|
+
relationships,
|
|
563
|
+
totalResults: entities.length + relationships.length
|
|
564
|
+
};
|
|
565
|
+
|
|
566
|
+
} catch (error) {
|
|
567
|
+
// Only log detailed errors in development mode
|
|
568
|
+
if (process.env.NODE_ENV === 'development') {
|
|
569
|
+
console.error('Error querying graph:', error);
|
|
570
|
+
}
|
|
571
|
+
return {
|
|
572
|
+
entities: [],
|
|
573
|
+
relationships: [],
|
|
574
|
+
totalResults: 0
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
// ==================== STATISTICS & MONITORING ====================
|
|
580
|
+
|
|
581
|
+
/**
|
|
582
|
+
* Get graph statistics
|
|
583
|
+
*/
|
|
584
|
+
getGraphStats(graph: KnowledgeGraph) {
|
|
585
|
+
const entityTypes = new Map<string, number>();
|
|
586
|
+
const relationshipTypes = new Map<string, number>();
|
|
587
|
+
|
|
588
|
+
// Count entity types
|
|
589
|
+
for (const entity of graph.entities) {
|
|
590
|
+
entityTypes.set(entity.type, (entityTypes.get(entity.type) || 0) + 1);
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
// Count relationship types
|
|
594
|
+
for (const relationship of graph.relationships) {
|
|
595
|
+
const type = relationship.type || relationship.label;
|
|
596
|
+
relationshipTypes.set(type, (relationshipTypes.get(type) || 0) + 1);
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// Calculate connectivity metrics
|
|
600
|
+
const entityConnections = new Map<string, number>();
|
|
601
|
+
for (const rel of graph.relationships) {
|
|
602
|
+
entityConnections.set(rel.source, (entityConnections.get(rel.source) || 0) + 1);
|
|
603
|
+
entityConnections.set(rel.target, (entityConnections.get(rel.target) || 0) + 1);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
const avgConnections = graph.entities.length > 0
|
|
607
|
+
? Array.from(entityConnections.values()).reduce((sum, count) => sum + count, 0) / graph.entities.length
|
|
608
|
+
: 0;
|
|
609
|
+
|
|
610
|
+
return {
|
|
611
|
+
totalEntities: graph.entities.length,
|
|
612
|
+
totalRelationships: graph.relationships.length,
|
|
613
|
+
entityTypes: Object.fromEntries(entityTypes),
|
|
614
|
+
relationshipTypes: Object.fromEntries(relationshipTypes),
|
|
615
|
+
averageConnections: avgConnections,
|
|
616
|
+
graphDensity: graph.entities.length > 0
|
|
617
|
+
? (graph.relationships.length * 2) / (graph.entities.length * (graph.entities.length - 1))
|
|
618
|
+
: 0,
|
|
619
|
+
extractionStats: this.extractionStats,
|
|
620
|
+
lastUpdated: graph.metadata.lastUpdated
|
|
621
|
+
};
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// ==================== PRIVATE METHODS ====================
|
|
625
|
+
|
|
626
|
+
private buildExtractionPrompt(content: string): string {
|
|
627
|
+
return `
|
|
628
|
+
Extract entities and relationships from the following text. Focus on meaningful entities (people, places, concepts, organizations) and clear relationships between them.
|
|
629
|
+
|
|
630
|
+
Format your response as valid JSON with "entities" and "relationships" arrays.
|
|
631
|
+
|
|
632
|
+
For entities:
|
|
633
|
+
- "id": unique identifier using meaningful names with underscores (e.g., "john_doe", "machine_learning")
|
|
634
|
+
- "label": display name (e.g., "John Doe", "Machine Learning")
|
|
635
|
+
- "type": entity type (person, concept, organization, location, event, skill, technology, etc.)
|
|
636
|
+
- "confidence": confidence score 0.0-1.0
|
|
637
|
+
|
|
638
|
+
For relationships:
|
|
639
|
+
- "source": source entity id
|
|
640
|
+
- "target": target entity id
|
|
641
|
+
- "label": relationship description (e.g., "works at", "uses", "located in")
|
|
642
|
+
- "confidence": confidence score 0.0-1.0
|
|
643
|
+
|
|
644
|
+
TEXT:
|
|
645
|
+
${content}
|
|
646
|
+
|
|
647
|
+
JSON:`;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
private async mockGeminiResponse(content: string): Promise<string> {
|
|
651
|
+
// Mock response for development - replace with actual AI service
|
|
652
|
+
const entities = this.extractEntitiesHeuristic(content);
|
|
653
|
+
const relationships = this.extractRelationshipsHeuristic(content, entities);
|
|
654
|
+
|
|
655
|
+
return JSON.stringify({
|
|
656
|
+
entities: entities.map(e => ({
|
|
657
|
+
id: e.id,
|
|
658
|
+
label: e.label,
|
|
659
|
+
type: e.type,
|
|
660
|
+
confidence: 0.8
|
|
661
|
+
})),
|
|
662
|
+
relationships: relationships.map(r => ({
|
|
663
|
+
source: r.source,
|
|
664
|
+
target: r.target,
|
|
665
|
+
label: r.label,
|
|
666
|
+
confidence: 0.7
|
|
667
|
+
}))
|
|
668
|
+
});
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
private extractEntitiesHeuristic(content: string): Entity[] {
|
|
672
|
+
const entities: Entity[] = [];
|
|
673
|
+
|
|
674
|
+
// Simple heuristic extraction (replace with actual AI)
|
|
675
|
+
const words = content.split(/\s+/);
|
|
676
|
+
const capitalizedWords = words.filter(word =>
|
|
677
|
+
/^[A-Z][a-z]+/.test(word) && word.length > 2
|
|
678
|
+
);
|
|
679
|
+
|
|
680
|
+
for (const word of capitalizedWords.slice(0, 5)) {
|
|
681
|
+
entities.push({
|
|
682
|
+
id: word.toLowerCase().replace(/[^\w]/g, '_'),
|
|
683
|
+
label: word,
|
|
684
|
+
type: 'concept',
|
|
685
|
+
confidence: 0.6
|
|
686
|
+
});
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
return entities;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
private extractRelationshipsHeuristic(content: string, entities: Entity[]): Relationship[] {
|
|
693
|
+
const relationships: Relationship[] = [];
|
|
694
|
+
|
|
695
|
+
// Simple relationship extraction
|
|
696
|
+
if (entities.length >= 2) {
|
|
697
|
+
relationships.push({
|
|
698
|
+
source: entities[0].id,
|
|
699
|
+
target: entities[1].id,
|
|
700
|
+
label: 'related to',
|
|
701
|
+
confidence: 0.5
|
|
702
|
+
});
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
return relationships;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
private parseExtractionResponse(response: string, memoryId: string): { entities: Entity[]; relationships: Relationship[] } {
|
|
709
|
+
try {
|
|
710
|
+
const parsed = JSON.parse(response);
|
|
711
|
+
|
|
712
|
+
if (!parsed.entities || !Array.isArray(parsed.entities) ||
|
|
713
|
+
!parsed.relationships || !Array.isArray(parsed.relationships)) {
|
|
714
|
+
throw new Error('Invalid response format');
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
const entities: Entity[] = parsed.entities.map((e: any) => ({
|
|
718
|
+
id: this.sanitizeId(e.id || `entity_${Math.random().toString(36).substring(2, 10)}`),
|
|
719
|
+
label: e.label || 'Unnamed Entity',
|
|
720
|
+
type: e.type || 'concept',
|
|
721
|
+
confidence: e.confidence || 0.5,
|
|
722
|
+
sourceMemoryIds: [memoryId]
|
|
723
|
+
}));
|
|
724
|
+
|
|
725
|
+
const idMap = new Map<string, string>();
|
|
726
|
+
parsed.entities.forEach((e: any, i: number) => {
|
|
727
|
+
idMap.set(e.id || '', entities[i].id);
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
const relationships: Relationship[] = parsed.relationships
|
|
731
|
+
.filter((r: any) => r.source && r.target && idMap.has(r.source) && idMap.has(r.target))
|
|
732
|
+
.map((r: any) => ({
|
|
733
|
+
source: idMap.get(r.source) || '',
|
|
734
|
+
target: idMap.get(r.target) || '',
|
|
735
|
+
label: r.label || 'related to',
|
|
736
|
+
confidence: r.confidence || 0.5,
|
|
737
|
+
sourceMemoryIds: [memoryId]
|
|
738
|
+
}));
|
|
739
|
+
|
|
740
|
+
return { entities, relationships };
|
|
741
|
+
|
|
742
|
+
} catch (error) {
|
|
743
|
+
console.error('Failed to parse extraction response:', error);
|
|
744
|
+
return { entities: [], relationships: [] };
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
private sanitizeId(id: string): string {
|
|
749
|
+
return id.replace(/[^\w_-]/g, '_').toLowerCase();
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
private calculateExtractionConfidence(entities: Entity[], relationships: Relationship[]): number {
|
|
753
|
+
if (entities.length === 0 && relationships.length === 0) return 0;
|
|
754
|
+
|
|
755
|
+
const entityConfidences = entities.map(e => e.confidence || 0.5);
|
|
756
|
+
const relationshipConfidences = relationships.map(r => r.confidence || 0.5);
|
|
757
|
+
|
|
758
|
+
const allConfidences = [...entityConfidences, ...relationshipConfidences];
|
|
759
|
+
return allConfidences.reduce((sum, conf) => sum + conf, 0) / allConfidences.length;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
private mergeEntities(existing: Entity, newEntity: Entity, sourceMemoryId?: string): Entity {
|
|
763
|
+
return {
|
|
764
|
+
...existing,
|
|
765
|
+
label: newEntity.label || existing.label,
|
|
766
|
+
type: newEntity.type || existing.type,
|
|
767
|
+
confidence: Math.max(existing.confidence || 0, newEntity.confidence || 0),
|
|
768
|
+
properties: { ...existing.properties, ...newEntity.properties },
|
|
769
|
+
sourceMemoryIds: [
|
|
770
|
+
...(existing.sourceMemoryIds || []),
|
|
771
|
+
...(sourceMemoryId ? [sourceMemoryId] : [])
|
|
772
|
+
],
|
|
773
|
+
lastUpdated: new Date()
|
|
774
|
+
};
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
private findSimilarEntity(entity: Entity, entities: Entity[]): Entity | undefined {
|
|
778
|
+
for (const existing of entities) {
|
|
779
|
+
if (this.calculateEntitySimilarity(entity, existing) > this.config.deduplicationThreshold) {
|
|
780
|
+
return existing;
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
return undefined;
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
private calculateEntitySimilarity(a: Entity, b: Entity): number {
|
|
787
|
+
// Simple similarity based on label and type
|
|
788
|
+
const labelSimilarity = this.stringSimilarity(a.label.toLowerCase(), b.label.toLowerCase());
|
|
789
|
+
const typeSimilarity = a.type === b.type ? 1.0 : 0.0;
|
|
790
|
+
|
|
791
|
+
return (labelSimilarity * 0.8) + (typeSimilarity * 0.2);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
private stringSimilarity(a: string, b: string): number {
|
|
795
|
+
const longer = a.length > b.length ? a : b;
|
|
796
|
+
const shorter = a.length > b.length ? b : a;
|
|
797
|
+
|
|
798
|
+
if (longer.length === 0) return 1.0;
|
|
799
|
+
|
|
800
|
+
const distance = this.levenshteinDistance(longer, shorter);
|
|
801
|
+
return (longer.length - distance) / longer.length;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
private levenshteinDistance(a: string, b: string): number {
|
|
805
|
+
const matrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));
|
|
806
|
+
|
|
807
|
+
for (let i = 0; i <= a.length; i++) matrix[0][i] = i;
|
|
808
|
+
for (let j = 0; j <= b.length; j++) matrix[j][0] = j;
|
|
809
|
+
|
|
810
|
+
for (let j = 1; j <= b.length; j++) {
|
|
811
|
+
for (let i = 1; i <= a.length; i++) {
|
|
812
|
+
const indicator = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
813
|
+
matrix[j][i] = Math.min(
|
|
814
|
+
matrix[j][i - 1] + 1,
|
|
815
|
+
matrix[j - 1][i] + 1,
|
|
816
|
+
matrix[j - 1][i - 1] + indicator
|
|
817
|
+
);
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
return matrix[b.length][a.length];
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
private generateRelationshipId(relationship: Relationship): string {
|
|
825
|
+
const content = `${relationship.source}_${relationship.target}_${relationship.label}`;
|
|
826
|
+
return this.sanitizeId(content);
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
private updateExtractionStats(entities: Entity[], relationships: Relationship[], confidence: number, processingTime: number): void {
|
|
830
|
+
this.extractionStats.totalExtractions++;
|
|
831
|
+
this.extractionStats.averageEntities =
|
|
832
|
+
(this.extractionStats.averageEntities + entities.length) / this.extractionStats.totalExtractions;
|
|
833
|
+
this.extractionStats.averageRelationships =
|
|
834
|
+
(this.extractionStats.averageRelationships + relationships.length) / this.extractionStats.totalExtractions;
|
|
835
|
+
this.extractionStats.averageConfidence =
|
|
836
|
+
(this.extractionStats.averageConfidence + confidence) / this.extractionStats.totalExtractions;
|
|
837
|
+
this.extractionStats.processingTime =
|
|
838
|
+
(this.extractionStats.processingTime + processingTime) / this.extractionStats.totalExtractions;
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
private delay(ms: number): Promise<void> {
|
|
842
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// ==================== SERVICE MANAGEMENT ====================
|
|
846
|
+
|
|
847
|
+
/**
|
|
848
|
+
* Test AI service connectivity
|
|
849
|
+
*/
|
|
850
|
+
async testAIConnection(): Promise<{ connected: boolean; usingMock: boolean; service: string }> {
|
|
851
|
+
if (this.config.useMockAI || !this.geminiAI) {
|
|
852
|
+
return { connected: false, usingMock: true, service: 'mock' };
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
try {
|
|
856
|
+
const connected = await this.geminiAI.testConnection();
|
|
857
|
+
return { connected, usingMock: false, service: 'gemini' };
|
|
858
|
+
} catch (error) {
|
|
859
|
+
console.error('AI connection test failed:', error);
|
|
860
|
+
return { connected: false, usingMock: false, service: 'gemini' };
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
/**
|
|
865
|
+
* Get service configuration (without sensitive data)
|
|
866
|
+
*/
|
|
867
|
+
getConfig(): Omit<Required<GraphConfig>, 'geminiApiKey'> & { aiConfigured: boolean } {
|
|
868
|
+
return {
|
|
869
|
+
extractionModel: this.config.extractionModel,
|
|
870
|
+
confidenceThreshold: this.config.confidenceThreshold,
|
|
871
|
+
maxHops: this.config.maxHops,
|
|
872
|
+
enableEmbeddings: this.config.enableEmbeddings,
|
|
873
|
+
deduplicationThreshold: this.config.deduplicationThreshold,
|
|
874
|
+
geminiConfig: this.config.geminiConfig,
|
|
875
|
+
useMockAI: this.config.useMockAI,
|
|
876
|
+
aiConfigured: !!this.config.geminiApiKey && !this.config.useMockAI
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
/**
|
|
881
|
+
* Get extraction statistics
|
|
882
|
+
*/
|
|
883
|
+
getExtractionStats() {
|
|
884
|
+
return { ...this.extractionStats };
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
|
|
888
888
|
export default GraphService;
|