@soulcraft/brainy 2.15.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +249 -152
- package/dist/api/ConfigAPI.d.ts +67 -0
- package/dist/api/ConfigAPI.js +166 -0
- package/dist/api/DataAPI.d.ts +123 -0
- package/dist/api/DataAPI.js +391 -0
- package/dist/api/SecurityAPI.d.ts +50 -0
- package/dist/api/SecurityAPI.js +139 -0
- package/dist/api/UniversalImportAPI.d.ts +134 -0
- package/dist/api/UniversalImportAPI.js +615 -0
- package/dist/augmentationManager.js +12 -7
- package/dist/augmentationPipeline.d.ts +0 -61
- package/dist/augmentationPipeline.js +0 -87
- package/dist/augmentationRegistry.d.ts +1 -1
- package/dist/augmentationRegistry.js +1 -1
- package/dist/augmentations/apiServerAugmentation.d.ts +27 -1
- package/dist/augmentations/apiServerAugmentation.js +288 -7
- package/dist/augmentations/auditLogAugmentation.d.ts +109 -0
- package/dist/augmentations/auditLogAugmentation.js +358 -0
- package/dist/augmentations/batchProcessingAugmentation.d.ts +3 -2
- package/dist/augmentations/batchProcessingAugmentation.js +123 -22
- package/dist/augmentations/brainyAugmentation.d.ts +87 -8
- package/dist/augmentations/brainyAugmentation.js +159 -2
- package/dist/augmentations/cacheAugmentation.d.ts +6 -5
- package/dist/augmentations/cacheAugmentation.js +113 -17
- package/dist/augmentations/conduitAugmentations.d.ts +2 -2
- package/dist/augmentations/conduitAugmentations.js +2 -2
- package/dist/augmentations/configResolver.d.ts +122 -0
- package/dist/augmentations/configResolver.js +440 -0
- package/dist/augmentations/connectionPoolAugmentation.d.ts +3 -1
- package/dist/augmentations/connectionPoolAugmentation.js +37 -12
- package/dist/augmentations/defaultAugmentations.d.ts +9 -11
- package/dist/augmentations/defaultAugmentations.js +4 -11
- package/dist/augmentations/discovery/catalogDiscovery.d.ts +142 -0
- package/dist/augmentations/discovery/catalogDiscovery.js +249 -0
- package/dist/augmentations/discovery/localDiscovery.d.ts +84 -0
- package/dist/augmentations/discovery/localDiscovery.js +246 -0
- package/dist/augmentations/discovery/runtimeLoader.d.ts +97 -0
- package/dist/augmentations/discovery/runtimeLoader.js +337 -0
- package/dist/augmentations/discovery.d.ts +152 -0
- package/dist/augmentations/discovery.js +441 -0
- package/dist/augmentations/display/intelligentComputation.d.ts +1 -1
- package/dist/augmentations/display/intelligentComputation.js +4 -4
- package/dist/augmentations/entityRegistryAugmentation.d.ts +3 -1
- package/dist/augmentations/entityRegistryAugmentation.js +5 -1
- package/dist/augmentations/indexAugmentation.d.ts +3 -3
- package/dist/augmentations/indexAugmentation.js +2 -2
- package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +22 -6
- package/dist/augmentations/intelligentVerbScoringAugmentation.js +106 -23
- package/dist/augmentations/manifest.d.ts +176 -0
- package/dist/augmentations/manifest.js +8 -0
- package/dist/augmentations/marketplace/AugmentationMarketplace.d.ts +168 -0
- package/dist/augmentations/marketplace/AugmentationMarketplace.js +329 -0
- package/dist/augmentations/marketplace/cli.d.ts +47 -0
- package/dist/augmentations/marketplace/cli.js +265 -0
- package/dist/augmentations/metricsAugmentation.d.ts +3 -3
- package/dist/augmentations/metricsAugmentation.js +2 -2
- package/dist/augmentations/monitoringAugmentation.d.ts +3 -3
- package/dist/augmentations/monitoringAugmentation.js +2 -2
- package/dist/augmentations/neuralImport.d.ts +1 -1
- package/dist/augmentations/rateLimitAugmentation.d.ts +82 -0
- package/dist/augmentations/rateLimitAugmentation.js +321 -0
- package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +2 -2
- package/dist/augmentations/requestDeduplicatorAugmentation.js +1 -1
- package/dist/augmentations/storageAugmentation.d.ts +1 -1
- package/dist/augmentations/storageAugmentation.js +2 -2
- package/dist/augmentations/storageAugmentations.d.ts +37 -8
- package/dist/augmentations/storageAugmentations.js +204 -15
- package/dist/augmentations/synapseAugmentation.d.ts +1 -1
- package/dist/augmentations/synapseAugmentation.js +35 -16
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +39 -59
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +103 -389
- package/dist/augmentations/universalDisplayAugmentation.d.ts +2 -2
- package/dist/augmentations/universalDisplayAugmentation.js +2 -2
- package/dist/brainy-unified.d.ts +106 -0
- package/dist/brainy-unified.js +327 -0
- package/dist/brainy.d.ts +273 -0
- package/dist/brainy.js +1181 -0
- package/dist/brainyData.d.ts +29 -72
- package/dist/brainyData.js +350 -304
- package/dist/brainyDataV3.d.ts +186 -0
- package/dist/brainyDataV3.js +337 -0
- package/dist/browserFramework.d.ts +6 -6
- package/dist/browserFramework.js +11 -8
- package/dist/browserFramework.minimal.d.ts +5 -5
- package/dist/browserFramework.minimal.js +11 -8
- package/dist/config/index.d.ts +2 -2
- package/dist/config/index.js +3 -3
- package/dist/config/modelAutoConfig.d.ts +6 -7
- package/dist/config/modelAutoConfig.js +17 -76
- package/dist/cortex/backupRestore.d.ts +2 -2
- package/dist/cortex/backupRestore.js +85 -27
- package/dist/cortex/healthCheck.d.ts +2 -2
- package/dist/cortex/neuralImport.d.ts +2 -2
- package/dist/cortex/neuralImport.js +18 -13
- package/dist/cortex/performanceMonitor.d.ts +2 -2
- package/dist/critical/model-guardian.d.ts +4 -0
- package/dist/critical/model-guardian.js +31 -11
- package/dist/demo.d.ts +4 -4
- package/dist/demo.js +7 -7
- package/dist/distributed/cacheSync.d.ts +112 -0
- package/dist/distributed/cacheSync.js +265 -0
- package/dist/distributed/coordinator.d.ts +193 -0
- package/dist/distributed/coordinator.js +548 -0
- package/dist/distributed/httpTransport.d.ts +120 -0
- package/dist/distributed/httpTransport.js +446 -0
- package/dist/distributed/index.d.ts +8 -0
- package/dist/distributed/index.js +5 -0
- package/dist/distributed/networkTransport.d.ts +132 -0
- package/dist/distributed/networkTransport.js +633 -0
- package/dist/distributed/queryPlanner.d.ts +104 -0
- package/dist/distributed/queryPlanner.js +327 -0
- package/dist/distributed/readWriteSeparation.d.ts +134 -0
- package/dist/distributed/readWriteSeparation.js +350 -0
- package/dist/distributed/shardManager.d.ts +114 -0
- package/dist/distributed/shardManager.js +357 -0
- package/dist/distributed/shardMigration.d.ts +110 -0
- package/dist/distributed/shardMigration.js +289 -0
- package/dist/distributed/storageDiscovery.d.ts +160 -0
- package/dist/distributed/storageDiscovery.js +551 -0
- package/dist/embeddings/EmbeddingManager.d.ts +0 -4
- package/dist/embeddings/EmbeddingManager.js +21 -26
- package/dist/errors/brainyError.d.ts +5 -1
- package/dist/errors/brainyError.js +12 -0
- package/dist/examples/basicUsage.js +3 -3
- package/dist/graph/graphAdjacencyIndex.d.ts +96 -0
- package/dist/graph/graphAdjacencyIndex.js +288 -0
- package/dist/graph/pathfinding.js +4 -2
- package/dist/hnsw/scaledHNSWSystem.js +11 -2
- package/dist/importManager.js +6 -3
- package/dist/index.d.ts +12 -21
- package/dist/index.js +14 -22
- package/dist/mcp/brainyMCPAdapter.d.ts +4 -4
- package/dist/mcp/brainyMCPAdapter.js +5 -5
- package/dist/mcp/brainyMCPService.d.ts +3 -3
- package/dist/mcp/brainyMCPService.js +3 -11
- package/dist/mcp/mcpAugmentationToolset.js +20 -30
- package/dist/neural/embeddedPatterns.d.ts +1 -1
- package/dist/neural/embeddedPatterns.js +2 -2
- package/dist/neural/entityExtractor.d.ts +65 -0
- package/dist/neural/entityExtractor.js +316 -0
- package/dist/neural/improvedNeuralAPI.js +90 -79
- package/dist/neural/naturalLanguageProcessor.d.ts +155 -10
- package/dist/neural/naturalLanguageProcessor.js +941 -66
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +2 -2
- package/dist/neural/naturalLanguageProcessorStatic.js +3 -3
- package/dist/neural/neuralAPI.js +8 -2
- package/dist/neural/patternLibrary.d.ts +57 -3
- package/dist/neural/patternLibrary.js +348 -13
- package/dist/neural/staticPatternMatcher.d.ts +2 -2
- package/dist/neural/staticPatternMatcher.js +2 -2
- package/dist/shared/default-augmentations.d.ts +3 -3
- package/dist/shared/default-augmentations.js +5 -5
- package/dist/storage/adapters/fileSystemStorage.d.ts +4 -0
- package/dist/storage/adapters/fileSystemStorage.js +54 -1
- package/dist/storage/adapters/memoryStorage.js +13 -8
- package/dist/storage/backwardCompatibility.d.ts +10 -78
- package/dist/storage/backwardCompatibility.js +17 -132
- package/dist/storage/baseStorage.d.ts +6 -0
- package/dist/storage/baseStorage.js +17 -0
- package/dist/storage/cacheManager.js +2 -2
- package/dist/storage/readOnlyOptimizations.js +8 -3
- package/dist/streaming/pipeline.d.ts +154 -0
- package/dist/streaming/pipeline.js +551 -0
- package/dist/triple/TripleIntelligence.d.ts +25 -110
- package/dist/triple/TripleIntelligence.js +4 -574
- package/dist/triple/TripleIntelligenceSystem.d.ts +159 -0
- package/dist/triple/TripleIntelligenceSystem.js +519 -0
- package/dist/types/apiTypes.d.ts +278 -0
- package/dist/types/apiTypes.js +33 -0
- package/dist/types/brainy.types.d.ts +308 -0
- package/dist/types/brainy.types.js +8 -0
- package/dist/types/brainyDataInterface.d.ts +3 -3
- package/dist/types/brainyDataInterface.js +2 -2
- package/dist/types/graphTypes.js +2 -2
- package/dist/utils/cacheAutoConfig.d.ts +3 -3
- package/dist/utils/embedding.js +8 -14
- package/dist/utils/enhancedLogger.d.ts +104 -0
- package/dist/utils/enhancedLogger.js +232 -0
- package/dist/utils/index.d.ts +1 -1
- package/dist/utils/index.js +1 -1
- package/dist/utils/intelligentTypeMapper.d.ts +60 -0
- package/dist/utils/intelligentTypeMapper.js +349 -0
- package/dist/utils/metadataIndex.d.ts +118 -1
- package/dist/utils/metadataIndex.js +539 -16
- package/dist/utils/paramValidation.d.ts +39 -0
- package/dist/utils/paramValidation.js +192 -0
- package/dist/utils/rateLimiter.d.ts +160 -0
- package/dist/utils/rateLimiter.js +271 -0
- package/dist/utils/statistics.d.ts +4 -4
- package/dist/utils/statistics.js +3 -3
- package/dist/utils/structuredLogger.d.ts +146 -0
- package/dist/utils/structuredLogger.js +394 -0
- package/dist/utils/textEncoding.js +2 -1
- package/dist/utils/typeValidation.d.ts +34 -0
- package/dist/utils/typeValidation.js +247 -0
- package/package.json +14 -6
- package/scripts/download-models.cjs +6 -15
- package/dist/augmentations/walAugmentation.d.ts +0 -111
- package/dist/augmentations/walAugmentation.js +0 -519
- package/dist/chat/BrainyChat.d.ts +0 -121
- package/dist/chat/BrainyChat.js +0 -396
- package/dist/chat/ChatCLI.d.ts +0 -61
- package/dist/chat/ChatCLI.js +0 -351
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Neural Entity Extractor using Brainy's NounTypes
|
|
3
|
+
* Uses embeddings and similarity matching for accurate type detection
|
|
4
|
+
*/
|
|
5
|
+
import { NounType } from '../types/graphTypes.js';
|
|
6
|
+
import { Vector } from '../coreTypes.js';
|
|
7
|
+
import type { Brainy } from '../brainy.js';
|
|
8
|
+
export interface ExtractedEntity {
|
|
9
|
+
text: string;
|
|
10
|
+
type: NounType;
|
|
11
|
+
position: {
|
|
12
|
+
start: number;
|
|
13
|
+
end: number;
|
|
14
|
+
};
|
|
15
|
+
confidence: number;
|
|
16
|
+
vector?: Vector;
|
|
17
|
+
metadata?: any;
|
|
18
|
+
}
|
|
19
|
+
export declare class NeuralEntityExtractor {
|
|
20
|
+
private brain;
|
|
21
|
+
private typeEmbeddings;
|
|
22
|
+
private initialized;
|
|
23
|
+
constructor(brain: Brainy | Brainy<any>);
|
|
24
|
+
/**
|
|
25
|
+
* Initialize type embeddings for neural matching
|
|
26
|
+
*/
|
|
27
|
+
private initializeTypeEmbeddings;
|
|
28
|
+
/**
|
|
29
|
+
* Extract entities from text using neural matching
|
|
30
|
+
*/
|
|
31
|
+
extract(text: string, options?: {
|
|
32
|
+
types?: NounType[];
|
|
33
|
+
confidence?: number;
|
|
34
|
+
includeVectors?: boolean;
|
|
35
|
+
neuralMatching?: boolean;
|
|
36
|
+
}): Promise<ExtractedEntity[]>;
|
|
37
|
+
/**
|
|
38
|
+
* Extract candidate entities using patterns
|
|
39
|
+
*/
|
|
40
|
+
private extractCandidates;
|
|
41
|
+
/**
|
|
42
|
+
* Get context-based confidence boost for type matching
|
|
43
|
+
*/
|
|
44
|
+
private getContextBoost;
|
|
45
|
+
/**
|
|
46
|
+
* Rule-based classification fallback
|
|
47
|
+
*/
|
|
48
|
+
private classifyByRules;
|
|
49
|
+
/**
|
|
50
|
+
* Get embedding for text
|
|
51
|
+
*/
|
|
52
|
+
private getEmbedding;
|
|
53
|
+
/**
|
|
54
|
+
* Calculate cosine similarity between vectors
|
|
55
|
+
*/
|
|
56
|
+
private cosineSimilarity;
|
|
57
|
+
/**
|
|
58
|
+
* Simple hash function for fallback
|
|
59
|
+
*/
|
|
60
|
+
private simpleHash;
|
|
61
|
+
/**
|
|
62
|
+
* Remove duplicate and overlapping entities
|
|
63
|
+
*/
|
|
64
|
+
private deduplicateEntities;
|
|
65
|
+
}
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Neural Entity Extractor using Brainy's NounTypes
|
|
3
|
+
* Uses embeddings and similarity matching for accurate type detection
|
|
4
|
+
*/
|
|
5
|
+
import { NounType } from '../types/graphTypes.js';
|
|
6
|
+
export class NeuralEntityExtractor {
|
|
7
|
+
constructor(brain) {
|
|
8
|
+
// Type embeddings for similarity matching
|
|
9
|
+
this.typeEmbeddings = new Map();
|
|
10
|
+
this.initialized = false;
|
|
11
|
+
this.brain = brain;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Initialize type embeddings for neural matching
|
|
15
|
+
*/
|
|
16
|
+
async initializeTypeEmbeddings() {
|
|
17
|
+
if (this.initialized)
|
|
18
|
+
return;
|
|
19
|
+
// Create representative embeddings for each NounType
|
|
20
|
+
const typeExamples = {
|
|
21
|
+
[NounType.Person]: ['John Smith', 'Jane Doe', 'person', 'individual', 'human'],
|
|
22
|
+
[NounType.Organization]: ['Microsoft Corporation', 'company', 'organization', 'business', 'enterprise'],
|
|
23
|
+
[NounType.Location]: ['New York City', 'location', 'place', 'address', 'geography'],
|
|
24
|
+
[NounType.Document]: ['document', 'file', 'report', 'paper', 'text'],
|
|
25
|
+
[NounType.Event]: ['conference', 'meeting', 'event', 'occurrence', 'happening'],
|
|
26
|
+
[NounType.Product]: ['iPhone', 'product', 'item', 'merchandise', 'goods'],
|
|
27
|
+
[NounType.Service]: ['consulting', 'service', 'offering', 'provision'],
|
|
28
|
+
[NounType.Concept]: ['idea', 'concept', 'theory', 'principle', 'notion'],
|
|
29
|
+
[NounType.Media]: ['image', 'video', 'audio', 'media', 'content'],
|
|
30
|
+
[NounType.Message]: ['email', 'message', 'communication', 'note'],
|
|
31
|
+
[NounType.Task]: ['task', 'todo', 'assignment', 'job', 'work'],
|
|
32
|
+
[NounType.Project]: ['project', 'initiative', 'program', 'endeavor'],
|
|
33
|
+
[NounType.Process]: ['workflow', 'process', 'procedure', 'method'],
|
|
34
|
+
[NounType.User]: ['user', 'account', 'profile', 'member'],
|
|
35
|
+
[NounType.Role]: ['manager', 'role', 'position', 'title', 'responsibility'],
|
|
36
|
+
[NounType.Topic]: ['subject', 'topic', 'theme', 'matter'],
|
|
37
|
+
[NounType.Language]: ['English', 'language', 'tongue', 'dialect'],
|
|
38
|
+
[NounType.Currency]: ['dollar', 'currency', 'money', 'USD', 'EUR'],
|
|
39
|
+
[NounType.Measurement]: ['meter', 'measurement', 'unit', 'quantity'],
|
|
40
|
+
[NounType.Contract]: ['agreement', 'contract', 'deal', 'treaty'],
|
|
41
|
+
[NounType.Regulation]: ['law', 'regulation', 'rule', 'policy'],
|
|
42
|
+
[NounType.Resource]: ['resource', 'asset', 'material', 'supply'],
|
|
43
|
+
[NounType.Dataset]: ['database', 'dataset', 'data', 'records'],
|
|
44
|
+
[NounType.Interface]: ['API', 'interface', 'endpoint', 'connection'],
|
|
45
|
+
[NounType.Thing]: ['thing', 'object', 'item', 'entity'],
|
|
46
|
+
[NounType.Content]: ['content', 'material', 'information'],
|
|
47
|
+
[NounType.Collection]: ['collection', 'group', 'set', 'list'],
|
|
48
|
+
[NounType.File]: ['file', 'document', 'archive'],
|
|
49
|
+
[NounType.State]: ['state', 'status', 'condition'],
|
|
50
|
+
[NounType.Hypothesis]: ['hypothesis', 'theory', 'assumption'],
|
|
51
|
+
[NounType.Experiment]: ['experiment', 'test', 'trial', 'study']
|
|
52
|
+
};
|
|
53
|
+
// Generate embeddings for each type
|
|
54
|
+
for (const [type, examples] of Object.entries(typeExamples)) {
|
|
55
|
+
const combinedText = examples.join(' ');
|
|
56
|
+
const embedding = await this.getEmbedding(combinedText);
|
|
57
|
+
this.typeEmbeddings.set(type, embedding);
|
|
58
|
+
}
|
|
59
|
+
this.initialized = true;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Extract entities from text using neural matching
|
|
63
|
+
*/
|
|
64
|
+
async extract(text, options) {
|
|
65
|
+
await this.initializeTypeEmbeddings();
|
|
66
|
+
const entities = [];
|
|
67
|
+
const minConfidence = options?.confidence || 0.6;
|
|
68
|
+
const targetTypes = options?.types || Object.values(NounType);
|
|
69
|
+
const useNeuralMatching = options?.neuralMatching !== false; // Default true
|
|
70
|
+
// Step 1: Extract potential entities using patterns
|
|
71
|
+
const candidates = await this.extractCandidates(text);
|
|
72
|
+
// Step 2: Classify each candidate using neural matching
|
|
73
|
+
for (const candidate of candidates) {
|
|
74
|
+
let bestType = NounType.Thing;
|
|
75
|
+
let bestConfidence = 0;
|
|
76
|
+
if (useNeuralMatching) {
|
|
77
|
+
// Get embedding for the candidate
|
|
78
|
+
const candidateVector = await this.getEmbedding(candidate.text);
|
|
79
|
+
// Find best matching NounType
|
|
80
|
+
for (const type of targetTypes) {
|
|
81
|
+
const typeVector = this.typeEmbeddings.get(type);
|
|
82
|
+
if (!typeVector)
|
|
83
|
+
continue;
|
|
84
|
+
const similarity = this.cosineSimilarity(candidateVector, typeVector);
|
|
85
|
+
// Apply context-based boosting
|
|
86
|
+
const contextBoost = this.getContextBoost(candidate.text, candidate.context, type);
|
|
87
|
+
const adjustedConfidence = similarity * (1 + contextBoost);
|
|
88
|
+
if (adjustedConfidence > bestConfidence) {
|
|
89
|
+
bestConfidence = adjustedConfidence;
|
|
90
|
+
bestType = type;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
// Fallback to rule-based classification
|
|
96
|
+
const classification = this.classifyByRules(candidate);
|
|
97
|
+
bestType = classification.type;
|
|
98
|
+
bestConfidence = classification.confidence;
|
|
99
|
+
}
|
|
100
|
+
if (bestConfidence >= minConfidence) {
|
|
101
|
+
const entity = {
|
|
102
|
+
text: candidate.text,
|
|
103
|
+
type: bestType,
|
|
104
|
+
position: candidate.position,
|
|
105
|
+
confidence: bestConfidence
|
|
106
|
+
};
|
|
107
|
+
if (options?.includeVectors) {
|
|
108
|
+
entity.vector = await this.getEmbedding(candidate.text);
|
|
109
|
+
}
|
|
110
|
+
entities.push(entity);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Remove duplicates and overlaps
|
|
114
|
+
return this.deduplicateEntities(entities);
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Extract candidate entities using patterns
|
|
118
|
+
*/
|
|
119
|
+
async extractCandidates(text) {
|
|
120
|
+
const candidates = [];
|
|
121
|
+
// Enhanced patterns for entity detection
|
|
122
|
+
const patterns = [
|
|
123
|
+
// Capitalized words (potential names, places, organizations)
|
|
124
|
+
/\b([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)\b/g,
|
|
125
|
+
// Email addresses
|
|
126
|
+
/\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b/g,
|
|
127
|
+
// URLs
|
|
128
|
+
/\b(https?:\/\/[^\s]+|www\.[^\s]+)\b/g,
|
|
129
|
+
// Phone numbers
|
|
130
|
+
/\b(\+?\d{1,3}?[- .]?\(?\d{1,4}\)?[- .]?\d{1,4}[- .]?\d{1,4})\b/g,
|
|
131
|
+
// Dates
|
|
132
|
+
/\b(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}|\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2})\b/g,
|
|
133
|
+
// Money amounts
|
|
134
|
+
/\b(\$[\d,]+(?:\.\d{2})?|[\d,]+(?:\.\d{2})?\s*(?:USD|EUR|GBP|JPY|CNY))\b/gi,
|
|
135
|
+
// Percentages
|
|
136
|
+
/\b(\d+(?:\.\d+)?%)\b/g,
|
|
137
|
+
// Hashtags and mentions
|
|
138
|
+
/([#@][a-zA-Z0-9_]+)/g,
|
|
139
|
+
// Product versions
|
|
140
|
+
/\b([A-Z][a-zA-Z0-9]+\s+v?\d+(?:\.\d+)*)\b/g,
|
|
141
|
+
// Quoted strings (potential names, titles)
|
|
142
|
+
/"([^"]+)"/g,
|
|
143
|
+
/'([^']+)'/g
|
|
144
|
+
];
|
|
145
|
+
for (const pattern of patterns) {
|
|
146
|
+
let match;
|
|
147
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
148
|
+
const extractedText = match[1] || match[0];
|
|
149
|
+
// Skip too short or too long
|
|
150
|
+
if (extractedText.length < 2 || extractedText.length > 100)
|
|
151
|
+
continue;
|
|
152
|
+
// Get context (surrounding text)
|
|
153
|
+
const contextStart = Math.max(0, match.index - 30);
|
|
154
|
+
const contextEnd = Math.min(text.length, match.index + match[0].length + 30);
|
|
155
|
+
const context = text.substring(contextStart, contextEnd);
|
|
156
|
+
candidates.push({
|
|
157
|
+
text: extractedText,
|
|
158
|
+
position: {
|
|
159
|
+
start: match.index,
|
|
160
|
+
end: match.index + match[0].length
|
|
161
|
+
},
|
|
162
|
+
context
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return candidates;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Get context-based confidence boost for type matching
|
|
170
|
+
*/
|
|
171
|
+
getContextBoost(text, context, type) {
|
|
172
|
+
const contextLower = context.toLowerCase();
|
|
173
|
+
let boost = 0;
|
|
174
|
+
// Context clues for each type
|
|
175
|
+
const contextClues = {
|
|
176
|
+
[NounType.Person]: ['mr', 'ms', 'mrs', 'dr', 'prof', 'said', 'told', 'wrote'],
|
|
177
|
+
[NounType.Organization]: ['inc', 'corp', 'llc', 'ltd', 'company', 'announced'],
|
|
178
|
+
[NounType.Location]: ['in', 'at', 'from', 'to', 'near', 'located', 'city', 'country'],
|
|
179
|
+
[NounType.Document]: ['file', 'document', 'report', 'paper', 'pdf', 'doc'],
|
|
180
|
+
[NounType.Event]: ['event', 'conference', 'meeting', 'summit', 'on', 'at'],
|
|
181
|
+
[NounType.Product]: ['product', 'version', 'release', 'model', 'buy', 'sell'],
|
|
182
|
+
[NounType.Currency]: ['$', '€', '£', '¥', 'usd', 'eur', 'price', 'cost'],
|
|
183
|
+
[NounType.Message]: ['email', 'message', 'sent', 'received', 'wrote', 'reply'],
|
|
184
|
+
// Add more context clues as needed
|
|
185
|
+
};
|
|
186
|
+
const clues = contextClues[type] || [];
|
|
187
|
+
for (const clue of clues) {
|
|
188
|
+
if (contextLower.includes(clue)) {
|
|
189
|
+
boost += 0.1;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return Math.min(boost, 0.3); // Cap boost at 0.3
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Rule-based classification fallback
|
|
196
|
+
*/
|
|
197
|
+
classifyByRules(candidate) {
|
|
198
|
+
const text = candidate.text;
|
|
199
|
+
// Email
|
|
200
|
+
if (text.includes('@')) {
|
|
201
|
+
return { type: NounType.Message, confidence: 0.9 };
|
|
202
|
+
}
|
|
203
|
+
// URL
|
|
204
|
+
if (text.startsWith('http') || text.startsWith('www.')) {
|
|
205
|
+
return { type: NounType.Resource, confidence: 0.9 };
|
|
206
|
+
}
|
|
207
|
+
// Money
|
|
208
|
+
if (text.startsWith('$') || /\d+\.\d{2}/.test(text)) {
|
|
209
|
+
return { type: NounType.Currency, confidence: 0.85 };
|
|
210
|
+
}
|
|
211
|
+
// Percentage
|
|
212
|
+
if (text.endsWith('%')) {
|
|
213
|
+
return { type: NounType.Measurement, confidence: 0.85 };
|
|
214
|
+
}
|
|
215
|
+
// Date pattern
|
|
216
|
+
if (/\d{1,2}[\/\-]\d{1,2}/.test(text)) {
|
|
217
|
+
return { type: NounType.Event, confidence: 0.7 };
|
|
218
|
+
}
|
|
219
|
+
// Hashtag
|
|
220
|
+
if (text.startsWith('#')) {
|
|
221
|
+
return { type: NounType.Topic, confidence: 0.8 };
|
|
222
|
+
}
|
|
223
|
+
// Mention
|
|
224
|
+
if (text.startsWith('@')) {
|
|
225
|
+
return { type: NounType.User, confidence: 0.8 };
|
|
226
|
+
}
|
|
227
|
+
// Capitalized words (likely proper nouns)
|
|
228
|
+
if (/^[A-Z]/.test(text)) {
|
|
229
|
+
// Multiple words - likely organization or person
|
|
230
|
+
const words = text.split(/\s+/);
|
|
231
|
+
if (words.length > 1) {
|
|
232
|
+
// Check for organization suffixes
|
|
233
|
+
if (/\b(Inc|Corp|LLC|Ltd|Co|Group|Foundation|University)\b/i.test(text)) {
|
|
234
|
+
return { type: NounType.Organization, confidence: 0.75 };
|
|
235
|
+
}
|
|
236
|
+
// Likely a person's name
|
|
237
|
+
return { type: NounType.Person, confidence: 0.65 };
|
|
238
|
+
}
|
|
239
|
+
// Single capitalized word - could be location
|
|
240
|
+
return { type: NounType.Location, confidence: 0.5 };
|
|
241
|
+
}
|
|
242
|
+
// Default to Thing with low confidence
|
|
243
|
+
return { type: NounType.Thing, confidence: 0.3 };
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Get embedding for text
|
|
247
|
+
*/
|
|
248
|
+
async getEmbedding(text) {
|
|
249
|
+
if ('embed' in this.brain && typeof this.brain.embed === 'function') {
|
|
250
|
+
return await this.brain.embed(text);
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
// Fallback - create simple hash-based vector
|
|
254
|
+
const vector = new Array(384).fill(0);
|
|
255
|
+
for (let i = 0; i < text.length; i++) {
|
|
256
|
+
vector[i % 384] += text.charCodeAt(i) / 255;
|
|
257
|
+
}
|
|
258
|
+
return vector.map(v => v / text.length);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Calculate cosine similarity between vectors
|
|
263
|
+
*/
|
|
264
|
+
cosineSimilarity(a, b) {
|
|
265
|
+
let dotProduct = 0;
|
|
266
|
+
let normA = 0;
|
|
267
|
+
let normB = 0;
|
|
268
|
+
for (let i = 0; i < a.length; i++) {
|
|
269
|
+
dotProduct += a[i] * b[i];
|
|
270
|
+
normA += a[i] * a[i];
|
|
271
|
+
normB += b[i] * b[i];
|
|
272
|
+
}
|
|
273
|
+
normA = Math.sqrt(normA);
|
|
274
|
+
normB = Math.sqrt(normB);
|
|
275
|
+
if (normA === 0 || normB === 0)
|
|
276
|
+
return 0;
|
|
277
|
+
return dotProduct / (normA * normB);
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Simple hash function for fallback
|
|
281
|
+
*/
|
|
282
|
+
simpleHash(text) {
|
|
283
|
+
let hash = 0;
|
|
284
|
+
for (let i = 0; i < text.length; i++) {
|
|
285
|
+
const char = text.charCodeAt(i);
|
|
286
|
+
hash = ((hash << 5) - hash) + char;
|
|
287
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
288
|
+
}
|
|
289
|
+
return Math.abs(hash);
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Remove duplicate and overlapping entities
|
|
293
|
+
*/
|
|
294
|
+
deduplicateEntities(entities) {
|
|
295
|
+
// Sort by position and confidence
|
|
296
|
+
entities.sort((a, b) => {
|
|
297
|
+
if (a.position.start !== b.position.start) {
|
|
298
|
+
return a.position.start - b.position.start;
|
|
299
|
+
}
|
|
300
|
+
return b.confidence - a.confidence; // Higher confidence first
|
|
301
|
+
});
|
|
302
|
+
const result = [];
|
|
303
|
+
for (const entity of entities) {
|
|
304
|
+
// Check for overlap with already added entities
|
|
305
|
+
const hasOverlap = result.some(existing => (entity.position.start >= existing.position.start &&
|
|
306
|
+
entity.position.start < existing.position.end) ||
|
|
307
|
+
(entity.position.end > existing.position.start &&
|
|
308
|
+
entity.position.end <= existing.position.end));
|
|
309
|
+
if (!hasOverlap) {
|
|
310
|
+
result.push(entity);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
return result;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
//# sourceMappingURL=entityExtractor.js.map
|
|
@@ -394,13 +394,9 @@ export class ImprovedNeuralAPI {
|
|
|
394
394
|
let offset = 0;
|
|
395
395
|
while (hasMoreVerbs && processedCount < maxRelationships) {
|
|
396
396
|
// Get batch of verbs using proper pagination API
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
limit: batchSize
|
|
401
|
-
}
|
|
402
|
-
});
|
|
403
|
-
const verbBatch = verbResult.data;
|
|
397
|
+
// Get all items and process in chunks (simplified approach)
|
|
398
|
+
const allItems = await this.brain.find({ query: '', limit: Math.min(1000, maxRelationships) });
|
|
399
|
+
const verbBatch = allItems.slice(offset, offset + batchSize);
|
|
404
400
|
if (verbBatch.length === 0) {
|
|
405
401
|
hasMoreVerbs = false;
|
|
406
402
|
break;
|
|
@@ -523,10 +519,10 @@ export class ImprovedNeuralAPI {
|
|
|
523
519
|
const limit = options.limit || 10;
|
|
524
520
|
const minSimilarity = options.minSimilarity || 0.1;
|
|
525
521
|
// Use HNSW index for efficient neighbor search
|
|
526
|
-
const searchResults = await this.brain.
|
|
527
|
-
|
|
522
|
+
const searchResults = await this.brain.find({
|
|
523
|
+
query: '',
|
|
528
524
|
limit: limit * 2, // Get more than needed for filtering
|
|
529
|
-
|
|
525
|
+
where: options.includeMetadata ? {} : undefined
|
|
530
526
|
});
|
|
531
527
|
// Filter and sort neighbors
|
|
532
528
|
const neighbors = [];
|
|
@@ -574,7 +570,7 @@ export class ImprovedNeuralAPI {
|
|
|
574
570
|
return this.hierarchyCache.get(cacheKey);
|
|
575
571
|
}
|
|
576
572
|
// Get item data
|
|
577
|
-
const item = await this.brain.
|
|
573
|
+
const item = await this.brain.get(id);
|
|
578
574
|
if (!item) {
|
|
579
575
|
throw new Error(`Item with ID ${id} not found`);
|
|
580
576
|
}
|
|
@@ -1134,8 +1130,8 @@ export class ImprovedNeuralAPI {
|
|
|
1134
1130
|
// Similarity implementation methods
|
|
1135
1131
|
async _similarityById(id1, id2, options) {
|
|
1136
1132
|
// Get vectors for both items
|
|
1137
|
-
const item1 = await this.brain.
|
|
1138
|
-
const item2 = await this.brain.
|
|
1133
|
+
const item1 = await this.brain.get(id1);
|
|
1134
|
+
const item2 = await this.brain.get(id2);
|
|
1139
1135
|
if (!item1 || !item2) {
|
|
1140
1136
|
return 0;
|
|
1141
1137
|
}
|
|
@@ -1189,7 +1185,7 @@ export class ImprovedNeuralAPI {
|
|
|
1189
1185
|
return input;
|
|
1190
1186
|
}
|
|
1191
1187
|
else if (this._isId(input)) {
|
|
1192
|
-
const item = await this.brain.
|
|
1188
|
+
const item = await this.brain.get(input);
|
|
1193
1189
|
return item?.vector || [];
|
|
1194
1190
|
}
|
|
1195
1191
|
else if (typeof input === 'string') {
|
|
@@ -1283,7 +1279,7 @@ export class ImprovedNeuralAPI {
|
|
|
1283
1279
|
};
|
|
1284
1280
|
// Get all verbs connecting the items
|
|
1285
1281
|
for (const sourceId of itemIds) {
|
|
1286
|
-
const sourceVerbs = await this.brain.
|
|
1282
|
+
const sourceVerbs = await this.brain.getRelations(sourceId);
|
|
1287
1283
|
for (const verb of sourceVerbs) {
|
|
1288
1284
|
const targetId = verb.target;
|
|
1289
1285
|
if (nodes.has(targetId) && sourceId !== targetId) {
|
|
@@ -1416,17 +1412,20 @@ export class ImprovedNeuralAPI {
|
|
|
1416
1412
|
*/
|
|
1417
1413
|
async _getItemsWithMetadata(itemIds) {
|
|
1418
1414
|
const items = await Promise.all(itemIds.map(async (id) => {
|
|
1419
|
-
const noun = await this.brain.
|
|
1415
|
+
const noun = await this.brain.get(id);
|
|
1416
|
+
if (!noun) {
|
|
1417
|
+
return null;
|
|
1418
|
+
}
|
|
1420
1419
|
return {
|
|
1421
1420
|
id,
|
|
1422
|
-
vector: noun
|
|
1423
|
-
metadata: noun
|
|
1424
|
-
nounType: noun?.noun || '
|
|
1425
|
-
label: noun?.label || id,
|
|
1426
|
-
data: noun
|
|
1421
|
+
vector: noun.vector || [],
|
|
1422
|
+
metadata: noun.metadata || {},
|
|
1423
|
+
nounType: noun.metadata?.noun || noun.metadata?.nounType || 'content',
|
|
1424
|
+
label: noun.metadata?.label || noun.metadata?.data || id,
|
|
1425
|
+
data: noun.metadata
|
|
1427
1426
|
};
|
|
1428
1427
|
}));
|
|
1429
|
-
return items.filter(item => item
|
|
1428
|
+
return items.filter((item) => item !== null);
|
|
1430
1429
|
}
|
|
1431
1430
|
/**
|
|
1432
1431
|
* Group items by their semantic noun types
|
|
@@ -1445,18 +1444,25 @@ export class ImprovedNeuralAPI {
|
|
|
1445
1444
|
// Placeholder implementations for complex operations
|
|
1446
1445
|
async _getAllItemIds() {
|
|
1447
1446
|
// Get all noun IDs from the brain
|
|
1448
|
-
|
|
1449
|
-
|
|
1447
|
+
// Get total item count using find with empty query
|
|
1448
|
+
const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
|
|
1449
|
+
const stats = { totalNouns: allItems.length || 0 };
|
|
1450
|
+
if (!stats.totalNouns || stats.totalNouns === 0) {
|
|
1450
1451
|
return [];
|
|
1451
1452
|
}
|
|
1452
|
-
//
|
|
1453
|
-
|
|
1454
|
-
const
|
|
1455
|
-
|
|
1453
|
+
// Get nouns with pagination (limit to 10000 for performance)
|
|
1454
|
+
const limit = Math.min(stats.totalNouns, 10000);
|
|
1455
|
+
const result = await this.brain.find({
|
|
1456
|
+
query: '',
|
|
1457
|
+
limit
|
|
1458
|
+
});
|
|
1459
|
+
return result.map((item) => item.id).filter((id) => id);
|
|
1456
1460
|
}
|
|
1457
1461
|
async _getTotalItemCount() {
|
|
1458
|
-
|
|
1459
|
-
|
|
1462
|
+
// Get total item count using find with empty query
|
|
1463
|
+
const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
|
|
1464
|
+
const stats = { totalNouns: allItems.length || 0 };
|
|
1465
|
+
return stats.totalNouns || 0;
|
|
1460
1466
|
}
|
|
1461
1467
|
// ===== GRAPH ALGORITHM SUPPORTING METHODS =====
|
|
1462
1468
|
_calculateTotalWeight(edges) {
|
|
@@ -1582,13 +1588,13 @@ export class ImprovedNeuralAPI {
|
|
|
1582
1588
|
*/
|
|
1583
1589
|
async _getItemsWithVectors(itemIds) {
|
|
1584
1590
|
const items = await Promise.all(itemIds.map(async (id) => {
|
|
1585
|
-
const noun = await this.brain.
|
|
1591
|
+
const noun = await this.brain.get(id);
|
|
1586
1592
|
return {
|
|
1587
1593
|
id,
|
|
1588
1594
|
vector: noun?.vector || []
|
|
1589
1595
|
};
|
|
1590
1596
|
}));
|
|
1591
|
-
return items.filter(item => item.vector.length > 0);
|
|
1597
|
+
return items.filter((item) => item !== null && item.vector.length > 0);
|
|
1592
1598
|
}
|
|
1593
1599
|
/**
|
|
1594
1600
|
* Calculate centroid from items using existing distance functions
|
|
@@ -1844,47 +1850,8 @@ export class ImprovedNeuralAPI {
|
|
|
1844
1850
|
async _generateIntelligentClusterLabel(members, algorithm) {
|
|
1845
1851
|
if (members.length === 0)
|
|
1846
1852
|
return `${algorithm}-cluster`;
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
const TripleIntelligenceEngine = await import('../triple/TripleIntelligence.js')
|
|
1850
|
-
.then(m => m.TripleIntelligenceEngine)
|
|
1851
|
-
.catch(() => null);
|
|
1852
|
-
if (!TripleIntelligenceEngine) {
|
|
1853
|
-
return this._generateClusterLabel(members, algorithm);
|
|
1854
|
-
}
|
|
1855
|
-
const intelligence = new TripleIntelligenceEngine(this.brain);
|
|
1856
|
-
// Extract key features from cluster members
|
|
1857
|
-
const memberData = members.map(m => ({
|
|
1858
|
-
id: m.id,
|
|
1859
|
-
type: m.nounType,
|
|
1860
|
-
label: m.label,
|
|
1861
|
-
data: m.data
|
|
1862
|
-
}));
|
|
1863
|
-
// Use Triple Intelligence to analyze the cluster and generate label
|
|
1864
|
-
const prompt = `Analyze this cluster of ${memberData.length} related items and provide a concise, descriptive label (2-4 words):
|
|
1865
|
-
|
|
1866
|
-
Items:
|
|
1867
|
-
${memberData.map(item => `- ${item.label || item.id} (${item.type})`).join('\n')}
|
|
1868
|
-
|
|
1869
|
-
The items were grouped using ${algorithm} clustering. What is the most appropriate label that captures their common theme or relationship?`;
|
|
1870
|
-
const response = await intelligence.find({
|
|
1871
|
-
like: prompt,
|
|
1872
|
-
limit: 1
|
|
1873
|
-
});
|
|
1874
|
-
// Extract clean label from response
|
|
1875
|
-
const firstResult = response[0];
|
|
1876
|
-
const label = (firstResult?.metadata?.content || firstResult?.id || `${algorithm}-cluster`)
|
|
1877
|
-
.toString()
|
|
1878
|
-
.replace(/^(Label:|Cluster:|Theme:)/i, '')
|
|
1879
|
-
.trim()
|
|
1880
|
-
.replace(/['"]/g, '')
|
|
1881
|
-
.slice(0, 50);
|
|
1882
|
-
return label || `${algorithm}-cluster`;
|
|
1883
|
-
}
|
|
1884
|
-
catch (error) {
|
|
1885
|
-
// Fallback to simple labeling
|
|
1886
|
-
return this._generateClusterLabel(members, algorithm);
|
|
1887
|
-
}
|
|
1853
|
+
// Use simple labeling - Triple Intelligence doesn't generate labels from prompts
|
|
1854
|
+
return this._generateClusterLabel(members, algorithm);
|
|
1888
1855
|
}
|
|
1889
1856
|
/**
|
|
1890
1857
|
* Generate simple cluster labels based on semantic analysis
|
|
@@ -2148,7 +2115,7 @@ The items were grouped using ${algorithm} clustering. What is the most appropria
|
|
|
2148
2115
|
*/
|
|
2149
2116
|
async _getRecentSample(itemIds, sampleSize) {
|
|
2150
2117
|
const items = await Promise.all(itemIds.map(async (id) => {
|
|
2151
|
-
const noun = await this.brain.
|
|
2118
|
+
const noun = await this.brain.get(id);
|
|
2152
2119
|
return {
|
|
2153
2120
|
id,
|
|
2154
2121
|
createdAt: noun?.createdAt || new Date(0)
|
|
@@ -2163,8 +2130,8 @@ The items were grouped using ${algorithm} clustering. What is the most appropria
|
|
|
2163
2130
|
*/
|
|
2164
2131
|
async _getImportantSample(itemIds, sampleSize) {
|
|
2165
2132
|
const items = await Promise.all(itemIds.map(async (id) => {
|
|
2166
|
-
const verbs = await this.brain.
|
|
2167
|
-
const noun = await this.brain.
|
|
2133
|
+
const verbs = await this.brain.getRelations(id);
|
|
2134
|
+
const noun = await this.brain.get(id);
|
|
2168
2135
|
// Calculate importance score
|
|
2169
2136
|
const connectionScore = verbs.length;
|
|
2170
2137
|
const dataScore = noun?.data ? Object.keys(noun.data).length : 0;
|
|
@@ -2246,7 +2213,15 @@ The items were grouped using ${algorithm} clustering. What is the most appropria
|
|
|
2246
2213
|
}
|
|
2247
2214
|
_calculateDomainConfidence(cluster, domainItems) {
|
|
2248
2215
|
// Calculate how well this cluster represents the domain
|
|
2249
|
-
|
|
2216
|
+
// Based on cluster density and coherence
|
|
2217
|
+
const density = cluster.members.length / (cluster.members.length + 10); // Normalize
|
|
2218
|
+
const coherence = cluster.cohesion || 0.5; // Use cluster's cohesion if available
|
|
2219
|
+
// Domain relevance: what fraction of cluster members are from this domain
|
|
2220
|
+
const domainMemberCount = cluster.members.filter(id => domainItems.some(item => item.id === id)).length;
|
|
2221
|
+
const domainRelevance = cluster.members.length > 0
|
|
2222
|
+
? domainMemberCount / cluster.members.length
|
|
2223
|
+
: 0;
|
|
2224
|
+
return (density * 0.3 + coherence * 0.3 + domainRelevance * 0.4); // Weighted average
|
|
2250
2225
|
}
|
|
2251
2226
|
async _findCrossDomainMembers(cluster, threshold) {
|
|
2252
2227
|
// Find members that might belong to multiple domains
|
|
@@ -2282,11 +2257,47 @@ The items were grouped using ${algorithm} clustering. What is the most appropria
|
|
|
2282
2257
|
}
|
|
2283
2258
|
async _calculateItemToClusterSimilarity(itemId, cluster) {
|
|
2284
2259
|
// Calculate similarity between an item and a cluster centroid
|
|
2285
|
-
|
|
2260
|
+
const item = await this.brain.get(itemId);
|
|
2261
|
+
if (!item || !item.vector || !cluster.centroid) {
|
|
2262
|
+
return 0; // No similarity if vectors missing
|
|
2263
|
+
}
|
|
2264
|
+
// Calculate cosine similarity
|
|
2265
|
+
const dotProduct = item.vector.reduce((sum, val, i) => sum + val * cluster.centroid[i], 0);
|
|
2266
|
+
const itemMagnitude = Math.sqrt(item.vector.reduce((sum, val) => sum + val * val, 0));
|
|
2267
|
+
const centroidMagnitude = Math.sqrt(cluster.centroid.reduce((sum, val) => sum + val * val, 0));
|
|
2268
|
+
if (itemMagnitude === 0 || centroidMagnitude === 0) {
|
|
2269
|
+
return 0;
|
|
2270
|
+
}
|
|
2271
|
+
return dotProduct / (itemMagnitude * centroidMagnitude);
|
|
2286
2272
|
}
|
|
2287
2273
|
async _recalculateClusterCentroid(cluster) {
|
|
2288
2274
|
// Recalculate centroid after adding new members
|
|
2289
|
-
|
|
2275
|
+
if (cluster.members.length === 0) {
|
|
2276
|
+
return cluster.centroid; // Keep existing if no members
|
|
2277
|
+
}
|
|
2278
|
+
// Get all member vectors
|
|
2279
|
+
const memberVectors = [];
|
|
2280
|
+
for (const memberId of cluster.members) {
|
|
2281
|
+
const member = await this.brain.get(memberId);
|
|
2282
|
+
if (member && member.vector) {
|
|
2283
|
+
memberVectors.push(member.vector);
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
if (memberVectors.length === 0) {
|
|
2287
|
+
return cluster.centroid; // Keep existing if no valid vectors
|
|
2288
|
+
}
|
|
2289
|
+
// Calculate mean vector (centroid)
|
|
2290
|
+
const dimensions = memberVectors[0].length;
|
|
2291
|
+
const newCentroid = new Array(dimensions).fill(0);
|
|
2292
|
+
for (const vector of memberVectors) {
|
|
2293
|
+
for (let i = 0; i < dimensions; i++) {
|
|
2294
|
+
newCentroid[i] += vector[i];
|
|
2295
|
+
}
|
|
2296
|
+
}
|
|
2297
|
+
for (let i = 0; i < dimensions; i++) {
|
|
2298
|
+
newCentroid[i] /= memberVectors.length;
|
|
2299
|
+
}
|
|
2300
|
+
return newCentroid;
|
|
2290
2301
|
}
|
|
2291
2302
|
async _calculateSimilarity(id1, id2) {
|
|
2292
2303
|
return await this.similar(id1, id2);
|