@soulcraft/brainy 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -3
- package/README.md +427 -111
- package/bin/brainy.js +340 -62
- package/dist/api/ConfigAPI.d.ts +67 -0
- package/dist/api/ConfigAPI.js +166 -0
- package/dist/api/DataAPI.d.ts +123 -0
- package/dist/api/DataAPI.js +391 -0
- package/dist/api/SecurityAPI.d.ts +50 -0
- package/dist/api/SecurityAPI.js +139 -0
- package/dist/api/UniversalImportAPI.d.ts +134 -0
- package/dist/api/UniversalImportAPI.js +615 -0
- package/dist/augmentationManager.js +12 -7
- package/dist/augmentationPipeline.d.ts +0 -61
- package/dist/augmentationPipeline.js +0 -87
- package/dist/augmentationRegistry.d.ts +1 -1
- package/dist/augmentationRegistry.js +1 -1
- package/dist/augmentations/apiServerAugmentation.d.ts +27 -1
- package/dist/augmentations/apiServerAugmentation.js +290 -9
- package/dist/augmentations/auditLogAugmentation.d.ts +109 -0
- package/dist/augmentations/auditLogAugmentation.js +358 -0
- package/dist/augmentations/batchProcessingAugmentation.d.ts +3 -2
- package/dist/augmentations/batchProcessingAugmentation.js +123 -22
- package/dist/augmentations/brainyAugmentation.d.ts +142 -8
- package/dist/augmentations/brainyAugmentation.js +179 -2
- package/dist/augmentations/cacheAugmentation.d.ts +8 -5
- package/dist/augmentations/cacheAugmentation.js +116 -17
- package/dist/augmentations/conduitAugmentations.d.ts +2 -2
- package/dist/augmentations/conduitAugmentations.js +2 -2
- package/dist/augmentations/configResolver.d.ts +122 -0
- package/dist/augmentations/configResolver.js +440 -0
- package/dist/augmentations/connectionPoolAugmentation.d.ts +3 -1
- package/dist/augmentations/connectionPoolAugmentation.js +37 -12
- package/dist/augmentations/defaultAugmentations.d.ts +14 -10
- package/dist/augmentations/defaultAugmentations.js +16 -11
- package/dist/augmentations/discovery/catalogDiscovery.d.ts +142 -0
- package/dist/augmentations/discovery/catalogDiscovery.js +249 -0
- package/dist/augmentations/discovery/localDiscovery.d.ts +84 -0
- package/dist/augmentations/discovery/localDiscovery.js +246 -0
- package/dist/augmentations/discovery/runtimeLoader.d.ts +97 -0
- package/dist/augmentations/discovery/runtimeLoader.js +337 -0
- package/dist/augmentations/discovery.d.ts +152 -0
- package/dist/augmentations/discovery.js +441 -0
- package/dist/augmentations/display/cache.d.ts +130 -0
- package/dist/augmentations/display/cache.js +319 -0
- package/dist/augmentations/display/fieldPatterns.d.ts +52 -0
- package/dist/augmentations/display/fieldPatterns.js +393 -0
- package/dist/augmentations/display/iconMappings.d.ts +57 -0
- package/dist/augmentations/display/iconMappings.js +68 -0
- package/dist/augmentations/display/intelligentComputation.d.ts +109 -0
- package/dist/augmentations/display/intelligentComputation.js +462 -0
- package/dist/augmentations/display/types.d.ts +203 -0
- package/dist/augmentations/display/types.js +7 -0
- package/dist/augmentations/entityRegistryAugmentation.d.ts +3 -1
- package/dist/augmentations/entityRegistryAugmentation.js +5 -1
- package/dist/augmentations/indexAugmentation.d.ts +5 -3
- package/dist/augmentations/indexAugmentation.js +5 -2
- package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +24 -7
- package/dist/augmentations/intelligentVerbScoringAugmentation.js +111 -27
- package/dist/augmentations/manifest.d.ts +176 -0
- package/dist/augmentations/manifest.js +8 -0
- package/dist/augmentations/marketplace/AugmentationMarketplace.d.ts +168 -0
- package/dist/augmentations/marketplace/AugmentationMarketplace.js +329 -0
- package/dist/augmentations/marketplace/cli.d.ts +47 -0
- package/dist/augmentations/marketplace/cli.js +265 -0
- package/dist/augmentations/metricsAugmentation.d.ts +3 -3
- package/dist/augmentations/metricsAugmentation.js +2 -2
- package/dist/augmentations/monitoringAugmentation.d.ts +3 -3
- package/dist/augmentations/monitoringAugmentation.js +2 -2
- package/dist/augmentations/neuralImport.d.ts +1 -1
- package/dist/augmentations/neuralImport.js +4 -4
- package/dist/augmentations/rateLimitAugmentation.d.ts +82 -0
- package/dist/augmentations/rateLimitAugmentation.js +321 -0
- package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +2 -2
- package/dist/augmentations/requestDeduplicatorAugmentation.js +1 -1
- package/dist/augmentations/storageAugmentation.d.ts +1 -1
- package/dist/augmentations/storageAugmentation.js +2 -2
- package/dist/augmentations/storageAugmentations.d.ts +37 -8
- package/dist/augmentations/storageAugmentations.js +204 -15
- package/dist/augmentations/synapseAugmentation.d.ts +1 -1
- package/dist/augmentations/synapseAugmentation.js +35 -16
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +83 -0
- package/dist/augmentations/typeMatching/brainyTypes.js +425 -0
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +39 -59
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +103 -389
- package/dist/augmentations/universalDisplayAugmentation.d.ts +191 -0
- package/dist/augmentations/universalDisplayAugmentation.js +371 -0
- package/dist/brainy-unified.d.ts +106 -0
- package/dist/brainy-unified.js +327 -0
- package/dist/brainy.d.ts +277 -0
- package/dist/brainy.js +1241 -0
- package/dist/brainyData.d.ts +56 -111
- package/dist/brainyData.js +912 -756
- package/dist/brainyDataV3.d.ts +186 -0
- package/dist/brainyDataV3.js +337 -0
- package/dist/config/distributedPresets-new.d.ts +118 -0
- package/dist/config/distributedPresets-new.js +318 -0
- package/dist/config/distributedPresets.d.ts +118 -0
- package/dist/config/distributedPresets.js +318 -0
- package/dist/config/extensibleConfig.d.ts +99 -0
- package/dist/config/extensibleConfig.js +268 -0
- package/dist/config/index.d.ts +17 -0
- package/dist/config/index.js +35 -0
- package/dist/config/modelAutoConfig.d.ts +32 -0
- package/dist/config/modelAutoConfig.js +139 -0
- package/dist/config/modelPrecisionManager.d.ts +42 -0
- package/dist/config/modelPrecisionManager.js +98 -0
- package/dist/config/sharedConfigManager.d.ts +67 -0
- package/dist/config/sharedConfigManager.js +215 -0
- package/dist/config/storageAutoConfig.d.ts +41 -0
- package/dist/config/storageAutoConfig.js +328 -0
- package/dist/config/zeroConfig.d.ts +68 -0
- package/dist/config/zeroConfig.js +301 -0
- package/dist/cortex/backupRestore.d.ts +2 -2
- package/dist/cortex/backupRestore.js +85 -27
- package/dist/cortex/healthCheck.d.ts +2 -2
- package/dist/cortex/neuralImport.d.ts +2 -2
- package/dist/cortex/neuralImport.js +18 -13
- package/dist/cortex/performanceMonitor.d.ts +2 -2
- package/dist/critical/model-guardian.d.ts +4 -0
- package/dist/critical/model-guardian.js +31 -11
- package/dist/demo.d.ts +4 -4
- package/dist/demo.js +7 -7
- package/dist/distributed/cacheSync.d.ts +112 -0
- package/dist/distributed/cacheSync.js +265 -0
- package/dist/distributed/coordinator.d.ts +193 -0
- package/dist/distributed/coordinator.js +548 -0
- package/dist/distributed/httpTransport.d.ts +120 -0
- package/dist/distributed/httpTransport.js +446 -0
- package/dist/distributed/index.d.ts +8 -0
- package/dist/distributed/index.js +5 -0
- package/dist/distributed/networkTransport.d.ts +132 -0
- package/dist/distributed/networkTransport.js +633 -0
- package/dist/distributed/queryPlanner.d.ts +104 -0
- package/dist/distributed/queryPlanner.js +327 -0
- package/dist/distributed/readWriteSeparation.d.ts +134 -0
- package/dist/distributed/readWriteSeparation.js +350 -0
- package/dist/distributed/shardManager.d.ts +114 -0
- package/dist/distributed/shardManager.js +357 -0
- package/dist/distributed/shardMigration.d.ts +110 -0
- package/dist/distributed/shardMigration.js +289 -0
- package/dist/distributed/storageDiscovery.d.ts +160 -0
- package/dist/distributed/storageDiscovery.js +551 -0
- package/dist/embeddings/CachedEmbeddings.d.ts +40 -0
- package/dist/embeddings/CachedEmbeddings.js +146 -0
- package/dist/embeddings/EmbeddingManager.d.ts +102 -0
- package/dist/embeddings/EmbeddingManager.js +291 -0
- package/dist/embeddings/SingletonModelManager.d.ts +95 -0
- package/dist/embeddings/SingletonModelManager.js +220 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/lightweight-embedder.d.ts +0 -1
- package/dist/embeddings/lightweight-embedder.js +4 -12
- package/dist/embeddings/model-manager.d.ts +11 -0
- package/dist/embeddings/model-manager.js +43 -7
- package/dist/embeddings/universal-memory-manager.d.ts +1 -1
- package/dist/embeddings/universal-memory-manager.js +27 -67
- package/dist/embeddings/worker-embedding.js +4 -8
- package/dist/errors/brainyError.d.ts +5 -1
- package/dist/errors/brainyError.js +12 -0
- package/dist/examples/basicUsage.js +7 -4
- package/dist/graph/graphAdjacencyIndex.d.ts +96 -0
- package/dist/graph/graphAdjacencyIndex.js +288 -0
- package/dist/graph/pathfinding.js +4 -2
- package/dist/hnsw/scaledHNSWSystem.js +11 -2
- package/dist/importManager.js +8 -5
- package/dist/index.d.ts +17 -22
- package/dist/index.js +37 -23
- package/dist/mcp/brainyMCPAdapter.d.ts +4 -4
- package/dist/mcp/brainyMCPAdapter.js +5 -5
- package/dist/mcp/brainyMCPService.d.ts +3 -3
- package/dist/mcp/brainyMCPService.js +3 -11
- package/dist/mcp/mcpAugmentationToolset.js +20 -30
- package/dist/neural/embeddedPatterns.d.ts +1 -1
- package/dist/neural/embeddedPatterns.js +2 -2
- package/dist/neural/entityExtractor.d.ts +65 -0
- package/dist/neural/entityExtractor.js +316 -0
- package/dist/neural/improvedNeuralAPI.d.ts +357 -0
- package/dist/neural/improvedNeuralAPI.js +2628 -0
- package/dist/neural/naturalLanguageProcessor.d.ts +155 -10
- package/dist/neural/naturalLanguageProcessor.js +941 -66
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +2 -2
- package/dist/neural/naturalLanguageProcessorStatic.js +3 -3
- package/dist/neural/neuralAPI.js +8 -2
- package/dist/neural/patternLibrary.d.ts +57 -3
- package/dist/neural/patternLibrary.js +348 -13
- package/dist/neural/staticPatternMatcher.d.ts +2 -2
- package/dist/neural/staticPatternMatcher.js +2 -2
- package/dist/neural/types.d.ts +287 -0
- package/dist/neural/types.js +24 -0
- package/dist/shared/default-augmentations.d.ts +3 -3
- package/dist/shared/default-augmentations.js +5 -5
- package/dist/storage/adapters/baseStorageAdapter.d.ts +42 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +26 -2
- package/dist/storage/adapters/fileSystemStorage.js +218 -15
- package/dist/storage/adapters/memoryStorage.d.ts +4 -4
- package/dist/storage/adapters/memoryStorage.js +17 -12
- package/dist/storage/adapters/opfsStorage.d.ts +2 -2
- package/dist/storage/adapters/opfsStorage.js +2 -2
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -2
- package/dist/storage/adapters/s3CompatibleStorage.js +2 -2
- package/dist/storage/backwardCompatibility.d.ts +10 -78
- package/dist/storage/backwardCompatibility.js +17 -132
- package/dist/storage/baseStorage.d.ts +18 -2
- package/dist/storage/baseStorage.js +74 -3
- package/dist/storage/cacheManager.js +2 -2
- package/dist/storage/readOnlyOptimizations.js +8 -3
- package/dist/streaming/pipeline.d.ts +154 -0
- package/dist/streaming/pipeline.js +551 -0
- package/dist/triple/TripleIntelligence.d.ts +25 -110
- package/dist/triple/TripleIntelligence.js +4 -574
- package/dist/triple/TripleIntelligenceSystem.d.ts +159 -0
- package/dist/triple/TripleIntelligenceSystem.js +519 -0
- package/dist/types/apiTypes.d.ts +278 -0
- package/dist/types/apiTypes.js +33 -0
- package/dist/types/brainy.types.d.ts +308 -0
- package/dist/types/brainy.types.js +8 -0
- package/dist/types/brainyDataInterface.d.ts +5 -8
- package/dist/types/brainyDataInterface.js +2 -2
- package/dist/types/graphTypes.js +2 -2
- package/dist/universal/crypto.d.ts +11 -1
- package/dist/universal/crypto.js +24 -93
- package/dist/universal/events.d.ts +3 -2
- package/dist/universal/events.js +6 -75
- package/dist/universal/fs.d.ts +2 -3
- package/dist/universal/fs.js +5 -211
- package/dist/universal/path.d.ts +3 -2
- package/dist/universal/path.js +22 -78
- package/dist/universal/uuid.d.ts +1 -1
- package/dist/universal/uuid.js +1 -1
- package/dist/utils/brainyTypes.d.ts +217 -0
- package/dist/utils/brainyTypes.js +261 -0
- package/dist/utils/cacheAutoConfig.d.ts +3 -3
- package/dist/utils/embedding.d.ts +9 -4
- package/dist/utils/embedding.js +89 -26
- package/dist/utils/enhancedLogger.d.ts +104 -0
- package/dist/utils/enhancedLogger.js +232 -0
- package/dist/utils/hybridModelManager.d.ts +19 -28
- package/dist/utils/hybridModelManager.js +36 -200
- package/dist/utils/index.d.ts +1 -1
- package/dist/utils/index.js +1 -1
- package/dist/utils/intelligentTypeMapper.d.ts +60 -0
- package/dist/utils/intelligentTypeMapper.js +349 -0
- package/dist/utils/metadataIndex.d.ts +118 -1
- package/dist/utils/metadataIndex.js +539 -16
- package/dist/utils/nodeVersionCheck.d.ts +24 -0
- package/dist/utils/nodeVersionCheck.js +65 -0
- package/dist/utils/paramValidation.d.ts +39 -0
- package/dist/utils/paramValidation.js +192 -0
- package/dist/utils/rateLimiter.d.ts +160 -0
- package/dist/utils/rateLimiter.js +271 -0
- package/dist/utils/statistics.d.ts +4 -4
- package/dist/utils/statistics.js +3 -3
- package/dist/utils/structuredLogger.d.ts +146 -0
- package/dist/utils/structuredLogger.js +394 -0
- package/dist/utils/textEncoding.js +2 -1
- package/dist/utils/typeValidation.d.ts +59 -0
- package/dist/utils/typeValidation.js +374 -0
- package/dist/utils/version.js +19 -3
- package/package.json +15 -17
- package/scripts/download-models.cjs +94 -20
- package/dist/augmentations/walAugmentation.d.ts +0 -109
- package/dist/augmentations/walAugmentation.js +0 -516
- package/dist/browserFramework.d.ts +0 -15
- package/dist/browserFramework.js +0 -31
- package/dist/browserFramework.minimal.d.ts +0 -14
- package/dist/browserFramework.minimal.js +0 -31
- package/dist/chat/BrainyChat.d.ts +0 -121
- package/dist/chat/BrainyChat.js +0 -396
- package/dist/chat/ChatCLI.d.ts +0 -61
- package/dist/chat/ChatCLI.js +0 -351
|
@@ -9,29 +9,369 @@
|
|
|
9
9
|
* - Progressive learning from usage
|
|
10
10
|
*/
|
|
11
11
|
import { PatternLibrary } from './patternLibrary.js';
|
|
12
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
12
13
|
export class NaturalLanguageProcessor {
|
|
13
14
|
constructor(brain) {
|
|
14
15
|
this.initialized = false;
|
|
16
|
+
this.embeddingCache = new Map();
|
|
17
|
+
// Field discovery with semantic matching
|
|
18
|
+
this.fieldEmbeddings = new Map();
|
|
19
|
+
this.fieldNames = [];
|
|
20
|
+
this.lastFieldRefresh = 0;
|
|
21
|
+
this.FIELD_REFRESH_INTERVAL = 60000; // Refresh every minute
|
|
22
|
+
// Type embeddings for NounType and VerbType matching
|
|
23
|
+
this.nounTypeEmbeddings = new Map();
|
|
24
|
+
this.verbTypeEmbeddings = new Map();
|
|
25
|
+
this.typeEmbeddingsInitialized = false;
|
|
15
26
|
this.brain = brain;
|
|
16
27
|
this.patternLibrary = new PatternLibrary(brain);
|
|
17
28
|
this.queryHistory = [];
|
|
18
29
|
}
|
|
30
|
+
/**
|
|
31
|
+
* Get embedding directly using brain's embed method
|
|
32
|
+
*/
|
|
33
|
+
async getEmbedding(text) {
|
|
34
|
+
// Check cache first
|
|
35
|
+
if (this.embeddingCache.has(text)) {
|
|
36
|
+
return this.embeddingCache.get(text);
|
|
37
|
+
}
|
|
38
|
+
// Use brain's embed method directly to avoid recursion
|
|
39
|
+
const embedding = await this.brain.embed(text);
|
|
40
|
+
// Cache the embedding
|
|
41
|
+
this.embeddingCache.set(text, embedding);
|
|
42
|
+
return embedding;
|
|
43
|
+
}
|
|
19
44
|
/**
|
|
20
45
|
* Initialize the pattern library (lazy loading)
|
|
21
46
|
*/
|
|
22
47
|
async ensureInitialized() {
|
|
23
48
|
if (!this.initialized) {
|
|
24
49
|
await this.patternLibrary.init();
|
|
50
|
+
await this.initializeTypeEmbeddings(); // Embed all noun/verb types
|
|
51
|
+
await this.refreshFieldEmbeddings(); // Load field embeddings
|
|
25
52
|
this.initialized = true;
|
|
26
53
|
}
|
|
27
54
|
}
|
|
55
|
+
/**
|
|
56
|
+
* Initialize embeddings for all NounTypes and VerbTypes
|
|
57
|
+
* These are fixed types that never change - perfect for caching
|
|
58
|
+
*/
|
|
59
|
+
async initializeTypeEmbeddings() {
|
|
60
|
+
if (this.typeEmbeddingsInitialized)
|
|
61
|
+
return;
|
|
62
|
+
// Embed all NounTypes (30+ types)
|
|
63
|
+
for (const [key, value] of Object.entries(NounType)) {
|
|
64
|
+
if (typeof value === 'string') {
|
|
65
|
+
// Embed both the key (Person) and value (person)
|
|
66
|
+
const keyEmbedding = await this.getEmbedding(key);
|
|
67
|
+
const valueEmbedding = await this.getEmbedding(value);
|
|
68
|
+
this.nounTypeEmbeddings.set(key, keyEmbedding);
|
|
69
|
+
this.nounTypeEmbeddings.set(value, valueEmbedding);
|
|
70
|
+
// Also embed common variations
|
|
71
|
+
const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
|
|
72
|
+
if (spaceSeparated !== value) {
|
|
73
|
+
const variantEmbedding = await this.getEmbedding(spaceSeparated);
|
|
74
|
+
this.nounTypeEmbeddings.set(spaceSeparated, variantEmbedding);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Embed all VerbTypes (40+ types)
|
|
79
|
+
for (const [key, value] of Object.entries(VerbType)) {
|
|
80
|
+
if (typeof value === 'string') {
|
|
81
|
+
const keyEmbedding = await this.getEmbedding(key);
|
|
82
|
+
const valueEmbedding = await this.getEmbedding(value);
|
|
83
|
+
this.verbTypeEmbeddings.set(key, keyEmbedding);
|
|
84
|
+
this.verbTypeEmbeddings.set(value, valueEmbedding);
|
|
85
|
+
// Common variations for verbs
|
|
86
|
+
const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
|
|
87
|
+
if (spaceSeparated !== value) {
|
|
88
|
+
const variantEmbedding = await this.getEmbedding(spaceSeparated);
|
|
89
|
+
this.verbTypeEmbeddings.set(spaceSeparated, variantEmbedding);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
this.typeEmbeddingsInitialized = true;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Find best matching NounType using semantic similarity
|
|
97
|
+
*/
|
|
98
|
+
async findBestNounType(term) {
|
|
99
|
+
const termEmbedding = await this.getEmbedding(term);
|
|
100
|
+
let bestMatch = null;
|
|
101
|
+
let bestScore = 0;
|
|
102
|
+
for (const [typeName, typeEmbedding] of this.nounTypeEmbeddings) {
|
|
103
|
+
const similarity = this.cosineSimilarity(termEmbedding, typeEmbedding);
|
|
104
|
+
if (similarity > bestScore && similarity > 0.75) { // Higher threshold for types
|
|
105
|
+
bestScore = similarity;
|
|
106
|
+
bestMatch = typeName;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// Map back to the actual NounType value
|
|
110
|
+
if (bestMatch) {
|
|
111
|
+
for (const [key, value] of Object.entries(NounType)) {
|
|
112
|
+
if (key === bestMatch || value === bestMatch ||
|
|
113
|
+
key.toLowerCase() === bestMatch.toLowerCase()) {
|
|
114
|
+
return { type: value, confidence: bestScore };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return { type: null, confidence: 0 };
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Find best matching VerbType using semantic similarity
|
|
122
|
+
*/
|
|
123
|
+
async findBestVerbType(term) {
|
|
124
|
+
const termEmbedding = await this.getEmbedding(term);
|
|
125
|
+
let bestMatch = null;
|
|
126
|
+
let bestScore = 0;
|
|
127
|
+
for (const [typeName, typeEmbedding] of this.verbTypeEmbeddings) {
|
|
128
|
+
const similarity = this.cosineSimilarity(termEmbedding, typeEmbedding);
|
|
129
|
+
if (similarity > bestScore && similarity > 0.75) {
|
|
130
|
+
bestScore = similarity;
|
|
131
|
+
bestMatch = typeName;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
// Map back to the actual VerbType value
|
|
135
|
+
if (bestMatch) {
|
|
136
|
+
for (const [key, value] of Object.entries(VerbType)) {
|
|
137
|
+
if (key === bestMatch || value === bestMatch ||
|
|
138
|
+
key.toLowerCase() === bestMatch.toLowerCase()) {
|
|
139
|
+
return { type: value, confidence: bestScore };
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
return { type: null, confidence: 0 };
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Refresh field embeddings from metadata index
|
|
147
|
+
* Creates embeddings for all indexed fields for semantic matching
|
|
148
|
+
*/
|
|
149
|
+
async refreshFieldEmbeddings() {
|
|
150
|
+
const now = Date.now();
|
|
151
|
+
if (now - this.lastFieldRefresh < this.FIELD_REFRESH_INTERVAL) {
|
|
152
|
+
return; // Skip if recently refreshed
|
|
153
|
+
}
|
|
154
|
+
try {
|
|
155
|
+
// Get actual indexed fields from metadata
|
|
156
|
+
this.fieldNames = await this.brain.getAvailableFields();
|
|
157
|
+
// Create embeddings for each field name for semantic matching
|
|
158
|
+
for (const field of this.fieldNames) {
|
|
159
|
+
if (!this.fieldEmbeddings.has(field)) {
|
|
160
|
+
// Embed the field name itself
|
|
161
|
+
const fieldEmbedding = await this.getEmbedding(field);
|
|
162
|
+
this.fieldEmbeddings.set(field, fieldEmbedding);
|
|
163
|
+
// Also embed common variations
|
|
164
|
+
const variations = this.getFieldVariations(field);
|
|
165
|
+
for (const variant of variations) {
|
|
166
|
+
const variantEmbedding = await this.getEmbedding(variant);
|
|
167
|
+
this.fieldEmbeddings.set(variant, variantEmbedding);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
this.lastFieldRefresh = now;
|
|
172
|
+
}
|
|
173
|
+
catch (error) {
|
|
174
|
+
console.warn('Failed to refresh field embeddings:', error);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Generate linguistic variations of field names - NO HARDCODED TERMS
|
|
179
|
+
* Uses algorithmic patterns to create natural variations
|
|
180
|
+
*/
|
|
181
|
+
getFieldVariations(field) {
|
|
182
|
+
const variations = [];
|
|
183
|
+
// camelCase to space separated: publishDate -> publish date
|
|
184
|
+
const spaceSeparated = field.replace(/([A-Z])/g, ' $1').toLowerCase().trim();
|
|
185
|
+
if (spaceSeparated !== field.toLowerCase()) {
|
|
186
|
+
variations.push(spaceSeparated);
|
|
187
|
+
}
|
|
188
|
+
// snake_case to space separated: created_at -> created at
|
|
189
|
+
const underscoreRemoved = field.replace(/_/g, ' ').toLowerCase();
|
|
190
|
+
if (underscoreRemoved !== field.toLowerCase()) {
|
|
191
|
+
variations.push(underscoreRemoved);
|
|
192
|
+
}
|
|
193
|
+
// kebab-case to space separated: publish-date -> publish date
|
|
194
|
+
const dashRemoved = field.replace(/-/g, ' ').toLowerCase();
|
|
195
|
+
if (dashRemoved !== field.toLowerCase()) {
|
|
196
|
+
variations.push(dashRemoved);
|
|
197
|
+
}
|
|
198
|
+
// Generate suffix variations (remove common suffixes)
|
|
199
|
+
const suffixes = ['At', 'Date', 'Time', 'Id', 'Ref', 'Name', 'Value', 'Count', 'Number'];
|
|
200
|
+
for (const suffix of suffixes) {
|
|
201
|
+
if (field.endsWith(suffix) && field.length > suffix.length) {
|
|
202
|
+
const withoutSuffix = field.slice(0, -suffix.length).toLowerCase();
|
|
203
|
+
variations.push(withoutSuffix);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Generate prefix variations (remove common prefixes)
|
|
207
|
+
const prefixes = ['is', 'has', 'can', 'get', 'set'];
|
|
208
|
+
for (const prefix of prefixes) {
|
|
209
|
+
if (field.toLowerCase().startsWith(prefix) && field.length > prefix.length) {
|
|
210
|
+
const withoutPrefix = field.slice(prefix.length).toLowerCase();
|
|
211
|
+
variations.push(withoutPrefix);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return [...new Set(variations)]; // Remove duplicates
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Find best matching field using semantic similarity
|
|
218
|
+
* Returns field name and confidence score
|
|
219
|
+
*/
|
|
220
|
+
async findBestMatchingField(term) {
|
|
221
|
+
// Ensure fields are loaded
|
|
222
|
+
await this.refreshFieldEmbeddings();
|
|
223
|
+
if (this.fieldNames.length === 0) {
|
|
224
|
+
return { field: null, confidence: 0 };
|
|
225
|
+
}
|
|
226
|
+
// Get embedding for the search term
|
|
227
|
+
const termEmbedding = await this.getEmbedding(term);
|
|
228
|
+
// Find most similar field using cosine similarity
|
|
229
|
+
let bestMatch = null;
|
|
230
|
+
let bestScore = 0;
|
|
231
|
+
for (const [fieldName, fieldEmbedding] of this.fieldEmbeddings) {
|
|
232
|
+
const similarity = this.cosineSimilarity(termEmbedding, fieldEmbedding);
|
|
233
|
+
if (similarity > bestScore && similarity > 0.7) { // 0.7 threshold for semantic match
|
|
234
|
+
bestScore = similarity;
|
|
235
|
+
bestMatch = fieldName;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
// Map back to actual field name if it was a variation
|
|
239
|
+
if (bestMatch && !this.fieldNames.includes(bestMatch)) {
|
|
240
|
+
// Find the original field this variation belongs to
|
|
241
|
+
for (const field of this.fieldNames) {
|
|
242
|
+
const variations = this.getFieldVariations(field);
|
|
243
|
+
if (variations.includes(bestMatch)) {
|
|
244
|
+
bestMatch = field;
|
|
245
|
+
break;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
return { field: bestMatch, confidence: bestScore };
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Find best matching field with type context prioritization
|
|
253
|
+
* Fields with high type affinity get boosted scores
|
|
254
|
+
*/
|
|
255
|
+
async findBestMatchingFieldWithTypeContext(term, typeSpecificFields) {
|
|
256
|
+
// First do normal field matching
|
|
257
|
+
const normalMatch = await this.findBestMatchingField(term);
|
|
258
|
+
if (!normalMatch.field || typeSpecificFields.length === 0) {
|
|
259
|
+
return normalMatch;
|
|
260
|
+
}
|
|
261
|
+
// Check if the matched field has type affinity
|
|
262
|
+
const typeField = typeSpecificFields.find(tf => tf.field === normalMatch.field);
|
|
263
|
+
if (typeField) {
|
|
264
|
+
// Boost confidence based on type affinity
|
|
265
|
+
// High affinity (0.8+) gets 20% boost, medium affinity (0.5+) gets 10% boost
|
|
266
|
+
let boost = 0;
|
|
267
|
+
if (typeField.affinity >= 0.8) {
|
|
268
|
+
boost = 0.2;
|
|
269
|
+
}
|
|
270
|
+
else if (typeField.affinity >= 0.5) {
|
|
271
|
+
boost = 0.1;
|
|
272
|
+
}
|
|
273
|
+
const boostedConfidence = Math.min(1.0, normalMatch.confidence + boost);
|
|
274
|
+
return { field: normalMatch.field, confidence: boostedConfidence };
|
|
275
|
+
}
|
|
276
|
+
return normalMatch;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Validate field-type compatibility
|
|
280
|
+
* Returns validation result with suggestions for invalid combinations
|
|
281
|
+
*/
|
|
282
|
+
async validateFieldForType(field, nounType) {
|
|
283
|
+
// Get fields that actually appear with this type
|
|
284
|
+
const typeFields = await this.brain.getFieldsForType(nounType);
|
|
285
|
+
// Check if this field appears with this type
|
|
286
|
+
const fieldInfo = typeFields.find(tf => tf.field === field);
|
|
287
|
+
if (fieldInfo) {
|
|
288
|
+
return {
|
|
289
|
+
isValid: true,
|
|
290
|
+
affinity: fieldInfo.affinity
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
// Field doesn't appear with this type - provide suggestions
|
|
294
|
+
const suggestions = typeFields
|
|
295
|
+
.filter(tf => tf.affinity > 0.1) // Only suggest common fields
|
|
296
|
+
.slice(0, 3) // Top 3 suggestions
|
|
297
|
+
.map(tf => tf.field);
|
|
298
|
+
return {
|
|
299
|
+
isValid: false,
|
|
300
|
+
affinity: 0,
|
|
301
|
+
suggestions,
|
|
302
|
+
reason: `Field '${field}' rarely appears with ${nounType} entities. Common fields: ${suggestions.join(', ')}`
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Enhanced intelligent parse with validation
|
|
307
|
+
*/
|
|
308
|
+
async validateAndOptimizeQuery(tripleQuery, detectedNounType, fieldMatches) {
|
|
309
|
+
if (!detectedNounType || !tripleQuery.where) {
|
|
310
|
+
return tripleQuery;
|
|
311
|
+
}
|
|
312
|
+
const validationErrors = [];
|
|
313
|
+
const optimizedWhere = {};
|
|
314
|
+
// Validate each field in the where clause
|
|
315
|
+
for (const [field, value] of Object.entries(tripleQuery.where)) {
|
|
316
|
+
if (field === 'noun') {
|
|
317
|
+
optimizedWhere[field] = value; // Always valid
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
const validation = await this.validateFieldForType(field, detectedNounType);
|
|
321
|
+
if (validation.isValid || validation.affinity > 0.05) {
|
|
322
|
+
// Valid or has some affinity - include in query
|
|
323
|
+
optimizedWhere[field] = value;
|
|
324
|
+
}
|
|
325
|
+
else {
|
|
326
|
+
// Invalid field for this type
|
|
327
|
+
validationErrors.push(validation.reason || `Invalid field: ${field}`);
|
|
328
|
+
// Try to find a better field match from suggestions
|
|
329
|
+
if (validation.suggestions && validation.suggestions.length > 0) {
|
|
330
|
+
const bestSuggestion = validation.suggestions[0];
|
|
331
|
+
optimizedWhere[bestSuggestion] = value;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
// Log validation errors for debugging (in production, could throw or return errors)
|
|
336
|
+
if (validationErrors.length > 0) {
|
|
337
|
+
console.warn('Field validation warnings:', validationErrors);
|
|
338
|
+
}
|
|
339
|
+
return {
|
|
340
|
+
...tripleQuery,
|
|
341
|
+
where: optimizedWhere
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Calculate cosine similarity between two vectors
|
|
346
|
+
*/
|
|
347
|
+
cosineSimilarity(a, b) {
|
|
348
|
+
if (a.length !== b.length)
|
|
349
|
+
return 0;
|
|
350
|
+
let dotProduct = 0;
|
|
351
|
+
let normA = 0;
|
|
352
|
+
let normB = 0;
|
|
353
|
+
for (let i = 0; i < a.length; i++) {
|
|
354
|
+
dotProduct += a[i] * b[i];
|
|
355
|
+
normA += a[i] * a[i];
|
|
356
|
+
normB += b[i] * b[i];
|
|
357
|
+
}
|
|
358
|
+
if (normA === 0 || normB === 0)
|
|
359
|
+
return 0;
|
|
360
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Public initialization method for external callers
|
|
364
|
+
*/
|
|
365
|
+
async init() {
|
|
366
|
+
await this.ensureInitialized();
|
|
367
|
+
}
|
|
28
368
|
/**
|
|
29
369
|
* 🎯 MAIN METHOD: Convert natural language to Triple Intelligence query
|
|
30
370
|
*/
|
|
31
371
|
async processNaturalQuery(naturalQuery) {
|
|
32
372
|
await this.ensureInitialized();
|
|
33
|
-
// Step 1:
|
|
34
|
-
const queryEmbedding = await this.
|
|
373
|
+
// Step 1: Get embedding via add/get/delete pattern
|
|
374
|
+
const queryEmbedding = await this.getEmbedding(naturalQuery);
|
|
35
375
|
// Step 2: Find best matching patterns from our library
|
|
36
376
|
const matches = await this.patternLibrary.findBestPatterns(queryEmbedding, 3);
|
|
37
377
|
// Step 3: Try each pattern until we get a good match
|
|
@@ -54,63 +394,198 @@ export class NaturalLanguageProcessor {
|
|
|
54
394
|
return query;
|
|
55
395
|
}
|
|
56
396
|
}
|
|
57
|
-
// Step 4:
|
|
58
|
-
return this.
|
|
397
|
+
// Step 4: Use intelligent field-aware parsing instead of fallback
|
|
398
|
+
return this.intelligentParse(naturalQuery, queryEmbedding);
|
|
59
399
|
}
|
|
60
400
|
/**
|
|
61
|
-
*
|
|
401
|
+
* Intelligent parse using type-aware field discovery and semantic matching
|
|
402
|
+
* NO FALLBACKS - uses actual indexed fields, entities, and type context
|
|
62
403
|
*/
|
|
63
|
-
async
|
|
64
|
-
// Analyze intent
|
|
404
|
+
async intelligentParse(query, queryEmbedding) {
|
|
405
|
+
// Step 1: Analyze intent and extract structure
|
|
65
406
|
const intent = await this.analyzeIntent(query);
|
|
66
|
-
//
|
|
67
|
-
|
|
68
|
-
//
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
//
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
}
|
|
407
|
+
// Step 2: Extract query terms
|
|
408
|
+
const queryTerms = query.split(/\s+/).filter(term => term.length > 2);
|
|
409
|
+
// Step 3: Detect NounType first for context
|
|
410
|
+
let detectedNounType = null;
|
|
411
|
+
let typeConfidence = 0;
|
|
412
|
+
for (const term of queryTerms) {
|
|
413
|
+
const nounTypeMatch = await this.findBestNounType(term);
|
|
414
|
+
if (nounTypeMatch.type && nounTypeMatch.confidence > typeConfidence) {
|
|
415
|
+
detectedNounType = nounTypeMatch.type;
|
|
416
|
+
typeConfidence = nounTypeMatch.confidence;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
// Step 4: Get type-specific fields if we detected a type
|
|
420
|
+
let typeSpecificFields = [];
|
|
421
|
+
if (detectedNounType && typeConfidence > 0.75) {
|
|
422
|
+
const fieldsForType = await this.brain.getFieldsForType(detectedNounType);
|
|
423
|
+
typeSpecificFields = fieldsForType.map(f => ({ field: f.field, affinity: f.affinity }));
|
|
424
|
+
}
|
|
425
|
+
// Step 5: Find matching fields and entities with type context
|
|
426
|
+
const entityMatches = await this.findEntityMatchesWithTypeContext(queryTerms, typeSpecificFields);
|
|
427
|
+
// Step 6: Build structured query from matches
|
|
428
|
+
const tripleQuery = {};
|
|
429
|
+
// Separate fields from entities
|
|
430
|
+
const fieldMatches = entityMatches.filter(m => m.type === 'field');
|
|
431
|
+
const entityRefs = entityMatches.filter(m => m.type === 'entity');
|
|
432
|
+
// Add detected type constraint if confident
|
|
433
|
+
if (detectedNounType && typeConfidence > 0.75) {
|
|
434
|
+
tripleQuery.where = { ...tripleQuery.where, noun: detectedNounType };
|
|
435
|
+
}
|
|
436
|
+
// Build metadata filters from field matches
|
|
437
|
+
if (fieldMatches.length > 0) {
|
|
438
|
+
// Use field cardinality to optimize query order
|
|
439
|
+
fieldMatches.sort((a, b) => (a.cardinality || 0) - (b.cardinality || 0));
|
|
440
|
+
tripleQuery.where = {};
|
|
441
|
+
for (const match of fieldMatches) {
|
|
442
|
+
// Extract value for this field from query
|
|
443
|
+
const valuePattern = new RegExp(`${match.term}\\s*(?:is|=|:)?\\s*(\\S+)`, 'i');
|
|
444
|
+
const valueMatch = query.match(valuePattern);
|
|
445
|
+
if (valueMatch) {
|
|
446
|
+
tripleQuery.where[match.field] = valueMatch[1];
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
// Build graph connections from entity references
|
|
451
|
+
if (entityRefs.length > 0) {
|
|
452
|
+
tripleQuery.connected = {
|
|
453
|
+
to: entityRefs[0].id
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
// Use remaining terms for vector search
|
|
457
|
+
const usedTerms = new Set([...fieldMatches.map(m => m.term), ...entityRefs.map(m => m.term)]);
|
|
458
|
+
const searchTerms = queryTerms.filter(term => !usedTerms.has(term));
|
|
459
|
+
if (searchTerms.length > 0) {
|
|
460
|
+
tripleQuery.like = searchTerms.join(' ');
|
|
461
|
+
}
|
|
462
|
+
else if (!tripleQuery.where || Object.keys(tripleQuery.where).length === 0) {
|
|
463
|
+
// If no specific filters, use the full query for vector search
|
|
464
|
+
tripleQuery.like = query;
|
|
465
|
+
}
|
|
466
|
+
// Validate and optimize query based on type context
|
|
467
|
+
const validatedQuery = await this.validateAndOptimizeQuery(tripleQuery, detectedNounType, fieldMatches);
|
|
468
|
+
// Add query optimization hints based on field statistics
|
|
469
|
+
if (fieldMatches.length > 0 && validatedQuery.where) {
|
|
470
|
+
const queryPlan = await this.brain.getOptimalQueryPlan(validatedQuery.where);
|
|
471
|
+
// Attach optimization hints as a separate property
|
|
472
|
+
const hints = validatedQuery;
|
|
473
|
+
hints.optimizationHints = {
|
|
474
|
+
detectedType: detectedNounType,
|
|
475
|
+
typeConfidence,
|
|
476
|
+
fieldOrder: queryPlan.fieldOrder,
|
|
477
|
+
strategy: queryPlan.strategy,
|
|
478
|
+
estimatedCost: queryPlan.estimatedCost,
|
|
479
|
+
typeSpecificFieldCount: typeSpecificFields.length
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
return validatedQuery;
|
|
84
483
|
}
|
|
85
484
|
/**
|
|
86
|
-
* Analyze intent using keywords and structure
|
|
485
|
+
* Analyze intent using keywords and structure with enhanced classification
|
|
87
486
|
*/
|
|
88
487
|
async analyzeIntent(query) {
|
|
89
|
-
// Use Brainy's embedding function to get semantic representation
|
|
90
|
-
const queryEmbedding = await this.brain.embed(query);
|
|
91
|
-
// Search for similar queries in history (if available)
|
|
92
|
-
let confidence = 0.7; // Base confidence
|
|
93
|
-
let type = 'vector'; // Default
|
|
94
488
|
// Analyze query structure patterns
|
|
95
489
|
const lowerQuery = query.toLowerCase();
|
|
490
|
+
// Determine primary intent
|
|
491
|
+
let primaryIntent = 'search';
|
|
492
|
+
let confidence = 0.7; // Base confidence
|
|
493
|
+
let type = 'vector'; // Default
|
|
494
|
+
// Intent detection patterns
|
|
495
|
+
if (lowerQuery.match(/\b(filter|where|with|having)\b/)) {
|
|
496
|
+
primaryIntent = 'filter';
|
|
497
|
+
confidence += 0.15;
|
|
498
|
+
}
|
|
499
|
+
else if (lowerQuery.match(/\b(count|sum|average|total|group by)\b/)) {
|
|
500
|
+
primaryIntent = 'aggregate';
|
|
501
|
+
confidence += 0.2;
|
|
502
|
+
}
|
|
503
|
+
else if (lowerQuery.match(/\b(compare|versus|vs|difference|between)\b/)) {
|
|
504
|
+
primaryIntent = 'compare';
|
|
505
|
+
confidence += 0.15;
|
|
506
|
+
}
|
|
507
|
+
else if (lowerQuery.match(/\b(explain|why|how|what causes)\b/)) {
|
|
508
|
+
primaryIntent = 'explain';
|
|
509
|
+
confidence += 0.1;
|
|
510
|
+
}
|
|
511
|
+
else if (lowerQuery.match(/\b(connected|related|linked|from.*to)\b/)) {
|
|
512
|
+
primaryIntent = 'navigate';
|
|
513
|
+
type = 'graph';
|
|
514
|
+
confidence += 0.15;
|
|
515
|
+
}
|
|
96
516
|
// Detect field queries
|
|
97
517
|
if (this.hasFieldPatterns(lowerQuery)) {
|
|
98
|
-
type = 'field';
|
|
99
|
-
confidence += 0.
|
|
518
|
+
type = type === 'graph' ? 'combined' : 'field';
|
|
519
|
+
confidence += 0.1;
|
|
100
520
|
}
|
|
101
521
|
// Detect connection queries
|
|
102
522
|
if (this.hasConnectionPatterns(lowerQuery)) {
|
|
103
523
|
type = type === 'field' ? 'combined' : 'graph';
|
|
104
524
|
confidence += 0.1;
|
|
105
525
|
}
|
|
106
|
-
// Extract
|
|
526
|
+
// Extract context
|
|
527
|
+
const context = {
|
|
528
|
+
domain: this.detectDomain(query),
|
|
529
|
+
temporalScope: this.detectTemporalScope(query),
|
|
530
|
+
complexity: this.assessComplexity(query)
|
|
531
|
+
};
|
|
532
|
+
// Extract basic terms with enhanced modifiers
|
|
107
533
|
const extractedTerms = this.extractTerms(query);
|
|
108
534
|
return {
|
|
109
535
|
type,
|
|
110
|
-
|
|
111
|
-
|
|
536
|
+
primaryIntent,
|
|
537
|
+
confidence,
|
|
538
|
+
extractedTerms,
|
|
539
|
+
context
|
|
112
540
|
};
|
|
113
541
|
}
|
|
542
|
+
/**
|
|
543
|
+
* Detect the domain of the query
|
|
544
|
+
*/
|
|
545
|
+
detectDomain(query) {
|
|
546
|
+
const lowerQuery = query.toLowerCase();
|
|
547
|
+
if (lowerQuery.match(/\b(code|function|api|bug|error|debug)\b/)) {
|
|
548
|
+
return 'technical';
|
|
549
|
+
}
|
|
550
|
+
else if (lowerQuery.match(/\b(revenue|sales|profit|customer|market)\b/)) {
|
|
551
|
+
return 'business';
|
|
552
|
+
}
|
|
553
|
+
else if (lowerQuery.match(/\b(research|study|paper|theory|hypothesis)\b/)) {
|
|
554
|
+
return 'academic';
|
|
555
|
+
}
|
|
556
|
+
return 'general';
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Detect temporal scope in query
|
|
560
|
+
*/
|
|
561
|
+
detectTemporalScope(query) {
|
|
562
|
+
const lowerQuery = query.toLowerCase();
|
|
563
|
+
if (lowerQuery.match(/\b(was|were|did|had|yesterday|last|previous|ago)\b/)) {
|
|
564
|
+
return 'past';
|
|
565
|
+
}
|
|
566
|
+
else if (lowerQuery.match(/\b(will|going to|tomorrow|next|future|upcoming)\b/)) {
|
|
567
|
+
return 'future';
|
|
568
|
+
}
|
|
569
|
+
else if (lowerQuery.match(/\b(is|are|currently|now|today|present)\b/)) {
|
|
570
|
+
return 'present';
|
|
571
|
+
}
|
|
572
|
+
return 'all';
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Assess query complexity
|
|
576
|
+
*/
|
|
577
|
+
assessComplexity(query) {
|
|
578
|
+
const words = query.split(/\s+/).length;
|
|
579
|
+
const hasMultipleClauses = query.match(/\b(and|or|but|with|where)\b/g)?.length || 0;
|
|
580
|
+
const hasNesting = query.includes('(') || query.includes('[');
|
|
581
|
+
if (words < 5 && hasMultipleClauses === 0) {
|
|
582
|
+
return 'simple';
|
|
583
|
+
}
|
|
584
|
+
else if (words > 15 || hasMultipleClauses > 2 || hasNesting) {
|
|
585
|
+
return 'complex';
|
|
586
|
+
}
|
|
587
|
+
return 'moderate';
|
|
588
|
+
}
|
|
114
589
|
/**
|
|
115
590
|
* Step 2: Use neural analysis to decompose complex queries
|
|
116
591
|
*/
|
|
@@ -182,8 +657,15 @@ export class NaturalLanguageProcessor {
|
|
|
182
657
|
const mods = intent.extractedTerms.modifiers;
|
|
183
658
|
if (mods.limit)
|
|
184
659
|
query.limit = mods.limit;
|
|
185
|
-
|
|
186
|
-
|
|
660
|
+
// Convert string boost to proper boost object
|
|
661
|
+
if (mods.boost) {
|
|
662
|
+
if (mods.boost === 'recent') {
|
|
663
|
+
query.boost = { field: 2.0, vector: 1.0, graph: 1.0 };
|
|
664
|
+
}
|
|
665
|
+
else if (mods.boost === 'popular') {
|
|
666
|
+
query.boost = { graph: 2.0, vector: 1.0, field: 1.0 };
|
|
667
|
+
}
|
|
668
|
+
}
|
|
187
669
|
}
|
|
188
670
|
return query;
|
|
189
671
|
}
|
|
@@ -200,7 +682,7 @@ export class NaturalLanguageProcessor {
|
|
|
200
682
|
// "Show me recent posts by John"
|
|
201
683
|
patterns.set(/show\\s+me\\s+recent\\s+(.+?)\\s+by\\s+(.+)/i, (match) => ({
|
|
202
684
|
like: match[1],
|
|
203
|
-
boost:
|
|
685
|
+
boost: { field: 2.0, vector: 1.0, graph: 1.0 },
|
|
204
686
|
connected: { from: match[2] }
|
|
205
687
|
}));
|
|
206
688
|
// "Papers with more than 100 citations"
|
|
@@ -255,14 +737,48 @@ export class NaturalLanguageProcessor {
|
|
|
255
737
|
return extracted;
|
|
256
738
|
}
|
|
257
739
|
/**
|
|
258
|
-
* Find entity matches using
|
|
740
|
+
* Find entity matches using type context and semantic similarity
|
|
259
741
|
*/
|
|
260
742
|
async findEntityMatches(terms) {
|
|
743
|
+
return this.findEntityMatchesWithTypeContext(terms, []);
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* Find entity matches with type context for better field prioritization
|
|
747
|
+
*/
|
|
748
|
+
async findEntityMatchesWithTypeContext(terms, typeSpecificFields) {
|
|
261
749
|
const matches = [];
|
|
750
|
+
// Get field statistics for optimization hints
|
|
751
|
+
const fieldStats = await this.brain.getFieldStatistics();
|
|
752
|
+
// Create field priority map based on type affinity
|
|
753
|
+
const fieldPriorityMap = new Map();
|
|
754
|
+
for (const { field, affinity } of typeSpecificFields) {
|
|
755
|
+
fieldPriorityMap.set(field, affinity);
|
|
756
|
+
}
|
|
262
757
|
for (const term of terms) {
|
|
263
758
|
try {
|
|
759
|
+
// First, check if term matches a field using semantic similarity
|
|
760
|
+
const fieldMatch = await this.findBestMatchingFieldWithTypeContext(term, typeSpecificFields);
|
|
761
|
+
if (fieldMatch.field && fieldMatch.confidence > 0.7) {
|
|
762
|
+
const stats = fieldStats.get(fieldMatch.field);
|
|
763
|
+
const typeAffinity = fieldPriorityMap.get(fieldMatch.field) || 0;
|
|
764
|
+
matches.push({
|
|
765
|
+
term,
|
|
766
|
+
type: 'field',
|
|
767
|
+
field: fieldMatch.field,
|
|
768
|
+
confidence: fieldMatch.confidence,
|
|
769
|
+
typeAffinity, // NEW: How likely this field appears with this type
|
|
770
|
+
cardinality: stats?.cardinality.uniqueValues,
|
|
771
|
+
distribution: stats?.cardinality.distribution,
|
|
772
|
+
indexType: stats?.indexType
|
|
773
|
+
});
|
|
774
|
+
// Skip entity search if we found a field match
|
|
775
|
+
continue;
|
|
776
|
+
}
|
|
264
777
|
// Search for similar entities in the knowledge base
|
|
265
|
-
const results = await this.brain.
|
|
778
|
+
const results = await this.brain.find({
|
|
779
|
+
query: term,
|
|
780
|
+
limit: 5
|
|
781
|
+
});
|
|
266
782
|
for (const result of results) {
|
|
267
783
|
if (result.score > 0.8) { // High similarity threshold
|
|
268
784
|
matches.push({
|
|
@@ -270,19 +786,10 @@ export class NaturalLanguageProcessor {
|
|
|
270
786
|
id: result.id,
|
|
271
787
|
type: 'entity',
|
|
272
788
|
confidence: result.score,
|
|
273
|
-
metadata: result.metadata
|
|
789
|
+
metadata: result.entity?.metadata
|
|
274
790
|
});
|
|
275
791
|
}
|
|
276
792
|
}
|
|
277
|
-
// Check if term matches known field names
|
|
278
|
-
if (this.isKnownField(term)) {
|
|
279
|
-
matches.push({
|
|
280
|
-
term,
|
|
281
|
-
type: 'field',
|
|
282
|
-
field: this.mapToFieldName(term),
|
|
283
|
-
confidence: 0.9
|
|
284
|
-
});
|
|
285
|
-
}
|
|
286
793
|
}
|
|
287
794
|
catch (error) {
|
|
288
795
|
// If search fails, continue with other terms
|
|
@@ -291,27 +798,395 @@ export class NaturalLanguageProcessor {
|
|
|
291
798
|
}
|
|
292
799
|
return matches;
|
|
293
800
|
}
|
|
801
|
+
// REMOVED: isKnownField and mapToFieldName - now using semantic field matching
|
|
802
|
+
// The findBestMatchingField method with embeddings replaces these hardcoded approaches
|
|
294
803
|
/**
|
|
295
|
-
*
|
|
804
|
+
* Find similar successful queries from history
|
|
805
|
+
* Uses Brainy's vector search to find semantically similar previous queries
|
|
296
806
|
*/
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
807
|
+
async findSimilarQueries(queryEmbedding) {
|
|
808
|
+
try {
|
|
809
|
+
// Search for similar queries in a hypothetical query history
|
|
810
|
+
// For now, return empty array since we don't have query history storage yet
|
|
811
|
+
// This would integrate with Brainy's search to find similar query patterns
|
|
812
|
+
// Future implementation could search a query_history noun type:
|
|
813
|
+
// const similarQueries = await this.brainy.search(queryEmbedding, {
|
|
814
|
+
// limit: 5,
|
|
815
|
+
// metadata: { type: 'successful_query' },
|
|
816
|
+
// nounTypes: ['query_history']
|
|
817
|
+
// })
|
|
818
|
+
return [];
|
|
819
|
+
}
|
|
820
|
+
catch (error) {
|
|
821
|
+
console.debug('Failed to find similar queries:', error);
|
|
822
|
+
return [];
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
/**
|
|
826
|
+
* Extract entities from query using Brainy's semantic search
|
|
827
|
+
* Identifies known entities, concepts, and relationships in the query text
|
|
828
|
+
*/
|
|
829
|
+
async extractEntities(query) {
|
|
830
|
+
try {
|
|
831
|
+
// Split query into potential entity terms
|
|
832
|
+
const terms = query.toLowerCase()
|
|
833
|
+
.split(/[\s,\.;!?]+/)
|
|
834
|
+
.filter(term => term.length > 2);
|
|
835
|
+
const entities = [];
|
|
836
|
+
// Search for each term in Brainy to see if it matches known entities
|
|
837
|
+
for (const term of terms) {
|
|
838
|
+
try {
|
|
839
|
+
const results = await this.brain.find(term);
|
|
840
|
+
if (results && results.length > 0) {
|
|
841
|
+
// Found matching entities
|
|
842
|
+
entities.push({
|
|
843
|
+
term,
|
|
844
|
+
matches: results,
|
|
845
|
+
confidence: results[0].score || 0.7
|
|
846
|
+
});
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
catch (searchError) {
|
|
850
|
+
// Continue if individual term search fails
|
|
851
|
+
console.debug(`Entity search failed for term: ${term}`, searchError);
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
return entities;
|
|
855
|
+
}
|
|
856
|
+
catch (error) {
|
|
857
|
+
console.debug('Failed to extract entities:', error);
|
|
858
|
+
return [];
|
|
859
|
+
}
|
|
303
860
|
}
|
|
304
861
|
/**
|
|
305
|
-
*
|
|
862
|
+
* Build final TripleQuery based on intent, entities, and query analysis
|
|
863
|
+
* Constructs optimized query combining vector, graph, and field searches
|
|
306
864
|
*/
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
865
|
+
async buildQuery(query, intent, entities) {
|
|
866
|
+
try {
|
|
867
|
+
const tripleQuery = {
|
|
868
|
+
like: query, // Default to semantic search
|
|
869
|
+
limit: 10
|
|
870
|
+
};
|
|
871
|
+
// Add field filters based on intent
|
|
872
|
+
if (intent.hasFieldPatterns) {
|
|
873
|
+
// Extract field-based constraints from the query
|
|
874
|
+
const whereClause = {};
|
|
875
|
+
// Look for date/year patterns
|
|
876
|
+
const yearMatch = query.match(/(\d{4})/g);
|
|
877
|
+
if (yearMatch) {
|
|
878
|
+
whereClause.year = parseInt(yearMatch[0]);
|
|
879
|
+
}
|
|
880
|
+
// Look for numeric constraints
|
|
881
|
+
const moreThanMatch = query.match(/more than (\d+)/i);
|
|
882
|
+
if (moreThanMatch) {
|
|
883
|
+
whereClause.count = { greaterThan: parseInt(moreThanMatch[1]) };
|
|
884
|
+
}
|
|
885
|
+
if (Object.keys(whereClause).length > 0) {
|
|
886
|
+
tripleQuery.where = whereClause;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
// Add connection-based searches
|
|
890
|
+
if (intent.hasConnectionPatterns) {
|
|
891
|
+
// Look for relationship patterns in the query
|
|
892
|
+
const connectedMatch = query.match(/connected to (.+?)$/i) ||
|
|
893
|
+
query.match(/related to (.+?)$/i);
|
|
894
|
+
if (connectedMatch) {
|
|
895
|
+
tripleQuery.connected = {
|
|
896
|
+
to: connectedMatch[1].trim()
|
|
897
|
+
};
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
// Add entity-specific filters
|
|
901
|
+
if (entities && entities.length > 0) {
|
|
902
|
+
const highConfidenceEntities = entities.filter(e => e.confidence > 0.8);
|
|
903
|
+
if (highConfidenceEntities.length > 0) {
|
|
904
|
+
// Use the highest confidence entity to refine search
|
|
905
|
+
const topEntity = highConfidenceEntities[0];
|
|
906
|
+
if (topEntity.matches && topEntity.matches.length > 0) {
|
|
907
|
+
// Add entity-specific metadata or connection
|
|
908
|
+
const entityData = topEntity.matches[0].metadata;
|
|
909
|
+
if (entityData && entityData.category) {
|
|
910
|
+
tripleQuery.where = {
|
|
911
|
+
...tripleQuery.where,
|
|
912
|
+
category: entityData.category
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
return tripleQuery;
|
|
919
|
+
}
|
|
920
|
+
catch (error) {
|
|
921
|
+
console.debug('Failed to build query:', error);
|
|
922
|
+
// Return simple query as fallback
|
|
923
|
+
return {
|
|
924
|
+
like: query,
|
|
925
|
+
limit: 10
|
|
926
|
+
};
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
/**
|
|
930
|
+
* Extract entities from text using NEURAL matching to strict NounTypes
|
|
931
|
+
* ALWAYS uses neural matching, NEVER falls back to patterns
|
|
932
|
+
*/
|
|
933
|
+
async extract(text, options) {
|
|
934
|
+
await this.ensureInitialized();
|
|
935
|
+
// ALWAYS use NeuralEntityExtractor for proper type matching
|
|
936
|
+
const { NeuralEntityExtractor } = await import('./entityExtractor.js');
|
|
937
|
+
const extractor = new NeuralEntityExtractor(this.brain);
|
|
938
|
+
// Convert string types to NounTypes if provided
|
|
939
|
+
const nounTypes = options?.types ?
|
|
940
|
+
options.types.map(t => t) :
|
|
941
|
+
undefined;
|
|
942
|
+
// Extract using neural matching
|
|
943
|
+
const entities = await extractor.extract(text, {
|
|
944
|
+
types: nounTypes,
|
|
945
|
+
confidence: options?.confidence || 0.0, // Accept ALL matches
|
|
946
|
+
includeVectors: false,
|
|
947
|
+
neuralMatching: true // ALWAYS use neural matching
|
|
948
|
+
});
|
|
949
|
+
// Convert to expected format
|
|
950
|
+
return entities.map(entity => ({
|
|
951
|
+
text: entity.text,
|
|
952
|
+
type: entity.type,
|
|
953
|
+
position: entity.position,
|
|
954
|
+
confidence: entity.confidence,
|
|
955
|
+
metadata: options?.includeMetadata ? {
|
|
956
|
+
...entity.metadata,
|
|
957
|
+
neuralMatch: true,
|
|
958
|
+
extractedAt: Date.now()
|
|
959
|
+
} : undefined
|
|
960
|
+
}));
|
|
961
|
+
}
|
|
962
|
+
/**
|
|
963
|
+
* DEPRECATED - Old pattern-based extraction
|
|
964
|
+
* This should NEVER be used - kept only for reference
|
|
965
|
+
*/
|
|
966
|
+
async extractWithPatterns_DEPRECATED(text, options) {
|
|
967
|
+
const extracted = [];
|
|
968
|
+
// Common entity patterns
|
|
969
|
+
const patterns = {
|
|
970
|
+
// People (names with capitals)
|
|
971
|
+
person: /\b([A-Z][a-z]+ [A-Z][a-z]+)\b/g,
|
|
972
|
+
// Organizations (capitals, Inc, LLC, etc)
|
|
973
|
+
organization: /\b([A-Z][a-zA-Z&]+(?: [A-Z][a-zA-Z&]+)*(?:,? (?:Inc|LLC|Corp|Ltd|Co|Group|Foundation|Institute|University|College|School|Hospital|Bank|Agency)\.?))\b/g,
|
|
974
|
+
// Locations (capitals, common place words)
|
|
975
|
+
location: /\b([A-Z][a-z]+(?: [A-Z][a-z]+)*(?:,? (?:[A-Z][a-z]+))?)(?= (?:City|County|State|Country|Street|Road|Avenue|Boulevard|Drive|Park|Square|Place|Island|Mountain|River|Lake|Ocean|Sea))\b/g,
|
|
976
|
+
// Dates
|
|
977
|
+
date: /\b(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}|\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2}|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}|\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{4})\b/gi,
|
|
978
|
+
// Times
|
|
979
|
+
time: /\b(\d{1,2}:\d{2}(?::\d{2})?(?:\s?[AP]M)?)\b/gi,
|
|
980
|
+
// Emails
|
|
981
|
+
email: /\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b/g,
|
|
982
|
+
// URLs
|
|
983
|
+
url: /\b(https?:\/\/[^\s]+)\b/g,
|
|
984
|
+
// Phone numbers
|
|
985
|
+
phone: /\b(\+?\d{1,3}?[- .]?\(?\d{1,4}\)?[- .]?\d{1,4}[- .]?\d{1,4})\b/g,
|
|
986
|
+
// Money
|
|
987
|
+
money: /\b(\$[\d,]+(?:\.\d{2})?|[\d,]+(?:\.\d{2})?\s*(?:USD|EUR|GBP|JPY|CNY))\b/gi,
|
|
988
|
+
// Percentages
|
|
989
|
+
percentage: /\b(\d+(?:\.\d+)?%)\b/g,
|
|
990
|
+
// Products/versions
|
|
991
|
+
product: /\b([A-Z][a-zA-Z0-9]*(?: [A-Z][a-zA-Z0-9]*)*\s+v?\d+(?:\.\d+)*)\b/g,
|
|
992
|
+
// Hashtags
|
|
993
|
+
hashtag: /#[a-zA-Z0-9_]+/g,
|
|
994
|
+
// Mentions
|
|
995
|
+
mention: /@[a-zA-Z0-9_]+/g
|
|
996
|
+
};
|
|
997
|
+
const minConfidence = options?.confidence || 0.5;
|
|
998
|
+
const targetTypes = options?.types || Object.keys(patterns);
|
|
999
|
+
// Apply each pattern
|
|
1000
|
+
for (const [type, pattern] of Object.entries(patterns)) {
|
|
1001
|
+
if (!targetTypes.includes(type))
|
|
1002
|
+
continue;
|
|
1003
|
+
let match;
|
|
1004
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
1005
|
+
const extractedText = match[1] || match[0];
|
|
1006
|
+
const confidence = this.calculateConfidence(extractedText, type);
|
|
1007
|
+
if (confidence >= minConfidence) {
|
|
1008
|
+
const entity = {
|
|
1009
|
+
text: extractedText,
|
|
1010
|
+
type,
|
|
1011
|
+
position: {
|
|
1012
|
+
start: match.index,
|
|
1013
|
+
end: match.index + match[0].length
|
|
1014
|
+
},
|
|
1015
|
+
confidence
|
|
1016
|
+
};
|
|
1017
|
+
if (options?.includeMetadata) {
|
|
1018
|
+
;
|
|
1019
|
+
entity.metadata = {
|
|
1020
|
+
pattern: pattern.source,
|
|
1021
|
+
contextBefore: text.substring(Math.max(0, match.index - 20), match.index),
|
|
1022
|
+
contextAfter: text.substring(match.index + match[0].length, Math.min(text.length, match.index + match[0].length + 20))
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1025
|
+
extracted.push(entity);
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
// Sort by position
|
|
1030
|
+
extracted.sort((a, b) => a.position.start - b.position.start);
|
|
1031
|
+
// Remove overlapping entities (keep higher confidence)
|
|
1032
|
+
const filtered = [];
|
|
1033
|
+
for (const entity of extracted) {
|
|
1034
|
+
const overlapping = filtered.find(e => (entity.position.start >= e.position.start && entity.position.start < e.position.end) ||
|
|
1035
|
+
(entity.position.end > e.position.start && entity.position.end <= e.position.end));
|
|
1036
|
+
if (!overlapping) {
|
|
1037
|
+
filtered.push(entity);
|
|
1038
|
+
}
|
|
1039
|
+
else if (entity.confidence > overlapping.confidence) {
|
|
1040
|
+
const index = filtered.indexOf(overlapping);
|
|
1041
|
+
filtered[index] = entity;
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
return filtered;
|
|
1045
|
+
}
|
|
1046
|
+
/**
|
|
1047
|
+
* Analyze sentiment of text
|
|
1048
|
+
*/
|
|
1049
|
+
async sentiment(text, options) {
|
|
1050
|
+
// Sentiment words with scores
|
|
1051
|
+
const positiveWords = new Set(['good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic', 'love', 'like', 'best', 'happy', 'joy', 'brilliant', 'outstanding', 'perfect', 'beautiful', 'awesome', 'super', 'nice', 'fun', 'exciting', 'impressive', 'incredible', 'remarkable', 'delightful', 'pleased', 'satisfied', 'successful', 'effective', 'helpful']);
|
|
1052
|
+
const negativeWords = new Set(['bad', 'terrible', 'awful', 'horrible', 'hate', 'dislike', 'worst', 'sad', 'angry', 'poor', 'disappointing', 'failed', 'broken', 'useless', 'waste', 'sucks', 'disgusting', 'ugly', 'boring', 'annoying', 'frustrating', 'difficult', 'complicated', 'confusing', 'slow', 'expensive', 'unfair', 'wrong', 'mistake', 'problem', 'issue']);
|
|
1053
|
+
const intensifiers = new Set(['very', 'extremely', 'really', 'absolutely', 'completely', 'totally', 'quite', 'rather', 'so']);
|
|
1054
|
+
const negations = new Set(['not', 'no', 'never', 'neither', 'none', 'nobody', 'nothing', 'nowhere', 'hardly', 'barely', 'scarcely']);
|
|
1055
|
+
const normalizedText = text.toLowerCase();
|
|
1056
|
+
const words = normalizedText.split(/\s+/);
|
|
1057
|
+
// Calculate overall sentiment
|
|
1058
|
+
let positiveCount = 0;
|
|
1059
|
+
let negativeCount = 0;
|
|
1060
|
+
let intensifierBoost = 1;
|
|
1061
|
+
for (let i = 0; i < words.length; i++) {
|
|
1062
|
+
const word = words[i].replace(/[^a-z]/g, '');
|
|
1063
|
+
const prevWord = i > 0 ? words[i - 1].replace(/[^a-z]/g, '') : '';
|
|
1064
|
+
// Check for intensifiers
|
|
1065
|
+
if (intensifiers.has(prevWord)) {
|
|
1066
|
+
intensifierBoost = 1.5;
|
|
1067
|
+
}
|
|
1068
|
+
else {
|
|
1069
|
+
intensifierBoost = 1;
|
|
1070
|
+
}
|
|
1071
|
+
// Check for negation
|
|
1072
|
+
const isNegated = negations.has(prevWord);
|
|
1073
|
+
if (positiveWords.has(word)) {
|
|
1074
|
+
if (isNegated) {
|
|
1075
|
+
negativeCount += intensifierBoost;
|
|
1076
|
+
}
|
|
1077
|
+
else {
|
|
1078
|
+
positiveCount += intensifierBoost;
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
else if (negativeWords.has(word)) {
|
|
1082
|
+
if (isNegated) {
|
|
1083
|
+
positiveCount += intensifierBoost;
|
|
1084
|
+
}
|
|
1085
|
+
else {
|
|
1086
|
+
negativeCount += intensifierBoost;
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
const total = positiveCount + negativeCount;
|
|
1091
|
+
const score = total > 0 ? (positiveCount - negativeCount) / total : 0;
|
|
1092
|
+
const magnitude = Math.min(1, total / words.length);
|
|
1093
|
+
let label;
|
|
1094
|
+
if (score > 0.2)
|
|
1095
|
+
label = 'positive';
|
|
1096
|
+
else if (score < -0.2)
|
|
1097
|
+
label = 'negative';
|
|
1098
|
+
else if (magnitude > 0.3)
|
|
1099
|
+
label = 'mixed';
|
|
1100
|
+
else
|
|
1101
|
+
label = 'neutral';
|
|
1102
|
+
const result = {
|
|
1103
|
+
overall: {
|
|
1104
|
+
score,
|
|
1105
|
+
magnitude,
|
|
1106
|
+
label
|
|
1107
|
+
}
|
|
313
1108
|
};
|
|
314
|
-
|
|
1109
|
+
// Sentence-level analysis
|
|
1110
|
+
if (options?.granularity === 'sentence' || options?.granularity === 'aspect') {
|
|
1111
|
+
const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
|
|
1112
|
+
result.sentences = [];
|
|
1113
|
+
for (const sentence of sentences) {
|
|
1114
|
+
const sentenceResult = await this.sentiment(sentence);
|
|
1115
|
+
result.sentences.push({
|
|
1116
|
+
text: sentence.trim(),
|
|
1117
|
+
score: sentenceResult.overall.score,
|
|
1118
|
+
magnitude: sentenceResult.overall.magnitude,
|
|
1119
|
+
label: sentenceResult.overall.label
|
|
1120
|
+
});
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
// Aspect-based analysis
|
|
1124
|
+
if (options?.granularity === 'aspect' && options?.aspects) {
|
|
1125
|
+
result.aspects = {};
|
|
1126
|
+
for (const aspect of options.aspects) {
|
|
1127
|
+
const aspectRegex = new RegExp(`[^.!?]*\\b${aspect}\\b[^.!?]*[.!?]?`, 'gi');
|
|
1128
|
+
const aspectSentences = text.match(aspectRegex) || [];
|
|
1129
|
+
if (aspectSentences.length > 0) {
|
|
1130
|
+
let aspectScore = 0;
|
|
1131
|
+
let aspectMagnitude = 0;
|
|
1132
|
+
for (const sentence of aspectSentences) {
|
|
1133
|
+
const sentimentResult = await this.sentiment(sentence);
|
|
1134
|
+
aspectScore += sentimentResult.overall.score;
|
|
1135
|
+
aspectMagnitude += sentimentResult.overall.magnitude;
|
|
1136
|
+
}
|
|
1137
|
+
result.aspects[aspect] = {
|
|
1138
|
+
score: aspectScore / aspectSentences.length,
|
|
1139
|
+
magnitude: aspectMagnitude / aspectSentences.length,
|
|
1140
|
+
mentions: aspectSentences.length
|
|
1141
|
+
};
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
return result;
|
|
1146
|
+
}
|
|
1147
|
+
/**
|
|
1148
|
+
* Calculate confidence for entity extraction
|
|
1149
|
+
*/
|
|
1150
|
+
calculateConfidence(text, type) {
|
|
1151
|
+
let confidence = 0.5; // Base confidence
|
|
1152
|
+
// Adjust based on type-specific rules
|
|
1153
|
+
switch (type) {
|
|
1154
|
+
case 'person':
|
|
1155
|
+
// Names with 2-3 capitalized words are more confident
|
|
1156
|
+
const nameWords = text.split(' ');
|
|
1157
|
+
if (nameWords.length >= 2 && nameWords.length <= 3) {
|
|
1158
|
+
confidence += 0.3;
|
|
1159
|
+
}
|
|
1160
|
+
if (nameWords.every(w => /^[A-Z]/.test(w))) {
|
|
1161
|
+
confidence += 0.2;
|
|
1162
|
+
}
|
|
1163
|
+
break;
|
|
1164
|
+
case 'organization':
|
|
1165
|
+
// Presence of corporate suffixes increases confidence
|
|
1166
|
+
if (/\b(Inc|LLC|Corp|Ltd|Co|Group)\.?$/.test(text)) {
|
|
1167
|
+
confidence += 0.4;
|
|
1168
|
+
}
|
|
1169
|
+
break;
|
|
1170
|
+
case 'email':
|
|
1171
|
+
case 'url':
|
|
1172
|
+
// These patterns are very specific, high confidence
|
|
1173
|
+
confidence = 0.95;
|
|
1174
|
+
break;
|
|
1175
|
+
case 'date':
|
|
1176
|
+
case 'time':
|
|
1177
|
+
case 'money':
|
|
1178
|
+
case 'percentage':
|
|
1179
|
+
// Numeric patterns are reliable
|
|
1180
|
+
confidence = 0.9;
|
|
1181
|
+
break;
|
|
1182
|
+
case 'location':
|
|
1183
|
+
// Geographic terms increase confidence
|
|
1184
|
+
if (/\b(City|State|Country|Street|Road|Avenue)$/.test(text)) {
|
|
1185
|
+
confidence += 0.3;
|
|
1186
|
+
}
|
|
1187
|
+
break;
|
|
1188
|
+
}
|
|
1189
|
+
return Math.min(1, confidence);
|
|
315
1190
|
}
|
|
316
1191
|
}
|
|
317
1192
|
//# sourceMappingURL=naturalLanguageProcessor.js.map
|