@soulcraft/brainy 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -3
- package/README.md +427 -111
- package/bin/brainy.js +340 -62
- package/dist/api/ConfigAPI.d.ts +67 -0
- package/dist/api/ConfigAPI.js +166 -0
- package/dist/api/DataAPI.d.ts +123 -0
- package/dist/api/DataAPI.js +391 -0
- package/dist/api/SecurityAPI.d.ts +50 -0
- package/dist/api/SecurityAPI.js +139 -0
- package/dist/api/UniversalImportAPI.d.ts +134 -0
- package/dist/api/UniversalImportAPI.js +615 -0
- package/dist/augmentationManager.js +12 -7
- package/dist/augmentationPipeline.d.ts +0 -61
- package/dist/augmentationPipeline.js +0 -87
- package/dist/augmentationRegistry.d.ts +1 -1
- package/dist/augmentationRegistry.js +1 -1
- package/dist/augmentations/apiServerAugmentation.d.ts +27 -1
- package/dist/augmentations/apiServerAugmentation.js +290 -9
- package/dist/augmentations/auditLogAugmentation.d.ts +109 -0
- package/dist/augmentations/auditLogAugmentation.js +358 -0
- package/dist/augmentations/batchProcessingAugmentation.d.ts +3 -2
- package/dist/augmentations/batchProcessingAugmentation.js +123 -22
- package/dist/augmentations/brainyAugmentation.d.ts +142 -8
- package/dist/augmentations/brainyAugmentation.js +179 -2
- package/dist/augmentations/cacheAugmentation.d.ts +8 -5
- package/dist/augmentations/cacheAugmentation.js +116 -17
- package/dist/augmentations/conduitAugmentations.d.ts +2 -2
- package/dist/augmentations/conduitAugmentations.js +2 -2
- package/dist/augmentations/configResolver.d.ts +122 -0
- package/dist/augmentations/configResolver.js +440 -0
- package/dist/augmentations/connectionPoolAugmentation.d.ts +3 -1
- package/dist/augmentations/connectionPoolAugmentation.js +37 -12
- package/dist/augmentations/defaultAugmentations.d.ts +14 -10
- package/dist/augmentations/defaultAugmentations.js +16 -11
- package/dist/augmentations/discovery/catalogDiscovery.d.ts +142 -0
- package/dist/augmentations/discovery/catalogDiscovery.js +249 -0
- package/dist/augmentations/discovery/localDiscovery.d.ts +84 -0
- package/dist/augmentations/discovery/localDiscovery.js +246 -0
- package/dist/augmentations/discovery/runtimeLoader.d.ts +97 -0
- package/dist/augmentations/discovery/runtimeLoader.js +337 -0
- package/dist/augmentations/discovery.d.ts +152 -0
- package/dist/augmentations/discovery.js +441 -0
- package/dist/augmentations/display/cache.d.ts +130 -0
- package/dist/augmentations/display/cache.js +319 -0
- package/dist/augmentations/display/fieldPatterns.d.ts +52 -0
- package/dist/augmentations/display/fieldPatterns.js +393 -0
- package/dist/augmentations/display/iconMappings.d.ts +57 -0
- package/dist/augmentations/display/iconMappings.js +68 -0
- package/dist/augmentations/display/intelligentComputation.d.ts +109 -0
- package/dist/augmentations/display/intelligentComputation.js +462 -0
- package/dist/augmentations/display/types.d.ts +203 -0
- package/dist/augmentations/display/types.js +7 -0
- package/dist/augmentations/entityRegistryAugmentation.d.ts +3 -1
- package/dist/augmentations/entityRegistryAugmentation.js +5 -1
- package/dist/augmentations/indexAugmentation.d.ts +5 -3
- package/dist/augmentations/indexAugmentation.js +5 -2
- package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +24 -7
- package/dist/augmentations/intelligentVerbScoringAugmentation.js +111 -27
- package/dist/augmentations/manifest.d.ts +176 -0
- package/dist/augmentations/manifest.js +8 -0
- package/dist/augmentations/marketplace/AugmentationMarketplace.d.ts +168 -0
- package/dist/augmentations/marketplace/AugmentationMarketplace.js +329 -0
- package/dist/augmentations/marketplace/cli.d.ts +47 -0
- package/dist/augmentations/marketplace/cli.js +265 -0
- package/dist/augmentations/metricsAugmentation.d.ts +3 -3
- package/dist/augmentations/metricsAugmentation.js +2 -2
- package/dist/augmentations/monitoringAugmentation.d.ts +3 -3
- package/dist/augmentations/monitoringAugmentation.js +2 -2
- package/dist/augmentations/neuralImport.d.ts +1 -1
- package/dist/augmentations/neuralImport.js +4 -4
- package/dist/augmentations/rateLimitAugmentation.d.ts +82 -0
- package/dist/augmentations/rateLimitAugmentation.js +321 -0
- package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +2 -2
- package/dist/augmentations/requestDeduplicatorAugmentation.js +1 -1
- package/dist/augmentations/storageAugmentation.d.ts +1 -1
- package/dist/augmentations/storageAugmentation.js +2 -2
- package/dist/augmentations/storageAugmentations.d.ts +37 -8
- package/dist/augmentations/storageAugmentations.js +204 -15
- package/dist/augmentations/synapseAugmentation.d.ts +1 -1
- package/dist/augmentations/synapseAugmentation.js +35 -16
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +83 -0
- package/dist/augmentations/typeMatching/brainyTypes.js +425 -0
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +39 -59
- package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +103 -389
- package/dist/augmentations/universalDisplayAugmentation.d.ts +191 -0
- package/dist/augmentations/universalDisplayAugmentation.js +371 -0
- package/dist/brainy-unified.d.ts +106 -0
- package/dist/brainy-unified.js +327 -0
- package/dist/brainy.d.ts +277 -0
- package/dist/brainy.js +1241 -0
- package/dist/brainyData.d.ts +56 -111
- package/dist/brainyData.js +912 -756
- package/dist/brainyDataV3.d.ts +186 -0
- package/dist/brainyDataV3.js +337 -0
- package/dist/config/distributedPresets-new.d.ts +118 -0
- package/dist/config/distributedPresets-new.js +318 -0
- package/dist/config/distributedPresets.d.ts +118 -0
- package/dist/config/distributedPresets.js +318 -0
- package/dist/config/extensibleConfig.d.ts +99 -0
- package/dist/config/extensibleConfig.js +268 -0
- package/dist/config/index.d.ts +17 -0
- package/dist/config/index.js +35 -0
- package/dist/config/modelAutoConfig.d.ts +32 -0
- package/dist/config/modelAutoConfig.js +139 -0
- package/dist/config/modelPrecisionManager.d.ts +42 -0
- package/dist/config/modelPrecisionManager.js +98 -0
- package/dist/config/sharedConfigManager.d.ts +67 -0
- package/dist/config/sharedConfigManager.js +215 -0
- package/dist/config/storageAutoConfig.d.ts +41 -0
- package/dist/config/storageAutoConfig.js +328 -0
- package/dist/config/zeroConfig.d.ts +68 -0
- package/dist/config/zeroConfig.js +301 -0
- package/dist/cortex/backupRestore.d.ts +2 -2
- package/dist/cortex/backupRestore.js +85 -27
- package/dist/cortex/healthCheck.d.ts +2 -2
- package/dist/cortex/neuralImport.d.ts +2 -2
- package/dist/cortex/neuralImport.js +18 -13
- package/dist/cortex/performanceMonitor.d.ts +2 -2
- package/dist/critical/model-guardian.d.ts +4 -0
- package/dist/critical/model-guardian.js +31 -11
- package/dist/demo.d.ts +4 -4
- package/dist/demo.js +7 -7
- package/dist/distributed/cacheSync.d.ts +112 -0
- package/dist/distributed/cacheSync.js +265 -0
- package/dist/distributed/coordinator.d.ts +193 -0
- package/dist/distributed/coordinator.js +548 -0
- package/dist/distributed/httpTransport.d.ts +120 -0
- package/dist/distributed/httpTransport.js +446 -0
- package/dist/distributed/index.d.ts +8 -0
- package/dist/distributed/index.js +5 -0
- package/dist/distributed/networkTransport.d.ts +132 -0
- package/dist/distributed/networkTransport.js +633 -0
- package/dist/distributed/queryPlanner.d.ts +104 -0
- package/dist/distributed/queryPlanner.js +327 -0
- package/dist/distributed/readWriteSeparation.d.ts +134 -0
- package/dist/distributed/readWriteSeparation.js +350 -0
- package/dist/distributed/shardManager.d.ts +114 -0
- package/dist/distributed/shardManager.js +357 -0
- package/dist/distributed/shardMigration.d.ts +110 -0
- package/dist/distributed/shardMigration.js +289 -0
- package/dist/distributed/storageDiscovery.d.ts +160 -0
- package/dist/distributed/storageDiscovery.js +551 -0
- package/dist/embeddings/CachedEmbeddings.d.ts +40 -0
- package/dist/embeddings/CachedEmbeddings.js +146 -0
- package/dist/embeddings/EmbeddingManager.d.ts +102 -0
- package/dist/embeddings/EmbeddingManager.js +291 -0
- package/dist/embeddings/SingletonModelManager.d.ts +95 -0
- package/dist/embeddings/SingletonModelManager.js +220 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/lightweight-embedder.d.ts +0 -1
- package/dist/embeddings/lightweight-embedder.js +4 -12
- package/dist/embeddings/model-manager.d.ts +11 -0
- package/dist/embeddings/model-manager.js +43 -7
- package/dist/embeddings/universal-memory-manager.d.ts +1 -1
- package/dist/embeddings/universal-memory-manager.js +27 -67
- package/dist/embeddings/worker-embedding.js +4 -8
- package/dist/errors/brainyError.d.ts +5 -1
- package/dist/errors/brainyError.js +12 -0
- package/dist/examples/basicUsage.js +7 -4
- package/dist/graph/graphAdjacencyIndex.d.ts +96 -0
- package/dist/graph/graphAdjacencyIndex.js +288 -0
- package/dist/graph/pathfinding.js +4 -2
- package/dist/hnsw/scaledHNSWSystem.js +11 -2
- package/dist/importManager.js +8 -5
- package/dist/index.d.ts +17 -22
- package/dist/index.js +37 -23
- package/dist/mcp/brainyMCPAdapter.d.ts +4 -4
- package/dist/mcp/brainyMCPAdapter.js +5 -5
- package/dist/mcp/brainyMCPService.d.ts +3 -3
- package/dist/mcp/brainyMCPService.js +3 -11
- package/dist/mcp/mcpAugmentationToolset.js +20 -30
- package/dist/neural/embeddedPatterns.d.ts +1 -1
- package/dist/neural/embeddedPatterns.js +2 -2
- package/dist/neural/entityExtractor.d.ts +65 -0
- package/dist/neural/entityExtractor.js +316 -0
- package/dist/neural/improvedNeuralAPI.d.ts +357 -0
- package/dist/neural/improvedNeuralAPI.js +2628 -0
- package/dist/neural/naturalLanguageProcessor.d.ts +155 -10
- package/dist/neural/naturalLanguageProcessor.js +941 -66
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +2 -2
- package/dist/neural/naturalLanguageProcessorStatic.js +3 -3
- package/dist/neural/neuralAPI.js +8 -2
- package/dist/neural/patternLibrary.d.ts +57 -3
- package/dist/neural/patternLibrary.js +348 -13
- package/dist/neural/staticPatternMatcher.d.ts +2 -2
- package/dist/neural/staticPatternMatcher.js +2 -2
- package/dist/neural/types.d.ts +287 -0
- package/dist/neural/types.js +24 -0
- package/dist/shared/default-augmentations.d.ts +3 -3
- package/dist/shared/default-augmentations.js +5 -5
- package/dist/storage/adapters/baseStorageAdapter.d.ts +42 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +26 -2
- package/dist/storage/adapters/fileSystemStorage.js +218 -15
- package/dist/storage/adapters/memoryStorage.d.ts +4 -4
- package/dist/storage/adapters/memoryStorage.js +17 -12
- package/dist/storage/adapters/opfsStorage.d.ts +2 -2
- package/dist/storage/adapters/opfsStorage.js +2 -2
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -2
- package/dist/storage/adapters/s3CompatibleStorage.js +2 -2
- package/dist/storage/backwardCompatibility.d.ts +10 -78
- package/dist/storage/backwardCompatibility.js +17 -132
- package/dist/storage/baseStorage.d.ts +18 -2
- package/dist/storage/baseStorage.js +74 -3
- package/dist/storage/cacheManager.js +2 -2
- package/dist/storage/readOnlyOptimizations.js +8 -3
- package/dist/streaming/pipeline.d.ts +154 -0
- package/dist/streaming/pipeline.js +551 -0
- package/dist/triple/TripleIntelligence.d.ts +25 -110
- package/dist/triple/TripleIntelligence.js +4 -574
- package/dist/triple/TripleIntelligenceSystem.d.ts +159 -0
- package/dist/triple/TripleIntelligenceSystem.js +519 -0
- package/dist/types/apiTypes.d.ts +278 -0
- package/dist/types/apiTypes.js +33 -0
- package/dist/types/brainy.types.d.ts +308 -0
- package/dist/types/brainy.types.js +8 -0
- package/dist/types/brainyDataInterface.d.ts +5 -8
- package/dist/types/brainyDataInterface.js +2 -2
- package/dist/types/graphTypes.js +2 -2
- package/dist/universal/crypto.d.ts +11 -1
- package/dist/universal/crypto.js +24 -93
- package/dist/universal/events.d.ts +3 -2
- package/dist/universal/events.js +6 -75
- package/dist/universal/fs.d.ts +2 -3
- package/dist/universal/fs.js +5 -211
- package/dist/universal/path.d.ts +3 -2
- package/dist/universal/path.js +22 -78
- package/dist/universal/uuid.d.ts +1 -1
- package/dist/universal/uuid.js +1 -1
- package/dist/utils/brainyTypes.d.ts +217 -0
- package/dist/utils/brainyTypes.js +261 -0
- package/dist/utils/cacheAutoConfig.d.ts +3 -3
- package/dist/utils/embedding.d.ts +9 -4
- package/dist/utils/embedding.js +89 -26
- package/dist/utils/enhancedLogger.d.ts +104 -0
- package/dist/utils/enhancedLogger.js +232 -0
- package/dist/utils/hybridModelManager.d.ts +19 -28
- package/dist/utils/hybridModelManager.js +36 -200
- package/dist/utils/index.d.ts +1 -1
- package/dist/utils/index.js +1 -1
- package/dist/utils/intelligentTypeMapper.d.ts +60 -0
- package/dist/utils/intelligentTypeMapper.js +349 -0
- package/dist/utils/metadataIndex.d.ts +118 -1
- package/dist/utils/metadataIndex.js +539 -16
- package/dist/utils/nodeVersionCheck.d.ts +24 -0
- package/dist/utils/nodeVersionCheck.js +65 -0
- package/dist/utils/paramValidation.d.ts +39 -0
- package/dist/utils/paramValidation.js +192 -0
- package/dist/utils/rateLimiter.d.ts +160 -0
- package/dist/utils/rateLimiter.js +271 -0
- package/dist/utils/statistics.d.ts +4 -4
- package/dist/utils/statistics.js +3 -3
- package/dist/utils/structuredLogger.d.ts +146 -0
- package/dist/utils/structuredLogger.js +394 -0
- package/dist/utils/textEncoding.js +2 -1
- package/dist/utils/typeValidation.d.ts +59 -0
- package/dist/utils/typeValidation.js +374 -0
- package/dist/utils/version.js +19 -3
- package/package.json +15 -17
- package/scripts/download-models.cjs +94 -20
- package/dist/augmentations/walAugmentation.d.ts +0 -109
- package/dist/augmentations/walAugmentation.js +0 -516
- package/dist/browserFramework.d.ts +0 -15
- package/dist/browserFramework.js +0 -31
- package/dist/browserFramework.minimal.d.ts +0 -14
- package/dist/browserFramework.minimal.js +0 -31
- package/dist/chat/BrainyChat.d.ts +0 -121
- package/dist/chat/BrainyChat.js +0 -396
- package/dist/chat/ChatCLI.d.ts +0 -61
- package/dist/chat/ChatCLI.js +0 -351
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Neural Entity Extractor using Brainy's NounTypes
|
|
3
|
+
* Uses embeddings and similarity matching for accurate type detection
|
|
4
|
+
*/
|
|
5
|
+
import { NounType } from '../types/graphTypes.js';
|
|
6
|
+
import { Vector } from '../coreTypes.js';
|
|
7
|
+
import type { Brainy } from '../brainy.js';
|
|
8
|
+
export interface ExtractedEntity {
|
|
9
|
+
text: string;
|
|
10
|
+
type: NounType;
|
|
11
|
+
position: {
|
|
12
|
+
start: number;
|
|
13
|
+
end: number;
|
|
14
|
+
};
|
|
15
|
+
confidence: number;
|
|
16
|
+
vector?: Vector;
|
|
17
|
+
metadata?: any;
|
|
18
|
+
}
|
|
19
|
+
export declare class NeuralEntityExtractor {
|
|
20
|
+
private brain;
|
|
21
|
+
private typeEmbeddings;
|
|
22
|
+
private initialized;
|
|
23
|
+
constructor(brain: Brainy | Brainy<any>);
|
|
24
|
+
/**
|
|
25
|
+
* Initialize type embeddings for neural matching
|
|
26
|
+
*/
|
|
27
|
+
private initializeTypeEmbeddings;
|
|
28
|
+
/**
|
|
29
|
+
* Extract entities from text using neural matching
|
|
30
|
+
*/
|
|
31
|
+
extract(text: string, options?: {
|
|
32
|
+
types?: NounType[];
|
|
33
|
+
confidence?: number;
|
|
34
|
+
includeVectors?: boolean;
|
|
35
|
+
neuralMatching?: boolean;
|
|
36
|
+
}): Promise<ExtractedEntity[]>;
|
|
37
|
+
/**
|
|
38
|
+
* Extract candidate entities using patterns
|
|
39
|
+
*/
|
|
40
|
+
private extractCandidates;
|
|
41
|
+
/**
|
|
42
|
+
* Get context-based confidence boost for type matching
|
|
43
|
+
*/
|
|
44
|
+
private getContextBoost;
|
|
45
|
+
/**
|
|
46
|
+
* Rule-based classification fallback
|
|
47
|
+
*/
|
|
48
|
+
private classifyByRules;
|
|
49
|
+
/**
|
|
50
|
+
* Get embedding for text
|
|
51
|
+
*/
|
|
52
|
+
private getEmbedding;
|
|
53
|
+
/**
|
|
54
|
+
* Calculate cosine similarity between vectors
|
|
55
|
+
*/
|
|
56
|
+
private cosineSimilarity;
|
|
57
|
+
/**
|
|
58
|
+
* Simple hash function for fallback
|
|
59
|
+
*/
|
|
60
|
+
private simpleHash;
|
|
61
|
+
/**
|
|
62
|
+
* Remove duplicate and overlapping entities
|
|
63
|
+
*/
|
|
64
|
+
private deduplicateEntities;
|
|
65
|
+
}
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Neural Entity Extractor using Brainy's NounTypes
|
|
3
|
+
* Uses embeddings and similarity matching for accurate type detection
|
|
4
|
+
*/
|
|
5
|
+
import { NounType } from '../types/graphTypes.js';
|
|
6
|
+
export class NeuralEntityExtractor {
|
|
7
|
+
constructor(brain) {
|
|
8
|
+
// Type embeddings for similarity matching
|
|
9
|
+
this.typeEmbeddings = new Map();
|
|
10
|
+
this.initialized = false;
|
|
11
|
+
this.brain = brain;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Initialize type embeddings for neural matching
|
|
15
|
+
*/
|
|
16
|
+
async initializeTypeEmbeddings() {
|
|
17
|
+
if (this.initialized)
|
|
18
|
+
return;
|
|
19
|
+
// Create representative embeddings for each NounType
|
|
20
|
+
const typeExamples = {
|
|
21
|
+
[NounType.Person]: ['John Smith', 'Jane Doe', 'person', 'individual', 'human'],
|
|
22
|
+
[NounType.Organization]: ['Microsoft Corporation', 'company', 'organization', 'business', 'enterprise'],
|
|
23
|
+
[NounType.Location]: ['New York City', 'location', 'place', 'address', 'geography'],
|
|
24
|
+
[NounType.Document]: ['document', 'file', 'report', 'paper', 'text'],
|
|
25
|
+
[NounType.Event]: ['conference', 'meeting', 'event', 'occurrence', 'happening'],
|
|
26
|
+
[NounType.Product]: ['iPhone', 'product', 'item', 'merchandise', 'goods'],
|
|
27
|
+
[NounType.Service]: ['consulting', 'service', 'offering', 'provision'],
|
|
28
|
+
[NounType.Concept]: ['idea', 'concept', 'theory', 'principle', 'notion'],
|
|
29
|
+
[NounType.Media]: ['image', 'video', 'audio', 'media', 'content'],
|
|
30
|
+
[NounType.Message]: ['email', 'message', 'communication', 'note'],
|
|
31
|
+
[NounType.Task]: ['task', 'todo', 'assignment', 'job', 'work'],
|
|
32
|
+
[NounType.Project]: ['project', 'initiative', 'program', 'endeavor'],
|
|
33
|
+
[NounType.Process]: ['workflow', 'process', 'procedure', 'method'],
|
|
34
|
+
[NounType.User]: ['user', 'account', 'profile', 'member'],
|
|
35
|
+
[NounType.Role]: ['manager', 'role', 'position', 'title', 'responsibility'],
|
|
36
|
+
[NounType.Topic]: ['subject', 'topic', 'theme', 'matter'],
|
|
37
|
+
[NounType.Language]: ['English', 'language', 'tongue', 'dialect'],
|
|
38
|
+
[NounType.Currency]: ['dollar', 'currency', 'money', 'USD', 'EUR'],
|
|
39
|
+
[NounType.Measurement]: ['meter', 'measurement', 'unit', 'quantity'],
|
|
40
|
+
[NounType.Contract]: ['agreement', 'contract', 'deal', 'treaty'],
|
|
41
|
+
[NounType.Regulation]: ['law', 'regulation', 'rule', 'policy'],
|
|
42
|
+
[NounType.Resource]: ['resource', 'asset', 'material', 'supply'],
|
|
43
|
+
[NounType.Dataset]: ['database', 'dataset', 'data', 'records'],
|
|
44
|
+
[NounType.Interface]: ['API', 'interface', 'endpoint', 'connection'],
|
|
45
|
+
[NounType.Thing]: ['thing', 'object', 'item', 'entity'],
|
|
46
|
+
[NounType.Content]: ['content', 'material', 'information'],
|
|
47
|
+
[NounType.Collection]: ['collection', 'group', 'set', 'list'],
|
|
48
|
+
[NounType.File]: ['file', 'document', 'archive'],
|
|
49
|
+
[NounType.State]: ['state', 'status', 'condition'],
|
|
50
|
+
[NounType.Hypothesis]: ['hypothesis', 'theory', 'assumption'],
|
|
51
|
+
[NounType.Experiment]: ['experiment', 'test', 'trial', 'study']
|
|
52
|
+
};
|
|
53
|
+
// Generate embeddings for each type
|
|
54
|
+
for (const [type, examples] of Object.entries(typeExamples)) {
|
|
55
|
+
const combinedText = examples.join(' ');
|
|
56
|
+
const embedding = await this.getEmbedding(combinedText);
|
|
57
|
+
this.typeEmbeddings.set(type, embedding);
|
|
58
|
+
}
|
|
59
|
+
this.initialized = true;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Extract entities from text using neural matching
|
|
63
|
+
*/
|
|
64
|
+
async extract(text, options) {
|
|
65
|
+
await this.initializeTypeEmbeddings();
|
|
66
|
+
const entities = [];
|
|
67
|
+
const minConfidence = options?.confidence || 0.6;
|
|
68
|
+
const targetTypes = options?.types || Object.values(NounType);
|
|
69
|
+
const useNeuralMatching = options?.neuralMatching !== false; // Default true
|
|
70
|
+
// Step 1: Extract potential entities using patterns
|
|
71
|
+
const candidates = await this.extractCandidates(text);
|
|
72
|
+
// Step 2: Classify each candidate using neural matching
|
|
73
|
+
for (const candidate of candidates) {
|
|
74
|
+
let bestType = NounType.Thing;
|
|
75
|
+
let bestConfidence = 0;
|
|
76
|
+
if (useNeuralMatching) {
|
|
77
|
+
// Get embedding for the candidate
|
|
78
|
+
const candidateVector = await this.getEmbedding(candidate.text);
|
|
79
|
+
// Find best matching NounType
|
|
80
|
+
for (const type of targetTypes) {
|
|
81
|
+
const typeVector = this.typeEmbeddings.get(type);
|
|
82
|
+
if (!typeVector)
|
|
83
|
+
continue;
|
|
84
|
+
const similarity = this.cosineSimilarity(candidateVector, typeVector);
|
|
85
|
+
// Apply context-based boosting
|
|
86
|
+
const contextBoost = this.getContextBoost(candidate.text, candidate.context, type);
|
|
87
|
+
const adjustedConfidence = similarity * (1 + contextBoost);
|
|
88
|
+
if (adjustedConfidence > bestConfidence) {
|
|
89
|
+
bestConfidence = adjustedConfidence;
|
|
90
|
+
bestType = type;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
// Fallback to rule-based classification
|
|
96
|
+
const classification = this.classifyByRules(candidate);
|
|
97
|
+
bestType = classification.type;
|
|
98
|
+
bestConfidence = classification.confidence;
|
|
99
|
+
}
|
|
100
|
+
if (bestConfidence >= minConfidence) {
|
|
101
|
+
const entity = {
|
|
102
|
+
text: candidate.text,
|
|
103
|
+
type: bestType,
|
|
104
|
+
position: candidate.position,
|
|
105
|
+
confidence: bestConfidence
|
|
106
|
+
};
|
|
107
|
+
if (options?.includeVectors) {
|
|
108
|
+
entity.vector = await this.getEmbedding(candidate.text);
|
|
109
|
+
}
|
|
110
|
+
entities.push(entity);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Remove duplicates and overlaps
|
|
114
|
+
return this.deduplicateEntities(entities);
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Extract candidate entities using patterns
|
|
118
|
+
*/
|
|
119
|
+
async extractCandidates(text) {
|
|
120
|
+
const candidates = [];
|
|
121
|
+
// Enhanced patterns for entity detection
|
|
122
|
+
const patterns = [
|
|
123
|
+
// Capitalized words (potential names, places, organizations)
|
|
124
|
+
/\b([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)\b/g,
|
|
125
|
+
// Email addresses
|
|
126
|
+
/\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b/g,
|
|
127
|
+
// URLs
|
|
128
|
+
/\b(https?:\/\/[^\s]+|www\.[^\s]+)\b/g,
|
|
129
|
+
// Phone numbers
|
|
130
|
+
/\b(\+?\d{1,3}?[- .]?\(?\d{1,4}\)?[- .]?\d{1,4}[- .]?\d{1,4})\b/g,
|
|
131
|
+
// Dates
|
|
132
|
+
/\b(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}|\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2})\b/g,
|
|
133
|
+
// Money amounts
|
|
134
|
+
/\b(\$[\d,]+(?:\.\d{2})?|[\d,]+(?:\.\d{2})?\s*(?:USD|EUR|GBP|JPY|CNY))\b/gi,
|
|
135
|
+
// Percentages
|
|
136
|
+
/\b(\d+(?:\.\d+)?%)\b/g,
|
|
137
|
+
// Hashtags and mentions
|
|
138
|
+
/([#@][a-zA-Z0-9_]+)/g,
|
|
139
|
+
// Product versions
|
|
140
|
+
/\b([A-Z][a-zA-Z0-9]+\s+v?\d+(?:\.\d+)*)\b/g,
|
|
141
|
+
// Quoted strings (potential names, titles)
|
|
142
|
+
/"([^"]+)"/g,
|
|
143
|
+
/'([^']+)'/g
|
|
144
|
+
];
|
|
145
|
+
for (const pattern of patterns) {
|
|
146
|
+
let match;
|
|
147
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
148
|
+
const extractedText = match[1] || match[0];
|
|
149
|
+
// Skip too short or too long
|
|
150
|
+
if (extractedText.length < 2 || extractedText.length > 100)
|
|
151
|
+
continue;
|
|
152
|
+
// Get context (surrounding text)
|
|
153
|
+
const contextStart = Math.max(0, match.index - 30);
|
|
154
|
+
const contextEnd = Math.min(text.length, match.index + match[0].length + 30);
|
|
155
|
+
const context = text.substring(contextStart, contextEnd);
|
|
156
|
+
candidates.push({
|
|
157
|
+
text: extractedText,
|
|
158
|
+
position: {
|
|
159
|
+
start: match.index,
|
|
160
|
+
end: match.index + match[0].length
|
|
161
|
+
},
|
|
162
|
+
context
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return candidates;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Get context-based confidence boost for type matching
|
|
170
|
+
*/
|
|
171
|
+
getContextBoost(text, context, type) {
|
|
172
|
+
const contextLower = context.toLowerCase();
|
|
173
|
+
let boost = 0;
|
|
174
|
+
// Context clues for each type
|
|
175
|
+
const contextClues = {
|
|
176
|
+
[NounType.Person]: ['mr', 'ms', 'mrs', 'dr', 'prof', 'said', 'told', 'wrote'],
|
|
177
|
+
[NounType.Organization]: ['inc', 'corp', 'llc', 'ltd', 'company', 'announced'],
|
|
178
|
+
[NounType.Location]: ['in', 'at', 'from', 'to', 'near', 'located', 'city', 'country'],
|
|
179
|
+
[NounType.Document]: ['file', 'document', 'report', 'paper', 'pdf', 'doc'],
|
|
180
|
+
[NounType.Event]: ['event', 'conference', 'meeting', 'summit', 'on', 'at'],
|
|
181
|
+
[NounType.Product]: ['product', 'version', 'release', 'model', 'buy', 'sell'],
|
|
182
|
+
[NounType.Currency]: ['$', '€', '£', '¥', 'usd', 'eur', 'price', 'cost'],
|
|
183
|
+
[NounType.Message]: ['email', 'message', 'sent', 'received', 'wrote', 'reply'],
|
|
184
|
+
// Add more context clues as needed
|
|
185
|
+
};
|
|
186
|
+
const clues = contextClues[type] || [];
|
|
187
|
+
for (const clue of clues) {
|
|
188
|
+
if (contextLower.includes(clue)) {
|
|
189
|
+
boost += 0.1;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return Math.min(boost, 0.3); // Cap boost at 0.3
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Rule-based classification fallback
|
|
196
|
+
*/
|
|
197
|
+
classifyByRules(candidate) {
|
|
198
|
+
const text = candidate.text;
|
|
199
|
+
// Email
|
|
200
|
+
if (text.includes('@')) {
|
|
201
|
+
return { type: NounType.Message, confidence: 0.9 };
|
|
202
|
+
}
|
|
203
|
+
// URL
|
|
204
|
+
if (text.startsWith('http') || text.startsWith('www.')) {
|
|
205
|
+
return { type: NounType.Resource, confidence: 0.9 };
|
|
206
|
+
}
|
|
207
|
+
// Money
|
|
208
|
+
if (text.startsWith('$') || /\d+\.\d{2}/.test(text)) {
|
|
209
|
+
return { type: NounType.Currency, confidence: 0.85 };
|
|
210
|
+
}
|
|
211
|
+
// Percentage
|
|
212
|
+
if (text.endsWith('%')) {
|
|
213
|
+
return { type: NounType.Measurement, confidence: 0.85 };
|
|
214
|
+
}
|
|
215
|
+
// Date pattern
|
|
216
|
+
if (/\d{1,2}[\/\-]\d{1,2}/.test(text)) {
|
|
217
|
+
return { type: NounType.Event, confidence: 0.7 };
|
|
218
|
+
}
|
|
219
|
+
// Hashtag
|
|
220
|
+
if (text.startsWith('#')) {
|
|
221
|
+
return { type: NounType.Topic, confidence: 0.8 };
|
|
222
|
+
}
|
|
223
|
+
// Mention
|
|
224
|
+
if (text.startsWith('@')) {
|
|
225
|
+
return { type: NounType.User, confidence: 0.8 };
|
|
226
|
+
}
|
|
227
|
+
// Capitalized words (likely proper nouns)
|
|
228
|
+
if (/^[A-Z]/.test(text)) {
|
|
229
|
+
// Multiple words - likely organization or person
|
|
230
|
+
const words = text.split(/\s+/);
|
|
231
|
+
if (words.length > 1) {
|
|
232
|
+
// Check for organization suffixes
|
|
233
|
+
if (/\b(Inc|Corp|LLC|Ltd|Co|Group|Foundation|University)\b/i.test(text)) {
|
|
234
|
+
return { type: NounType.Organization, confidence: 0.75 };
|
|
235
|
+
}
|
|
236
|
+
// Likely a person's name
|
|
237
|
+
return { type: NounType.Person, confidence: 0.65 };
|
|
238
|
+
}
|
|
239
|
+
// Single capitalized word - could be location
|
|
240
|
+
return { type: NounType.Location, confidence: 0.5 };
|
|
241
|
+
}
|
|
242
|
+
// Default to Thing with low confidence
|
|
243
|
+
return { type: NounType.Thing, confidence: 0.3 };
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Get embedding for text
|
|
247
|
+
*/
|
|
248
|
+
async getEmbedding(text) {
|
|
249
|
+
if ('embed' in this.brain && typeof this.brain.embed === 'function') {
|
|
250
|
+
return await this.brain.embed(text);
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
// Fallback - create simple hash-based vector
|
|
254
|
+
const vector = new Array(384).fill(0);
|
|
255
|
+
for (let i = 0; i < text.length; i++) {
|
|
256
|
+
vector[i % 384] += text.charCodeAt(i) / 255;
|
|
257
|
+
}
|
|
258
|
+
return vector.map(v => v / text.length);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Calculate cosine similarity between vectors
|
|
263
|
+
*/
|
|
264
|
+
cosineSimilarity(a, b) {
|
|
265
|
+
let dotProduct = 0;
|
|
266
|
+
let normA = 0;
|
|
267
|
+
let normB = 0;
|
|
268
|
+
for (let i = 0; i < a.length; i++) {
|
|
269
|
+
dotProduct += a[i] * b[i];
|
|
270
|
+
normA += a[i] * a[i];
|
|
271
|
+
normB += b[i] * b[i];
|
|
272
|
+
}
|
|
273
|
+
normA = Math.sqrt(normA);
|
|
274
|
+
normB = Math.sqrt(normB);
|
|
275
|
+
if (normA === 0 || normB === 0)
|
|
276
|
+
return 0;
|
|
277
|
+
return dotProduct / (normA * normB);
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Simple hash function for fallback
|
|
281
|
+
*/
|
|
282
|
+
simpleHash(text) {
|
|
283
|
+
let hash = 0;
|
|
284
|
+
for (let i = 0; i < text.length; i++) {
|
|
285
|
+
const char = text.charCodeAt(i);
|
|
286
|
+
hash = ((hash << 5) - hash) + char;
|
|
287
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
288
|
+
}
|
|
289
|
+
return Math.abs(hash);
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Remove duplicate and overlapping entities
|
|
293
|
+
*/
|
|
294
|
+
deduplicateEntities(entities) {
|
|
295
|
+
// Sort by position and confidence
|
|
296
|
+
entities.sort((a, b) => {
|
|
297
|
+
if (a.position.start !== b.position.start) {
|
|
298
|
+
return a.position.start - b.position.start;
|
|
299
|
+
}
|
|
300
|
+
return b.confidence - a.confidence; // Higher confidence first
|
|
301
|
+
});
|
|
302
|
+
const result = [];
|
|
303
|
+
for (const entity of entities) {
|
|
304
|
+
// Check for overlap with already added entities
|
|
305
|
+
const hasOverlap = result.some(existing => (entity.position.start >= existing.position.start &&
|
|
306
|
+
entity.position.start < existing.position.end) ||
|
|
307
|
+
(entity.position.end > existing.position.start &&
|
|
308
|
+
entity.position.end <= existing.position.end));
|
|
309
|
+
if (!hasOverlap) {
|
|
310
|
+
result.push(entity);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
return result;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
//# sourceMappingURL=entityExtractor.js.map
|