@soulcraft/brainy 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +53 -3
  2. package/README.md +427 -111
  3. package/bin/brainy.js +340 -62
  4. package/dist/api/ConfigAPI.d.ts +67 -0
  5. package/dist/api/ConfigAPI.js +166 -0
  6. package/dist/api/DataAPI.d.ts +123 -0
  7. package/dist/api/DataAPI.js +391 -0
  8. package/dist/api/SecurityAPI.d.ts +50 -0
  9. package/dist/api/SecurityAPI.js +139 -0
  10. package/dist/api/UniversalImportAPI.d.ts +134 -0
  11. package/dist/api/UniversalImportAPI.js +615 -0
  12. package/dist/augmentationManager.js +12 -7
  13. package/dist/augmentationPipeline.d.ts +0 -61
  14. package/dist/augmentationPipeline.js +0 -87
  15. package/dist/augmentationRegistry.d.ts +1 -1
  16. package/dist/augmentationRegistry.js +1 -1
  17. package/dist/augmentations/apiServerAugmentation.d.ts +27 -1
  18. package/dist/augmentations/apiServerAugmentation.js +290 -9
  19. package/dist/augmentations/auditLogAugmentation.d.ts +109 -0
  20. package/dist/augmentations/auditLogAugmentation.js +358 -0
  21. package/dist/augmentations/batchProcessingAugmentation.d.ts +3 -2
  22. package/dist/augmentations/batchProcessingAugmentation.js +123 -22
  23. package/dist/augmentations/brainyAugmentation.d.ts +142 -8
  24. package/dist/augmentations/brainyAugmentation.js +179 -2
  25. package/dist/augmentations/cacheAugmentation.d.ts +8 -5
  26. package/dist/augmentations/cacheAugmentation.js +116 -17
  27. package/dist/augmentations/conduitAugmentations.d.ts +2 -2
  28. package/dist/augmentations/conduitAugmentations.js +2 -2
  29. package/dist/augmentations/configResolver.d.ts +122 -0
  30. package/dist/augmentations/configResolver.js +440 -0
  31. package/dist/augmentations/connectionPoolAugmentation.d.ts +3 -1
  32. package/dist/augmentations/connectionPoolAugmentation.js +37 -12
  33. package/dist/augmentations/defaultAugmentations.d.ts +14 -10
  34. package/dist/augmentations/defaultAugmentations.js +16 -11
  35. package/dist/augmentations/discovery/catalogDiscovery.d.ts +142 -0
  36. package/dist/augmentations/discovery/catalogDiscovery.js +249 -0
  37. package/dist/augmentations/discovery/localDiscovery.d.ts +84 -0
  38. package/dist/augmentations/discovery/localDiscovery.js +246 -0
  39. package/dist/augmentations/discovery/runtimeLoader.d.ts +97 -0
  40. package/dist/augmentations/discovery/runtimeLoader.js +337 -0
  41. package/dist/augmentations/discovery.d.ts +152 -0
  42. package/dist/augmentations/discovery.js +441 -0
  43. package/dist/augmentations/display/cache.d.ts +130 -0
  44. package/dist/augmentations/display/cache.js +319 -0
  45. package/dist/augmentations/display/fieldPatterns.d.ts +52 -0
  46. package/dist/augmentations/display/fieldPatterns.js +393 -0
  47. package/dist/augmentations/display/iconMappings.d.ts +57 -0
  48. package/dist/augmentations/display/iconMappings.js +68 -0
  49. package/dist/augmentations/display/intelligentComputation.d.ts +109 -0
  50. package/dist/augmentations/display/intelligentComputation.js +462 -0
  51. package/dist/augmentations/display/types.d.ts +203 -0
  52. package/dist/augmentations/display/types.js +7 -0
  53. package/dist/augmentations/entityRegistryAugmentation.d.ts +3 -1
  54. package/dist/augmentations/entityRegistryAugmentation.js +5 -1
  55. package/dist/augmentations/indexAugmentation.d.ts +5 -3
  56. package/dist/augmentations/indexAugmentation.js +5 -2
  57. package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +24 -7
  58. package/dist/augmentations/intelligentVerbScoringAugmentation.js +111 -27
  59. package/dist/augmentations/manifest.d.ts +176 -0
  60. package/dist/augmentations/manifest.js +8 -0
  61. package/dist/augmentations/marketplace/AugmentationMarketplace.d.ts +168 -0
  62. package/dist/augmentations/marketplace/AugmentationMarketplace.js +329 -0
  63. package/dist/augmentations/marketplace/cli.d.ts +47 -0
  64. package/dist/augmentations/marketplace/cli.js +265 -0
  65. package/dist/augmentations/metricsAugmentation.d.ts +3 -3
  66. package/dist/augmentations/metricsAugmentation.js +2 -2
  67. package/dist/augmentations/monitoringAugmentation.d.ts +3 -3
  68. package/dist/augmentations/monitoringAugmentation.js +2 -2
  69. package/dist/augmentations/neuralImport.d.ts +1 -1
  70. package/dist/augmentations/neuralImport.js +4 -4
  71. package/dist/augmentations/rateLimitAugmentation.d.ts +82 -0
  72. package/dist/augmentations/rateLimitAugmentation.js +321 -0
  73. package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +2 -2
  74. package/dist/augmentations/requestDeduplicatorAugmentation.js +1 -1
  75. package/dist/augmentations/storageAugmentation.d.ts +1 -1
  76. package/dist/augmentations/storageAugmentation.js +2 -2
  77. package/dist/augmentations/storageAugmentations.d.ts +37 -8
  78. package/dist/augmentations/storageAugmentations.js +204 -15
  79. package/dist/augmentations/synapseAugmentation.d.ts +1 -1
  80. package/dist/augmentations/synapseAugmentation.js +35 -16
  81. package/dist/augmentations/typeMatching/brainyTypes.d.ts +83 -0
  82. package/dist/augmentations/typeMatching/brainyTypes.js +425 -0
  83. package/dist/augmentations/typeMatching/intelligentTypeMatcher.d.ts +39 -59
  84. package/dist/augmentations/typeMatching/intelligentTypeMatcher.js +103 -389
  85. package/dist/augmentations/universalDisplayAugmentation.d.ts +191 -0
  86. package/dist/augmentations/universalDisplayAugmentation.js +371 -0
  87. package/dist/brainy-unified.d.ts +106 -0
  88. package/dist/brainy-unified.js +327 -0
  89. package/dist/brainy.d.ts +277 -0
  90. package/dist/brainy.js +1241 -0
  91. package/dist/brainyData.d.ts +56 -111
  92. package/dist/brainyData.js +912 -756
  93. package/dist/brainyDataV3.d.ts +186 -0
  94. package/dist/brainyDataV3.js +337 -0
  95. package/dist/config/distributedPresets-new.d.ts +118 -0
  96. package/dist/config/distributedPresets-new.js +318 -0
  97. package/dist/config/distributedPresets.d.ts +118 -0
  98. package/dist/config/distributedPresets.js +318 -0
  99. package/dist/config/extensibleConfig.d.ts +99 -0
  100. package/dist/config/extensibleConfig.js +268 -0
  101. package/dist/config/index.d.ts +17 -0
  102. package/dist/config/index.js +35 -0
  103. package/dist/config/modelAutoConfig.d.ts +32 -0
  104. package/dist/config/modelAutoConfig.js +139 -0
  105. package/dist/config/modelPrecisionManager.d.ts +42 -0
  106. package/dist/config/modelPrecisionManager.js +98 -0
  107. package/dist/config/sharedConfigManager.d.ts +67 -0
  108. package/dist/config/sharedConfigManager.js +215 -0
  109. package/dist/config/storageAutoConfig.d.ts +41 -0
  110. package/dist/config/storageAutoConfig.js +328 -0
  111. package/dist/config/zeroConfig.d.ts +68 -0
  112. package/dist/config/zeroConfig.js +301 -0
  113. package/dist/cortex/backupRestore.d.ts +2 -2
  114. package/dist/cortex/backupRestore.js +85 -27
  115. package/dist/cortex/healthCheck.d.ts +2 -2
  116. package/dist/cortex/neuralImport.d.ts +2 -2
  117. package/dist/cortex/neuralImport.js +18 -13
  118. package/dist/cortex/performanceMonitor.d.ts +2 -2
  119. package/dist/critical/model-guardian.d.ts +4 -0
  120. package/dist/critical/model-guardian.js +31 -11
  121. package/dist/demo.d.ts +4 -4
  122. package/dist/demo.js +7 -7
  123. package/dist/distributed/cacheSync.d.ts +112 -0
  124. package/dist/distributed/cacheSync.js +265 -0
  125. package/dist/distributed/coordinator.d.ts +193 -0
  126. package/dist/distributed/coordinator.js +548 -0
  127. package/dist/distributed/httpTransport.d.ts +120 -0
  128. package/dist/distributed/httpTransport.js +446 -0
  129. package/dist/distributed/index.d.ts +8 -0
  130. package/dist/distributed/index.js +5 -0
  131. package/dist/distributed/networkTransport.d.ts +132 -0
  132. package/dist/distributed/networkTransport.js +633 -0
  133. package/dist/distributed/queryPlanner.d.ts +104 -0
  134. package/dist/distributed/queryPlanner.js +327 -0
  135. package/dist/distributed/readWriteSeparation.d.ts +134 -0
  136. package/dist/distributed/readWriteSeparation.js +350 -0
  137. package/dist/distributed/shardManager.d.ts +114 -0
  138. package/dist/distributed/shardManager.js +357 -0
  139. package/dist/distributed/shardMigration.d.ts +110 -0
  140. package/dist/distributed/shardMigration.js +289 -0
  141. package/dist/distributed/storageDiscovery.d.ts +160 -0
  142. package/dist/distributed/storageDiscovery.js +551 -0
  143. package/dist/embeddings/CachedEmbeddings.d.ts +40 -0
  144. package/dist/embeddings/CachedEmbeddings.js +146 -0
  145. package/dist/embeddings/EmbeddingManager.d.ts +102 -0
  146. package/dist/embeddings/EmbeddingManager.js +291 -0
  147. package/dist/embeddings/SingletonModelManager.d.ts +95 -0
  148. package/dist/embeddings/SingletonModelManager.js +220 -0
  149. package/dist/embeddings/index.d.ts +12 -0
  150. package/dist/embeddings/index.js +16 -0
  151. package/dist/embeddings/lightweight-embedder.d.ts +0 -1
  152. package/dist/embeddings/lightweight-embedder.js +4 -12
  153. package/dist/embeddings/model-manager.d.ts +11 -0
  154. package/dist/embeddings/model-manager.js +43 -7
  155. package/dist/embeddings/universal-memory-manager.d.ts +1 -1
  156. package/dist/embeddings/universal-memory-manager.js +27 -67
  157. package/dist/embeddings/worker-embedding.js +4 -8
  158. package/dist/errors/brainyError.d.ts +5 -1
  159. package/dist/errors/brainyError.js +12 -0
  160. package/dist/examples/basicUsage.js +7 -4
  161. package/dist/graph/graphAdjacencyIndex.d.ts +96 -0
  162. package/dist/graph/graphAdjacencyIndex.js +288 -0
  163. package/dist/graph/pathfinding.js +4 -2
  164. package/dist/hnsw/scaledHNSWSystem.js +11 -2
  165. package/dist/importManager.js +8 -5
  166. package/dist/index.d.ts +17 -22
  167. package/dist/index.js +37 -23
  168. package/dist/mcp/brainyMCPAdapter.d.ts +4 -4
  169. package/dist/mcp/brainyMCPAdapter.js +5 -5
  170. package/dist/mcp/brainyMCPService.d.ts +3 -3
  171. package/dist/mcp/brainyMCPService.js +3 -11
  172. package/dist/mcp/mcpAugmentationToolset.js +20 -30
  173. package/dist/neural/embeddedPatterns.d.ts +1 -1
  174. package/dist/neural/embeddedPatterns.js +2 -2
  175. package/dist/neural/entityExtractor.d.ts +65 -0
  176. package/dist/neural/entityExtractor.js +316 -0
  177. package/dist/neural/improvedNeuralAPI.d.ts +357 -0
  178. package/dist/neural/improvedNeuralAPI.js +2628 -0
  179. package/dist/neural/naturalLanguageProcessor.d.ts +155 -10
  180. package/dist/neural/naturalLanguageProcessor.js +941 -66
  181. package/dist/neural/naturalLanguageProcessorStatic.d.ts +2 -2
  182. package/dist/neural/naturalLanguageProcessorStatic.js +3 -3
  183. package/dist/neural/neuralAPI.js +8 -2
  184. package/dist/neural/patternLibrary.d.ts +57 -3
  185. package/dist/neural/patternLibrary.js +348 -13
  186. package/dist/neural/staticPatternMatcher.d.ts +2 -2
  187. package/dist/neural/staticPatternMatcher.js +2 -2
  188. package/dist/neural/types.d.ts +287 -0
  189. package/dist/neural/types.js +24 -0
  190. package/dist/shared/default-augmentations.d.ts +3 -3
  191. package/dist/shared/default-augmentations.js +5 -5
  192. package/dist/storage/adapters/baseStorageAdapter.d.ts +42 -0
  193. package/dist/storage/adapters/fileSystemStorage.d.ts +26 -2
  194. package/dist/storage/adapters/fileSystemStorage.js +218 -15
  195. package/dist/storage/adapters/memoryStorage.d.ts +4 -4
  196. package/dist/storage/adapters/memoryStorage.js +17 -12
  197. package/dist/storage/adapters/opfsStorage.d.ts +2 -2
  198. package/dist/storage/adapters/opfsStorage.js +2 -2
  199. package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -2
  200. package/dist/storage/adapters/s3CompatibleStorage.js +2 -2
  201. package/dist/storage/backwardCompatibility.d.ts +10 -78
  202. package/dist/storage/backwardCompatibility.js +17 -132
  203. package/dist/storage/baseStorage.d.ts +18 -2
  204. package/dist/storage/baseStorage.js +74 -3
  205. package/dist/storage/cacheManager.js +2 -2
  206. package/dist/storage/readOnlyOptimizations.js +8 -3
  207. package/dist/streaming/pipeline.d.ts +154 -0
  208. package/dist/streaming/pipeline.js +551 -0
  209. package/dist/triple/TripleIntelligence.d.ts +25 -110
  210. package/dist/triple/TripleIntelligence.js +4 -574
  211. package/dist/triple/TripleIntelligenceSystem.d.ts +159 -0
  212. package/dist/triple/TripleIntelligenceSystem.js +519 -0
  213. package/dist/types/apiTypes.d.ts +278 -0
  214. package/dist/types/apiTypes.js +33 -0
  215. package/dist/types/brainy.types.d.ts +308 -0
  216. package/dist/types/brainy.types.js +8 -0
  217. package/dist/types/brainyDataInterface.d.ts +5 -8
  218. package/dist/types/brainyDataInterface.js +2 -2
  219. package/dist/types/graphTypes.js +2 -2
  220. package/dist/universal/crypto.d.ts +11 -1
  221. package/dist/universal/crypto.js +24 -93
  222. package/dist/universal/events.d.ts +3 -2
  223. package/dist/universal/events.js +6 -75
  224. package/dist/universal/fs.d.ts +2 -3
  225. package/dist/universal/fs.js +5 -211
  226. package/dist/universal/path.d.ts +3 -2
  227. package/dist/universal/path.js +22 -78
  228. package/dist/universal/uuid.d.ts +1 -1
  229. package/dist/universal/uuid.js +1 -1
  230. package/dist/utils/brainyTypes.d.ts +217 -0
  231. package/dist/utils/brainyTypes.js +261 -0
  232. package/dist/utils/cacheAutoConfig.d.ts +3 -3
  233. package/dist/utils/embedding.d.ts +9 -4
  234. package/dist/utils/embedding.js +89 -26
  235. package/dist/utils/enhancedLogger.d.ts +104 -0
  236. package/dist/utils/enhancedLogger.js +232 -0
  237. package/dist/utils/hybridModelManager.d.ts +19 -28
  238. package/dist/utils/hybridModelManager.js +36 -200
  239. package/dist/utils/index.d.ts +1 -1
  240. package/dist/utils/index.js +1 -1
  241. package/dist/utils/intelligentTypeMapper.d.ts +60 -0
  242. package/dist/utils/intelligentTypeMapper.js +349 -0
  243. package/dist/utils/metadataIndex.d.ts +118 -1
  244. package/dist/utils/metadataIndex.js +539 -16
  245. package/dist/utils/nodeVersionCheck.d.ts +24 -0
  246. package/dist/utils/nodeVersionCheck.js +65 -0
  247. package/dist/utils/paramValidation.d.ts +39 -0
  248. package/dist/utils/paramValidation.js +192 -0
  249. package/dist/utils/rateLimiter.d.ts +160 -0
  250. package/dist/utils/rateLimiter.js +271 -0
  251. package/dist/utils/statistics.d.ts +4 -4
  252. package/dist/utils/statistics.js +3 -3
  253. package/dist/utils/structuredLogger.d.ts +146 -0
  254. package/dist/utils/structuredLogger.js +394 -0
  255. package/dist/utils/textEncoding.js +2 -1
  256. package/dist/utils/typeValidation.d.ts +59 -0
  257. package/dist/utils/typeValidation.js +374 -0
  258. package/dist/utils/version.js +19 -3
  259. package/package.json +15 -17
  260. package/scripts/download-models.cjs +94 -20
  261. package/dist/augmentations/walAugmentation.d.ts +0 -109
  262. package/dist/augmentations/walAugmentation.js +0 -516
  263. package/dist/browserFramework.d.ts +0 -15
  264. package/dist/browserFramework.js +0 -31
  265. package/dist/browserFramework.minimal.d.ts +0 -14
  266. package/dist/browserFramework.minimal.js +0 -31
  267. package/dist/chat/BrainyChat.d.ts +0 -121
  268. package/dist/chat/BrainyChat.js +0 -396
  269. package/dist/chat/ChatCLI.d.ts +0 -61
  270. package/dist/chat/ChatCLI.js +0 -351
@@ -9,29 +9,369 @@
9
9
  * - Progressive learning from usage
10
10
  */
11
11
  import { PatternLibrary } from './patternLibrary.js';
12
+ import { NounType, VerbType } from '../types/graphTypes.js';
12
13
  export class NaturalLanguageProcessor {
13
14
  constructor(brain) {
14
15
  this.initialized = false;
16
+ this.embeddingCache = new Map();
17
+ // Field discovery with semantic matching
18
+ this.fieldEmbeddings = new Map();
19
+ this.fieldNames = [];
20
+ this.lastFieldRefresh = 0;
21
+ this.FIELD_REFRESH_INTERVAL = 60000; // Refresh every minute
22
+ // Type embeddings for NounType and VerbType matching
23
+ this.nounTypeEmbeddings = new Map();
24
+ this.verbTypeEmbeddings = new Map();
25
+ this.typeEmbeddingsInitialized = false;
15
26
  this.brain = brain;
16
27
  this.patternLibrary = new PatternLibrary(brain);
17
28
  this.queryHistory = [];
18
29
  }
30
+ /**
31
+ * Get embedding directly using brain's embed method
32
+ */
33
+ async getEmbedding(text) {
34
+ // Check cache first
35
+ if (this.embeddingCache.has(text)) {
36
+ return this.embeddingCache.get(text);
37
+ }
38
+ // Use brain's embed method directly to avoid recursion
39
+ const embedding = await this.brain.embed(text);
40
+ // Cache the embedding
41
+ this.embeddingCache.set(text, embedding);
42
+ return embedding;
43
+ }
19
44
  /**
20
45
  * Initialize the pattern library (lazy loading)
21
46
  */
22
47
  async ensureInitialized() {
23
48
  if (!this.initialized) {
24
49
  await this.patternLibrary.init();
50
+ await this.initializeTypeEmbeddings(); // Embed all noun/verb types
51
+ await this.refreshFieldEmbeddings(); // Load field embeddings
25
52
  this.initialized = true;
26
53
  }
27
54
  }
55
+ /**
56
+ * Initialize embeddings for all NounTypes and VerbTypes
57
+ * These are fixed types that never change - perfect for caching
58
+ */
59
+ async initializeTypeEmbeddings() {
60
+ if (this.typeEmbeddingsInitialized)
61
+ return;
62
+ // Embed all NounTypes (30+ types)
63
+ for (const [key, value] of Object.entries(NounType)) {
64
+ if (typeof value === 'string') {
65
+ // Embed both the key (Person) and value (person)
66
+ const keyEmbedding = await this.getEmbedding(key);
67
+ const valueEmbedding = await this.getEmbedding(value);
68
+ this.nounTypeEmbeddings.set(key, keyEmbedding);
69
+ this.nounTypeEmbeddings.set(value, valueEmbedding);
70
+ // Also embed common variations
71
+ const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
72
+ if (spaceSeparated !== value) {
73
+ const variantEmbedding = await this.getEmbedding(spaceSeparated);
74
+ this.nounTypeEmbeddings.set(spaceSeparated, variantEmbedding);
75
+ }
76
+ }
77
+ }
78
+ // Embed all VerbTypes (40+ types)
79
+ for (const [key, value] of Object.entries(VerbType)) {
80
+ if (typeof value === 'string') {
81
+ const keyEmbedding = await this.getEmbedding(key);
82
+ const valueEmbedding = await this.getEmbedding(value);
83
+ this.verbTypeEmbeddings.set(key, keyEmbedding);
84
+ this.verbTypeEmbeddings.set(value, valueEmbedding);
85
+ // Common variations for verbs
86
+ const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
87
+ if (spaceSeparated !== value) {
88
+ const variantEmbedding = await this.getEmbedding(spaceSeparated);
89
+ this.verbTypeEmbeddings.set(spaceSeparated, variantEmbedding);
90
+ }
91
+ }
92
+ }
93
+ this.typeEmbeddingsInitialized = true;
94
+ }
95
+ /**
96
+ * Find best matching NounType using semantic similarity
97
+ */
98
+ async findBestNounType(term) {
99
+ const termEmbedding = await this.getEmbedding(term);
100
+ let bestMatch = null;
101
+ let bestScore = 0;
102
+ for (const [typeName, typeEmbedding] of this.nounTypeEmbeddings) {
103
+ const similarity = this.cosineSimilarity(termEmbedding, typeEmbedding);
104
+ if (similarity > bestScore && similarity > 0.75) { // Higher threshold for types
105
+ bestScore = similarity;
106
+ bestMatch = typeName;
107
+ }
108
+ }
109
+ // Map back to the actual NounType value
110
+ if (bestMatch) {
111
+ for (const [key, value] of Object.entries(NounType)) {
112
+ if (key === bestMatch || value === bestMatch ||
113
+ key.toLowerCase() === bestMatch.toLowerCase()) {
114
+ return { type: value, confidence: bestScore };
115
+ }
116
+ }
117
+ }
118
+ return { type: null, confidence: 0 };
119
+ }
120
+ /**
121
+ * Find best matching VerbType using semantic similarity
122
+ */
123
+ async findBestVerbType(term) {
124
+ const termEmbedding = await this.getEmbedding(term);
125
+ let bestMatch = null;
126
+ let bestScore = 0;
127
+ for (const [typeName, typeEmbedding] of this.verbTypeEmbeddings) {
128
+ const similarity = this.cosineSimilarity(termEmbedding, typeEmbedding);
129
+ if (similarity > bestScore && similarity > 0.75) {
130
+ bestScore = similarity;
131
+ bestMatch = typeName;
132
+ }
133
+ }
134
+ // Map back to the actual VerbType value
135
+ if (bestMatch) {
136
+ for (const [key, value] of Object.entries(VerbType)) {
137
+ if (key === bestMatch || value === bestMatch ||
138
+ key.toLowerCase() === bestMatch.toLowerCase()) {
139
+ return { type: value, confidence: bestScore };
140
+ }
141
+ }
142
+ }
143
+ return { type: null, confidence: 0 };
144
+ }
145
+ /**
146
+ * Refresh field embeddings from metadata index
147
+ * Creates embeddings for all indexed fields for semantic matching
148
+ */
149
+ async refreshFieldEmbeddings() {
150
+ const now = Date.now();
151
+ if (now - this.lastFieldRefresh < this.FIELD_REFRESH_INTERVAL) {
152
+ return; // Skip if recently refreshed
153
+ }
154
+ try {
155
+ // Get actual indexed fields from metadata
156
+ this.fieldNames = await this.brain.getAvailableFields();
157
+ // Create embeddings for each field name for semantic matching
158
+ for (const field of this.fieldNames) {
159
+ if (!this.fieldEmbeddings.has(field)) {
160
+ // Embed the field name itself
161
+ const fieldEmbedding = await this.getEmbedding(field);
162
+ this.fieldEmbeddings.set(field, fieldEmbedding);
163
+ // Also embed common variations
164
+ const variations = this.getFieldVariations(field);
165
+ for (const variant of variations) {
166
+ const variantEmbedding = await this.getEmbedding(variant);
167
+ this.fieldEmbeddings.set(variant, variantEmbedding);
168
+ }
169
+ }
170
+ }
171
+ this.lastFieldRefresh = now;
172
+ }
173
+ catch (error) {
174
+ console.warn('Failed to refresh field embeddings:', error);
175
+ }
176
+ }
177
+ /**
178
+ * Generate linguistic variations of field names - NO HARDCODED TERMS
179
+ * Uses algorithmic patterns to create natural variations
180
+ */
181
+ getFieldVariations(field) {
182
+ const variations = [];
183
+ // camelCase to space separated: publishDate -> publish date
184
+ const spaceSeparated = field.replace(/([A-Z])/g, ' $1').toLowerCase().trim();
185
+ if (spaceSeparated !== field.toLowerCase()) {
186
+ variations.push(spaceSeparated);
187
+ }
188
+ // snake_case to space separated: created_at -> created at
189
+ const underscoreRemoved = field.replace(/_/g, ' ').toLowerCase();
190
+ if (underscoreRemoved !== field.toLowerCase()) {
191
+ variations.push(underscoreRemoved);
192
+ }
193
+ // kebab-case to space separated: publish-date -> publish date
194
+ const dashRemoved = field.replace(/-/g, ' ').toLowerCase();
195
+ if (dashRemoved !== field.toLowerCase()) {
196
+ variations.push(dashRemoved);
197
+ }
198
+ // Generate suffix variations (remove common suffixes)
199
+ const suffixes = ['At', 'Date', 'Time', 'Id', 'Ref', 'Name', 'Value', 'Count', 'Number'];
200
+ for (const suffix of suffixes) {
201
+ if (field.endsWith(suffix) && field.length > suffix.length) {
202
+ const withoutSuffix = field.slice(0, -suffix.length).toLowerCase();
203
+ variations.push(withoutSuffix);
204
+ }
205
+ }
206
+ // Generate prefix variations (remove common prefixes)
207
+ const prefixes = ['is', 'has', 'can', 'get', 'set'];
208
+ for (const prefix of prefixes) {
209
+ if (field.toLowerCase().startsWith(prefix) && field.length > prefix.length) {
210
+ const withoutPrefix = field.slice(prefix.length).toLowerCase();
211
+ variations.push(withoutPrefix);
212
+ }
213
+ }
214
+ return [...new Set(variations)]; // Remove duplicates
215
+ }
216
+ /**
217
+ * Find best matching field using semantic similarity
218
+ * Returns field name and confidence score
219
+ */
220
+ async findBestMatchingField(term) {
221
+ // Ensure fields are loaded
222
+ await this.refreshFieldEmbeddings();
223
+ if (this.fieldNames.length === 0) {
224
+ return { field: null, confidence: 0 };
225
+ }
226
+ // Get embedding for the search term
227
+ const termEmbedding = await this.getEmbedding(term);
228
+ // Find most similar field using cosine similarity
229
+ let bestMatch = null;
230
+ let bestScore = 0;
231
+ for (const [fieldName, fieldEmbedding] of this.fieldEmbeddings) {
232
+ const similarity = this.cosineSimilarity(termEmbedding, fieldEmbedding);
233
+ if (similarity > bestScore && similarity > 0.7) { // 0.7 threshold for semantic match
234
+ bestScore = similarity;
235
+ bestMatch = fieldName;
236
+ }
237
+ }
238
+ // Map back to actual field name if it was a variation
239
+ if (bestMatch && !this.fieldNames.includes(bestMatch)) {
240
+ // Find the original field this variation belongs to
241
+ for (const field of this.fieldNames) {
242
+ const variations = this.getFieldVariations(field);
243
+ if (variations.includes(bestMatch)) {
244
+ bestMatch = field;
245
+ break;
246
+ }
247
+ }
248
+ }
249
+ return { field: bestMatch, confidence: bestScore };
250
+ }
251
+ /**
252
+ * Find best matching field with type context prioritization
253
+ * Fields with high type affinity get boosted scores
254
+ */
255
+ async findBestMatchingFieldWithTypeContext(term, typeSpecificFields) {
256
+ // First do normal field matching
257
+ const normalMatch = await this.findBestMatchingField(term);
258
+ if (!normalMatch.field || typeSpecificFields.length === 0) {
259
+ return normalMatch;
260
+ }
261
+ // Check if the matched field has type affinity
262
+ const typeField = typeSpecificFields.find(tf => tf.field === normalMatch.field);
263
+ if (typeField) {
264
+ // Boost confidence based on type affinity
265
+ // High affinity (0.8+) gets 20% boost, medium affinity (0.5+) gets 10% boost
266
+ let boost = 0;
267
+ if (typeField.affinity >= 0.8) {
268
+ boost = 0.2;
269
+ }
270
+ else if (typeField.affinity >= 0.5) {
271
+ boost = 0.1;
272
+ }
273
+ const boostedConfidence = Math.min(1.0, normalMatch.confidence + boost);
274
+ return { field: normalMatch.field, confidence: boostedConfidence };
275
+ }
276
+ return normalMatch;
277
+ }
278
+ /**
279
+ * Validate field-type compatibility
280
+ * Returns validation result with suggestions for invalid combinations
281
+ */
282
+ async validateFieldForType(field, nounType) {
283
+ // Get fields that actually appear with this type
284
+ const typeFields = await this.brain.getFieldsForType(nounType);
285
+ // Check if this field appears with this type
286
+ const fieldInfo = typeFields.find(tf => tf.field === field);
287
+ if (fieldInfo) {
288
+ return {
289
+ isValid: true,
290
+ affinity: fieldInfo.affinity
291
+ };
292
+ }
293
+ // Field doesn't appear with this type - provide suggestions
294
+ const suggestions = typeFields
295
+ .filter(tf => tf.affinity > 0.1) // Only suggest common fields
296
+ .slice(0, 3) // Top 3 suggestions
297
+ .map(tf => tf.field);
298
+ return {
299
+ isValid: false,
300
+ affinity: 0,
301
+ suggestions,
302
+ reason: `Field '${field}' rarely appears with ${nounType} entities. Common fields: ${suggestions.join(', ')}`
303
+ };
304
+ }
305
+ /**
306
+ * Enhanced intelligent parse with validation
307
+ */
308
+ async validateAndOptimizeQuery(tripleQuery, detectedNounType, fieldMatches) {
309
+ if (!detectedNounType || !tripleQuery.where) {
310
+ return tripleQuery;
311
+ }
312
+ const validationErrors = [];
313
+ const optimizedWhere = {};
314
+ // Validate each field in the where clause
315
+ for (const [field, value] of Object.entries(tripleQuery.where)) {
316
+ if (field === 'noun') {
317
+ optimizedWhere[field] = value; // Always valid
318
+ continue;
319
+ }
320
+ const validation = await this.validateFieldForType(field, detectedNounType);
321
+ if (validation.isValid || validation.affinity > 0.05) {
322
+ // Valid or has some affinity - include in query
323
+ optimizedWhere[field] = value;
324
+ }
325
+ else {
326
+ // Invalid field for this type
327
+ validationErrors.push(validation.reason || `Invalid field: ${field}`);
328
+ // Try to find a better field match from suggestions
329
+ if (validation.suggestions && validation.suggestions.length > 0) {
330
+ const bestSuggestion = validation.suggestions[0];
331
+ optimizedWhere[bestSuggestion] = value;
332
+ }
333
+ }
334
+ }
335
+ // Log validation errors for debugging (in production, could throw or return errors)
336
+ if (validationErrors.length > 0) {
337
+ console.warn('Field validation warnings:', validationErrors);
338
+ }
339
+ return {
340
+ ...tripleQuery,
341
+ where: optimizedWhere
342
+ };
343
+ }
344
+ /**
345
+ * Calculate cosine similarity between two vectors
346
+ */
347
+ cosineSimilarity(a, b) {
348
+ if (a.length !== b.length)
349
+ return 0;
350
+ let dotProduct = 0;
351
+ let normA = 0;
352
+ let normB = 0;
353
+ for (let i = 0; i < a.length; i++) {
354
+ dotProduct += a[i] * b[i];
355
+ normA += a[i] * a[i];
356
+ normB += b[i] * b[i];
357
+ }
358
+ if (normA === 0 || normB === 0)
359
+ return 0;
360
+ return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
361
+ }
362
+ /**
363
+ * Public initialization method for external callers
364
+ */
365
+ async init() {
366
+ await this.ensureInitialized();
367
+ }
28
368
  /**
29
369
  * 🎯 MAIN METHOD: Convert natural language to Triple Intelligence query
30
370
  */
31
371
  async processNaturalQuery(naturalQuery) {
32
372
  await this.ensureInitialized();
33
- // Step 1: Embed the query for semantic matching
34
- const queryEmbedding = await this.brain.embed(naturalQuery);
373
+ // Step 1: Get embedding via add/get/delete pattern
374
+ const queryEmbedding = await this.getEmbedding(naturalQuery);
35
375
  // Step 2: Find best matching patterns from our library
36
376
  const matches = await this.patternLibrary.findBestPatterns(queryEmbedding, 3);
37
377
  // Step 3: Try each pattern until we get a good match
@@ -54,63 +394,198 @@ export class NaturalLanguageProcessor {
54
394
  return query;
55
395
  }
56
396
  }
57
- // Step 4: Fall back to hybrid approach if no pattern matches well
58
- return this.hybridParse(naturalQuery, queryEmbedding);
397
+ // Step 4: Use intelligent field-aware parsing instead of fallback
398
+ return this.intelligentParse(naturalQuery, queryEmbedding);
59
399
  }
60
400
  /**
61
- * Hybrid parse when pattern matching fails
401
+ * Intelligent parse using type-aware field discovery and semantic matching
402
+ * NO FALLBACKS - uses actual indexed fields, entities, and type context
62
403
  */
63
- async hybridParse(query, queryEmbedding) {
64
- // Analyze intent using embeddings and keywords
404
+ async intelligentParse(query, queryEmbedding) {
405
+ // Step 1: Analyze intent and extract structure
65
406
  const intent = await this.analyzeIntent(query);
66
- // Find similar successful queries from history
67
- // TODO: Implement findSimilarQueries method
68
- // const similar = await this.findSimilarQueries(queryEmbedding)
69
- // if (similar.length > 0 && similar[0].similarity > 0.9) {
70
- // // Adapt a very similar previous query
71
- // return this.adaptQuery(query, similar[0].result)
72
- // }
73
- // Extract entities using Brainy's search
74
- // TODO: Implement extractEntities method
75
- // const entities = await this.extractEntities(query)
76
- // Build query based on intent and entities
77
- // TODO: Implement buildQuery method
78
- // return this.buildQuery(query, intent, entities)
79
- // Return a basic query for now
80
- return {
81
- like: query,
82
- limit: 10
83
- };
407
+ // Step 2: Extract query terms
408
+ const queryTerms = query.split(/\s+/).filter(term => term.length > 2);
409
+ // Step 3: Detect NounType first for context
410
+ let detectedNounType = null;
411
+ let typeConfidence = 0;
412
+ for (const term of queryTerms) {
413
+ const nounTypeMatch = await this.findBestNounType(term);
414
+ if (nounTypeMatch.type && nounTypeMatch.confidence > typeConfidence) {
415
+ detectedNounType = nounTypeMatch.type;
416
+ typeConfidence = nounTypeMatch.confidence;
417
+ }
418
+ }
419
+ // Step 4: Get type-specific fields if we detected a type
420
+ let typeSpecificFields = [];
421
+ if (detectedNounType && typeConfidence > 0.75) {
422
+ const fieldsForType = await this.brain.getFieldsForType(detectedNounType);
423
+ typeSpecificFields = fieldsForType.map(f => ({ field: f.field, affinity: f.affinity }));
424
+ }
425
+ // Step 5: Find matching fields and entities with type context
426
+ const entityMatches = await this.findEntityMatchesWithTypeContext(queryTerms, typeSpecificFields);
427
+ // Step 6: Build structured query from matches
428
+ const tripleQuery = {};
429
+ // Separate fields from entities
430
+ const fieldMatches = entityMatches.filter(m => m.type === 'field');
431
+ const entityRefs = entityMatches.filter(m => m.type === 'entity');
432
+ // Add detected type constraint if confident
433
+ if (detectedNounType && typeConfidence > 0.75) {
434
+ tripleQuery.where = { ...tripleQuery.where, noun: detectedNounType };
435
+ }
436
+ // Build metadata filters from field matches
437
+ if (fieldMatches.length > 0) {
438
+ // Use field cardinality to optimize query order
439
+ fieldMatches.sort((a, b) => (a.cardinality || 0) - (b.cardinality || 0));
440
+ tripleQuery.where = {};
441
+ for (const match of fieldMatches) {
442
+ // Extract value for this field from query
443
+ const valuePattern = new RegExp(`${match.term}\\s*(?:is|=|:)?\\s*(\\S+)`, 'i');
444
+ const valueMatch = query.match(valuePattern);
445
+ if (valueMatch) {
446
+ tripleQuery.where[match.field] = valueMatch[1];
447
+ }
448
+ }
449
+ }
450
+ // Build graph connections from entity references
451
+ if (entityRefs.length > 0) {
452
+ tripleQuery.connected = {
453
+ to: entityRefs[0].id
454
+ };
455
+ }
456
+ // Use remaining terms for vector search
457
+ const usedTerms = new Set([...fieldMatches.map(m => m.term), ...entityRefs.map(m => m.term)]);
458
+ const searchTerms = queryTerms.filter(term => !usedTerms.has(term));
459
+ if (searchTerms.length > 0) {
460
+ tripleQuery.like = searchTerms.join(' ');
461
+ }
462
+ else if (!tripleQuery.where || Object.keys(tripleQuery.where).length === 0) {
463
+ // If no specific filters, use the full query for vector search
464
+ tripleQuery.like = query;
465
+ }
466
+ // Validate and optimize query based on type context
467
+ const validatedQuery = await this.validateAndOptimizeQuery(tripleQuery, detectedNounType, fieldMatches);
468
+ // Add query optimization hints based on field statistics
469
+ if (fieldMatches.length > 0 && validatedQuery.where) {
470
+ const queryPlan = await this.brain.getOptimalQueryPlan(validatedQuery.where);
471
+ // Attach optimization hints as a separate property
472
+ const hints = validatedQuery;
473
+ hints.optimizationHints = {
474
+ detectedType: detectedNounType,
475
+ typeConfidence,
476
+ fieldOrder: queryPlan.fieldOrder,
477
+ strategy: queryPlan.strategy,
478
+ estimatedCost: queryPlan.estimatedCost,
479
+ typeSpecificFieldCount: typeSpecificFields.length
480
+ };
481
+ }
482
+ return validatedQuery;
84
483
  }
85
484
  /**
86
- * Analyze intent using keywords and structure
485
+ * Analyze intent using keywords and structure with enhanced classification
87
486
  */
88
487
  async analyzeIntent(query) {
89
- // Use Brainy's embedding function to get semantic representation
90
- const queryEmbedding = await this.brain.embed(query);
91
- // Search for similar queries in history (if available)
92
- let confidence = 0.7; // Base confidence
93
- let type = 'vector'; // Default
94
488
  // Analyze query structure patterns
95
489
  const lowerQuery = query.toLowerCase();
490
+ // Determine primary intent
491
+ let primaryIntent = 'search';
492
+ let confidence = 0.7; // Base confidence
493
+ let type = 'vector'; // Default
494
+ // Intent detection patterns
495
+ if (lowerQuery.match(/\b(filter|where|with|having)\b/)) {
496
+ primaryIntent = 'filter';
497
+ confidence += 0.15;
498
+ }
499
+ else if (lowerQuery.match(/\b(count|sum|average|total|group by)\b/)) {
500
+ primaryIntent = 'aggregate';
501
+ confidence += 0.2;
502
+ }
503
+ else if (lowerQuery.match(/\b(compare|versus|vs|difference|between)\b/)) {
504
+ primaryIntent = 'compare';
505
+ confidence += 0.15;
506
+ }
507
+ else if (lowerQuery.match(/\b(explain|why|how|what causes)\b/)) {
508
+ primaryIntent = 'explain';
509
+ confidence += 0.1;
510
+ }
511
+ else if (lowerQuery.match(/\b(connected|related|linked|from.*to)\b/)) {
512
+ primaryIntent = 'navigate';
513
+ type = 'graph';
514
+ confidence += 0.15;
515
+ }
96
516
  // Detect field queries
97
517
  if (this.hasFieldPatterns(lowerQuery)) {
98
- type = 'field';
99
- confidence += 0.2;
518
+ type = type === 'graph' ? 'combined' : 'field';
519
+ confidence += 0.1;
100
520
  }
101
521
  // Detect connection queries
102
522
  if (this.hasConnectionPatterns(lowerQuery)) {
103
523
  type = type === 'field' ? 'combined' : 'graph';
104
524
  confidence += 0.1;
105
525
  }
106
- // Extract basic terms
526
+ // Extract context
527
+ const context = {
528
+ domain: this.detectDomain(query),
529
+ temporalScope: this.detectTemporalScope(query),
530
+ complexity: this.assessComplexity(query)
531
+ };
532
+ // Extract basic terms with enhanced modifiers
107
533
  const extractedTerms = this.extractTerms(query);
108
534
  return {
109
535
  type,
110
- confidence: Math.min(confidence, 1.0),
111
- extractedTerms
536
+ primaryIntent,
537
+ confidence,
538
+ extractedTerms,
539
+ context
112
540
  };
113
541
  }
542
+ /**
543
+ * Detect the domain of the query
544
+ */
545
+ detectDomain(query) {
546
+ const lowerQuery = query.toLowerCase();
547
+ if (lowerQuery.match(/\b(code|function|api|bug|error|debug)\b/)) {
548
+ return 'technical';
549
+ }
550
+ else if (lowerQuery.match(/\b(revenue|sales|profit|customer|market)\b/)) {
551
+ return 'business';
552
+ }
553
+ else if (lowerQuery.match(/\b(research|study|paper|theory|hypothesis)\b/)) {
554
+ return 'academic';
555
+ }
556
+ return 'general';
557
+ }
558
+ /**
559
+ * Detect temporal scope in query
560
+ */
561
+ detectTemporalScope(query) {
562
+ const lowerQuery = query.toLowerCase();
563
+ if (lowerQuery.match(/\b(was|were|did|had|yesterday|last|previous|ago)\b/)) {
564
+ return 'past';
565
+ }
566
+ else if (lowerQuery.match(/\b(will|going to|tomorrow|next|future|upcoming)\b/)) {
567
+ return 'future';
568
+ }
569
+ else if (lowerQuery.match(/\b(is|are|currently|now|today|present)\b/)) {
570
+ return 'present';
571
+ }
572
+ return 'all';
573
+ }
574
+ /**
575
+ * Assess query complexity
576
+ */
577
+ assessComplexity(query) {
578
+ const words = query.split(/\s+/).length;
579
+ const hasMultipleClauses = query.match(/\b(and|or|but|with|where)\b/g)?.length || 0;
580
+ const hasNesting = query.includes('(') || query.includes('[');
581
+ if (words < 5 && hasMultipleClauses === 0) {
582
+ return 'simple';
583
+ }
584
+ else if (words > 15 || hasMultipleClauses > 2 || hasNesting) {
585
+ return 'complex';
586
+ }
587
+ return 'moderate';
588
+ }
114
589
  /**
115
590
  * Step 2: Use neural analysis to decompose complex queries
116
591
  */
@@ -182,8 +657,15 @@ export class NaturalLanguageProcessor {
182
657
  const mods = intent.extractedTerms.modifiers;
183
658
  if (mods.limit)
184
659
  query.limit = mods.limit;
185
- if (mods.boost)
186
- query.boost = mods.boost;
660
+ // Convert string boost to proper boost object
661
+ if (mods.boost) {
662
+ if (mods.boost === 'recent') {
663
+ query.boost = { field: 2.0, vector: 1.0, graph: 1.0 };
664
+ }
665
+ else if (mods.boost === 'popular') {
666
+ query.boost = { graph: 2.0, vector: 1.0, field: 1.0 };
667
+ }
668
+ }
187
669
  }
188
670
  return query;
189
671
  }
@@ -200,7 +682,7 @@ export class NaturalLanguageProcessor {
200
682
  // "Show me recent posts by John"
201
683
  patterns.set(/show\\s+me\\s+recent\\s+(.+?)\\s+by\\s+(.+)/i, (match) => ({
202
684
  like: match[1],
203
- boost: 'recent',
685
+ boost: { field: 2.0, vector: 1.0, graph: 1.0 },
204
686
  connected: { from: match[2] }
205
687
  }));
206
688
  // "Papers with more than 100 citations"
@@ -255,14 +737,48 @@ export class NaturalLanguageProcessor {
255
737
  return extracted;
256
738
  }
257
739
  /**
258
- * Find entity matches using Brainy's search capabilities
740
+ * Find entity matches using type context and semantic similarity
259
741
  */
260
742
  async findEntityMatches(terms) {
743
+ return this.findEntityMatchesWithTypeContext(terms, []);
744
+ }
745
+ /**
746
+ * Find entity matches with type context for better field prioritization
747
+ */
748
+ async findEntityMatchesWithTypeContext(terms, typeSpecificFields) {
261
749
  const matches = [];
750
+ // Get field statistics for optimization hints
751
+ const fieldStats = await this.brain.getFieldStatistics();
752
+ // Create field priority map based on type affinity
753
+ const fieldPriorityMap = new Map();
754
+ for (const { field, affinity } of typeSpecificFields) {
755
+ fieldPriorityMap.set(field, affinity);
756
+ }
262
757
  for (const term of terms) {
263
758
  try {
759
+ // First, check if term matches a field using semantic similarity
760
+ const fieldMatch = await this.findBestMatchingFieldWithTypeContext(term, typeSpecificFields);
761
+ if (fieldMatch.field && fieldMatch.confidence > 0.7) {
762
+ const stats = fieldStats.get(fieldMatch.field);
763
+ const typeAffinity = fieldPriorityMap.get(fieldMatch.field) || 0;
764
+ matches.push({
765
+ term,
766
+ type: 'field',
767
+ field: fieldMatch.field,
768
+ confidence: fieldMatch.confidence,
769
+ typeAffinity, // NEW: How likely this field appears with this type
770
+ cardinality: stats?.cardinality.uniqueValues,
771
+ distribution: stats?.cardinality.distribution,
772
+ indexType: stats?.indexType
773
+ });
774
+ // Skip entity search if we found a field match
775
+ continue;
776
+ }
264
777
  // Search for similar entities in the knowledge base
265
- const results = await this.brain.search(term, { limit: 5 });
778
+ const results = await this.brain.find({
779
+ query: term,
780
+ limit: 5
781
+ });
266
782
  for (const result of results) {
267
783
  if (result.score > 0.8) { // High similarity threshold
268
784
  matches.push({
@@ -270,19 +786,10 @@ export class NaturalLanguageProcessor {
270
786
  id: result.id,
271
787
  type: 'entity',
272
788
  confidence: result.score,
273
- metadata: result.metadata
789
+ metadata: result.entity?.metadata
274
790
  });
275
791
  }
276
792
  }
277
- // Check if term matches known field names
278
- if (this.isKnownField(term)) {
279
- matches.push({
280
- term,
281
- type: 'field',
282
- field: this.mapToFieldName(term),
283
- confidence: 0.9
284
- });
285
- }
286
793
  }
287
794
  catch (error) {
288
795
  // If search fails, continue with other terms
@@ -291,27 +798,395 @@ export class NaturalLanguageProcessor {
291
798
  }
292
799
  return matches;
293
800
  }
801
+ // REMOVED: isKnownField and mapToFieldName - now using semantic field matching
802
+ // The findBestMatchingField method with embeddings replaces these hardcoded approaches
294
803
  /**
295
- * Check if term is a known field name
804
+ * Find similar successful queries from history
805
+ * Uses Brainy's vector search to find semantically similar previous queries
296
806
  */
297
- isKnownField(term) {
298
- const knownFields = [
299
- 'year', 'date', 'created', 'published', 'author', 'title',
300
- 'citations', 'views', 'score', 'rating', 'category', 'type'
301
- ];
302
- return knownFields.includes(term.toLowerCase());
807
+ async findSimilarQueries(queryEmbedding) {
808
+ try {
809
+ // Search for similar queries in a hypothetical query history
810
+ // For now, return empty array since we don't have query history storage yet
811
+ // This would integrate with Brainy's search to find similar query patterns
812
+ // Future implementation could search a query_history noun type:
813
+ // const similarQueries = await this.brainy.search(queryEmbedding, {
814
+ // limit: 5,
815
+ // metadata: { type: 'successful_query' },
816
+ // nounTypes: ['query_history']
817
+ // })
818
+ return [];
819
+ }
820
+ catch (error) {
821
+ console.debug('Failed to find similar queries:', error);
822
+ return [];
823
+ }
824
+ }
825
+ /**
826
+ * Extract entities from query using Brainy's semantic search
827
+ * Identifies known entities, concepts, and relationships in the query text
828
+ */
829
+ async extractEntities(query) {
830
+ try {
831
+ // Split query into potential entity terms
832
+ const terms = query.toLowerCase()
833
+ .split(/[\s,\.;!?]+/)
834
+ .filter(term => term.length > 2);
835
+ const entities = [];
836
+ // Search for each term in Brainy to see if it matches known entities
837
+ for (const term of terms) {
838
+ try {
839
+ const results = await this.brain.find(term);
840
+ if (results && results.length > 0) {
841
+ // Found matching entities
842
+ entities.push({
843
+ term,
844
+ matches: results,
845
+ confidence: results[0].score || 0.7
846
+ });
847
+ }
848
+ }
849
+ catch (searchError) {
850
+ // Continue if individual term search fails
851
+ console.debug(`Entity search failed for term: ${term}`, searchError);
852
+ }
853
+ }
854
+ return entities;
855
+ }
856
+ catch (error) {
857
+ console.debug('Failed to extract entities:', error);
858
+ return [];
859
+ }
303
860
  }
304
861
  /**
305
- * Map colloquial terms to actual field names
862
+ * Build final TripleQuery based on intent, entities, and query analysis
863
+ * Constructs optimized query combining vector, graph, and field searches
306
864
  */
307
- mapToFieldName(term) {
308
- const fieldMappings = {
309
- 'published': 'publishDate',
310
- 'created': 'createdAt',
311
- 'author': 'authorId',
312
- 'citations': 'citationCount'
865
+ async buildQuery(query, intent, entities) {
866
+ try {
867
+ const tripleQuery = {
868
+ like: query, // Default to semantic search
869
+ limit: 10
870
+ };
871
+ // Add field filters based on intent
872
+ if (intent.hasFieldPatterns) {
873
+ // Extract field-based constraints from the query
874
+ const whereClause = {};
875
+ // Look for date/year patterns
876
+ const yearMatch = query.match(/(\d{4})/g);
877
+ if (yearMatch) {
878
+ whereClause.year = parseInt(yearMatch[0]);
879
+ }
880
+ // Look for numeric constraints
881
+ const moreThanMatch = query.match(/more than (\d+)/i);
882
+ if (moreThanMatch) {
883
+ whereClause.count = { greaterThan: parseInt(moreThanMatch[1]) };
884
+ }
885
+ if (Object.keys(whereClause).length > 0) {
886
+ tripleQuery.where = whereClause;
887
+ }
888
+ }
889
+ // Add connection-based searches
890
+ if (intent.hasConnectionPatterns) {
891
+ // Look for relationship patterns in the query
892
+ const connectedMatch = query.match(/connected to (.+?)$/i) ||
893
+ query.match(/related to (.+?)$/i);
894
+ if (connectedMatch) {
895
+ tripleQuery.connected = {
896
+ to: connectedMatch[1].trim()
897
+ };
898
+ }
899
+ }
900
+ // Add entity-specific filters
901
+ if (entities && entities.length > 0) {
902
+ const highConfidenceEntities = entities.filter(e => e.confidence > 0.8);
903
+ if (highConfidenceEntities.length > 0) {
904
+ // Use the highest confidence entity to refine search
905
+ const topEntity = highConfidenceEntities[0];
906
+ if (topEntity.matches && topEntity.matches.length > 0) {
907
+ // Add entity-specific metadata or connection
908
+ const entityData = topEntity.matches[0].metadata;
909
+ if (entityData && entityData.category) {
910
+ tripleQuery.where = {
911
+ ...tripleQuery.where,
912
+ category: entityData.category
913
+ };
914
+ }
915
+ }
916
+ }
917
+ }
918
+ return tripleQuery;
919
+ }
920
+ catch (error) {
921
+ console.debug('Failed to build query:', error);
922
+ // Return simple query as fallback
923
+ return {
924
+ like: query,
925
+ limit: 10
926
+ };
927
+ }
928
+ }
929
+ /**
930
+ * Extract entities from text using NEURAL matching to strict NounTypes
931
+ * ALWAYS uses neural matching, NEVER falls back to patterns
932
+ */
933
+ async extract(text, options) {
934
+ await this.ensureInitialized();
935
+ // ALWAYS use NeuralEntityExtractor for proper type matching
936
+ const { NeuralEntityExtractor } = await import('./entityExtractor.js');
937
+ const extractor = new NeuralEntityExtractor(this.brain);
938
+ // Convert string types to NounTypes if provided
939
+ const nounTypes = options?.types ?
940
+ options.types.map(t => t) :
941
+ undefined;
942
+ // Extract using neural matching
943
+ const entities = await extractor.extract(text, {
944
+ types: nounTypes,
945
+ confidence: options?.confidence || 0.0, // Accept ALL matches
946
+ includeVectors: false,
947
+ neuralMatching: true // ALWAYS use neural matching
948
+ });
949
+ // Convert to expected format
950
+ return entities.map(entity => ({
951
+ text: entity.text,
952
+ type: entity.type,
953
+ position: entity.position,
954
+ confidence: entity.confidence,
955
+ metadata: options?.includeMetadata ? {
956
+ ...entity.metadata,
957
+ neuralMatch: true,
958
+ extractedAt: Date.now()
959
+ } : undefined
960
+ }));
961
+ }
962
+ /**
963
+ * DEPRECATED - Old pattern-based extraction
964
+ * This should NEVER be used - kept only for reference
965
+ */
966
+ async extractWithPatterns_DEPRECATED(text, options) {
967
+ const extracted = [];
968
+ // Common entity patterns
969
+ const patterns = {
970
+ // People (names with capitals)
971
+ person: /\b([A-Z][a-z]+ [A-Z][a-z]+)\b/g,
972
+ // Organizations (capitals, Inc, LLC, etc)
973
+ organization: /\b([A-Z][a-zA-Z&]+(?: [A-Z][a-zA-Z&]+)*(?:,? (?:Inc|LLC|Corp|Ltd|Co|Group|Foundation|Institute|University|College|School|Hospital|Bank|Agency)\.?))\b/g,
974
+ // Locations (capitals, common place words)
975
+ location: /\b([A-Z][a-z]+(?: [A-Z][a-z]+)*(?:,? (?:[A-Z][a-z]+))?)(?= (?:City|County|State|Country|Street|Road|Avenue|Boulevard|Drive|Park|Square|Place|Island|Mountain|River|Lake|Ocean|Sea))\b/g,
976
+ // Dates
977
+ date: /\b(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}|\d{4}[\/\-]\d{1,2}[\/\-]\d{1,2}|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}|\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{4})\b/gi,
978
+ // Times
979
+ time: /\b(\d{1,2}:\d{2}(?::\d{2})?(?:\s?[AP]M)?)\b/gi,
980
+ // Emails
981
+ email: /\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b/g,
982
+ // URLs
983
+ url: /\b(https?:\/\/[^\s]+)\b/g,
984
+ // Phone numbers
985
+ phone: /\b(\+?\d{1,3}?[- .]?\(?\d{1,4}\)?[- .]?\d{1,4}[- .]?\d{1,4})\b/g,
986
+ // Money
987
+ money: /\b(\$[\d,]+(?:\.\d{2})?|[\d,]+(?:\.\d{2})?\s*(?:USD|EUR|GBP|JPY|CNY))\b/gi,
988
+ // Percentages
989
+ percentage: /\b(\d+(?:\.\d+)?%)\b/g,
990
+ // Products/versions
991
+ product: /\b([A-Z][a-zA-Z0-9]*(?: [A-Z][a-zA-Z0-9]*)*\s+v?\d+(?:\.\d+)*)\b/g,
992
+ // Hashtags
993
+ hashtag: /#[a-zA-Z0-9_]+/g,
994
+ // Mentions
995
+ mention: /@[a-zA-Z0-9_]+/g
996
+ };
997
+ const minConfidence = options?.confidence || 0.5;
998
+ const targetTypes = options?.types || Object.keys(patterns);
999
+ // Apply each pattern
1000
+ for (const [type, pattern] of Object.entries(patterns)) {
1001
+ if (!targetTypes.includes(type))
1002
+ continue;
1003
+ let match;
1004
+ while ((match = pattern.exec(text)) !== null) {
1005
+ const extractedText = match[1] || match[0];
1006
+ const confidence = this.calculateConfidence(extractedText, type);
1007
+ if (confidence >= minConfidence) {
1008
+ const entity = {
1009
+ text: extractedText,
1010
+ type,
1011
+ position: {
1012
+ start: match.index,
1013
+ end: match.index + match[0].length
1014
+ },
1015
+ confidence
1016
+ };
1017
+ if (options?.includeMetadata) {
1018
+ ;
1019
+ entity.metadata = {
1020
+ pattern: pattern.source,
1021
+ contextBefore: text.substring(Math.max(0, match.index - 20), match.index),
1022
+ contextAfter: text.substring(match.index + match[0].length, Math.min(text.length, match.index + match[0].length + 20))
1023
+ };
1024
+ }
1025
+ extracted.push(entity);
1026
+ }
1027
+ }
1028
+ }
1029
+ // Sort by position
1030
+ extracted.sort((a, b) => a.position.start - b.position.start);
1031
+ // Remove overlapping entities (keep higher confidence)
1032
+ const filtered = [];
1033
+ for (const entity of extracted) {
1034
+ const overlapping = filtered.find(e => (entity.position.start >= e.position.start && entity.position.start < e.position.end) ||
1035
+ (entity.position.end > e.position.start && entity.position.end <= e.position.end));
1036
+ if (!overlapping) {
1037
+ filtered.push(entity);
1038
+ }
1039
+ else if (entity.confidence > overlapping.confidence) {
1040
+ const index = filtered.indexOf(overlapping);
1041
+ filtered[index] = entity;
1042
+ }
1043
+ }
1044
+ return filtered;
1045
+ }
1046
+ /**
1047
+ * Analyze sentiment of text
1048
+ */
1049
+ async sentiment(text, options) {
1050
+ // Sentiment words with scores
1051
+ const positiveWords = new Set(['good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic', 'love', 'like', 'best', 'happy', 'joy', 'brilliant', 'outstanding', 'perfect', 'beautiful', 'awesome', 'super', 'nice', 'fun', 'exciting', 'impressive', 'incredible', 'remarkable', 'delightful', 'pleased', 'satisfied', 'successful', 'effective', 'helpful']);
1052
+ const negativeWords = new Set(['bad', 'terrible', 'awful', 'horrible', 'hate', 'dislike', 'worst', 'sad', 'angry', 'poor', 'disappointing', 'failed', 'broken', 'useless', 'waste', 'sucks', 'disgusting', 'ugly', 'boring', 'annoying', 'frustrating', 'difficult', 'complicated', 'confusing', 'slow', 'expensive', 'unfair', 'wrong', 'mistake', 'problem', 'issue']);
1053
+ const intensifiers = new Set(['very', 'extremely', 'really', 'absolutely', 'completely', 'totally', 'quite', 'rather', 'so']);
1054
+ const negations = new Set(['not', 'no', 'never', 'neither', 'none', 'nobody', 'nothing', 'nowhere', 'hardly', 'barely', 'scarcely']);
1055
+ const normalizedText = text.toLowerCase();
1056
+ const words = normalizedText.split(/\s+/);
1057
+ // Calculate overall sentiment
1058
+ let positiveCount = 0;
1059
+ let negativeCount = 0;
1060
+ let intensifierBoost = 1;
1061
+ for (let i = 0; i < words.length; i++) {
1062
+ const word = words[i].replace(/[^a-z]/g, '');
1063
+ const prevWord = i > 0 ? words[i - 1].replace(/[^a-z]/g, '') : '';
1064
+ // Check for intensifiers
1065
+ if (intensifiers.has(prevWord)) {
1066
+ intensifierBoost = 1.5;
1067
+ }
1068
+ else {
1069
+ intensifierBoost = 1;
1070
+ }
1071
+ // Check for negation
1072
+ const isNegated = negations.has(prevWord);
1073
+ if (positiveWords.has(word)) {
1074
+ if (isNegated) {
1075
+ negativeCount += intensifierBoost;
1076
+ }
1077
+ else {
1078
+ positiveCount += intensifierBoost;
1079
+ }
1080
+ }
1081
+ else if (negativeWords.has(word)) {
1082
+ if (isNegated) {
1083
+ positiveCount += intensifierBoost;
1084
+ }
1085
+ else {
1086
+ negativeCount += intensifierBoost;
1087
+ }
1088
+ }
1089
+ }
1090
+ const total = positiveCount + negativeCount;
1091
+ const score = total > 0 ? (positiveCount - negativeCount) / total : 0;
1092
+ const magnitude = Math.min(1, total / words.length);
1093
+ let label;
1094
+ if (score > 0.2)
1095
+ label = 'positive';
1096
+ else if (score < -0.2)
1097
+ label = 'negative';
1098
+ else if (magnitude > 0.3)
1099
+ label = 'mixed';
1100
+ else
1101
+ label = 'neutral';
1102
+ const result = {
1103
+ overall: {
1104
+ score,
1105
+ magnitude,
1106
+ label
1107
+ }
313
1108
  };
314
- return fieldMappings[term.toLowerCase()] || term.toLowerCase();
1109
+ // Sentence-level analysis
1110
+ if (options?.granularity === 'sentence' || options?.granularity === 'aspect') {
1111
+ const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
1112
+ result.sentences = [];
1113
+ for (const sentence of sentences) {
1114
+ const sentenceResult = await this.sentiment(sentence);
1115
+ result.sentences.push({
1116
+ text: sentence.trim(),
1117
+ score: sentenceResult.overall.score,
1118
+ magnitude: sentenceResult.overall.magnitude,
1119
+ label: sentenceResult.overall.label
1120
+ });
1121
+ }
1122
+ }
1123
+ // Aspect-based analysis
1124
+ if (options?.granularity === 'aspect' && options?.aspects) {
1125
+ result.aspects = {};
1126
+ for (const aspect of options.aspects) {
1127
+ const aspectRegex = new RegExp(`[^.!?]*\\b${aspect}\\b[^.!?]*[.!?]?`, 'gi');
1128
+ const aspectSentences = text.match(aspectRegex) || [];
1129
+ if (aspectSentences.length > 0) {
1130
+ let aspectScore = 0;
1131
+ let aspectMagnitude = 0;
1132
+ for (const sentence of aspectSentences) {
1133
+ const sentimentResult = await this.sentiment(sentence);
1134
+ aspectScore += sentimentResult.overall.score;
1135
+ aspectMagnitude += sentimentResult.overall.magnitude;
1136
+ }
1137
+ result.aspects[aspect] = {
1138
+ score: aspectScore / aspectSentences.length,
1139
+ magnitude: aspectMagnitude / aspectSentences.length,
1140
+ mentions: aspectSentences.length
1141
+ };
1142
+ }
1143
+ }
1144
+ }
1145
+ return result;
1146
+ }
1147
+ /**
1148
+ * Calculate confidence for entity extraction
1149
+ */
1150
+ calculateConfidence(text, type) {
1151
+ let confidence = 0.5; // Base confidence
1152
+ // Adjust based on type-specific rules
1153
+ switch (type) {
1154
+ case 'person':
1155
+ // Names with 2-3 capitalized words are more confident
1156
+ const nameWords = text.split(' ');
1157
+ if (nameWords.length >= 2 && nameWords.length <= 3) {
1158
+ confidence += 0.3;
1159
+ }
1160
+ if (nameWords.every(w => /^[A-Z]/.test(w))) {
1161
+ confidence += 0.2;
1162
+ }
1163
+ break;
1164
+ case 'organization':
1165
+ // Presence of corporate suffixes increases confidence
1166
+ if (/\b(Inc|LLC|Corp|Ltd|Co|Group)\.?$/.test(text)) {
1167
+ confidence += 0.4;
1168
+ }
1169
+ break;
1170
+ case 'email':
1171
+ case 'url':
1172
+ // These patterns are very specific, high confidence
1173
+ confidence = 0.95;
1174
+ break;
1175
+ case 'date':
1176
+ case 'time':
1177
+ case 'money':
1178
+ case 'percentage':
1179
+ // Numeric patterns are reliable
1180
+ confidence = 0.9;
1181
+ break;
1182
+ case 'location':
1183
+ // Geographic terms increase confidence
1184
+ if (/\b(City|State|Country|Street|Road|Avenue)$/.test(text)) {
1185
+ confidence += 0.3;
1186
+ }
1187
+ break;
1188
+ }
1189
+ return Math.min(1, confidence);
315
1190
  }
316
1191
  }
317
1192
  //# sourceMappingURL=naturalLanguageProcessor.js.map