@soulcraft/brainy 3.38.0 → 3.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,6 +62,10 @@ export interface ImportProgress {
62
62
  total?: number;
63
63
  entities?: number;
64
64
  relationships?: number;
65
+ /** Rows per second (v3.38.0) */
66
+ throughput?: number;
67
+ /** Estimated time remaining in ms (v3.38.0) */
68
+ eta?: number;
65
69
  }
66
70
  export interface ImportResult {
67
71
  /** Import ID for history tracking */
@@ -237,13 +237,20 @@ export class ImportCoordinator {
237
237
  enableConceptExtraction: options.enableConceptExtraction !== false,
238
238
  confidenceThreshold: options.confidenceThreshold || 0.6,
239
239
  onProgress: (stats) => {
240
+ // Enhanced progress reporting (v3.38.0) with throughput and ETA
241
+ const message = stats.throughput
242
+ ? `Extracting entities from ${format} (${stats.throughput} rows/sec, ETA: ${Math.round(stats.eta / 1000)}s)...`
243
+ : `Extracting entities from ${format}...`;
240
244
  options.onProgress?.({
241
245
  stage: 'extracting',
242
- message: `Extracting entities from ${format}...`,
246
+ message,
243
247
  processed: stats.processed,
244
248
  total: stats.total,
245
249
  entities: stats.entities,
246
- relationships: stats.relationships
250
+ relationships: stats.relationships,
251
+ // Pass through enhanced metrics if available
252
+ throughput: stats.throughput,
253
+ eta: stats.eta
247
254
  });
248
255
  }
249
256
  };
@@ -25,12 +25,18 @@ export interface SmartExcelOptions extends FormatHandlerOptions {
25
25
  definitionColumn?: string;
26
26
  typeColumn?: string;
27
27
  relatedColumn?: string;
28
- /** Progress callback */
28
+ /** Progress callback (v3.38.0: Enhanced with performance metrics) */
29
29
  onProgress?: (stats: {
30
30
  processed: number;
31
31
  total: number;
32
32
  entities: number;
33
33
  relationships: number;
34
+ /** Rows per second (v3.38.0) */
35
+ throughput?: number;
36
+ /** Estimated time remaining in ms (v3.38.0) */
37
+ eta?: number;
38
+ /** Current phase (v3.38.0) */
39
+ phase?: string;
34
40
  }) => void;
35
41
  }
36
42
  export interface ExtractedRow {
@@ -66,114 +66,141 @@ export class SmartExcelImporter {
66
66
  }
67
67
  // Detect column names
68
68
  const columns = this.detectColumns(rows[0], opts);
69
- // Process each row
69
+ // Process each row with BATCHED PARALLEL PROCESSING (v3.38.0)
70
70
  const extractedRows = [];
71
71
  const entityMap = new Map();
72
72
  const stats = {
73
73
  byType: {},
74
74
  byConfidence: { high: 0, medium: 0, low: 0 }
75
75
  };
76
- for (let i = 0; i < rows.length; i++) {
77
- const row = rows[i];
78
- // Extract data from row
79
- const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
80
- const definition = this.getColumnValue(row, columns.definition) || '';
81
- const type = this.getColumnValue(row, columns.type);
82
- const relatedTerms = this.getColumnValue(row, columns.related);
83
- // Extract entities from definition
84
- let relatedEntities = [];
85
- if (opts.enableNeuralExtraction && definition) {
86
- relatedEntities = await this.extractor.extract(definition, {
87
- confidence: opts.confidenceThreshold * 0.8, // Lower threshold for related entities
88
- neuralMatching: true,
89
- cache: { enabled: true }
90
- });
91
- // Filter out the main term from related entities
92
- relatedEntities = relatedEntities.filter(e => e.text.toLowerCase() !== term.toLowerCase());
93
- }
94
- // Determine main entity type
95
- const mainEntityType = type ?
96
- this.mapTypeString(type) :
97
- (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
98
- // Generate entity ID
99
- const entityId = this.generateEntityId(term);
100
- entityMap.set(term.toLowerCase(), entityId);
101
- // Extract concepts
102
- let concepts = [];
103
- if (opts.enableConceptExtraction && definition) {
104
- try {
105
- concepts = await this.brain.extractConcepts(definition, { limit: 10 });
106
- }
107
- catch (error) {
108
- // Concept extraction is optional
109
- concepts = [];
110
- }
111
- }
112
- // Create main entity
113
- const mainEntity = {
114
- id: entityId,
115
- name: term,
116
- type: mainEntityType,
117
- description: definition,
118
- confidence: 0.95, // Main entity from row has high confidence
119
- metadata: {
120
- source: 'excel',
121
- row: i + 1,
122
- originalData: row,
123
- concepts,
124
- extractedAt: Date.now()
125
- }
126
- };
127
- // Track statistics
128
- this.updateStats(stats, mainEntityType, mainEntity.confidence);
129
- // Infer relationships
130
- const relationships = [];
131
- if (opts.enableRelationshipInference) {
132
- // Extract relationships from definition text
133
- for (const relEntity of relatedEntities) {
134
- const verbType = await this.inferRelationship(term, relEntity.text, definition);
135
- relationships.push({
136
- from: entityId,
137
- to: relEntity.text, // Use entity name directly, will be resolved later
138
- type: verbType,
139
- confidence: relEntity.confidence,
140
- evidence: `Extracted from: "${definition.substring(0, 100)}..."`
141
- });
142
- }
143
- // Parse explicit "Related Terms" column
144
- if (relatedTerms) {
145
- const terms = relatedTerms.split(/[,;]/).map(t => t.trim()).filter(Boolean);
146
- for (const relTerm of terms) {
147
- // Ensure we don't create self-relationships
148
- if (relTerm.toLowerCase() !== term.toLowerCase()) {
149
- relationships.push({
150
- from: entityId,
151
- to: relTerm, // Use term name directly
152
- type: VerbType.RelatedTo,
153
- confidence: 0.9, // Explicit relationships have high confidence
154
- evidence: `Explicitly listed in "Related" column`
155
- });
76
+ // Batch processing configuration
77
+ const CHUNK_SIZE = 10; // Process 10 rows at a time for optimal performance
78
+ let totalProcessed = 0;
79
+ const performanceStartTime = Date.now();
80
+ // Process rows in chunks
81
+ for (let chunkStart = 0; chunkStart < rows.length; chunkStart += CHUNK_SIZE) {
82
+ const chunk = rows.slice(chunkStart, Math.min(chunkStart + CHUNK_SIZE, rows.length));
83
+ // Process chunk in parallel for massive speedup
84
+ const chunkResults = await Promise.all(chunk.map(async (row, chunkIndex) => {
85
+ const i = chunkStart + chunkIndex;
86
+ // Extract data from row
87
+ const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
88
+ const definition = this.getColumnValue(row, columns.definition) || '';
89
+ const type = this.getColumnValue(row, columns.type);
90
+ const relatedTerms = this.getColumnValue(row, columns.related);
91
+ // Parallel extraction: entities AND concepts at the same time
92
+ const [relatedEntities, concepts] = await Promise.all([
93
+ // Extract entities from definition
94
+ opts.enableNeuralExtraction && definition
95
+ ? this.extractor.extract(definition, {
96
+ confidence: opts.confidenceThreshold * 0.8,
97
+ neuralMatching: true,
98
+ cache: { enabled: true }
99
+ }).then(entities =>
100
+ // Filter out the main term from related entities
101
+ entities.filter(e => e.text.toLowerCase() !== term.toLowerCase()))
102
+ : Promise.resolve([]),
103
+ // Extract concepts (in parallel with entity extraction)
104
+ opts.enableConceptExtraction && definition
105
+ ? this.brain.extractConcepts(definition, { limit: 10 }).catch(() => [])
106
+ : Promise.resolve([])
107
+ ]);
108
+ // Determine main entity type
109
+ const mainEntityType = type ?
110
+ this.mapTypeString(type) :
111
+ (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
112
+ // Generate entity ID
113
+ const entityId = this.generateEntityId(term);
114
+ // Create main entity
115
+ const mainEntity = {
116
+ id: entityId,
117
+ name: term,
118
+ type: mainEntityType,
119
+ description: definition,
120
+ confidence: 0.95,
121
+ metadata: {
122
+ source: 'excel',
123
+ row: i + 1,
124
+ originalData: row,
125
+ concepts,
126
+ extractedAt: Date.now()
127
+ }
128
+ };
129
+ // Infer relationships
130
+ const relationships = [];
131
+ if (opts.enableRelationshipInference) {
132
+ // Extract relationships from definition text
133
+ for (const relEntity of relatedEntities) {
134
+ const verbType = await this.inferRelationship(term, relEntity.text, definition);
135
+ relationships.push({
136
+ from: entityId,
137
+ to: relEntity.text,
138
+ type: verbType,
139
+ confidence: relEntity.confidence,
140
+ evidence: `Extracted from: "${definition.substring(0, 100)}..."`
141
+ });
142
+ }
143
+ // Parse explicit "Related Terms" column
144
+ if (relatedTerms) {
145
+ const terms = relatedTerms.split(/[,;]/).map(t => t.trim()).filter(Boolean);
146
+ for (const relTerm of terms) {
147
+ if (relTerm.toLowerCase() !== term.toLowerCase()) {
148
+ relationships.push({
149
+ from: entityId,
150
+ to: relTerm,
151
+ type: VerbType.RelatedTo,
152
+ confidence: 0.9,
153
+ evidence: `Explicitly listed in "Related" column`
154
+ });
155
+ }
156
156
  }
157
157
  }
158
158
  }
159
+ return {
160
+ term,
161
+ entityId,
162
+ mainEntity,
163
+ mainEntityType,
164
+ relatedEntities,
165
+ relationships,
166
+ concepts
167
+ };
168
+ }));
169
+ // Process chunk results sequentially to maintain order
170
+ for (const result of chunkResults) {
171
+ // Store entity ID mapping
172
+ entityMap.set(result.term.toLowerCase(), result.entityId);
173
+ // Track statistics
174
+ this.updateStats(stats, result.mainEntityType, result.mainEntity.confidence);
175
+ // Add extracted row
176
+ extractedRows.push({
177
+ entity: result.mainEntity,
178
+ relatedEntities: result.relatedEntities.map(e => ({
179
+ name: e.text,
180
+ type: e.type,
181
+ confidence: e.confidence
182
+ })),
183
+ relationships: result.relationships,
184
+ concepts: result.concepts
185
+ });
159
186
  }
160
- // Add extracted row
161
- extractedRows.push({
162
- entity: mainEntity,
163
- relatedEntities: relatedEntities.map(e => ({
164
- name: e.text,
165
- type: e.type,
166
- confidence: e.confidence
167
- })),
168
- relationships,
169
- concepts
170
- });
171
- // Report progress
187
+ // Update progress tracking
188
+ totalProcessed += chunk.length;
189
+ // Calculate performance metrics
190
+ const elapsed = Date.now() - performanceStartTime;
191
+ const rowsPerSecond = totalProcessed / (elapsed / 1000);
192
+ const remainingRows = rows.length - totalProcessed;
193
+ const estimatedTimeRemaining = remainingRows / rowsPerSecond;
194
+ // Report progress with enhanced metrics
172
195
  opts.onProgress({
173
- processed: i + 1,
196
+ processed: totalProcessed,
174
197
  total: rows.length,
175
- entities: extractedRows.length + relatedEntities.length,
176
- relationships: relationships.length
198
+ entities: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
199
+ relationships: extractedRows.reduce((sum, row) => sum + row.relationships.length, 0),
200
+ // Additional performance metrics (v3.38.0)
201
+ throughput: Math.round(rowsPerSecond * 10) / 10,
202
+ eta: Math.round(estimatedTimeRemaining),
203
+ phase: 'extracting'
177
204
  });
178
205
  }
179
206
  return {
@@ -24,6 +24,8 @@ export declare class NeuralEntityExtractor {
24
24
  private typeEmbeddings;
25
25
  private initialized;
26
26
  private cache;
27
+ private embeddingCache;
28
+ private embeddingCacheStats;
27
29
  constructor(brain: Brainy | Brainy<any>, cacheOptions?: EntityCacheOptions);
28
30
  /**
29
31
  * Initialize type embeddings for neural matching
@@ -61,7 +63,10 @@ export declare class NeuralEntityExtractor {
61
63
  */
62
64
  private classifyByRules;
63
65
  /**
64
- * Get embedding for text
66
+ * Get embedding for text with caching (v3.38.0)
67
+ *
68
+ * PERFORMANCE OPTIMIZATION: Caches embeddings during extraction session
69
+ * to avoid redundant model calls for repeated text (common in large imports)
65
70
  */
66
71
  private getEmbedding;
67
72
  /**
@@ -96,4 +101,27 @@ export declare class NeuralEntityExtractor {
96
101
  * Cleanup expired cache entries
97
102
  */
98
103
  cleanupCache(): number;
104
+ /**
105
+ * Clear embedding cache (v3.38.0)
106
+ *
107
+ * Clears the runtime embedding cache. Useful for:
108
+ * - Freeing memory after large imports
109
+ * - Testing with fresh cache state
110
+ */
111
+ clearEmbeddingCache(): void;
112
+ /**
113
+ * Get embedding cache statistics (v3.38.0)
114
+ *
115
+ * Returns performance metrics for the embedding cache:
116
+ * - hits: Number of cache hits (avoided model calls)
117
+ * - misses: Number of cache misses (required model calls)
118
+ * - size: Current cache size
119
+ * - hitRate: Percentage of requests served from cache
120
+ */
121
+ getEmbeddingCacheStats(): {
122
+ hitRate: number;
123
+ hits: number;
124
+ misses: number;
125
+ size: number;
126
+ };
99
127
  }
@@ -12,6 +12,14 @@ export class NeuralEntityExtractor {
12
12
  // Type embeddings for similarity matching
13
13
  this.typeEmbeddings = new Map();
14
14
  this.initialized = false;
15
+ // Runtime embedding cache for performance (v3.38.0)
16
+ // Caches candidate embeddings during an extraction session to avoid redundant model calls
17
+ this.embeddingCache = new Map();
18
+ this.embeddingCacheStats = {
19
+ hits: 0,
20
+ misses: 0,
21
+ size: 0
22
+ };
15
23
  this.brain = brain;
16
24
  this.cache = new EntityExtractionCache(cacheOptions);
17
25
  }
@@ -253,20 +261,46 @@ export class NeuralEntityExtractor {
253
261
  return { type: NounType.Thing, confidence: 0.3 };
254
262
  }
255
263
  /**
256
- * Get embedding for text
264
+ * Get embedding for text with caching (v3.38.0)
265
+ *
266
+ * PERFORMANCE OPTIMIZATION: Caches embeddings during extraction session
267
+ * to avoid redundant model calls for repeated text (common in large imports)
257
268
  */
258
269
  async getEmbedding(text) {
270
+ // Normalize text for cache key
271
+ const normalizedText = text.trim().toLowerCase();
272
+ // Check cache first
273
+ const cached = this.embeddingCache.get(normalizedText);
274
+ if (cached) {
275
+ this.embeddingCacheStats.hits++;
276
+ return cached;
277
+ }
278
+ // Cache miss - generate embedding
279
+ this.embeddingCacheStats.misses++;
280
+ let vector;
259
281
  if ('embed' in this.brain && typeof this.brain.embed === 'function') {
260
- return await this.brain.embed(text);
282
+ vector = await this.brain.embed(text);
261
283
  }
262
284
  else {
263
285
  // Fallback - create simple hash-based vector
264
- const vector = new Array(384).fill(0);
286
+ vector = new Array(384).fill(0);
265
287
  for (let i = 0; i < text.length; i++) {
266
288
  vector[i % 384] += text.charCodeAt(i) / 255;
267
289
  }
268
- return vector.map(v => v / text.length);
290
+ vector = vector.map(v => v / text.length);
291
+ }
292
+ // Store in cache
293
+ this.embeddingCache.set(normalizedText, vector);
294
+ this.embeddingCacheStats.size = this.embeddingCache.size;
295
+ // Memory management: Clear cache if it grows too large (>10000 entries)
296
+ if (this.embeddingCache.size > 10000) {
297
+ // Keep most recent 5000 entries (simple LRU approximation)
298
+ const entries = Array.from(this.embeddingCache.entries());
299
+ this.embeddingCache.clear();
300
+ entries.slice(-5000).forEach(([k, v]) => this.embeddingCache.set(k, v));
301
+ this.embeddingCacheStats.size = this.embeddingCache.size;
269
302
  }
303
+ return vector;
270
304
  }
271
305
  /**
272
306
  * Calculate cosine similarity between vectors
@@ -355,5 +389,36 @@ export class NeuralEntityExtractor {
355
389
  cleanupCache() {
356
390
  return this.cache.cleanup();
357
391
  }
392
+ /**
393
+ * Clear embedding cache (v3.38.0)
394
+ *
395
+ * Clears the runtime embedding cache. Useful for:
396
+ * - Freeing memory after large imports
397
+ * - Testing with fresh cache state
398
+ */
399
+ clearEmbeddingCache() {
400
+ this.embeddingCache.clear();
401
+ this.embeddingCacheStats = {
402
+ hits: 0,
403
+ misses: 0,
404
+ size: 0
405
+ };
406
+ }
407
+ /**
408
+ * Get embedding cache statistics (v3.38.0)
409
+ *
410
+ * Returns performance metrics for the embedding cache:
411
+ * - hits: Number of cache hits (avoided model calls)
412
+ * - misses: Number of cache misses (required model calls)
413
+ * - size: Current cache size
414
+ * - hitRate: Percentage of requests served from cache
415
+ */
416
+ getEmbeddingCacheStats() {
417
+ const total = this.embeddingCacheStats.hits + this.embeddingCacheStats.misses;
418
+ return {
419
+ ...this.embeddingCacheStats,
420
+ hitRate: total > 0 ? this.embeddingCacheStats.hits / total : 0
421
+ };
422
+ }
358
423
  }
359
424
  //# sourceMappingURL=entityExtractor.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.38.0",
3
+ "version": "3.39.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",