@soulcraft/brainy 3.39.0 → 3.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,20 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [3.40.1](https://github.com/soulcraftlabs/brainy/compare/v3.40.0...v3.40.1) (2025-10-13)
6
+
7
+
8
+ ### 🐛 Bug Fixes
9
+
10
+ * correct cache eviction formula to prioritize high-value items ([8e7b52b](https://github.com/soulcraftlabs/brainy/commit/8e7b52bda98e637164e2fb321251c254d03cdf70))
11
+
12
+ ## [3.40.0](https://github.com/soulcraftlabs/brainy/compare/v3.39.0...v3.40.0) (2025-10-13)
13
+
14
+
15
+ ### ✨ Features
16
+
17
+ * extend batch processing and enhanced progress to CSV and PDF imports ([bb46da2](https://github.com/soulcraftlabs/brainy/commit/bb46da2ee7fc3cd0b5becc7e42afff7d7034ecfe))
18
+
5
19
  ### [3.37.3](https://github.com/soulcraftlabs/brainy/compare/v3.37.2...v3.37.3) (2025-10-10)
6
20
 
7
21
  - fix: populate totalNodes/totalEdges in ALL storage adapters for HNSW rebuild (a21a845)
@@ -30,12 +30,18 @@ export interface SmartCSVOptions extends FormatHandlerOptions {
30
30
  /** CSV-specific options */
31
31
  csvDelimiter?: string;
32
32
  csvHeaders?: boolean;
33
- /** Progress callback */
33
+ /** Progress callback (v3.39.0: Enhanced with performance metrics) */
34
34
  onProgress?: (stats: {
35
35
  processed: number;
36
36
  total: number;
37
37
  entities: number;
38
38
  relationships: number;
39
+ /** Rows per second (v3.39.0) */
40
+ throughput?: number;
41
+ /** Estimated time remaining in ms (v3.39.0) */
42
+ eta?: number;
43
+ /** Current phase (v3.39.0) */
44
+ phase?: string;
39
45
  }) => void;
40
46
  }
41
47
  export interface ExtractedRow {
@@ -62,113 +62,141 @@ export class SmartCSVImporter {
62
62
  }
63
63
  // Detect column names
64
64
  const columns = this.detectColumns(rows[0], opts);
65
- // Process each row
65
+ // Process each row with BATCHED PARALLEL PROCESSING (v3.39.0)
66
66
  const extractedRows = [];
67
67
  const entityMap = new Map();
68
68
  const stats = {
69
69
  byType: {},
70
70
  byConfidence: { high: 0, medium: 0, low: 0 }
71
71
  };
72
- for (let i = 0; i < rows.length; i++) {
73
- const row = rows[i];
74
- // Extract data from row
75
- const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
76
- const definition = this.getColumnValue(row, columns.definition) || '';
77
- const type = this.getColumnValue(row, columns.type);
78
- const relatedTerms = this.getColumnValue(row, columns.related);
79
- // Extract entities from definition
80
- let relatedEntities = [];
81
- if (opts.enableNeuralExtraction && definition) {
82
- relatedEntities = await this.extractor.extract(definition, {
83
- confidence: opts.confidenceThreshold * 0.8, // Lower threshold for related entities
84
- neuralMatching: true,
85
- cache: { enabled: true }
86
- });
87
- // Filter out the main term from related entities
88
- relatedEntities = relatedEntities.filter(e => e.text.toLowerCase() !== term.toLowerCase());
89
- }
90
- // Determine main entity type
91
- const mainEntityType = type ?
92
- this.mapTypeString(type) :
93
- (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
94
- // Generate entity ID
95
- const entityId = this.generateEntityId(term);
96
- entityMap.set(term.toLowerCase(), entityId);
97
- // Extract concepts
98
- let concepts = [];
99
- if (opts.enableConceptExtraction && definition) {
100
- try {
101
- concepts = await this.brain.extractConcepts(definition, { limit: 10 });
102
- }
103
- catch (error) {
104
- concepts = [];
105
- }
106
- }
107
- // Create main entity
108
- const mainEntity = {
109
- id: entityId,
110
- name: term,
111
- type: mainEntityType,
112
- description: definition,
113
- confidence: 0.95, // Main entity from row has high confidence
114
- metadata: {
115
- source: 'csv',
116
- row: i + 1,
117
- originalData: row,
118
- concepts,
119
- extractedAt: Date.now()
120
- }
121
- };
122
- // Track statistics
123
- this.updateStats(stats, mainEntityType, mainEntity.confidence);
124
- // Infer relationships
125
- const relationships = [];
126
- if (opts.enableRelationshipInference) {
127
- // Extract relationships from definition text
128
- for (const relEntity of relatedEntities) {
129
- const verbType = await this.inferRelationship(term, relEntity.text, definition);
130
- relationships.push({
131
- from: entityId,
132
- to: relEntity.text,
133
- type: verbType,
134
- confidence: relEntity.confidence,
135
- evidence: `Extracted from: "${definition.substring(0, 100)}..."`
136
- });
137
- }
138
- // Parse explicit "Related" column
139
- if (relatedTerms) {
140
- const terms = relatedTerms.split(/[,;|]/).map(t => t.trim()).filter(Boolean);
141
- for (const relTerm of terms) {
142
- // Ensure we don't create self-relationships
143
- if (relTerm.toLowerCase() !== term.toLowerCase()) {
144
- relationships.push({
145
- from: entityId,
146
- to: relTerm,
147
- type: VerbType.RelatedTo,
148
- confidence: 0.9,
149
- evidence: `Explicitly listed in "Related" column`
150
- });
72
+ // Batch processing configuration
73
+ const CHUNK_SIZE = 10; // Process 10 rows at a time for optimal performance
74
+ let totalProcessed = 0;
75
+ const performanceStartTime = Date.now();
76
+ // Process rows in chunks
77
+ for (let chunkStart = 0; chunkStart < rows.length; chunkStart += CHUNK_SIZE) {
78
+ const chunk = rows.slice(chunkStart, Math.min(chunkStart + CHUNK_SIZE, rows.length));
79
+ // Process chunk in parallel for massive speedup
80
+ const chunkResults = await Promise.all(chunk.map(async (row, chunkIndex) => {
81
+ const i = chunkStart + chunkIndex;
82
+ // Extract data from row
83
+ const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
84
+ const definition = this.getColumnValue(row, columns.definition) || '';
85
+ const type = this.getColumnValue(row, columns.type);
86
+ const relatedTerms = this.getColumnValue(row, columns.related);
87
+ // Parallel extraction: entities AND concepts at the same time
88
+ const [relatedEntities, concepts] = await Promise.all([
89
+ // Extract entities from definition
90
+ opts.enableNeuralExtraction && definition
91
+ ? this.extractor.extract(definition, {
92
+ confidence: opts.confidenceThreshold * 0.8,
93
+ neuralMatching: true,
94
+ cache: { enabled: true }
95
+ }).then(entities =>
96
+ // Filter out the main term from related entities
97
+ entities.filter(e => e.text.toLowerCase() !== term.toLowerCase()))
98
+ : Promise.resolve([]),
99
+ // Extract concepts (in parallel with entity extraction)
100
+ opts.enableConceptExtraction && definition
101
+ ? this.brain.extractConcepts(definition, { limit: 10 }).catch(() => [])
102
+ : Promise.resolve([])
103
+ ]);
104
+ // Determine main entity type
105
+ const mainEntityType = type ?
106
+ this.mapTypeString(type) :
107
+ (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
108
+ // Generate entity ID
109
+ const entityId = this.generateEntityId(term);
110
+ // Create main entity
111
+ const mainEntity = {
112
+ id: entityId,
113
+ name: term,
114
+ type: mainEntityType,
115
+ description: definition,
116
+ confidence: 0.95,
117
+ metadata: {
118
+ source: 'csv',
119
+ row: i + 1,
120
+ originalData: row,
121
+ concepts,
122
+ extractedAt: Date.now()
123
+ }
124
+ };
125
+ // Infer relationships
126
+ const relationships = [];
127
+ if (opts.enableRelationshipInference) {
128
+ // Extract relationships from definition text
129
+ for (const relEntity of relatedEntities) {
130
+ const verbType = await this.inferRelationship(term, relEntity.text, definition);
131
+ relationships.push({
132
+ from: entityId,
133
+ to: relEntity.text,
134
+ type: verbType,
135
+ confidence: relEntity.confidence,
136
+ evidence: `Extracted from: "${definition.substring(0, 100)}..."`
137
+ });
138
+ }
139
+ // Parse explicit "Related" column
140
+ if (relatedTerms) {
141
+ const terms = relatedTerms.split(/[,;|]/).map(t => t.trim()).filter(Boolean);
142
+ for (const relTerm of terms) {
143
+ if (relTerm.toLowerCase() !== term.toLowerCase()) {
144
+ relationships.push({
145
+ from: entityId,
146
+ to: relTerm,
147
+ type: VerbType.RelatedTo,
148
+ confidence: 0.9,
149
+ evidence: `Explicitly listed in "Related" column`
150
+ });
151
+ }
151
152
  }
152
153
  }
153
154
  }
155
+ return {
156
+ term,
157
+ entityId,
158
+ mainEntity,
159
+ mainEntityType,
160
+ relatedEntities,
161
+ relationships,
162
+ concepts
163
+ };
164
+ }));
165
+ // Process chunk results sequentially to maintain order
166
+ for (const result of chunkResults) {
167
+ // Store entity ID mapping
168
+ entityMap.set(result.term.toLowerCase(), result.entityId);
169
+ // Track statistics
170
+ this.updateStats(stats, result.mainEntityType, result.mainEntity.confidence);
171
+ // Add extracted row
172
+ extractedRows.push({
173
+ entity: result.mainEntity,
174
+ relatedEntities: result.relatedEntities.map(e => ({
175
+ name: e.text,
176
+ type: e.type,
177
+ confidence: e.confidence
178
+ })),
179
+ relationships: result.relationships,
180
+ concepts: result.concepts
181
+ });
154
182
  }
155
- // Add extracted row
156
- extractedRows.push({
157
- entity: mainEntity,
158
- relatedEntities: relatedEntities.map(e => ({
159
- name: e.text,
160
- type: e.type,
161
- confidence: e.confidence
162
- })),
163
- relationships,
164
- concepts
165
- });
166
- // Report progress
183
+ // Update progress tracking
184
+ totalProcessed += chunk.length;
185
+ // Calculate performance metrics
186
+ const elapsed = Date.now() - performanceStartTime;
187
+ const rowsPerSecond = totalProcessed / (elapsed / 1000);
188
+ const remainingRows = rows.length - totalProcessed;
189
+ const estimatedTimeRemaining = remainingRows / rowsPerSecond;
190
+ // Report progress with enhanced metrics
167
191
  opts.onProgress({
168
- processed: i + 1,
192
+ processed: totalProcessed,
169
193
  total: rows.length,
170
- entities: extractedRows.length + relatedEntities.length,
171
- relationships: relationships.length
194
+ entities: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
195
+ relationships: extractedRows.reduce((sum, row) => sum + row.relationships.length, 0),
196
+ // Additional performance metrics (v3.39.0)
197
+ throughput: Math.round(rowsPerSecond * 10) / 10,
198
+ eta: Math.round(estimatedTimeRemaining),
199
+ phase: 'extracting'
172
200
  });
173
201
  }
174
202
  return {
@@ -27,12 +27,18 @@ export interface SmartPDFOptions extends FormatHandlerOptions {
27
27
  extractFromTables?: boolean;
28
28
  /** Group by page or full document */
29
29
  groupBy?: 'page' | 'document';
30
- /** Progress callback */
30
+ /** Progress callback (v3.39.0: Enhanced with performance metrics) */
31
31
  onProgress?: (stats: {
32
32
  processed: number;
33
33
  total: number;
34
34
  entities: number;
35
35
  relationships: number;
36
+ /** Sections per second (v3.39.0) */
37
+ throughput?: number;
38
+ /** Estimated time remaining in ms (v3.39.0) */
39
+ eta?: number;
40
+ /** Current phase (v3.39.0) */
41
+ phase?: string;
36
42
  }) => void;
37
43
  }
38
44
  export interface ExtractedSection {
@@ -55,7 +55,7 @@ export class SmartPDFImporter {
55
55
  }
56
56
  // Group data by page or combine into single document
57
57
  const grouped = this.groupData(data, opts);
58
- // Process each group
58
+ // Process each group with BATCHED PARALLEL PROCESSING (v3.39.0)
59
59
  const sections = [];
60
60
  const entityMap = new Map();
61
61
  const stats = {
@@ -63,17 +63,35 @@ export class SmartPDFImporter {
63
63
  byConfidence: { high: 0, medium: 0, low: 0 },
64
64
  bySource: { paragraphs: 0, tables: 0 }
65
65
  };
66
- let processedCount = 0;
66
+ // Batch processing configuration
67
+ const CHUNK_SIZE = 5; // Process 5 sections at a time (smaller than rows due to section size)
68
+ let totalProcessed = 0;
69
+ const performanceStartTime = Date.now();
67
70
  const totalGroups = grouped.length;
68
- for (const group of grouped) {
69
- const sectionResult = await this.processSection(group, opts, stats, entityMap);
70
- sections.push(sectionResult);
71
- processedCount++;
71
+ // Process sections in chunks
72
+ for (let chunkStart = 0; chunkStart < grouped.length; chunkStart += CHUNK_SIZE) {
73
+ const chunk = grouped.slice(chunkStart, Math.min(chunkStart + CHUNK_SIZE, grouped.length));
74
+ // Process chunk in parallel for better performance
75
+ const chunkResults = await Promise.all(chunk.map(group => this.processSection(group, opts, stats, entityMap)));
76
+ // Add results sequentially to maintain order
77
+ sections.push(...chunkResults);
78
+ // Update progress tracking
79
+ totalProcessed += chunk.length;
80
+ // Calculate performance metrics
81
+ const elapsed = Date.now() - performanceStartTime;
82
+ const sectionsPerSecond = totalProcessed / (elapsed / 1000);
83
+ const remainingSections = grouped.length - totalProcessed;
84
+ const estimatedTimeRemaining = remainingSections / sectionsPerSecond;
85
+ // Report progress with enhanced metrics
72
86
  opts.onProgress({
73
- processed: processedCount,
87
+ processed: totalProcessed,
74
88
  total: totalGroups,
75
89
  entities: sections.reduce((sum, s) => sum + s.entities.length, 0),
76
- relationships: sections.reduce((sum, s) => sum + s.relationships.length, 0)
90
+ relationships: sections.reduce((sum, s) => sum + s.relationships.length, 0),
91
+ // Additional performance metrics (v3.39.0)
92
+ throughput: Math.round(sectionsPerSecond * 10) / 10,
93
+ eta: Math.round(estimatedTimeRemaining),
94
+ phase: 'extracting'
77
95
  });
78
96
  }
79
97
  const pagesProcessed = new Set(data.map(d => d._page)).size;
@@ -150,25 +168,21 @@ export class SmartPDFImporter {
150
168
  }
151
169
  }
152
170
  const combinedText = texts.join('\n\n');
153
- // Extract entities if enabled
154
- let extractedEntities = [];
155
- if (options.enableNeuralExtraction && combinedText.length > 0) {
156
- extractedEntities = await this.extractor.extract(combinedText, {
157
- confidence: options.confidenceThreshold || 0.6,
158
- neuralMatching: true,
159
- cache: { enabled: true }
160
- });
161
- }
162
- // Extract concepts if enabled
163
- let concepts = [];
164
- if (options.enableConceptExtraction && combinedText.length > 0) {
165
- try {
166
- concepts = await this.brain.extractConcepts(combinedText, { limit: 15 });
167
- }
168
- catch (error) {
169
- concepts = [];
170
- }
171
- }
171
+ // Parallel extraction: entities AND concepts at the same time (v3.39.0)
172
+ const [extractedEntities, concepts] = await Promise.all([
173
+ // Extract entities if enabled
174
+ options.enableNeuralExtraction && combinedText.length > 0
175
+ ? this.extractor.extract(combinedText, {
176
+ confidence: options.confidenceThreshold || 0.6,
177
+ neuralMatching: true,
178
+ cache: { enabled: true }
179
+ })
180
+ : Promise.resolve([]),
181
+ // Extract concepts (in parallel with entity extraction)
182
+ options.enableConceptExtraction && combinedText.length > 0
183
+ ? this.brain.extractConcepts(combinedText, { limit: 15 }).catch(() => [])
184
+ : Promise.resolve([])
185
+ ]);
172
186
  // Create entity objects
173
187
  const entities = extractedEntities.map(e => {
174
188
  const entityId = this.generateEntityId(e.text, group.id);
@@ -157,9 +157,9 @@ export class UnifiedCache {
157
157
  let victim = null;
158
158
  let lowestScore = Infinity;
159
159
  for (const [key, item] of this.cache) {
160
- // Calculate value score: access frequency / rebuild cost
160
+ // Calculate value score: access frequency * rebuild cost (higher is better)
161
161
  const accessScore = (this.access.get(key) || 1);
162
- const score = accessScore / Math.max(item.rebuildCost, 1);
162
+ const score = accessScore * item.rebuildCost;
163
163
  if (score < lowestScore) {
164
164
  lowestScore = score;
165
165
  victim = key;
@@ -180,7 +180,7 @@ export class UnifiedCache {
180
180
  evictForSize(bytesNeeded) {
181
181
  const candidates = [];
182
182
  for (const [key, item] of this.cache) {
183
- const score = (this.access.get(key) || 1) / item.rebuildCost;
183
+ const score = (this.access.get(key) || 1) * item.rebuildCost;
184
184
  candidates.push([key, score, item]);
185
185
  }
186
186
  // Sort by score (lower is worse)
@@ -250,7 +250,7 @@ export class UnifiedCache {
250
250
  const candidates = [];
251
251
  for (const [key, item] of this.cache) {
252
252
  if (item.type === type) {
253
- const score = (this.access.get(key) || 1) / item.rebuildCost;
253
+ const score = (this.access.get(key) || 1) * item.rebuildCost;
254
254
  candidates.push([key, score, item]);
255
255
  }
256
256
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.39.0",
3
+ "version": "3.40.1",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",