npm - @soulcraft/brainy - Versions diffs - 3.39.0 → 3.40.1 - Mend

@soulcraft/brainy 3.39.0 → 3.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +14 -0
package/dist/importers/SmartCSVImporter.d.ts +7 -1
package/dist/importers/SmartCSVImporter.js +123 -95
package/dist/importers/SmartPDFImporter.d.ts +7 -1
package/dist/importers/SmartPDFImporter.js +41 -27
package/dist/utils/unifiedCache.js +4 -4
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,20 @@
 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
+### [3.40.1](https://github.com/soulcraftlabs/brainy/compare/v3.40.0...v3.40.1) (2025-10-13)
+### 🐛 Bug Fixes
+* correct cache eviction formula to prioritize high-value items ([8e7b52b](https://github.com/soulcraftlabs/brainy/commit/8e7b52bda98e637164e2fb321251c254d03cdf70))
+## [3.40.0](https://github.com/soulcraftlabs/brainy/compare/v3.39.0...v3.40.0) (2025-10-13)
+### ✨ Features
+* extend batch processing and enhanced progress to CSV and PDF imports ([bb46da2](https://github.com/soulcraftlabs/brainy/commit/bb46da2ee7fc3cd0b5becc7e42afff7d7034ecfe))
 ### [3.37.3](https://github.com/soulcraftlabs/brainy/compare/v3.37.2...v3.37.3) (2025-10-10)
 - fix: populate totalNodes/totalEdges in ALL storage adapters for HNSW rebuild (a21a845)

package/dist/importers/SmartCSVImporter.d.ts CHANGED Viewed

@@ -30,12 +30,18 @@ export interface SmartCSVOptions extends FormatHandlerOptions {
     /** CSV-specific options */
     csvDelimiter?: string;
     csvHeaders?: boolean;
-    /** Progress callback */
+    /** Progress callback (v3.39.0: Enhanced with performance metrics) */
     onProgress?: (stats: {
         processed: number;
         total: number;
         entities: number;
         relationships: number;
+        /** Rows per second (v3.39.0) */
+        throughput?: number;
+        /** Estimated time remaining in ms (v3.39.0) */
+        eta?: number;
+        /** Current phase (v3.39.0) */
+        phase?: string;
     }) => void;
 }
 export interface ExtractedRow {

package/dist/importers/SmartCSVImporter.js CHANGED Viewed

@@ -62,113 +62,141 @@ export class SmartCSVImporter {
         }
         // Detect column names
         const columns = this.detectColumns(rows[0], opts);
-        // Process each row
+        // Process each row with BATCHED PARALLEL PROCESSING (v3.39.0)
         const extractedRows = [];
         const entityMap = new Map();
         const stats = {
             byType: {},
             byConfidence: { high: 0, medium: 0, low: 0 }
         };
-        for (let i = 0; i < rows.length; i++) {
-            const row = rows[i];
-            // Extract data from row
-            const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
-            const definition = this.getColumnValue(row, columns.definition) || '';
-            const type = this.getColumnValue(row, columns.type);
-            const relatedTerms = this.getColumnValue(row, columns.related);
-            // Extract entities from definition
-            let relatedEntities = [];
-            if (opts.enableNeuralExtraction && definition) {
-                relatedEntities = await this.extractor.extract(definition, {
-                    confidence: opts.confidenceThreshold * 0.8, // Lower threshold for related entities
-                    neuralMatching: true,
-                    cache: { enabled: true }
-                });
-                // Filter out the main term from related entities
-                relatedEntities = relatedEntities.filter(e => e.text.toLowerCase() !== term.toLowerCase());
-            }
-            // Determine main entity type
-            const mainEntityType = type ?
-                this.mapTypeString(type) :
-                (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
-            // Generate entity ID
-            const entityId = this.generateEntityId(term);
-            entityMap.set(term.toLowerCase(), entityId);
-            // Extract concepts
-            let concepts = [];
-            if (opts.enableConceptExtraction && definition) {
-                try {
-                    concepts = await this.brain.extractConcepts(definition, { limit: 10 });
-                }
-                catch (error) {
-                    concepts = [];
-                }
-            }
-            // Create main entity
-            const mainEntity = {
-                id: entityId,
-                name: term,
-                type: mainEntityType,
-                description: definition,
-                confidence: 0.95, // Main entity from row has high confidence
-                metadata: {
-                    source: 'csv',
-                    row: i + 1,
-                    originalData: row,
-                    concepts,
-                    extractedAt: Date.now()
-                }
-            };
-            // Track statistics
-            this.updateStats(stats, mainEntityType, mainEntity.confidence);
-            // Infer relationships
-            const relationships = [];
-            if (opts.enableRelationshipInference) {
-                // Extract relationships from definition text
-                for (const relEntity of relatedEntities) {
-                    const verbType = await this.inferRelationship(term, relEntity.text, definition);
-                    relationships.push({
-                        from: entityId,
-                        to: relEntity.text,
-                        type: verbType,
-                        confidence: relEntity.confidence,
-                        evidence: `Extracted from: "${definition.substring(0, 100)}..."`
-                    });
-                }
-                // Parse explicit "Related" column
-                if (relatedTerms) {
-                    const terms = relatedTerms.split(/[,;|]/).map(t => t.trim()).filter(Boolean);
-                    for (const relTerm of terms) {
-                        // Ensure we don't create self-relationships
-                        if (relTerm.toLowerCase() !== term.toLowerCase()) {
-                            relationships.push({
-                                from: entityId,
-                                to: relTerm,
-                                type: VerbType.RelatedTo,
-                                confidence: 0.9,
-                                evidence: `Explicitly listed in "Related" column`
-                            });
+        // Batch processing configuration
+        const CHUNK_SIZE = 10; // Process 10 rows at a time for optimal performance
+        let totalProcessed = 0;
+        const performanceStartTime = Date.now();
+        // Process rows in chunks
+        for (let chunkStart = 0; chunkStart < rows.length; chunkStart += CHUNK_SIZE) {
+            const chunk = rows.slice(chunkStart, Math.min(chunkStart + CHUNK_SIZE, rows.length));
+            // Process chunk in parallel for massive speedup
+            const chunkResults = await Promise.all(chunk.map(async (row, chunkIndex) => {
+                const i = chunkStart + chunkIndex;
+                // Extract data from row
+                const term = this.getColumnValue(row, columns.term) || `Entity_${i}`;
+                const definition = this.getColumnValue(row, columns.definition) || '';
+                const type = this.getColumnValue(row, columns.type);
+                const relatedTerms = this.getColumnValue(row, columns.related);
+                // Parallel extraction: entities AND concepts at the same time
+                const [relatedEntities, concepts] = await Promise.all([
+                    // Extract entities from definition
+                    opts.enableNeuralExtraction && definition
+                        ? this.extractor.extract(definition, {
+                            confidence: opts.confidenceThreshold * 0.8,
+                            neuralMatching: true,
+                            cache: { enabled: true }
+                        }).then(entities =>
+                        // Filter out the main term from related entities
+                        entities.filter(e => e.text.toLowerCase() !== term.toLowerCase()))
+                        : Promise.resolve([]),
+                    // Extract concepts (in parallel with entity extraction)
+                    opts.enableConceptExtraction && definition
+                        ? this.brain.extractConcepts(definition, { limit: 10 }).catch(() => [])
+                        : Promise.resolve([])
+                ]);
+                // Determine main entity type
+                const mainEntityType = type ?
+                    this.mapTypeString(type) :
+                    (relatedEntities.length > 0 ? relatedEntities[0].type : NounType.Thing);
+                // Generate entity ID
+                const entityId = this.generateEntityId(term);
+                // Create main entity
+                const mainEntity = {
+                    id: entityId,
+                    name: term,
+                    type: mainEntityType,
+                    description: definition,
+                    confidence: 0.95,
+                    metadata: {
+                        source: 'csv',
+                        row: i + 1,
+                        originalData: row,
+                        concepts,
+                        extractedAt: Date.now()
+                    }
+                };
+                // Infer relationships
+                const relationships = [];
+                if (opts.enableRelationshipInference) {
+                    // Extract relationships from definition text
+                    for (const relEntity of relatedEntities) {
+                        const verbType = await this.inferRelationship(term, relEntity.text, definition);
+                        relationships.push({
+                            from: entityId,
+                            to: relEntity.text,
+                            type: verbType,
+                            confidence: relEntity.confidence,
+                            evidence: `Extracted from: "${definition.substring(0, 100)}..."`
+                        });
+                    }
+                    // Parse explicit "Related" column
+                    if (relatedTerms) {
+                        const terms = relatedTerms.split(/[,;|]/).map(t => t.trim()).filter(Boolean);
+                        for (const relTerm of terms) {
+                            if (relTerm.toLowerCase() !== term.toLowerCase()) {
+                                relationships.push({
+                                    from: entityId,
+                                    to: relTerm,
+                                    type: VerbType.RelatedTo,
+                                    confidence: 0.9,
+                                    evidence: `Explicitly listed in "Related" column`
+                                });
+                            }
                         }
                     }
                 }
+                return {
+                    term,
+                    entityId,
+                    mainEntity,
+                    mainEntityType,
+                    relatedEntities,
+                    relationships,
+                    concepts
+                };
+            }));
+            // Process chunk results sequentially to maintain order
+            for (const result of chunkResults) {
+                // Store entity ID mapping
+                entityMap.set(result.term.toLowerCase(), result.entityId);
+                // Track statistics
+                this.updateStats(stats, result.mainEntityType, result.mainEntity.confidence);
+                // Add extracted row
+                extractedRows.push({
+                    entity: result.mainEntity,
+                    relatedEntities: result.relatedEntities.map(e => ({
+                        name: e.text,
+                        type: e.type,
+                        confidence: e.confidence
+                    })),
+                    relationships: result.relationships,
+                    concepts: result.concepts
+                });
             }
-            // Add extracted row
-            extractedRows.push({
-                entity: mainEntity,
-                relatedEntities: relatedEntities.map(e => ({
-                    name: e.text,
-                    type: e.type,
-                    confidence: e.confidence
-                })),
-                relationships,
-                concepts
-            });
-            // Report progress
+            // Update progress tracking
+            totalProcessed += chunk.length;
+            // Calculate performance metrics
+            const elapsed = Date.now() - performanceStartTime;
+            const rowsPerSecond = totalProcessed / (elapsed / 1000);
+            const remainingRows = rows.length - totalProcessed;
+            const estimatedTimeRemaining = remainingRows / rowsPerSecond;
+            // Report progress with enhanced metrics
             opts.onProgress({
-                processed: i + 1,
+                processed: totalProcessed,
                 total: rows.length,
-                entities: extractedRows.length + relatedEntities.length,
-                relationships: relationships.length
+                entities: extractedRows.reduce((sum, row) => sum + 1 + row.relatedEntities.length, 0),
+                relationships: extractedRows.reduce((sum, row) => sum + row.relationships.length, 0),
+                // Additional performance metrics (v3.39.0)
+                throughput: Math.round(rowsPerSecond * 10) / 10,
+                eta: Math.round(estimatedTimeRemaining),
+                phase: 'extracting'
             });
         }
         return {

package/dist/importers/SmartPDFImporter.d.ts CHANGED Viewed

@@ -27,12 +27,18 @@ export interface SmartPDFOptions extends FormatHandlerOptions {
     extractFromTables?: boolean;
     /** Group by page or full document */
     groupBy?: 'page' | 'document';
-    /** Progress callback */
+    /** Progress callback (v3.39.0: Enhanced with performance metrics) */
     onProgress?: (stats: {
         processed: number;
         total: number;
         entities: number;
         relationships: number;
+        /** Sections per second (v3.39.0) */
+        throughput?: number;
+        /** Estimated time remaining in ms (v3.39.0) */
+        eta?: number;
+        /** Current phase (v3.39.0) */
+        phase?: string;
     }) => void;
 }
 export interface ExtractedSection {

package/dist/importers/SmartPDFImporter.js CHANGED Viewed

@@ -55,7 +55,7 @@ export class SmartPDFImporter {
         }
         // Group data by page or combine into single document
         const grouped = this.groupData(data, opts);
-        // Process each group
+        // Process each group with BATCHED PARALLEL PROCESSING (v3.39.0)
         const sections = [];
         const entityMap = new Map();
         const stats = {
@@ -63,17 +63,35 @@ export class SmartPDFImporter {
             byConfidence: { high: 0, medium: 0, low: 0 },
             bySource: { paragraphs: 0, tables: 0 }
         };
-        let processedCount = 0;
+        // Batch processing configuration
+        const CHUNK_SIZE = 5; // Process 5 sections at a time (smaller than rows due to section size)
+        let totalProcessed = 0;
+        const performanceStartTime = Date.now();
         const totalGroups = grouped.length;
-        for (const group of grouped) {
-            const sectionResult = await this.processSection(group, opts, stats, entityMap);
-            sections.push(sectionResult);
-            processedCount++;
+        // Process sections in chunks
+        for (let chunkStart = 0; chunkStart < grouped.length; chunkStart += CHUNK_SIZE) {
+            const chunk = grouped.slice(chunkStart, Math.min(chunkStart + CHUNK_SIZE, grouped.length));
+            // Process chunk in parallel for better performance
+            const chunkResults = await Promise.all(chunk.map(group => this.processSection(group, opts, stats, entityMap)));
+            // Add results sequentially to maintain order
+            sections.push(...chunkResults);
+            // Update progress tracking
+            totalProcessed += chunk.length;
+            // Calculate performance metrics
+            const elapsed = Date.now() - performanceStartTime;
+            const sectionsPerSecond = totalProcessed / (elapsed / 1000);
+            const remainingSections = grouped.length - totalProcessed;
+            const estimatedTimeRemaining = remainingSections / sectionsPerSecond;
+            // Report progress with enhanced metrics
             opts.onProgress({
-                processed: processedCount,
+                processed: totalProcessed,
                 total: totalGroups,
                 entities: sections.reduce((sum, s) => sum + s.entities.length, 0),
-                relationships: sections.reduce((sum, s) => sum + s.relationships.length, 0)
+                relationships: sections.reduce((sum, s) => sum + s.relationships.length, 0),
+                // Additional performance metrics (v3.39.0)
+                throughput: Math.round(sectionsPerSecond * 10) / 10,
+                eta: Math.round(estimatedTimeRemaining),
+                phase: 'extracting'
             });
         }
         const pagesProcessed = new Set(data.map(d => d._page)).size;
@@ -150,25 +168,21 @@ export class SmartPDFImporter {
             }
         }
         const combinedText = texts.join('\n\n');
-        // Extract entities if enabled
-        let extractedEntities = [];
-        if (options.enableNeuralExtraction && combinedText.length > 0) {
-            extractedEntities = await this.extractor.extract(combinedText, {
-                confidence: options.confidenceThreshold || 0.6,
-                neuralMatching: true,
-                cache: { enabled: true }
-            });
-        }
-        // Extract concepts if enabled
-        let concepts = [];
-        if (options.enableConceptExtraction && combinedText.length > 0) {
-            try {
-                concepts = await this.brain.extractConcepts(combinedText, { limit: 15 });
-            }
-            catch (error) {
-                concepts = [];
-            }
-        }
+        // Parallel extraction: entities AND concepts at the same time (v3.39.0)
+        const [extractedEntities, concepts] = await Promise.all([
+            // Extract entities if enabled
+            options.enableNeuralExtraction && combinedText.length > 0
+                ? this.extractor.extract(combinedText, {
+                    confidence: options.confidenceThreshold || 0.6,
+                    neuralMatching: true,
+                    cache: { enabled: true }
+                })
+                : Promise.resolve([]),
+            // Extract concepts (in parallel with entity extraction)
+            options.enableConceptExtraction && combinedText.length > 0
+                ? this.brain.extractConcepts(combinedText, { limit: 15 }).catch(() => [])
+                : Promise.resolve([])
+        ]);
         // Create entity objects
         const entities = extractedEntities.map(e => {
             const entityId = this.generateEntityId(e.text, group.id);

package/dist/utils/unifiedCache.js CHANGED Viewed

@@ -157,9 +157,9 @@ export class UnifiedCache {
         let victim = null;
         let lowestScore = Infinity;
         for (const [key, item] of this.cache) {
-            // Calculate value score: access frequency / rebuild cost
+            // Calculate value score: access frequency * rebuild cost (higher is better)
             const accessScore = (this.access.get(key) || 1);
-            const score = accessScore / Math.max(item.rebuildCost, 1);
+            const score = accessScore * item.rebuildCost;
             if (score < lowestScore) {
                 lowestScore = score;
                 victim = key;
@@ -180,7 +180,7 @@ export class UnifiedCache {
     evictForSize(bytesNeeded) {
         const candidates = [];
         for (const [key, item] of this.cache) {
-            const score = (this.access.get(key) || 1) / item.rebuildCost;
+            const score = (this.access.get(key) || 1) * item.rebuildCost;
             candidates.push([key, score, item]);
         }
         // Sort by score (lower is worse)
@@ -250,7 +250,7 @@ export class UnifiedCache {
         const candidates = [];
         for (const [key, item] of this.cache) {
             if (item.type === type) {
-                const score = (this.access.get(key) || 1) / item.rebuildCost;
+                const score = (this.access.get(key) || 1) * item.rebuildCost;
                 candidates.push([key, score, item]);
             }
         }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@soulcraft/brainy",
-  "version": "3.39.0",
+  "version": "3.40.1",
   "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
   "main": "dist/index.js",
   "module": "dist/index.js",