npm - @soulcraft/brainy - Versions diffs - 4.1.4 → 4.2.1 - Mend

@soulcraft/brainy 4.1.4 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/CHANGELOG.md +35 -0
package/dist/import/FormatDetector.d.ts +6 -1
package/dist/import/FormatDetector.js +40 -1
package/dist/import/ImportCoordinator.d.ts +102 -4
package/dist/import/ImportCoordinator.js +248 -6
package/dist/import/InstancePool.d.ts +136 -0
package/dist/import/InstancePool.js +231 -0
package/dist/importers/SmartCSVImporter.d.ts +2 -1
package/dist/importers/SmartCSVImporter.js +11 -22
package/dist/importers/SmartDOCXImporter.d.ts +125 -0
package/dist/importers/SmartDOCXImporter.js +227 -0
package/dist/importers/SmartExcelImporter.d.ts +12 -1
package/dist/importers/SmartExcelImporter.js +40 -25
package/dist/importers/SmartJSONImporter.d.ts +1 -0
package/dist/importers/SmartJSONImporter.js +25 -6
package/dist/importers/SmartMarkdownImporter.d.ts +2 -1
package/dist/importers/SmartMarkdownImporter.js +11 -16
package/dist/importers/SmartPDFImporter.d.ts +2 -1
package/dist/importers/SmartPDFImporter.js +11 -22
package/dist/importers/SmartYAMLImporter.d.ts +121 -0
package/dist/importers/SmartYAMLImporter.js +275 -0
package/dist/importers/VFSStructureGenerator.js +12 -0
package/dist/neural/SmartExtractor.d.ts +279 -0
package/dist/neural/SmartExtractor.js +592 -0
package/dist/neural/SmartRelationshipExtractor.d.ts +217 -0
package/dist/neural/SmartRelationshipExtractor.js +396 -0
package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
package/dist/neural/embeddedTypeEmbeddings.js +2 -2
package/dist/neural/entityExtractor.d.ts +3 -0
package/dist/neural/entityExtractor.js +34 -36
package/dist/neural/presets.d.ts +189 -0
package/dist/neural/presets.js +365 -0
package/dist/neural/signals/ContextSignal.d.ts +166 -0
package/dist/neural/signals/ContextSignal.js +646 -0
package/dist/neural/signals/EmbeddingSignal.d.ts +175 -0
package/dist/neural/signals/EmbeddingSignal.js +435 -0
package/dist/neural/signals/ExactMatchSignal.d.ts +220 -0
package/dist/neural/signals/ExactMatchSignal.js +542 -0
package/dist/neural/signals/PatternSignal.d.ts +159 -0
package/dist/neural/signals/PatternSignal.js +478 -0
package/dist/neural/signals/VerbContextSignal.d.ts +102 -0
package/dist/neural/signals/VerbContextSignal.js +390 -0
package/dist/neural/signals/VerbEmbeddingSignal.d.ts +131 -0
package/dist/neural/signals/VerbEmbeddingSignal.js +304 -0
package/dist/neural/signals/VerbExactMatchSignal.d.ts +115 -0
package/dist/neural/signals/VerbExactMatchSignal.js +335 -0
package/dist/neural/signals/VerbPatternSignal.d.ts +104 -0
package/dist/neural/signals/VerbPatternSignal.js +457 -0
package/dist/types/graphTypes.d.ts +2 -0
package/dist/utils/metadataIndex.d.ts +22 -0
package/dist/utils/metadataIndex.js +76 -0
package/package.json +4 -1

package/dist/neural/signals/VerbExactMatchSignal.js ADDED Viewed

@@ -0,0 +1,335 @@
+/**
+ * VerbExactMatchSignal - O(1) exact match relationship type classification
+ *
+ * HIGHEST WEIGHT: 40% (most reliable signal for verbs)
+ *
+ * Uses:
+ * 1. O(1) keyword lookup (exact string match against 334 verb keywords)
+ * 2. Context-aware matching (sentence patterns)
+ * 3. Multi-word phrase matching ("created by", "part of", "belongs to")
+ *
+ * PRODUCTION-READY: No TODOs, no mocks, real implementation
+ */
+import { VerbType } from '../../types/graphTypes.js';
+import { getKeywordEmbeddings } from '../embeddedKeywordEmbeddings.js';
+/**
+ * VerbExactMatchSignal - Instant O(1) relationship type classification
+ *
+ * Production features:
+ * - O(1) hash table lookups using 334 pre-computed verb keywords
+ * - Multi-word phrase matching ("created by", "part of", etc.)
+ * - Context-aware pattern detection
+ * - LRU cache for hot paths
+ * - High confidence (0.85-0.95) - most reliable signal
+ */
+export class VerbExactMatchSignal {
+    constructor(brain, options) {
+        // O(1) keyword lookup (key: normalized keyword → value: VerbType + confidence)
+        this.keywordIndex = new Map();
+        // LRU cache
+        this.cache = new Map();
+        this.cacheOrder = [];
+        // Statistics
+        this.stats = {
+            calls: 0,
+            cacheHits: 0,
+            exactMatches: 0,
+            phraseMatches: 0,
+            partialMatches: 0
+        };
+        this.brain = brain;
+        this.options = {
+            minConfidence: options?.minConfidence ?? 0.70,
+            cacheSize: options?.cacheSize ?? 2000,
+            caseSensitive: options?.caseSensitive ?? false
+        };
+        // Build keyword index from pre-computed embeddings
+        this.buildKeywordIndex();
+    }
+    /**
+     * Build keyword index from embedded keyword embeddings (O(n) once at startup)
+     */
+    buildKeywordIndex() {
+        const allKeywords = getKeywordEmbeddings();
+        // Filter to verb keywords only
+        const verbKeywords = allKeywords.filter(k => k.typeCategory === 'verb');
+        for (const keyword of verbKeywords) {
+            const normalized = this.normalize(keyword.keyword);
+            // Only keep highest confidence for duplicate keywords
+            const existing = this.keywordIndex.get(normalized);
+            if (!existing || keyword.confidence > existing.confidence) {
+                this.keywordIndex.set(normalized, {
+                    type: keyword.type,
+                    confidence: keyword.confidence,
+                    isCanonical: keyword.isCanonical
+                });
+            }
+        }
+        // Verify we have the expected number of verb keywords
+        if (this.keywordIndex.size === 0) {
+            throw new Error('VerbExactMatchSignal: No verb keywords found in embeddings');
+        }
+    }
+    /**
+     * Classify relationship type from context text
+     *
+     * @param context Full context text (sentence or paragraph)
+     * @returns VerbSignal with classified type or null
+     */
+    async classify(context) {
+        this.stats.calls++;
+        if (!context || context.trim().length === 0) {
+            return null;
+        }
+        // Check cache
+        const cacheKey = this.getCacheKey(context);
+        const cached = this.getFromCache(cacheKey);
+        if (cached !== undefined) {
+            this.stats.cacheHits++;
+            return cached;
+        }
+        try {
+            const result = this.classifyInternal(context);
+            // Add to cache
+            this.addToCache(cacheKey, result);
+            return result;
+        }
+        catch (error) {
+            return null;
+        }
+    }
+    /**
+     * Internal classification logic (not cached)
+     */
+    classifyInternal(context) {
+        const normalized = this.normalize(context);
+        // Strategy 1: Multi-word phrase matching (highest priority)
+        // Look for common verb phrases: "created by", "part of", "belongs to", etc.
+        const phraseResult = this.matchPhrases(normalized);
+        if (phraseResult && phraseResult.confidence >= this.options.minConfidence) {
+            this.stats.phraseMatches++;
+            return phraseResult;
+        }
+        // Strategy 2: Single keyword matching
+        // Split into tokens and check each against keyword index
+        const tokens = this.tokenize(normalized);
+        let bestMatch = null;
+        let bestConfidence = 0;
+        for (let i = 0; i < tokens.length; i++) {
+            const token = tokens[i];
+            // Check exact keyword match
+            const match = this.keywordIndex.get(token);
+            if (match) {
+                const confidence = match.isCanonical ? 0.95 : 0.85;
+                if (confidence > bestConfidence) {
+                    bestConfidence = confidence;
+                    bestMatch = {
+                        type: match.type,
+                        confidence,
+                        evidence: `Exact keyword match: "${token}"`,
+                        metadata: {
+                            matchedKeyword: token,
+                            matchPosition: i
+                        }
+                    };
+                }
+            }
+            // Check bi-gram (two consecutive tokens)
+            if (i < tokens.length - 1) {
+                const bigram = `${tokens[i]} ${tokens[i + 1]}`;
+                const bigramMatch = this.keywordIndex.get(bigram);
+                if (bigramMatch) {
+                    const confidence = bigramMatch.isCanonical ? 0.95 : 0.85;
+                    if (confidence > bestConfidence) {
+                        bestConfidence = confidence;
+                        bestMatch = {
+                            type: bigramMatch.type,
+                            confidence,
+                            evidence: `Phrase match: "${bigram}"`,
+                            metadata: {
+                                matchedKeyword: bigram,
+                                matchPosition: i
+                            }
+                        };
+                    }
+                }
+            }
+            // Check tri-gram (three consecutive tokens)
+            if (i < tokens.length - 2) {
+                const trigram = `${tokens[i]} ${tokens[i + 1]} ${tokens[i + 2]}`;
+                const trigramMatch = this.keywordIndex.get(trigram);
+                if (trigramMatch) {
+                    const confidence = trigramMatch.isCanonical ? 0.95 : 0.85;
+                    if (confidence > bestConfidence) {
+                        bestConfidence = confidence;
+                        bestMatch = {
+                            type: trigramMatch.type,
+                            confidence,
+                            evidence: `Phrase match: "${trigram}"`,
+                            metadata: {
+                                matchedKeyword: trigram,
+                                matchPosition: i
+                            }
+                        };
+                    }
+                }
+            }
+        }
+        if (bestMatch && bestMatch.confidence >= this.options.minConfidence) {
+            this.stats.exactMatches++;
+            return bestMatch;
+        }
+        return null;
+    }
+    /**
+     * Match common multi-word verb phrases
+     *
+     * These are high-confidence patterns that indicate specific relationships
+     */
+    matchPhrases(text) {
+        // Common relationship phrases with their VerbTypes
+        const phrases = [
+            // Creation relationships
+            { pattern: /created?\s+by/i, type: VerbType.CreatedBy, confidence: 0.95 },
+            { pattern: /authored?\s+by/i, type: VerbType.CreatedBy, confidence: 0.95 },
+            { pattern: /written\s+by/i, type: VerbType.CreatedBy, confidence: 0.95 },
+            { pattern: /developed\s+by/i, type: VerbType.CreatedBy, confidence: 0.90 },
+            { pattern: /built\s+by/i, type: VerbType.Creates, confidence: 0.85 },
+            // Ownership relationships
+            { pattern: /owned\s+by/i, type: VerbType.Owns, confidence: 0.95 },
+            { pattern: /belongs\s+to/i, type: VerbType.BelongsTo, confidence: 0.95 },
+            { pattern: /attributed\s+to/i, type: VerbType.AttributedTo, confidence: 0.95 },
+            // Part/Whole relationships
+            { pattern: /part\s+of/i, type: VerbType.PartOf, confidence: 0.95 },
+            { pattern: /contains/i, type: VerbType.Contains, confidence: 0.90 },
+            { pattern: /includes/i, type: VerbType.Contains, confidence: 0.85 },
+            // Location relationships
+            { pattern: /located\s+(?:at|in)/i, type: VerbType.LocatedAt, confidence: 0.95 },
+            { pattern: /based\s+in/i, type: VerbType.LocatedAt, confidence: 0.90 },
+            { pattern: /situated\s+in/i, type: VerbType.LocatedAt, confidence: 0.90 },
+            // Membership relationships
+            { pattern: /member\s+of/i, type: VerbType.MemberOf, confidence: 0.95 },
+            { pattern: /works?\s+(?:at|for)/i, type: VerbType.WorksWith, confidence: 0.85 },
+            { pattern: /employed\s+by/i, type: VerbType.WorksWith, confidence: 0.90 },
+            // Reporting relationships
+            { pattern: /reports?\s+to/i, type: VerbType.ReportsTo, confidence: 0.95 },
+            { pattern: /manages/i, type: VerbType.Supervises, confidence: 0.85 },
+            { pattern: /supervises/i, type: VerbType.Supervises, confidence: 0.95 },
+            // Reference relationships
+            { pattern: /references/i, type: VerbType.References, confidence: 0.90 },
+            { pattern: /cites/i, type: VerbType.References, confidence: 0.90 },
+            { pattern: /mentions/i, type: VerbType.References, confidence: 0.85 },
+            // Temporal relationships
+            { pattern: /precedes/i, type: VerbType.Precedes, confidence: 0.90 },
+            { pattern: /follows/i, type: VerbType.Succeeds, confidence: 0.90 },
+            { pattern: /before/i, type: VerbType.Precedes, confidence: 0.75 },
+            { pattern: /after/i, type: VerbType.Succeeds, confidence: 0.75 },
+            // Causal relationships
+            { pattern: /causes/i, type: VerbType.Causes, confidence: 0.90 },
+            { pattern: /requires/i, type: VerbType.Requires, confidence: 0.90 },
+            { pattern: /depends\s+on/i, type: VerbType.DependsOn, confidence: 0.95 },
+            // Transformation relationships
+            { pattern: /transforms/i, type: VerbType.Transforms, confidence: 0.90 },
+            { pattern: /modifies/i, type: VerbType.Modifies, confidence: 0.90 },
+            { pattern: /becomes/i, type: VerbType.Becomes, confidence: 0.90 }
+        ];
+        for (const { pattern, type, confidence } of phrases) {
+            if (pattern.test(text)) {
+                return {
+                    type,
+                    confidence,
+                    evidence: `Phrase pattern match: ${pattern.source}`,
+                    metadata: {
+                        matchedKeyword: pattern.source
+                    }
+                };
+            }
+        }
+        return null;
+    }
+    /**
+     * Normalize text for matching
+     */
+    normalize(text) {
+        let normalized = text.trim();
+        if (!this.options.caseSensitive) {
+            normalized = normalized.toLowerCase();
+        }
+        // Remove extra whitespace
+        normalized = normalized.replace(/\s+/g, ' ');
+        return normalized;
+    }
+    /**
+     * Tokenize text into words
+     */
+    tokenize(text) {
+        return text
+            .split(/\s+/)
+            .map(token => token.replace(/[^\w\s-]/g, '')) // Remove punctuation except hyphens
+            .filter(token => token.length > 0);
+    }
+    /**
+     * Get cache key
+     */
+    getCacheKey(context) {
+        return this.normalize(context).substring(0, 200); // Limit key length
+    }
+    /**
+     * Get from LRU cache
+     */
+    getFromCache(key) {
+        if (!this.cache.has(key)) {
+            return undefined;
+        }
+        const cached = this.cache.get(key);
+        // Move to end (most recently used)
+        this.cacheOrder = this.cacheOrder.filter(k => k !== key);
+        this.cacheOrder.push(key);
+        return cached ?? null;
+    }
+    /**
+     * Add to LRU cache with eviction
+     */
+    addToCache(key, value) {
+        this.cache.set(key, value);
+        this.cacheOrder.push(key);
+        // Evict oldest if over limit
+        if (this.cache.size > this.options.cacheSize) {
+            const oldest = this.cacheOrder.shift();
+            if (oldest) {
+                this.cache.delete(oldest);
+            }
+        }
+    }
+    /**
+     * Get statistics
+     */
+    getStats() {
+        return {
+            ...this.stats,
+            keywordCount: this.keywordIndex.size,
+            cacheSize: this.cache.size,
+            cacheHitRate: this.stats.calls > 0 ? this.stats.cacheHits / this.stats.calls : 0
+        };
+    }
+    /**
+     * Reset statistics
+     */
+    resetStats() {
+        this.stats = {
+            calls: 0,
+            cacheHits: 0,
+            exactMatches: 0,
+            phraseMatches: 0,
+            partialMatches: 0
+        };
+    }
+    /**
+     * Clear cache
+     */
+    clearCache() {
+        this.cache.clear();
+        this.cacheOrder = [];
+    }
+}
+//# sourceMappingURL=VerbExactMatchSignal.js.map

package/dist/neural/signals/VerbPatternSignal.d.ts ADDED Viewed

@@ -0,0 +1,104 @@
+/**
+ * VerbPatternSignal - Regex pattern matching for relationship classification
+ *
+ * WEIGHT: 20% (deterministic, high precision)
+ *
+ * Uses:
+ * 1. Subject-verb-object patterns ("X created Y", "X belongs to Y")
+ * 2. Prepositional phrase patterns ("in", "at", "by", "of")
+ * 3. Structural patterns (parentheses, commas, formatting)
+ *
+ * PRODUCTION-READY: No TODOs, no mocks, real implementation
+ */
+import type { Brainy } from '../../brainy.js';
+import { VerbType } from '../../types/graphTypes.js';
+/**
+ * Signal result with classification details
+ */
+export interface VerbSignal {
+    type: VerbType;
+    confidence: number;
+    evidence: string;
+    metadata?: {
+        pattern?: string;
+        matchedText?: string;
+    };
+}
+/**
+ * Options for verb pattern signal
+ */
+export interface VerbPatternSignalOptions {
+    minConfidence?: number;
+    cacheSize?: number;
+}
+/**
+ * VerbPatternSignal - Deterministic relationship type classification
+ *
+ * Production features:
+ * - Pre-compiled regex patterns (zero runtime cost)
+ * - Subject-verb-object structure detection
+ * - Prepositional phrase recognition
+ * - Context-aware pattern matching
+ * - LRU cache for hot paths
+ */
+export declare class VerbPatternSignal {
+    private brain;
+    private options;
+    private patterns;
+    private cache;
+    private cacheOrder;
+    private stats;
+    constructor(brain: Brainy, options?: VerbPatternSignalOptions);
+    /**
+     * Initialize all regex patterns
+     *
+     * Patterns are organized by relationship category for clarity
+     */
+    private initializePatterns;
+    /**
+     * Classify relationship type using pattern matching
+     *
+     * @param subject Subject entity (e.g., "Alice")
+     * @param object Object entity (e.g., "UCSF")
+     * @param context Full context text
+     * @returns VerbSignal with classified type or null
+     */
+    classify(subject: string, object: string, context: string): Promise<VerbSignal | null>;
+    /**
+     * Get cache key
+     */
+    private getCacheKey;
+    /**
+     * Get from LRU cache
+     */
+    private getFromCache;
+    /**
+     * Add to LRU cache with eviction
+     */
+    private addToCache;
+    /**
+     * Get statistics
+     */
+    getStats(): {
+        patternCount: number;
+        cacheSize: number;
+        cacheHitRate: number;
+        matchRate: number;
+        topPatterns: {
+            pattern: string;
+            hits: number;
+        }[];
+        calls: number;
+        cacheHits: number;
+        matches: number;
+        patternHits: Map<string, number>;
+    };
+    /**
+     * Reset statistics
+     */
+    resetStats(): void;
+    /**
+     * Clear cache
+     */
+    clearCache(): void;
+}