npm - cozo-memory - Versions diffs - 1.1.8 → 1.2.0 - Mend

cozo-memory 1.1.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/adaptive-retrieval.js +10 -0
package/dist/dynamic-fusion.js +15 -2
package/dist/hybrid-search.js +51 -18
package/dist/logger.js +56 -0
package/dist/migrate-logging.js +113 -0
package/dist/performance-monitor.js +108 -0
package/dist/test-large-dataset.js +502 -0
package/package.json +1 -1

package/dist/adaptive-retrieval.js CHANGED Viewed

@@ -322,6 +322,11 @@ class AdaptiveGraphRetrieval {
      * Main adaptive retrieval method
      */
     async retrieve(query, limit = 10) {
+        // Validate limit to prevent errors
+        if (limit <= 0) {
+            console.error('[AdaptiveRetrieval] Invalid limit value:', limit, '- must be positive. Defaulting to 10.');
+            limit = 10;
+        }
         // 1. Classify query complexity
         const complexity = this.classifyQueryComplexity(query);
         console.error(`[AdaptiveRetrieval] Query complexity: ${complexity}`);
@@ -338,6 +343,11 @@ class AdaptiveGraphRetrieval {
     }
     // ==================== Strategy Implementations ====================
     async vectorSearch(embedding, limit) {
+        // Validate limit
+        if (limit <= 0) {
+            console.error('[AdaptiveRetrieval] Invalid limit in vectorSearch:', limit);
+            return [];
+        }
         const result = await this.db.run(`
       ?[id, name, type, score] :=
         ~entity:semantic{id | query: vec($embedding), k: $limit, ef: 100, bind_distance: dist},

package/dist/dynamic-fusion.js CHANGED Viewed

@@ -74,10 +74,23 @@ class DynamicFusionSearch {
      */
     async search(query, config = {}) {
         const startTime = Date.now();
+        // Merge config with defaults first
+        const fullConfig = this.mergeConfig(config);
+        // Validate topK values to prevent errors
+        if (fullConfig.vector && fullConfig.vector.topK <= 0) {
+            console.error('[DynamicFusion] Invalid vector.topK:', fullConfig.vector.topK, '- must be positive. Defaulting to 20.');
+            fullConfig.vector.topK = 20;
+        }
+        if (fullConfig.sparse && fullConfig.sparse.topK <= 0) {
+            console.error('[DynamicFusion] Invalid sparse.topK:', fullConfig.sparse.topK, '- must be positive. Defaulting to 20.');
+            fullConfig.sparse.topK = 20;
+        }
+        if (fullConfig.fts && fullConfig.fts.topK <= 0) {
+            console.error('[DynamicFusion] Invalid fts.topK:', fullConfig.fts.topK, '- must be positive. Defaulting to 20.');
+            fullConfig.fts.topK = 20;
+        }
         // Get adaptive weights based on query classification
         const adaptiveWeights = await this.adaptiveQueryFusion.getAdaptiveWeights(query);
-        // Merge config with defaults first, then apply adaptive weights
-        const fullConfig = this.mergeConfig(config);
         // Override weights with adaptive values
         fullConfig.vector.weight = adaptiveWeights.vector;
         fullConfig.sparse.weight = adaptiveWeights.sparse;

package/dist/hybrid-search.js CHANGED Viewed

@@ -6,6 +6,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.HybridSearch = void 0;
 const crypto_1 = __importDefault(require("crypto"));
 const reranker_service_1 = require("./reranker-service");
+const logger_1 = require("./logger");
+const performance_monitor_1 = require("./performance-monitor");
 const SEMANTIC_CACHE_THRESHOLD = 0.95;
 class HybridSearch {
     db;
@@ -138,22 +140,30 @@ class HybridSearch {
         }
     }
     async advancedSearch(options) {
-        console.error("[HybridSearch] Starting advancedSearch with options:", JSON.stringify(options, null, 2));
+        logger_1.logger.debug('HybridSearch', 'Starting advancedSearch', { query: options.query, limit: options.limit });
         const { query, limit = 10, filters, graphConstraints, vectorParams } = options;
+        // Validate limit to prevent infinite loops
+        if (limit <= 0) {
+            logger_1.logger.warn('HybridSearch', `Invalid limit value: ${limit} - must be positive. Defaulting to 10.`);
+            options.limit = 10;
+        }
+        const endTimer = performance_monitor_1.perfMonitor.startTimer('advancedSearch');
         let queryEmbedding;
         try {
             queryEmbedding = await this.embeddingService.embed(query);
         }
         catch (e) {
-            console.error("[HybridSearch] Embedding failed", e);
+            logger_1.logger.error('HybridSearch', 'Embedding failed', e);
+            endTimer();
             throw e;
         }
         const cachedResults = await this.tryCacheLookup(options, queryEmbedding);
         if (cachedResults !== null) {
-            console.error("[HybridSearch] Cache hit for advancedSearch");
+            logger_1.logger.debug('HybridSearch', 'Cache hit for advancedSearch');
+            endTimer();
             return cachedResults;
         }
-        console.error("[HybridSearch] Cache miss, executing Datalog query...");
+        logger_1.logger.trace('HybridSearch', 'Cache miss, executing Datalog query...');
         let topk = limit * 2;
         const hasFilters = (filters?.metadata && Object.keys(filters.metadata).length > 0) ||
             (filters?.entityTypes && filters.entityTypes.length > 0);
@@ -204,7 +214,7 @@ class HybridSearch {
             semanticCall += `, filter: ${hnswFilters.join(" && ")}`;
         }
         semanticCall += `}`;
-        let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at, @ "NOW"}`];
+        let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at}`];
         if (metaJoins.length > 0) {
             bodyConstraints.push(...metaJoins);
         }
@@ -229,13 +239,13 @@ class HybridSearch {
         }
         const helperRules = [
             `rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}`,
-            `rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0`
+            `rank_val[id, r] := *entity{id}, not *entity_rank{entity_id: id}, r = 0.0`
         ];
         if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
-            helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type, @ "NOW"}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type, @ "NOW"}`);
+            helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type}`);
         }
         if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
-            helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id, @ "NOW"}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id, @ "NOW"}`);
+            helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id}`);
         }
         const datalogQuery = [
             ...helperRules,
@@ -278,11 +288,16 @@ class HybridSearch {
                 return rerankedResults;
             }
             await this.updateCache(options, queryEmbedding, finalResults);
+            endTimer();
             return finalResults;
         }
         catch (e) {
-            console.error("[HybridSearch] Error in advancedSearch:", e.message);
-            return this.search(options);
+            logger_1.logger.error('HybridSearch', 'Error in advancedSearch', e.message);
+            performance_monitor_1.perfMonitor.recordMetric('advancedSearch', 0, true);
+            endTimer();
+            // Prevent infinite recursion by returning empty results instead of calling search()
+            logger_1.logger.warn('HybridSearch', 'Returning empty results to prevent infinite loop');
+            return [];
         }
     }
     async search(options) {
@@ -308,8 +323,14 @@ class HybridSearch {
         });
     }
     async graphRag(options) {
-        console.error("[HybridSearch] Starting graphRag with options:", JSON.stringify(options, null, 2));
+        logger_1.logger.debug('HybridSearch', 'Starting graphRag', { query: options.query, limit: options.limit });
         const { query, limit = 5, filters, graphConstraints } = options;
+        // Validate limit to prevent infinite loops
+        if (limit <= 0) {
+            logger_1.logger.warn('HybridSearch', `Invalid limit value: ${limit} - must be positive. Defaulting to 5.`);
+            options.limit = 5;
+        }
+        const endTimer = performance_monitor_1.perfMonitor.startTimer('graphRag');
         const maxDepth = graphConstraints?.maxDepth || 2;
         const queryEmbedding = await this.embeddingService.embed(query);
         const topk = limit * 2;
@@ -350,7 +371,7 @@ class HybridSearch {
         // 4. Calculate a combined score based on vector distance, graph distance, and PageRank
         const datalogQuery = `
       rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}
-      rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0
+      rank_val[id, r] := *entity{id}, not *entity_rank{entity_id: id}, r = 0.0
       seeds[id, score] := ${seedConstraints.join(", ")}, score = 1.0 - dist
@@ -360,7 +381,7 @@ class HybridSearch {
       result_entities[id, final_score, depth] := path[seed_id, id, depth], seeds[seed_id, seed_score], rank_val[id, pr], final_score = seed_score * (1.0 - 0.2 * depth)
-      ?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at, @ "NOW"}, source = 'graph_rag_entity', text = ''
+      ?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at}, source = 'graph_rag_entity', text = ''
       :sort -score
       :limit $limit
@@ -396,19 +417,31 @@ class HybridSearch {
             }
             const decayedResults = this.applyTimeDecay(searchResults);
             if (options.rerank) {
-                return await this.applyReranking(options.query, decayedResults);
+                const reranked = await this.applyReranking(options.query, decayedResults);
+                endTimer();
+                return reranked;
             }
+            endTimer();
             return decayedResults;
         }
         catch (e) {
-            console.error("[HybridSearch] Error in graphRag:", e.message);
-            // Fallback to normal search on error
-            return this.search(options);
+            logger_1.logger.error('HybridSearch', 'Error in graphRag', e.message);
+            performance_monitor_1.perfMonitor.recordMetric('graphRag', 0, true);
+            endTimer();
+            // Prevent infinite recursion by returning empty results
+            logger_1.logger.warn('HybridSearch', 'Returning empty results to prevent infinite loop');
+            return [];
         }
     }
     async agenticRetrieve(options) {
-        console.error("[HybridSearch] Starting agenticRetrieve with query:", options.query);
+        logger_1.logger.debug('HybridSearch', 'Starting agenticRetrieve', { query: options.query });
         const { query, routingModel = "demyagent-4b-i1:Q6_K" } = options;
+        // Validate limit to prevent infinite loops
+        if (options.limit !== undefined && options.limit <= 0) {
+            logger_1.logger.warn('HybridSearch', `Invalid limit value: ${options.limit} - must be positive. Defaulting to 10.`);
+            options.limit = 10;
+        }
+        const endTimer = performance_monitor_1.perfMonitor.startTimer('agenticRetrieve');
         const systemPrompt = `You are a Routing Agent for an advanced Memory/RAG system.
 Your job is to analyze the user's query and decide which search strategy is the most appropriate.
 Available strategies:

package/dist/logger.js ADDED Viewed

@@ -0,0 +1,56 @@
+"use strict";
+/**
+ * Centralized Logging System for CozoDB Memory
+ *
+ * Supports different log levels and can be configured via environment variables
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.logger = exports.LogLevel = void 0;
+var LogLevel;
+(function (LogLevel) {
+    LogLevel[LogLevel["ERROR"] = 0] = "ERROR";
+    LogLevel[LogLevel["WARN"] = 1] = "WARN";
+    LogLevel[LogLevel["INFO"] = 2] = "INFO";
+    LogLevel[LogLevel["DEBUG"] = 3] = "DEBUG";
+    LogLevel[LogLevel["TRACE"] = 4] = "TRACE";
+})(LogLevel || (exports.LogLevel = LogLevel = {}));
+class Logger {
+    level;
+    prefix;
+    constructor(prefix = '[CozoDB]') {
+        this.prefix = prefix;
+        // Read from environment variable, default to INFO
+        const envLevel = process.env.LOG_LEVEL?.toUpperCase();
+        this.level = LogLevel[envLevel] ?? LogLevel.INFO;
+    }
+    setLevel(level) {
+        this.level = level;
+    }
+    error(component, message, ...args) {
+        if (this.level >= LogLevel.ERROR) {
+            console.error(`${this.prefix}[${component}] ERROR:`, message, ...args);
+        }
+    }
+    warn(component, message, ...args) {
+        if (this.level >= LogLevel.WARN) {
+            console.warn(`${this.prefix}[${component}] WARN:`, message, ...args);
+        }
+    }
+    info(component, message, ...args) {
+        if (this.level >= LogLevel.INFO) {
+            console.error(`${this.prefix}[${component}] INFO:`, message, ...args);
+        }
+    }
+    debug(component, message, ...args) {
+        if (this.level >= LogLevel.DEBUG) {
+            console.error(`${this.prefix}[${component}] DEBUG:`, message, ...args);
+        }
+    }
+    trace(component, message, ...args) {
+        if (this.level >= LogLevel.TRACE) {
+            console.error(`${this.prefix}[${component}] TRACE:`, message, ...args);
+        }
+    }
+}
+// Singleton instance
+exports.logger = new Logger();

package/dist/migrate-logging.js ADDED Viewed

@@ -0,0 +1,113 @@
+"use strict";
+/**
+ * Migration Script: Replace console.error with logger
+ *
+ * This script helps migrate from console.error to the centralized logger
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+const fs = __importStar(require("fs"));
+const filesToMigrate = [
+    'src/memory-service.ts',
+    'src/db-service.ts',
+    'src/embedding-service.ts',
+    'src/inference-engine.ts',
+    'src/dynamic-fusion.ts',
+    'src/adaptive-retrieval.ts',
+    'src/adaptive-query-fusion.ts',
+    'src/reranker-service.ts',
+    'src/export-import-service.ts',
+    'src/janitor-service.ts'
+];
+// Mapping of console.error patterns to logger calls
+const migrations = [
+    {
+        pattern: /console\.error\(\s*\[([^\]]+)\]\s+([^,]+),/g,
+        replacement: "logger.error('$1', $2,"
+    },
+    {
+        pattern: /console\.error\(\s*\[([^\]]+)\]\s+([^)]+)\)/g,
+        replacement: "logger.error('$1', $2)"
+    },
+    {
+        pattern: /console\.warn\(\s*\[([^\]]+)\]\s+([^)]+)\)/g,
+        replacement: "logger.warn('$1', $2)"
+    }
+];
+function migrateFile(filePath) {
+    if (!fs.existsSync(filePath)) {
+        console.log(`Skipping ${filePath} - file not found`);
+        return;
+    }
+    let content = fs.readFileSync(filePath, 'utf-8');
+    let modified = false;
+    // Check if logger is already imported
+    if (!content.includes("import { logger }")) {
+        // Find the last import statement
+        const importRegex = /^import .+ from .+;$/gm;
+        const imports = content.match(importRegex);
+        if (imports && imports.length > 0) {
+            const lastImport = imports[imports.length - 1];
+            const lastImportIndex = content.lastIndexOf(lastImport);
+            content = content.slice(0, lastImportIndex + lastImport.length) +
+                "\nimport { logger } from './logger';" +
+                content.slice(lastImportIndex + lastImport.length);
+            modified = true;
+        }
+    }
+    // Apply migrations
+    for (const migration of migrations) {
+        if (migration.pattern.test(content)) {
+            content = content.replace(migration.pattern, migration.replacement);
+            modified = true;
+        }
+    }
+    if (modified) {
+        fs.writeFileSync(filePath, content, 'utf-8');
+        console.log(`✓ Migrated ${filePath}`);
+    }
+    else {
+        console.log(`- No changes needed for ${filePath}`);
+    }
+}
+console.log('Starting logging migration...\n');
+for (const file of filesToMigrate) {
+    migrateFile(file);
+}
+console.log('\nMigration complete!');
+console.log('\nNext steps:');
+console.log('1. Review the changes');
+console.log('2. Run: npm run build');
+console.log('3. Test with: LOG_LEVEL=DEBUG npm start');

package/dist/performance-monitor.js ADDED Viewed

@@ -0,0 +1,108 @@
+"use strict";
+/**
+ * Performance Monitoring System
+ *
+ * Tracks operation latencies, throughput, and resource usage
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.perfMonitor = exports.PerformanceMonitor = void 0;
+const logger_1 = require("./logger");
+class PerformanceMonitor {
+    metrics;
+    maxSamples;
+    constructor(maxSamples = 1000) {
+        this.metrics = new Map();
+        this.maxSamples = maxSamples;
+    }
+    /**
+     * Start timing an operation
+     */
+    startTimer(operation) {
+        const startTime = Date.now();
+        return () => {
+            const duration = Date.now() - startTime;
+            this.recordMetric(operation, duration);
+        };
+    }
+    /**
+     * Record a metric manually
+     */
+    recordMetric(operation, duration, isError = false) {
+        let metric = this.metrics.get(operation);
+        if (!metric) {
+            metric = {
+                times: [],
+                errors: 0,
+                lastExecuted: Date.now()
+            };
+            this.metrics.set(operation, metric);
+        }
+        metric.times.push(duration);
+        metric.lastExecuted = Date.now();
+        if (isError) {
+            metric.errors++;
+        }
+        // Keep only last N samples
+        if (metric.times.length > this.maxSamples) {
+            metric.times.shift();
+        }
+    }
+    /**
+     * Get metrics for a specific operation
+     */
+    getMetrics(operation) {
+        const metric = this.metrics.get(operation);
+        if (!metric || metric.times.length === 0) {
+            return null;
+        }
+        const sorted = [...metric.times].sort((a, b) => a - b);
+        const count = sorted.length;
+        const totalTime = sorted.reduce((sum, t) => sum + t, 0);
+        return {
+            operation,
+            count,
+            totalTime,
+            avgTime: totalTime / count,
+            minTime: sorted[0],
+            maxTime: sorted[count - 1],
+            p50: sorted[Math.floor(count * 0.5)],
+            p95: sorted[Math.floor(count * 0.95)],
+            p99: sorted[Math.floor(count * 0.99)],
+            errors: metric.errors,
+            lastExecuted: metric.lastExecuted
+        };
+    }
+    /**
+     * Get all metrics
+     */
+    getAllMetrics() {
+        const results = [];
+        for (const operation of this.metrics.keys()) {
+            const metric = this.getMetrics(operation);
+            if (metric) {
+                results.push(metric);
+            }
+        }
+        return results.sort((a, b) => b.count - a.count);
+    }
+    /**
+     * Log performance summary
+     */
+    logSummary() {
+        const metrics = this.getAllMetrics();
+        logger_1.logger.info('PerformanceMonitor', '=== Performance Summary ===');
+        for (const m of metrics) {
+            logger_1.logger.info('PerformanceMonitor', `${m.operation}: ${m.count} calls, avg=${m.avgTime.toFixed(2)}ms, ` +
+                `p95=${m.p95.toFixed(2)}ms, errors=${m.errors}`);
+        }
+    }
+    /**
+     * Reset all metrics
+     */
+    reset() {
+        this.metrics.clear();
+    }
+}
+exports.PerformanceMonitor = PerformanceMonitor;
+// Singleton instance
+exports.perfMonitor = new PerformanceMonitor();

package/dist/test-large-dataset.js ADDED Viewed

@@ -0,0 +1,502 @@
+"use strict";
+/**
+ * Large Dataset Performance Test
+ *
+ * Tests system performance with realistic data volumes
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+const embedding_service_1 = require("./embedding-service");
+const hybrid_search_1 = require("./hybrid-search");
+const performance_monitor_1 = require("./performance-monitor");
+const logger_1 = require("./logger");
+const uuid_1 = require("uuid");
+// Set log level to INFO for cleaner output
+logger_1.logger.setLevel(logger_1.LogLevel.INFO);
+const CONFIGS = {
+    small: {
+        numEntities: 50, // Reduced to keep total observations under 100
+        numObservationsPerEntity: 1,
+        numRelationships: 75,
+        searchQueries: 20
+    },
+    medium: {
+        numEntities: 200,
+        numObservationsPerEntity: 2,
+        numRelationships: 400,
+        searchQueries: 50
+    },
+    large: {
+        numEntities: 500,
+        numObservationsPerEntity: 3,
+        numRelationships: 1000,
+        searchQueries: 100
+    }
+};
+// Sample data generators
+const ENTITY_TYPES = ['Person', 'Project', 'Technology', 'Document', 'Task'];
+const RELATION_TYPES = ['works_on', 'uses', 'depends_on', 'created_by', 'related_to'];
+function generateEntityName(type, index) {
+    const names = {
+        Person: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank', 'Grace', 'Henry'],
+        Project: ['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta'],
+        Technology: ['TypeScript', 'React', 'Node.js', 'Python', 'Go', 'Rust', 'Java', 'C++'],
+        Document: ['Spec', 'Guide', 'Manual', 'Report', 'Analysis', 'Design', 'Plan', 'Review'],
+        Task: ['Implement', 'Test', 'Deploy', 'Review', 'Refactor', 'Document', 'Optimize', 'Debug']
+    };
+    const nameList = names[type] || ['Item'];
+    const baseName = nameList[index % nameList.length];
+    return `${baseName} ${Math.floor(index / nameList.length) + 1}`;
+}
+function generateObservation(entityName, type, index) {
+    const templates = [
+        `${entityName} is a ${type.toLowerCase()} that focuses on innovation and quality.`,
+        `Key characteristics of ${entityName} include reliability and performance.`,
+        `${entityName} has been actively developed and maintained since 2020.`,
+        `The primary goal of ${entityName} is to deliver exceptional results.`,
+        `${entityName} integrates seamlessly with modern development workflows.`
+    ];
+    return templates[index % templates.length];
+}
+async function createTestData(db, embeddingService, config) {
+    const startTime = Date.now();
+    const entityIds = [];
+    logger_1.logger.info('TestLargeDataset', `Creating ${config.numEntities} entities...`);
+    // Pre-generate embeddings for entity types to speed up creation
+    const typeEmbeddings = new Map();
+    for (const type of ENTITY_TYPES) {
+        const contentEmbed = await embeddingService.embed(`${type} entity`);
+        const nameEmbed = await embeddingService.embed(type);
+        typeEmbeddings.set(type, { content: contentEmbed, name: nameEmbed });
+    }
+    // Create entities directly in CozoDB
+    for (let i = 0; i < config.numEntities; i++) {
+        const type = ENTITY_TYPES[i % ENTITY_TYPES.length];
+        const name = generateEntityName(type, i);
+        const id = (0, uuid_1.v4)();
+        const endTimer = performance_monitor_1.perfMonitor.startTimer('create_entity');
+        try {
+            // Reuse type embeddings for speed
+            const embeddings = typeEmbeddings.get(type);
+            // Insert into CozoDB
+            await db.run(`
+        ?[id, name, type, content_embedding, name_embedding, metadata, created_at] <- [
+          [$id, $name, $type, $content_embedding, $name_embedding, $metadata, $created_at]
+        ]
+        :put entity { id => name, type, content_embedding, name_embedding, metadata, created_at }
+      `, {
+                id,
+                name,
+                type,
+                content_embedding: embeddings.content,
+                name_embedding: embeddings.name,
+                metadata: { index: i, category: type.toLowerCase(), created_at: Date.now() },
+                created_at: [Date.now() * 1000, true]
+            });
+            entityIds.push(id);
+            endTimer();
+        }
+        catch (error) {
+            performance_monitor_1.perfMonitor.recordMetric('create_entity', 0, true);
+            logger_1.logger.error('TestLargeDataset', `Failed to create entity ${name}:`, error);
+        }
+        if ((i + 1) % 50 === 0) {
+            logger_1.logger.info('TestLargeDataset', `Created ${i + 1}/${config.numEntities} entities`);
+        }
+    }
+    logger_1.logger.info('TestLargeDataset', `Creating observations...`);
+    // Pre-generate a few observation embeddings to reuse (for speed)
+    const observationTemplates = [
+        'This entity focuses on innovation and quality.',
+        'Key characteristics include reliability and performance.',
+        'Has been actively developed since 2020.',
+        'Primary goal is to deliver exceptional results.',
+        'Integrates seamlessly with modern workflows.'
+    ];
+    logger_1.logger.info('TestLargeDataset', `Pre-generating ${observationTemplates.length} observation embeddings...`);
+    const templateEmbeddings = await Promise.all(observationTemplates.map(t => embeddingService.embed(t)));
+    // Create observations in batches for better performance
+    let obsCount = 0;
+    const totalObservations = entityIds.length * config.numObservationsPerEntity;
+    let lastLogTime = Date.now();
+    const BATCH_SIZE = 50; // Insert 50 observations at once
+    const observationBatch = [];
+    for (const entityId of entityIds) {
+        for (let j = 0; j < config.numObservationsPerEntity; j++) {
+            // Reuse pre-generated embeddings for speed
+            const templateIdx = j % templateEmbeddings.length;
+            const text = observationTemplates[templateIdx];
+            const embedding = templateEmbeddings[templateIdx];
+            observationBatch.push({
+                id: (0, uuid_1.v4)(),
+                entity_id: entityId,
+                text,
+                embedding,
+                metadata: { confidence: 0.8 + Math.random() * 0.2 },
+                session_id: '',
+                task_id: '',
+                created_at: [Date.now() * 1000, true]
+            });
+            // Insert batch when it reaches BATCH_SIZE
+            if (observationBatch.length >= BATCH_SIZE) {
+                const endTimer = performance_monitor_1.perfMonitor.startTimer('add_observation');
+                try {
+                    // Build batch insert query - remove hyphens from UUIDs for variable names
+                    const rows = observationBatch.map(obs => {
+                        const cleanId = obs.id.replace(/-/g, '_');
+                        return `[$id_${cleanId}, $entity_id_${cleanId}, $text_${cleanId}, $embedding_${cleanId}, $metadata_${cleanId}, $session_id_${cleanId}, $task_id_${cleanId}, $created_at_${cleanId}]`;
+                    }).join(',\n            ');
+                    const params = {};
+                    for (const obs of observationBatch) {
+                        const cleanId = obs.id.replace(/-/g, '_');
+                        params[`id_${cleanId}`] = obs.id;
+                        params[`entity_id_${cleanId}`] = obs.entity_id;
+                        params[`text_${cleanId}`] = obs.text;
+                        params[`embedding_${cleanId}`] = obs.embedding;
+                        params[`metadata_${cleanId}`] = obs.metadata;
+                        params[`session_id_${cleanId}`] = obs.session_id;
+                        params[`task_id_${cleanId}`] = obs.task_id;
+                        params[`created_at_${cleanId}`] = obs.created_at;
+                    }
+                    await db.run(`
+            ?[id, entity_id, text, embedding, metadata, session_id, task_id, created_at] <- [
+              ${rows}
+            ]
+            :put observation { id => entity_id, text, embedding, metadata, session_id, task_id, created_at }
+          `, params);
+                    obsCount += observationBatch.length;
+                    endTimer();
+                    observationBatch.length = 0; // Clear batch
+                }
+                catch (error) {
+                    performance_monitor_1.perfMonitor.recordMetric('add_observation', 0, true);
+                    logger_1.logger.error('TestLargeDataset', `Failed to create observation batch at ${obsCount}:`, error);
+                    observationBatch.length = 0; // Clear batch on error
+                }
+                // Log progress
+                const now = Date.now();
+                if (obsCount % 50 === 0 || (now - lastLogTime) > 10000) {
+                    logger_1.logger.info('TestLargeDataset', `Created ${obsCount}/${totalObservations} observations (${((obsCount / totalObservations) * 100).toFixed(1)}%)`);
+                    lastLogTime = now;
+                }
+            }
+        }
+    }
+    // Insert remaining observations
+    if (observationBatch.length > 0) {
+        const endTimer = performance_monitor_1.perfMonitor.startTimer('add_observation');
+        try {
+            const rows = observationBatch.map(obs => {
+                const cleanId = obs.id.replace(/-/g, '_');
+                return `[$id_${cleanId}, $entity_id_${cleanId}, $text_${cleanId}, $embedding_${cleanId}, $metadata_${cleanId}, $session_id_${cleanId}, $task_id_${cleanId}, $created_at_${cleanId}]`;
+            }).join(',\n            ');
+            const params = {};
+            for (const obs of observationBatch) {
+                const cleanId = obs.id.replace(/-/g, '_');
+                params[`id_${cleanId}`] = obs.id;
+                params[`entity_id_${cleanId}`] = obs.entity_id;
+                params[`text_${cleanId}`] = obs.text;
+                params[`embedding_${cleanId}`] = obs.embedding;
+                params[`metadata_${cleanId}`] = obs.metadata;
+                params[`session_id_${cleanId}`] = obs.session_id;
+                params[`task_id_${cleanId}`] = obs.task_id;
+                params[`created_at_${cleanId}`] = obs.created_at;
+            }
+            await db.run(`
+        ?[id, entity_id, text, embedding, metadata, session_id, task_id, created_at] <- [
+          ${rows}
+        ]
+        :put observation { id => entity_id, text, embedding, metadata, session_id, task_id, created_at }
+      `, params);
+            obsCount += observationBatch.length;
+            endTimer();
+        }
+        catch (error) {
+            performance_monitor_1.perfMonitor.recordMetric('add_observation', 0, true);
+            logger_1.logger.error('TestLargeDataset', `Failed to create final observation batch:`, error);
+        }
+        logger_1.logger.info('TestLargeDataset', `Created ${obsCount}/${totalObservations} observations (100.0%)`);
+    }
+    logger_1.logger.info('TestLargeDataset', `Creating ${config.numRelationships} relationships...`);
+    // Create relationships
+    for (let i = 0; i < config.numRelationships; i++) {
+        const fromId = entityIds[Math.floor(Math.random() * entityIds.length)];
+        let toId = entityIds[Math.floor(Math.random() * entityIds.length)];
+        // Avoid self-references
+        while (toId === fromId) {
+            toId = entityIds[Math.floor(Math.random() * entityIds.length)];
+        }
+        const relationType = RELATION_TYPES[i % RELATION_TYPES.length];
+        const endTimer = performance_monitor_1.perfMonitor.startTimer('create_relation');
+        try {
+            await db.run(`
+        ?[from_id, to_id, relation_type, strength, metadata, created_at] <- [
+          [$from_id, $to_id, $relation_type, $strength, $metadata, $created_at]
+        ]
+        :put relationship { from_id, to_id, relation_type => strength, metadata, created_at }
+      `, {
+                from_id: fromId,
+                to_id: toId,
+                relation_type: relationType,
+                strength: 0.5 + Math.random() * 0.5,
+                metadata: {},
+                created_at: [Date.now() * 1000, true]
+            });
+            endTimer();
+        }
+        catch (error) {
+            performance_monitor_1.perfMonitor.recordMetric('create_relation', 0, true);
+        }
+        if ((i + 1) % 500 === 0) {
+            logger_1.logger.info('TestLargeDataset', `Created ${i + 1}/${config.numRelationships} relationships`);
+        }
+    }
+    const duration = Date.now() - startTime;
+    logger_1.logger.info('TestLargeDataset', `Data creation completed in ${(duration / 1000).toFixed(2)}s`);
+    return { entityIds, duration };
+}
+async function runSearchTests(hybridSearch, config) {
+    logger_1.logger.info('TestLargeDataset', `Running ${config.searchQueries} search queries...`);
+    const queries = [
+        'project management',
+        'software development',
+        'team collaboration',
+        'technical documentation',
+        'code review process',
+        'deployment pipeline',
+        'testing strategy',
+        'performance optimization'
+    ];
+    for (let i = 0; i < config.searchQueries; i++) {
+        const query = queries[i % queries.length];
+        const endTimer = performance_monitor_1.perfMonitor.startTimer('hybrid_search');
+        try {
+            await hybridSearch.search({ query, limit: 10 });
+            endTimer();
+        }
+        catch (error) {
+            performance_monitor_1.perfMonitor.recordMetric('hybrid_search', 0, true);
+            logger_1.logger.error('TestLargeDataset', `Search failed for query "${query}":`, error);
+        }
+        if ((i + 1) % 20 === 0) {
+            logger_1.logger.info('TestLargeDataset', `Completed ${i + 1}/${config.searchQueries} searches`);
+        }
+    }
+}
+async function runTest(configName, cleanStart = false) {
+    const config = CONFIGS[configName];
+    if (!config) {
+        logger_1.logger.error('TestLargeDataset', `Unknown config: ${configName}`);
+        return;
+    }
+    logger_1.logger.info('TestLargeDataset', `\n=== Starting ${configName.toUpperCase()} dataset test ===`);
+    logger_1.logger.info('TestLargeDataset', `Config: ${JSON.stringify(config, null, 2)}`);
+    // Use the real CozoDB setup like in index.ts
+    const { CozoDb } = await import('cozo-node');
+    const dbPath = `test_large_${configName}.cozo.db`;
+    // Delete old database only if cleanStart flag is set
+    const fs = await import('fs');
+    if (cleanStart && fs.existsSync(dbPath)) {
+        logger_1.logger.info('TestLargeDataset', `Removing old database: ${dbPath}`);
+        fs.unlinkSync(dbPath);
+    }
+    else if (fs.existsSync(dbPath)) {
+        logger_1.logger.info('TestLargeDataset', `Using existing database: ${dbPath}`);
+    }
+    const db = new CozoDb('sqlite', dbPath);
+    const embeddingService = new embedding_service_1.EmbeddingService();
+    const hybridSearch = new hybrid_search_1.HybridSearch(db, embeddingService);
+    // Initialize schema like the real server does
+    try {
+        // Create entity table
+        await db.run(`
+      :create entity {
+        id: String,
+        =>
+        name: String,
+        type: String,
+        content_embedding: <F32; 1024>,
+        name_embedding: <F32; 1024>,
+        metadata: Json,
+        created_at: Validity
+      }
+    `);
+        // Create content HNSW index
+        await db.run(`
+      ::hnsw create entity:semantic {
+        dim: 1024,
+        m: 50,
+        dtype: F32,
+        ef_construction: 200,
+        fields: [content_embedding],
+        distance: Cosine,
+        extend_candidates: true,
+        keep_pruned_connections: true
+      }
+    `);
+        // Create name HNSW index
+        await db.run(`
+      ::hnsw create entity:name_semantic {
+        dim: 1024,
+        m: 50,
+        dtype: F32,
+        ef_construction: 200,
+        fields: [name_embedding],
+        distance: Cosine,
+        extend_candidates: true,
+        keep_pruned_connections: true
+      }
+    `);
+        // Create FTS index for entity names
+        await db.run(`
+      ::fts create entity:fts {
+        extractor: name,
+        tokenizer: Simple,
+        filters: [Lowercase, Stemmer('english'), Stopwords('en')]
+      }
+    `);
+        // Create observation table
+        await db.run(`
+      :create observation {
+        id: String,
+        =>
+        entity_id: String,
+        text: String,
+        embedding: <F32; 1024>,
+        metadata: Json,
+        session_id: String,
+        task_id: String,
+        created_at: Validity
+      }
+    `);
+        // Create observation HNSW index
+        await db.run(`
+      ::hnsw create observation:semantic {
+        dim: 1024,
+        m: 50,
+        dtype: F32,
+        ef_construction: 200,
+        fields: [embedding],
+        distance: Cosine,
+        extend_candidates: true,
+        keep_pruned_connections: true
+      }
+    `);
+        // Create FTS index for observation text
+        await db.run(`
+      ::fts create observation:fts {
+        extractor: text,
+        tokenizer: Simple,
+        filters: [Lowercase, Stemmer('english'), Stopwords('en')]
+      }
+    `);
+        // Create relationship table
+        await db.run(`
+      :create relationship {
+        from_id: String,
+        to_id: String,
+        relation_type: String,
+        =>
+        strength: Float,
+        metadata: Json,
+        created_at: Validity
+      }
+    `);
+        // Create search cache table
+        await db.run(`
+      :create search_cache {
+        query_hash: String,
+        =>
+        query_text: String,
+        results: Json,
+        options: Json,
+        embedding: <F32; 1024>,
+        created_at: Int
+      }
+    `);
+        // Create search cache HNSW index
+        await db.run(`
+      ::hnsw create search_cache:semantic {
+        dim: 1024,
+        m: 16,
+        dtype: F32,
+        ef_construction: 200,
+        fields: [embedding],
+        distance: Cosine
+      }
+    `);
+        // Create entity_rank table (for PageRank scores)
+        await db.run(`
+      :create entity_rank {
+        entity_id: String
+        =>
+        pagerank: Float
+      }
+    `);
+        logger_1.logger.info('TestLargeDataset', 'Database schema initialized with all indexes');
+    }
+    catch (error) {
+        if (!error.message?.includes('already exists')) {
+            logger_1.logger.error('TestLargeDataset', 'Schema initialization failed:', error);
+            throw error;
+        }
+        logger_1.logger.info('TestLargeDataset', 'Schema already exists, continuing...');
+    }
+    try {
+        // Check if database already has data - simple approach
+        let existingEntityCount = 0;
+        try {
+            const statsQuery = await db.run(`?[id] := *entity{id} :limit 1`);
+            existingEntityCount = statsQuery.rows.length > 0 ? 1 : 0;
+            if (existingEntityCount > 0) {
+                // Get actual count
+                const countQuery = await db.run(`?[count(id)] := *entity{id}`);
+                existingEntityCount = countQuery.rows[0]?.[0] || 0;
+            }
+        }
+        catch (e) {
+            // Table doesn't exist yet, that's fine
+            existingEntityCount = 0;
+        }
+        if (existingEntityCount > 0 && !cleanStart) {
+            logger_1.logger.info('TestLargeDataset', `Database already contains ${existingEntityCount} entities, skipping data creation`);
+            logger_1.logger.info('TestLargeDataset', `Use --clean flag to recreate database from scratch`);
+        }
+        else {
+            // Create test data
+            const { entityIds, duration: createDuration } = await createTestData(db, embeddingService, config);
+            const totalOps = config.numEntities +
+                (config.numEntities * config.numObservationsPerEntity) +
+                config.numRelationships;
+            const totalTime = createDuration / 1000;
+            const throughput = totalOps / totalTime;
+            logger_1.logger.info('TestLargeDataset', `\nData creation stats:`);
+            logger_1.logger.info('TestLargeDataset', `Total operations: ${totalOps}`);
+            logger_1.logger.info('TestLargeDataset', `Total time: ${totalTime.toFixed(2)}s`);
+            logger_1.logger.info('TestLargeDataset', `Throughput: ${throughput.toFixed(2)} ops/sec`);
+        }
+        // Run search tests
+        await runSearchTests(hybridSearch, config);
+        // Print performance summary
+        logger_1.logger.info('TestLargeDataset', '\n=== Performance Summary ===');
+        performance_monitor_1.perfMonitor.logSummary();
+    }
+    catch (error) {
+        logger_1.logger.error('TestLargeDataset', 'Test failed:', error);
+    }
+    finally {
+        db.close();
+    }
+}
+// Run tests
+async function main() {
+    const configName = process.argv[2] || 'small';
+    const cleanStart = process.argv.includes('--clean');
+    if (cleanStart) {
+        logger_1.logger.info('TestLargeDataset', 'Clean start mode: will delete existing database');
+    }
+    await runTest(configName, cleanStart);
+    logger_1.logger.info('TestLargeDataset', '\n=== Test completed ===');
+    logger_1.logger.info('TestLargeDataset', `\nUsage: npx ts-node src/test-large-dataset.ts [small|medium|large] [--clean]`);
+    logger_1.logger.info('TestLargeDataset', `  --clean: Delete existing database before test`);
+}
+main().catch(console.error);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cozo-memory",
-  "version": "1.1.8",
+  "version": "1.2.0",
   "mcpName": "io.github.tobs-code/cozo-memory",
   "description": "Local-first persistent memory system for AI agents with hybrid search, graph reasoning, and MCP integration",
   "main": "dist/index.js",