cozo-memory 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +533 -0
  3. package/dist/api_bridge.js +266 -0
  4. package/dist/benchmark-gpu-cpu.js +188 -0
  5. package/dist/benchmark-heavy.js +230 -0
  6. package/dist/benchmark.js +160 -0
  7. package/dist/clear-cache.js +29 -0
  8. package/dist/db-service.js +228 -0
  9. package/dist/download-model.js +48 -0
  10. package/dist/embedding-service.js +249 -0
  11. package/dist/full-system-test.js +45 -0
  12. package/dist/hybrid-search.js +337 -0
  13. package/dist/index.js +3106 -0
  14. package/dist/inference-engine.js +348 -0
  15. package/dist/memory-service.js +215 -0
  16. package/dist/test-advanced-filters.js +64 -0
  17. package/dist/test-advanced-search.js +82 -0
  18. package/dist/test-advanced-time.js +47 -0
  19. package/dist/test-embedding.js +22 -0
  20. package/dist/test-filter-expr.js +84 -0
  21. package/dist/test-fts.js +58 -0
  22. package/dist/test-functions.js +25 -0
  23. package/dist/test-gpu-check.js +16 -0
  24. package/dist/test-graph-algs-final.js +76 -0
  25. package/dist/test-graph-filters.js +88 -0
  26. package/dist/test-graph-rag.js +124 -0
  27. package/dist/test-graph-walking.js +138 -0
  28. package/dist/test-index.js +35 -0
  29. package/dist/test-int-filter.js +48 -0
  30. package/dist/test-integration.js +69 -0
  31. package/dist/test-lower.js +35 -0
  32. package/dist/test-lsh.js +67 -0
  33. package/dist/test-mcp-tool.js +40 -0
  34. package/dist/test-pagerank.js +31 -0
  35. package/dist/test-semantic-walk.js +145 -0
  36. package/dist/test-time-filter.js +66 -0
  37. package/dist/test-time-functions.js +38 -0
  38. package/dist/test-triggers.js +60 -0
  39. package/dist/test-ts-ort.js +48 -0
  40. package/dist/test-validity-access.js +35 -0
  41. package/dist/test-validity-body.js +42 -0
  42. package/dist/test-validity-decomp.js +37 -0
  43. package/dist/test-validity-extraction.js +45 -0
  44. package/dist/test-validity-json.js +35 -0
  45. package/dist/test-validity.js +38 -0
  46. package/dist/types.js +3 -0
  47. package/dist/verify-gpu.js +30 -0
  48. package/dist/verify_transaction_tool.js +46 -0
  49. package/package.json +75 -0
@@ -0,0 +1,249 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.EmbeddingService = void 0;
37
+ const transformers_1 = require("@xenova/transformers");
38
+ const ort = require('onnxruntime-node');
39
+ const path = __importStar(require("path"));
40
+ const fs = __importStar(require("fs"));
41
+ // Robust path to project root
42
+ const PROJECT_ROOT = path.resolve(__dirname, '..');
43
+ const CACHE_DIR = path.resolve(PROJECT_ROOT, '.cache');
44
+ transformers_1.env.cacheDir = CACHE_DIR;
45
+ transformers_1.env.allowLocalModels = true;
46
+ // Simple LRU Cache Implementation
47
+ class LRUCache {
48
+ cache = new Map();
49
+ maxSize;
50
+ ttl;
51
+ constructor(maxSize = 1000, ttlMs = 3600000) {
52
+ this.maxSize = maxSize;
53
+ this.ttl = ttlMs;
54
+ }
55
+ get(key) {
56
+ const entry = this.cache.get(key);
57
+ if (!entry)
58
+ return undefined;
59
+ // Check TTL
60
+ if (Date.now() - entry.timestamp > this.ttl) {
61
+ this.cache.delete(key);
62
+ return undefined;
63
+ }
64
+ // Move to end (most recently used)
65
+ this.cache.delete(key);
66
+ this.cache.set(key, entry);
67
+ return entry.value;
68
+ }
69
+ set(key, value) {
70
+ // If key already exists, remove old position
71
+ if (this.cache.has(key)) {
72
+ this.cache.delete(key);
73
+ }
74
+ // If cache is full, remove oldest entry
75
+ else if (this.cache.size >= this.maxSize) {
76
+ const firstKey = this.cache.keys().next().value;
77
+ if (firstKey) {
78
+ this.cache.delete(firstKey);
79
+ }
80
+ }
81
+ this.cache.set(key, { value, timestamp: Date.now() });
82
+ }
83
+ clear() {
84
+ this.cache.clear();
85
+ }
86
+ size() {
87
+ return this.cache.size;
88
+ }
89
+ }
90
+ class EmbeddingService {
91
+ cache;
92
+ session = null;
93
+ tokenizer = null;
94
+ modelId = "Xenova/bge-m3";
95
+ dimensions = 1024;
96
+ queue = Promise.resolve();
97
+ constructor() {
98
+ this.cache = new LRUCache(1000, 3600000); // 1000 entries, 1h TTL
99
+ }
100
+ // Serializes embedding execution to avoid event loop blocking
101
+ async runSerialized(task) {
102
+ // Chain the task to the queue
103
+ const res = this.queue.then(() => task());
104
+ // Update the queue to wait for this task (but catch errors so queue doesn't stall)
105
+ this.queue = res.catch(() => { });
106
+ return res;
107
+ }
108
+ async init() {
109
+ if (this.session && this.tokenizer)
110
+ return;
111
+ try {
112
+ // 1. Load Tokenizer
113
+ if (!this.tokenizer) {
114
+ this.tokenizer = await transformers_1.AutoTokenizer.from_pretrained(this.modelId);
115
+ }
116
+ // 2. Determine model path
117
+ const baseDir = path.join(transformers_1.env.cacheDir, 'Xenova', 'bge-m3', 'onnx');
118
+ // Priority: FP32 (model.onnx) > Quantized (model_quantized.onnx)
119
+ let modelPath = path.join(baseDir, 'model.onnx');
120
+ if (!fs.existsSync(modelPath)) {
121
+ modelPath = path.join(baseDir, 'model_quantized.onnx');
122
+ }
123
+ if (!fs.existsSync(modelPath)) {
124
+ throw new Error(`Model file not found at: ${modelPath}`);
125
+ }
126
+ // 3. Create Session
127
+ if (!this.session) {
128
+ const options = {
129
+ executionProviders: ['cpu'], // Use CPU backend to avoid native conflicts
130
+ graphOptimizationLevel: 'all'
131
+ };
132
+ this.session = await ort.InferenceSession.create(modelPath, options);
133
+ }
134
+ }
135
+ catch (err) {
136
+ console.error("[EmbeddingService] Critical initialization error:", err);
137
+ throw err;
138
+ }
139
+ }
140
+ async embed(text) {
141
+ return this.runSerialized(async () => {
142
+ const textStr = String(text || "");
143
+ // 1. Cache lookup
144
+ const cached = this.cache.get(textStr);
145
+ if (cached) {
146
+ return cached;
147
+ }
148
+ try {
149
+ await this.init();
150
+ if (!this.session || !this.tokenizer)
151
+ throw new Error("Session/Tokenizer not initialized");
152
+ // 2. Tokenization
153
+ const model_inputs = await this.tokenizer(textStr, { padding: true, truncation: true });
154
+ // 3. Tensor Creation
155
+ const feeds = {};
156
+ let attentionMaskData = null;
157
+ for (const [key, value] of Object.entries(model_inputs)) {
158
+ if (key === 'input_ids' || key === 'attention_mask' || key === 'token_type_ids') {
159
+ // @ts-ignore
160
+ const data = BigInt64Array.from(value.data || value.cpuData);
161
+ // @ts-ignore
162
+ const dims = value.dims;
163
+ // Store attention mask for pooling
164
+ if (key === 'attention_mask') {
165
+ attentionMaskData = data;
166
+ }
167
+ feeds[key] = new ort.Tensor('int64', data, dims);
168
+ }
169
+ }
170
+ // 4. Inference
171
+ const results = await this.session.run(feeds);
172
+ // 5. Pooling & Normalization
173
+ // Output name usually 'last_hidden_state' or 'logits'
174
+ // For BGE-M3, the first output is usually the hidden states [batch, seq_len, hidden_size]
175
+ const outputName = this.session.outputNames[0];
176
+ const outputTensor = results[outputName];
177
+ // Ensure we have data
178
+ if (!outputTensor || !attentionMaskData) {
179
+ throw new Error("No output data or attention mask available");
180
+ }
181
+ const embedding = this.meanPooling(outputTensor.data, attentionMaskData, outputTensor.dims);
182
+ // Normalize
183
+ const normalized = this.normalize(embedding);
184
+ this.cache.set(textStr, normalized);
185
+ return normalized;
186
+ }
187
+ catch (error) {
188
+ console.error(`[EmbeddingService] Error embedding "${textStr.substring(0, 20)}...":`, error?.message || error);
189
+ return new Array(this.dimensions).fill(0);
190
+ }
191
+ });
192
+ }
193
+ // Batch-Embeddings
194
+ async embedBatch(texts) {
195
+ // For now, process sequentially via serialized queue to avoid overloading
196
+ // In future, true batching can be implemented by passing array to tokenizer
197
+ const results = [];
198
+ for (const text of texts) {
199
+ results.push(await this.embed(text));
200
+ }
201
+ return results;
202
+ }
203
+ meanPooling(data, attentionMask, dims) {
204
+ // dims: [batch_size, seq_len, hidden_size]
205
+ // We assume batch_size = 1 for single embedding call
206
+ const [batchSize, seqLen, hiddenSize] = dims;
207
+ // Create accumulator
208
+ const embedding = new Float32Array(hiddenSize).fill(0);
209
+ let validTokens = 0;
210
+ for (let i = 0; i < seqLen; i++) {
211
+ // Check mask (1 = valid token, 0 = padding)
212
+ if (attentionMask[i] === 1n) {
213
+ validTokens++;
214
+ for (let j = 0; j < hiddenSize; j++) {
215
+ // data is flat array: [batch * seq * hidden]
216
+ // index = i * hiddenSize + j
217
+ embedding[j] += data[i * hiddenSize + j];
218
+ }
219
+ }
220
+ }
221
+ // Divide by valid count
222
+ if (validTokens > 0) {
223
+ for (let j = 0; j < hiddenSize; j++) {
224
+ embedding[j] /= validTokens;
225
+ }
226
+ }
227
+ return Array.from(embedding);
228
+ }
229
+ normalize(vector) {
230
+ const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
231
+ if (norm === 0)
232
+ return vector;
233
+ return vector.map(v => v / norm);
234
+ }
235
+ // Cache Statistics
236
+ getCacheStats() {
237
+ return {
238
+ size: this.cache.size(),
239
+ maxSize: 1000,
240
+ model: this.modelId,
241
+ dimensions: this.dimensions
242
+ };
243
+ }
244
+ // Clear Cache
245
+ clearCache() {
246
+ this.cache.clear();
247
+ }
248
+ }
249
+ exports.EmbeddingService = EmbeddingService;
@@ -0,0 +1,45 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const index_1 = require("./index");
4
+ const perf_hooks_1 = require("perf_hooks");
5
+ async function runFullSystemTest() {
6
+ // DB path is handled internally by MemoryServer
7
+ console.log("🚀 Starting Full System Test (v0.8.5)...");
8
+ const memory = new index_1.MemoryServer();
9
+ console.log("\n--- 1. Setup & Schema ---");
10
+ await memory.initPromise;
11
+ console.log("✅ Schema initialized.");
12
+ console.log("\n--- 2. Data Ingest & Memory Creation ---");
13
+ const e1 = await memory.createEntity({
14
+ name: "user_123",
15
+ type: "User",
16
+ metadata: { description: "A test user" }
17
+ });
18
+ if (e1.error) {
19
+ console.error("Failed to create entity:", e1.error);
20
+ return;
21
+ }
22
+ const o1 = await memory.addObservation({
23
+ entity_id: e1.id,
24
+ text: "User prefers dark mode and likes coding in TypeScript.",
25
+ });
26
+ console.log("✅ Observation 1 added.");
27
+ console.log("\n--- 3. Cache System (L1, L2, Semantic) ---");
28
+ // Query 1: Cold start
29
+ const t1 = perf_hooks_1.performance.now();
30
+ await memory.advancedSearch({ query: "dark mode preference" });
31
+ const d1 = perf_hooks_1.performance.now() - t1;
32
+ console.log("Query 1 (Cold Start)...");
33
+ console.log(`⏱️ Duration: ${d1.toFixed(2)}ms`);
34
+ // Query 1: Repeat (L1 Cache)
35
+ const t2 = perf_hooks_1.performance.now();
36
+ await memory.advancedSearch({ query: "dark mode preference" });
37
+ const d2 = perf_hooks_1.performance.now() - t2;
38
+ console.log("\nQuery 1 (L1 Memory Cache)...");
39
+ console.log(`⏱️ Duration: ${d2.toFixed(2)}ms`);
40
+ if (d2 < 5)
41
+ console.log("✅ SUCCESS: L1 Cache Hit (< 5ms)");
42
+ // Query 2: Semantic Cache (similar query)
43
+ // ... (rest of the test if any)
44
+ }
45
+ runFullSystemTest().catch(console.error);
@@ -0,0 +1,337 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.HybridSearch = void 0;
7
+ const crypto_1 = __importDefault(require("crypto"));
8
+ const SEMANTIC_CACHE_THRESHOLD = 0.95;
9
+ class HybridSearch {
10
+ db;
11
+ embeddingService;
12
+ searchCache = new Map();
13
+ CACHE_TTL = 300000; // 5 minutes cache
14
+ constructor(db, embeddingService) {
15
+ this.db = db;
16
+ this.embeddingService = embeddingService;
17
+ }
18
+ getCacheKey(options) {
19
+ const str = JSON.stringify({
20
+ q: options.query,
21
+ l: options.limit,
22
+ t: options.entityTypes,
23
+ io: options.includeObservations,
24
+ ie: options.includeEntities,
25
+ tr: options.timeRangeHours,
26
+ f: options.filters,
27
+ g: options.graphConstraints,
28
+ v: options.vectorParams
29
+ });
30
+ return crypto_1.default.createHash('md5').update(str).digest('hex');
31
+ }
32
+ async tryCacheLookup(options, queryEmbedding) {
33
+ const cacheKey = this.getCacheKey(options);
34
+ const cached = this.searchCache.get(cacheKey);
35
+ if (cached && (Date.now() - cached.timestamp < this.CACHE_TTL)) {
36
+ console.error(`[HybridSearch] In-Memory cache hit for key: ${cacheKey}`);
37
+ return cached.results;
38
+ }
39
+ try {
40
+ const exactRes = await this.db.run('?[results] := *search_cache{query_hash: $hash, results, created_at}, created_at > $min_ts', { hash: cacheKey, min_ts: Math.floor((Date.now() - this.CACHE_TTL) / 1000) });
41
+ if (exactRes.rows.length > 0) {
42
+ console.error(`[HybridSearch] DB cache hit for key: ${cacheKey}`);
43
+ const results = exactRes.rows[0][0];
44
+ this.searchCache.set(cacheKey, { results, timestamp: Date.now() });
45
+ return results;
46
+ }
47
+ }
48
+ catch (e) {
49
+ console.error(`[HybridSearch] Cache lookup error or table missing: ${e.message}`);
50
+ }
51
+ return null;
52
+ }
53
+ async updateCache(options, queryEmbedding, results) {
54
+ const cacheKey = this.getCacheKey(options);
55
+ this.searchCache.set(cacheKey, { results, timestamp: Date.now() });
56
+ try {
57
+ await this.db.run('?[query_hash, results, options, created_at, embedding] <- [[$hash, $res, $opt, $now, vec($emb)]] :put search_cache{query_hash}', { hash: cacheKey, res: results, opt: options, now: Math.floor(Date.now() / 1000), emb: queryEmbedding });
58
+ }
59
+ catch (e) { }
60
+ }
61
+ applyTimeDecay(results) {
62
+ return results.map(r => {
63
+ let score = Number(r.score);
64
+ if (isNaN(score))
65
+ score = 0;
66
+ if (r.created_at) {
67
+ const createdAt = Array.isArray(r.created_at) ? r.created_at[0] : r.created_at;
68
+ const ageHours = (Date.now() - Number(createdAt) / 1000) / (1000 * 60 * 60);
69
+ const decay = Math.pow(0.5, ageHours / (24 * 90)); // 90 days half-life
70
+ let newScore = score * decay;
71
+ if (isNaN(newScore))
72
+ newScore = 0;
73
+ return { ...r, score: newScore };
74
+ }
75
+ return { ...r, score };
76
+ });
77
+ }
78
+ async advancedSearch(options) {
79
+ console.error("[HybridSearch] Starting advancedSearch with options:", JSON.stringify(options, null, 2));
80
+ const { query, limit = 10, filters, graphConstraints, vectorParams } = options;
81
+ let queryEmbedding;
82
+ try {
83
+ queryEmbedding = await this.embeddingService.embed(query);
84
+ }
85
+ catch (e) {
86
+ console.error("[HybridSearch] Embedding failed", e);
87
+ throw e;
88
+ }
89
+ const cachedResults = await this.tryCacheLookup(options, queryEmbedding);
90
+ if (cachedResults !== null) {
91
+ console.error("[HybridSearch] Cache hit for advancedSearch");
92
+ return cachedResults;
93
+ }
94
+ console.error("[HybridSearch] Cache miss, executing Datalog query...");
95
+ let topk = limit * 2;
96
+ const hasFilters = (filters?.metadata && Object.keys(filters.metadata).length > 0) ||
97
+ (filters?.entityTypes && filters.entityTypes.length > 0);
98
+ if (hasFilters) {
99
+ // Significantly increase topk for post-filtering
100
+ topk = Math.max(limit * 20, 200);
101
+ }
102
+ const params = {
103
+ query_vector: queryEmbedding,
104
+ limit: limit,
105
+ topk: topk,
106
+ ef_search: vectorParams?.efSearch || 100,
107
+ };
108
+ let hnswFilters = [];
109
+ const metaRules = [];
110
+ const metaJoins = [];
111
+ if (filters?.metadata) {
112
+ Object.entries(filters.metadata).forEach(([key, value], index) => {
113
+ const paramName = `meta_val_${index}`;
114
+ params[paramName] = value;
115
+ // Use metadata->'key' syntax which is correct for CozoDB JSON access
116
+ metaJoins.push(`metadata->'${key}' == $${paramName}`);
117
+ });
118
+ }
119
+ if (filters?.entityTypes && filters.entityTypes.length > 0) {
120
+ params.allowed_types = filters.entityTypes;
121
+ // Post-filtering for types
122
+ metaJoins.push(`is_in(type, $allowed_types)`);
123
+ }
124
+ // Use filtered indexes if possible (v1.7)
125
+ let indexToUse = "entity:semantic";
126
+ if (filters?.entityTypes && filters.entityTypes.length === 1) {
127
+ const requestedType = filters.entityTypes[0].toLowerCase();
128
+ const supportedFilteredIndexes = ['person', 'project', 'task', 'note'];
129
+ if (supportedFilteredIndexes.includes(requestedType)) {
130
+ indexToUse = `entity:semantic_${requestedType}`;
131
+ }
132
+ }
133
+ // Multi-Vector Support: Use name_embedding if query is short (v1.7)
134
+ let indexToSearch = indexToUse;
135
+ const isShortQuery = query.split(' ').length <= 3;
136
+ if (isShortQuery && !filters?.entityTypes) {
137
+ // For short queries without type filter, the name_semantic index is often more precise
138
+ indexToSearch = "entity:name_semantic";
139
+ }
140
+ let semanticCall = `~${indexToSearch}{id | query: vec($query_vector), k: $topk, ef: $ef_search, bind_distance: dist`;
141
+ if (hnswFilters.length > 0) {
142
+ semanticCall += `, filter: ${hnswFilters.join(" && ")}`;
143
+ }
144
+ semanticCall += `}`;
145
+ let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at}`];
146
+ if (metaJoins.length > 0) {
147
+ bodyConstraints.push(...metaJoins);
148
+ }
149
+ if (options.timeRangeHours) {
150
+ const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
151
+ params.min_ts = minTs;
152
+ }
153
+ if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
154
+ graphConstraints.requiredRelations.forEach((relType, index) => {
155
+ const relParam = `rel_type_${index}`;
156
+ params[relParam] = relType;
157
+ bodyConstraints.push(`rel_match[id, $${relParam}]`);
158
+ });
159
+ }
160
+ if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
161
+ params.target_ids = graphConstraints.targetEntityIds;
162
+ bodyConstraints.push(`target_match[id, t_id]`, `is_in(t_id, $target_ids)`);
163
+ }
164
+ if (filters?.minScore) {
165
+ params.min_score = filters.minScore;
166
+ bodyConstraints.push(`score >= $min_score`);
167
+ }
168
+ const helperRules = [
169
+ `rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}`,
170
+ `rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0`
171
+ ];
172
+ if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
173
+ helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type}`);
174
+ }
175
+ if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
176
+ helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id}`);
177
+ }
178
+ const datalogQuery = [
179
+ ...helperRules,
180
+ `?[id, name, type, metadata, created_at, score, dist] := ${bodyConstraints.join(', ')}, rank_val[id, pr], score = (1.0 - dist)`,
181
+ `:sort -score`,
182
+ `:limit $limit`
183
+ ].join('\n').trim();
184
+ console.error('--- DEBUG: Cozo Datalog Query ---');
185
+ console.error(datalogQuery);
186
+ console.error('--- DEBUG: Params ---');
187
+ console.dir(params, { depth: null });
188
+ try {
189
+ const results = await this.db.run(datalogQuery, params);
190
+ let searchResults = results.rows.map((r) => ({
191
+ id: r[0],
192
+ entity_id: r[0],
193
+ name: r[1],
194
+ type: r[2],
195
+ metadata: r[3],
196
+ explanation: `DEBUG: raw_score=${r[5]}, dist=${r[6]}`,
197
+ created_at: Array.isArray(r[4]) ? r[4][0] : r[4], // CozoDB returns [start, end] for Validity
198
+ score: Number(r[5]) || 0,
199
+ source: "advanced_hybrid",
200
+ }));
201
+ // Post-Filtering for Time Range
202
+ if (options.timeRangeHours) {
203
+ const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
204
+ searchResults = searchResults.filter(r => (r.created_at || 0) > minTs);
205
+ }
206
+ // Post-Filtering for Metadata (since CozoDB get() in Datalog often fails)
207
+ if (filters?.metadata) {
208
+ searchResults = searchResults.filter(r => {
209
+ if (!r.metadata || typeof r.metadata !== 'object')
210
+ return false;
211
+ return Object.entries(filters.metadata).every(([key, val]) => r.metadata[key] === val);
212
+ });
213
+ }
214
+ const finalResults = this.applyTimeDecay(searchResults);
215
+ await this.updateCache(options, queryEmbedding, finalResults);
216
+ return finalResults;
217
+ }
218
+ catch (e) {
219
+ console.error("[HybridSearch] Error in advancedSearch:", e.message);
220
+ return this.search(options);
221
+ }
222
+ }
223
+ async search(options) {
224
+ const { query, limit = 10 } = options;
225
+ const queryEmbedding = await this.embeddingService.embed(query);
226
+ const cachedResults = await this.tryCacheLookup(options, queryEmbedding);
227
+ if (cachedResults) {
228
+ // Add debug info to cached results too
229
+ return cachedResults.map(r => ({
230
+ ...r,
231
+ explanation: (typeof r.explanation === 'string' ? r.explanation : JSON.stringify(r.explanation)) + ` | CACHED`
232
+ }));
233
+ }
234
+ const { limit: queryLimit = 10, filters, graphConstraints, vectorParams } = options;
235
+ // @ts-ignore
236
+ const { topk = 5, efSearch = 50 } = vectorParams || {};
237
+ // Fallback Mock
238
+ return [];
239
+ }
240
+ async graphRag(options) {
241
+ console.error("[HybridSearch] Starting graphRag with options:", JSON.stringify(options, null, 2));
242
+ const { query, limit = 5, filters, graphConstraints } = options;
243
+ const maxDepth = graphConstraints?.maxDepth || 2;
244
+ const queryEmbedding = await this.embeddingService.embed(query);
245
+ const topk = limit * 2;
246
+ const params = {
247
+ query_vector: queryEmbedding,
248
+ topk: topk,
249
+ ef_search: 100,
250
+ max_depth: maxDepth,
251
+ limit: limit
252
+ };
253
+ let hnswFilters = [];
254
+ const metaRules = [];
255
+ const metaJoins = [];
256
+ if (filters?.entityTypes && filters.entityTypes.length > 0) {
257
+ params.allowed_types = filters.entityTypes;
258
+ hnswFilters.push(`is_in(type, $allowed_types)`);
259
+ }
260
+ if (filters?.metadata) {
261
+ Object.entries(filters.metadata).forEach(([key, value], index) => {
262
+ const paramName = `meta_val_${index}`;
263
+ params[paramName] = value;
264
+ });
265
+ }
266
+ let seedSemanticCall = `~entity:semantic{id, type, metadata | query: vec($query_vector), k: $topk, ef: $ef_search, bind_distance: dist`;
267
+ if (hnswFilters.length > 0) {
268
+ seedSemanticCall += `, filter: ${hnswFilters.join(" && ")}`;
269
+ }
270
+ seedSemanticCall += `}`;
271
+ let seedConstraints = [seedSemanticCall];
272
+ if (options.timeRangeHours) {
273
+ const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
274
+ params.min_ts = minTs;
275
+ }
276
+ // Datalog Query for Graph-RAG:
277
+ // 1. Find seed entities via vector search (with inline filtering)
278
+ // 2. Explore the graph starting from seeds up to maxDepth hops
279
+ // 3. Collect all reached entities and observations
280
+ // 4. Calculate a combined score based on vector distance, graph distance, and PageRank
281
+ const datalogQuery = `
282
+ rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}
283
+ rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0
284
+
285
+ seeds[id, score] := ${seedConstraints.join(", ")}, score = 1.0 - dist
286
+
287
+ path[start_id, current_id, d] := seeds[start_id, _], current_id = start_id, d = 0
288
+ path[start_id, next_id, d_new] := path[start_id, current_id, d], *relationship{from_id: current_id, to_id: next_id}, d < $max_depth, d_new = d + 1
289
+ path[start_id, next_id, d_new] := path[start_id, current_id, d], *relationship{to_id: current_id, from_id: next_id}, d < $max_depth, d_new = d + 1
290
+
291
+ result_entities[id, final_score, depth] := path[seed_id, id, depth], seeds[seed_id, seed_score], rank_val[id, pr], final_score = seed_score * (1.0 - 0.2 * depth)
292
+
293
+ ?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at}, source = 'graph_rag_entity', text = ''
294
+
295
+ :sort -score
296
+ :limit $limit
297
+ `.trim();
298
+ console.error("[HybridSearch] Graph-RAG Datalog Query:\n", datalogQuery);
299
+ try {
300
+ const results = await this.db.run(datalogQuery, params);
301
+ let searchResults = results.rows.map((r) => ({
302
+ id: r[0],
303
+ name: r[1],
304
+ type: r[2],
305
+ metadata: r[3],
306
+ created_at: Array.isArray(r[4]) ? r[4][0] : r[4],
307
+ score: Number(r[5]) || 0,
308
+ source: r[6],
309
+ text: r[7] || undefined,
310
+ explanation: {
311
+ source_score: r[5],
312
+ details: `Found via graph expansion (Source: ${r[6]})`
313
+ }
314
+ }));
315
+ // Post-filtering for time range
316
+ if (options.timeRangeHours) {
317
+ const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
318
+ searchResults = searchResults.filter(r => (r.created_at || 0) > minTs);
319
+ }
320
+ // Post-filtering for metadata
321
+ if (filters?.metadata) {
322
+ searchResults = searchResults.filter(r => {
323
+ if (!r.metadata || typeof r.metadata !== 'object')
324
+ return false;
325
+ return Object.entries(filters.metadata).every(([key, val]) => r.metadata[key] === val);
326
+ });
327
+ }
328
+ return this.applyTimeDecay(searchResults);
329
+ }
330
+ catch (e) {
331
+ console.error("[HybridSearch] Error in graphRag:", e.message);
332
+ // Fallback to normal search on error
333
+ return this.search(options);
334
+ }
335
+ }
336
+ }
337
+ exports.HybridSearch = HybridSearch;