cozo-memory 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +533 -0
- package/dist/api_bridge.js +266 -0
- package/dist/benchmark-gpu-cpu.js +188 -0
- package/dist/benchmark-heavy.js +230 -0
- package/dist/benchmark.js +160 -0
- package/dist/clear-cache.js +29 -0
- package/dist/db-service.js +228 -0
- package/dist/download-model.js +48 -0
- package/dist/embedding-service.js +249 -0
- package/dist/full-system-test.js +45 -0
- package/dist/hybrid-search.js +337 -0
- package/dist/index.js +3106 -0
- package/dist/inference-engine.js +348 -0
- package/dist/memory-service.js +215 -0
- package/dist/test-advanced-filters.js +64 -0
- package/dist/test-advanced-search.js +82 -0
- package/dist/test-advanced-time.js +47 -0
- package/dist/test-embedding.js +22 -0
- package/dist/test-filter-expr.js +84 -0
- package/dist/test-fts.js +58 -0
- package/dist/test-functions.js +25 -0
- package/dist/test-gpu-check.js +16 -0
- package/dist/test-graph-algs-final.js +76 -0
- package/dist/test-graph-filters.js +88 -0
- package/dist/test-graph-rag.js +124 -0
- package/dist/test-graph-walking.js +138 -0
- package/dist/test-index.js +35 -0
- package/dist/test-int-filter.js +48 -0
- package/dist/test-integration.js +69 -0
- package/dist/test-lower.js +35 -0
- package/dist/test-lsh.js +67 -0
- package/dist/test-mcp-tool.js +40 -0
- package/dist/test-pagerank.js +31 -0
- package/dist/test-semantic-walk.js +145 -0
- package/dist/test-time-filter.js +66 -0
- package/dist/test-time-functions.js +38 -0
- package/dist/test-triggers.js +60 -0
- package/dist/test-ts-ort.js +48 -0
- package/dist/test-validity-access.js +35 -0
- package/dist/test-validity-body.js +42 -0
- package/dist/test-validity-decomp.js +37 -0
- package/dist/test-validity-extraction.js +45 -0
- package/dist/test-validity-json.js +35 -0
- package/dist/test-validity.js +38 -0
- package/dist/types.js +3 -0
- package/dist/verify-gpu.js +30 -0
- package/dist/verify_transaction_tool.js +46 -0
- package/package.json +75 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.EmbeddingService = void 0;
|
|
37
|
+
const transformers_1 = require("@xenova/transformers");
|
|
38
|
+
const ort = require('onnxruntime-node');
|
|
39
|
+
const path = __importStar(require("path"));
|
|
40
|
+
const fs = __importStar(require("fs"));
|
|
41
|
+
// Robust path to project root
|
|
42
|
+
const PROJECT_ROOT = path.resolve(__dirname, '..');
|
|
43
|
+
const CACHE_DIR = path.resolve(PROJECT_ROOT, '.cache');
|
|
44
|
+
transformers_1.env.cacheDir = CACHE_DIR;
|
|
45
|
+
transformers_1.env.allowLocalModels = true;
|
|
46
|
+
// Simple LRU Cache Implementation
|
|
47
|
+
class LRUCache {
|
|
48
|
+
cache = new Map();
|
|
49
|
+
maxSize;
|
|
50
|
+
ttl;
|
|
51
|
+
constructor(maxSize = 1000, ttlMs = 3600000) {
|
|
52
|
+
this.maxSize = maxSize;
|
|
53
|
+
this.ttl = ttlMs;
|
|
54
|
+
}
|
|
55
|
+
get(key) {
|
|
56
|
+
const entry = this.cache.get(key);
|
|
57
|
+
if (!entry)
|
|
58
|
+
return undefined;
|
|
59
|
+
// Check TTL
|
|
60
|
+
if (Date.now() - entry.timestamp > this.ttl) {
|
|
61
|
+
this.cache.delete(key);
|
|
62
|
+
return undefined;
|
|
63
|
+
}
|
|
64
|
+
// Move to end (most recently used)
|
|
65
|
+
this.cache.delete(key);
|
|
66
|
+
this.cache.set(key, entry);
|
|
67
|
+
return entry.value;
|
|
68
|
+
}
|
|
69
|
+
set(key, value) {
|
|
70
|
+
// If key already exists, remove old position
|
|
71
|
+
if (this.cache.has(key)) {
|
|
72
|
+
this.cache.delete(key);
|
|
73
|
+
}
|
|
74
|
+
// If cache is full, remove oldest entry
|
|
75
|
+
else if (this.cache.size >= this.maxSize) {
|
|
76
|
+
const firstKey = this.cache.keys().next().value;
|
|
77
|
+
if (firstKey) {
|
|
78
|
+
this.cache.delete(firstKey);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
this.cache.set(key, { value, timestamp: Date.now() });
|
|
82
|
+
}
|
|
83
|
+
clear() {
|
|
84
|
+
this.cache.clear();
|
|
85
|
+
}
|
|
86
|
+
size() {
|
|
87
|
+
return this.cache.size;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
class EmbeddingService {
|
|
91
|
+
cache;
|
|
92
|
+
session = null;
|
|
93
|
+
tokenizer = null;
|
|
94
|
+
modelId = "Xenova/bge-m3";
|
|
95
|
+
dimensions = 1024;
|
|
96
|
+
queue = Promise.resolve();
|
|
97
|
+
constructor() {
|
|
98
|
+
this.cache = new LRUCache(1000, 3600000); // 1000 entries, 1h TTL
|
|
99
|
+
}
|
|
100
|
+
// Serializes embedding execution to avoid event loop blocking
|
|
101
|
+
async runSerialized(task) {
|
|
102
|
+
// Chain the task to the queue
|
|
103
|
+
const res = this.queue.then(() => task());
|
|
104
|
+
// Update the queue to wait for this task (but catch errors so queue doesn't stall)
|
|
105
|
+
this.queue = res.catch(() => { });
|
|
106
|
+
return res;
|
|
107
|
+
}
|
|
108
|
+
async init() {
|
|
109
|
+
if (this.session && this.tokenizer)
|
|
110
|
+
return;
|
|
111
|
+
try {
|
|
112
|
+
// 1. Load Tokenizer
|
|
113
|
+
if (!this.tokenizer) {
|
|
114
|
+
this.tokenizer = await transformers_1.AutoTokenizer.from_pretrained(this.modelId);
|
|
115
|
+
}
|
|
116
|
+
// 2. Determine model path
|
|
117
|
+
const baseDir = path.join(transformers_1.env.cacheDir, 'Xenova', 'bge-m3', 'onnx');
|
|
118
|
+
// Priority: FP32 (model.onnx) > Quantized (model_quantized.onnx)
|
|
119
|
+
let modelPath = path.join(baseDir, 'model.onnx');
|
|
120
|
+
if (!fs.existsSync(modelPath)) {
|
|
121
|
+
modelPath = path.join(baseDir, 'model_quantized.onnx');
|
|
122
|
+
}
|
|
123
|
+
if (!fs.existsSync(modelPath)) {
|
|
124
|
+
throw new Error(`Model file not found at: ${modelPath}`);
|
|
125
|
+
}
|
|
126
|
+
// 3. Create Session
|
|
127
|
+
if (!this.session) {
|
|
128
|
+
const options = {
|
|
129
|
+
executionProviders: ['cpu'], // Use CPU backend to avoid native conflicts
|
|
130
|
+
graphOptimizationLevel: 'all'
|
|
131
|
+
};
|
|
132
|
+
this.session = await ort.InferenceSession.create(modelPath, options);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
console.error("[EmbeddingService] Critical initialization error:", err);
|
|
137
|
+
throw err;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
async embed(text) {
|
|
141
|
+
return this.runSerialized(async () => {
|
|
142
|
+
const textStr = String(text || "");
|
|
143
|
+
// 1. Cache lookup
|
|
144
|
+
const cached = this.cache.get(textStr);
|
|
145
|
+
if (cached) {
|
|
146
|
+
return cached;
|
|
147
|
+
}
|
|
148
|
+
try {
|
|
149
|
+
await this.init();
|
|
150
|
+
if (!this.session || !this.tokenizer)
|
|
151
|
+
throw new Error("Session/Tokenizer not initialized");
|
|
152
|
+
// 2. Tokenization
|
|
153
|
+
const model_inputs = await this.tokenizer(textStr, { padding: true, truncation: true });
|
|
154
|
+
// 3. Tensor Creation
|
|
155
|
+
const feeds = {};
|
|
156
|
+
let attentionMaskData = null;
|
|
157
|
+
for (const [key, value] of Object.entries(model_inputs)) {
|
|
158
|
+
if (key === 'input_ids' || key === 'attention_mask' || key === 'token_type_ids') {
|
|
159
|
+
// @ts-ignore
|
|
160
|
+
const data = BigInt64Array.from(value.data || value.cpuData);
|
|
161
|
+
// @ts-ignore
|
|
162
|
+
const dims = value.dims;
|
|
163
|
+
// Store attention mask for pooling
|
|
164
|
+
if (key === 'attention_mask') {
|
|
165
|
+
attentionMaskData = data;
|
|
166
|
+
}
|
|
167
|
+
feeds[key] = new ort.Tensor('int64', data, dims);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// 4. Inference
|
|
171
|
+
const results = await this.session.run(feeds);
|
|
172
|
+
// 5. Pooling & Normalization
|
|
173
|
+
// Output name usually 'last_hidden_state' or 'logits'
|
|
174
|
+
// For BGE-M3, the first output is usually the hidden states [batch, seq_len, hidden_size]
|
|
175
|
+
const outputName = this.session.outputNames[0];
|
|
176
|
+
const outputTensor = results[outputName];
|
|
177
|
+
// Ensure we have data
|
|
178
|
+
if (!outputTensor || !attentionMaskData) {
|
|
179
|
+
throw new Error("No output data or attention mask available");
|
|
180
|
+
}
|
|
181
|
+
const embedding = this.meanPooling(outputTensor.data, attentionMaskData, outputTensor.dims);
|
|
182
|
+
// Normalize
|
|
183
|
+
const normalized = this.normalize(embedding);
|
|
184
|
+
this.cache.set(textStr, normalized);
|
|
185
|
+
return normalized;
|
|
186
|
+
}
|
|
187
|
+
catch (error) {
|
|
188
|
+
console.error(`[EmbeddingService] Error embedding "${textStr.substring(0, 20)}...":`, error?.message || error);
|
|
189
|
+
return new Array(this.dimensions).fill(0);
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
// Batch-Embeddings
|
|
194
|
+
async embedBatch(texts) {
|
|
195
|
+
// For now, process sequentially via serialized queue to avoid overloading
|
|
196
|
+
// In future, true batching can be implemented by passing array to tokenizer
|
|
197
|
+
const results = [];
|
|
198
|
+
for (const text of texts) {
|
|
199
|
+
results.push(await this.embed(text));
|
|
200
|
+
}
|
|
201
|
+
return results;
|
|
202
|
+
}
|
|
203
|
+
meanPooling(data, attentionMask, dims) {
|
|
204
|
+
// dims: [batch_size, seq_len, hidden_size]
|
|
205
|
+
// We assume batch_size = 1 for single embedding call
|
|
206
|
+
const [batchSize, seqLen, hiddenSize] = dims;
|
|
207
|
+
// Create accumulator
|
|
208
|
+
const embedding = new Float32Array(hiddenSize).fill(0);
|
|
209
|
+
let validTokens = 0;
|
|
210
|
+
for (let i = 0; i < seqLen; i++) {
|
|
211
|
+
// Check mask (1 = valid token, 0 = padding)
|
|
212
|
+
if (attentionMask[i] === 1n) {
|
|
213
|
+
validTokens++;
|
|
214
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
215
|
+
// data is flat array: [batch * seq * hidden]
|
|
216
|
+
// index = i * hiddenSize + j
|
|
217
|
+
embedding[j] += data[i * hiddenSize + j];
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// Divide by valid count
|
|
222
|
+
if (validTokens > 0) {
|
|
223
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
224
|
+
embedding[j] /= validTokens;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return Array.from(embedding);
|
|
228
|
+
}
|
|
229
|
+
normalize(vector) {
|
|
230
|
+
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
|
|
231
|
+
if (norm === 0)
|
|
232
|
+
return vector;
|
|
233
|
+
return vector.map(v => v / norm);
|
|
234
|
+
}
|
|
235
|
+
// Cache Statistics
|
|
236
|
+
getCacheStats() {
|
|
237
|
+
return {
|
|
238
|
+
size: this.cache.size(),
|
|
239
|
+
maxSize: 1000,
|
|
240
|
+
model: this.modelId,
|
|
241
|
+
dimensions: this.dimensions
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
// Clear Cache
|
|
245
|
+
clearCache() {
|
|
246
|
+
this.cache.clear();
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
exports.EmbeddingService = EmbeddingService;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const index_1 = require("./index");
|
|
4
|
+
const perf_hooks_1 = require("perf_hooks");
|
|
5
|
+
async function runFullSystemTest() {
|
|
6
|
+
// DB path is handled internally by MemoryServer
|
|
7
|
+
console.log("🚀 Starting Full System Test (v0.8.5)...");
|
|
8
|
+
const memory = new index_1.MemoryServer();
|
|
9
|
+
console.log("\n--- 1. Setup & Schema ---");
|
|
10
|
+
await memory.initPromise;
|
|
11
|
+
console.log("✅ Schema initialized.");
|
|
12
|
+
console.log("\n--- 2. Data Ingest & Memory Creation ---");
|
|
13
|
+
const e1 = await memory.createEntity({
|
|
14
|
+
name: "user_123",
|
|
15
|
+
type: "User",
|
|
16
|
+
metadata: { description: "A test user" }
|
|
17
|
+
});
|
|
18
|
+
if (e1.error) {
|
|
19
|
+
console.error("Failed to create entity:", e1.error);
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
const o1 = await memory.addObservation({
|
|
23
|
+
entity_id: e1.id,
|
|
24
|
+
text: "User prefers dark mode and likes coding in TypeScript.",
|
|
25
|
+
});
|
|
26
|
+
console.log("✅ Observation 1 added.");
|
|
27
|
+
console.log("\n--- 3. Cache System (L1, L2, Semantic) ---");
|
|
28
|
+
// Query 1: Cold start
|
|
29
|
+
const t1 = perf_hooks_1.performance.now();
|
|
30
|
+
await memory.advancedSearch({ query: "dark mode preference" });
|
|
31
|
+
const d1 = perf_hooks_1.performance.now() - t1;
|
|
32
|
+
console.log("Query 1 (Cold Start)...");
|
|
33
|
+
console.log(`⏱️ Duration: ${d1.toFixed(2)}ms`);
|
|
34
|
+
// Query 1: Repeat (L1 Cache)
|
|
35
|
+
const t2 = perf_hooks_1.performance.now();
|
|
36
|
+
await memory.advancedSearch({ query: "dark mode preference" });
|
|
37
|
+
const d2 = perf_hooks_1.performance.now() - t2;
|
|
38
|
+
console.log("\nQuery 1 (L1 Memory Cache)...");
|
|
39
|
+
console.log(`⏱️ Duration: ${d2.toFixed(2)}ms`);
|
|
40
|
+
if (d2 < 5)
|
|
41
|
+
console.log("✅ SUCCESS: L1 Cache Hit (< 5ms)");
|
|
42
|
+
// Query 2: Semantic Cache (similar query)
|
|
43
|
+
// ... (rest of the test if any)
|
|
44
|
+
}
|
|
45
|
+
runFullSystemTest().catch(console.error);
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.HybridSearch = void 0;
|
|
7
|
+
const crypto_1 = __importDefault(require("crypto"));
|
|
8
|
+
const SEMANTIC_CACHE_THRESHOLD = 0.95;
|
|
9
|
+
class HybridSearch {
|
|
10
|
+
db;
|
|
11
|
+
embeddingService;
|
|
12
|
+
searchCache = new Map();
|
|
13
|
+
CACHE_TTL = 300000; // 5 minutes cache
|
|
14
|
+
constructor(db, embeddingService) {
|
|
15
|
+
this.db = db;
|
|
16
|
+
this.embeddingService = embeddingService;
|
|
17
|
+
}
|
|
18
|
+
getCacheKey(options) {
|
|
19
|
+
const str = JSON.stringify({
|
|
20
|
+
q: options.query,
|
|
21
|
+
l: options.limit,
|
|
22
|
+
t: options.entityTypes,
|
|
23
|
+
io: options.includeObservations,
|
|
24
|
+
ie: options.includeEntities,
|
|
25
|
+
tr: options.timeRangeHours,
|
|
26
|
+
f: options.filters,
|
|
27
|
+
g: options.graphConstraints,
|
|
28
|
+
v: options.vectorParams
|
|
29
|
+
});
|
|
30
|
+
return crypto_1.default.createHash('md5').update(str).digest('hex');
|
|
31
|
+
}
|
|
32
|
+
async tryCacheLookup(options, queryEmbedding) {
|
|
33
|
+
const cacheKey = this.getCacheKey(options);
|
|
34
|
+
const cached = this.searchCache.get(cacheKey);
|
|
35
|
+
if (cached && (Date.now() - cached.timestamp < this.CACHE_TTL)) {
|
|
36
|
+
console.error(`[HybridSearch] In-Memory cache hit for key: ${cacheKey}`);
|
|
37
|
+
return cached.results;
|
|
38
|
+
}
|
|
39
|
+
try {
|
|
40
|
+
const exactRes = await this.db.run('?[results] := *search_cache{query_hash: $hash, results, created_at}, created_at > $min_ts', { hash: cacheKey, min_ts: Math.floor((Date.now() - this.CACHE_TTL) / 1000) });
|
|
41
|
+
if (exactRes.rows.length > 0) {
|
|
42
|
+
console.error(`[HybridSearch] DB cache hit for key: ${cacheKey}`);
|
|
43
|
+
const results = exactRes.rows[0][0];
|
|
44
|
+
this.searchCache.set(cacheKey, { results, timestamp: Date.now() });
|
|
45
|
+
return results;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
catch (e) {
|
|
49
|
+
console.error(`[HybridSearch] Cache lookup error or table missing: ${e.message}`);
|
|
50
|
+
}
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
async updateCache(options, queryEmbedding, results) {
|
|
54
|
+
const cacheKey = this.getCacheKey(options);
|
|
55
|
+
this.searchCache.set(cacheKey, { results, timestamp: Date.now() });
|
|
56
|
+
try {
|
|
57
|
+
await this.db.run('?[query_hash, results, options, created_at, embedding] <- [[$hash, $res, $opt, $now, vec($emb)]] :put search_cache{query_hash}', { hash: cacheKey, res: results, opt: options, now: Math.floor(Date.now() / 1000), emb: queryEmbedding });
|
|
58
|
+
}
|
|
59
|
+
catch (e) { }
|
|
60
|
+
}
|
|
61
|
+
applyTimeDecay(results) {
|
|
62
|
+
return results.map(r => {
|
|
63
|
+
let score = Number(r.score);
|
|
64
|
+
if (isNaN(score))
|
|
65
|
+
score = 0;
|
|
66
|
+
if (r.created_at) {
|
|
67
|
+
const createdAt = Array.isArray(r.created_at) ? r.created_at[0] : r.created_at;
|
|
68
|
+
const ageHours = (Date.now() - Number(createdAt) / 1000) / (1000 * 60 * 60);
|
|
69
|
+
const decay = Math.pow(0.5, ageHours / (24 * 90)); // 90 days half-life
|
|
70
|
+
let newScore = score * decay;
|
|
71
|
+
if (isNaN(newScore))
|
|
72
|
+
newScore = 0;
|
|
73
|
+
return { ...r, score: newScore };
|
|
74
|
+
}
|
|
75
|
+
return { ...r, score };
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
async advancedSearch(options) {
|
|
79
|
+
console.error("[HybridSearch] Starting advancedSearch with options:", JSON.stringify(options, null, 2));
|
|
80
|
+
const { query, limit = 10, filters, graphConstraints, vectorParams } = options;
|
|
81
|
+
let queryEmbedding;
|
|
82
|
+
try {
|
|
83
|
+
queryEmbedding = await this.embeddingService.embed(query);
|
|
84
|
+
}
|
|
85
|
+
catch (e) {
|
|
86
|
+
console.error("[HybridSearch] Embedding failed", e);
|
|
87
|
+
throw e;
|
|
88
|
+
}
|
|
89
|
+
const cachedResults = await this.tryCacheLookup(options, queryEmbedding);
|
|
90
|
+
if (cachedResults !== null) {
|
|
91
|
+
console.error("[HybridSearch] Cache hit for advancedSearch");
|
|
92
|
+
return cachedResults;
|
|
93
|
+
}
|
|
94
|
+
console.error("[HybridSearch] Cache miss, executing Datalog query...");
|
|
95
|
+
let topk = limit * 2;
|
|
96
|
+
const hasFilters = (filters?.metadata && Object.keys(filters.metadata).length > 0) ||
|
|
97
|
+
(filters?.entityTypes && filters.entityTypes.length > 0);
|
|
98
|
+
if (hasFilters) {
|
|
99
|
+
// Significantly increase topk for post-filtering
|
|
100
|
+
topk = Math.max(limit * 20, 200);
|
|
101
|
+
}
|
|
102
|
+
const params = {
|
|
103
|
+
query_vector: queryEmbedding,
|
|
104
|
+
limit: limit,
|
|
105
|
+
topk: topk,
|
|
106
|
+
ef_search: vectorParams?.efSearch || 100,
|
|
107
|
+
};
|
|
108
|
+
let hnswFilters = [];
|
|
109
|
+
const metaRules = [];
|
|
110
|
+
const metaJoins = [];
|
|
111
|
+
if (filters?.metadata) {
|
|
112
|
+
Object.entries(filters.metadata).forEach(([key, value], index) => {
|
|
113
|
+
const paramName = `meta_val_${index}`;
|
|
114
|
+
params[paramName] = value;
|
|
115
|
+
// Use metadata->'key' syntax which is correct for CozoDB JSON access
|
|
116
|
+
metaJoins.push(`metadata->'${key}' == $${paramName}`);
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
if (filters?.entityTypes && filters.entityTypes.length > 0) {
|
|
120
|
+
params.allowed_types = filters.entityTypes;
|
|
121
|
+
// Post-filtering for types
|
|
122
|
+
metaJoins.push(`is_in(type, $allowed_types)`);
|
|
123
|
+
}
|
|
124
|
+
// Use filtered indexes if possible (v1.7)
|
|
125
|
+
let indexToUse = "entity:semantic";
|
|
126
|
+
if (filters?.entityTypes && filters.entityTypes.length === 1) {
|
|
127
|
+
const requestedType = filters.entityTypes[0].toLowerCase();
|
|
128
|
+
const supportedFilteredIndexes = ['person', 'project', 'task', 'note'];
|
|
129
|
+
if (supportedFilteredIndexes.includes(requestedType)) {
|
|
130
|
+
indexToUse = `entity:semantic_${requestedType}`;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
// Multi-Vector Support: Use name_embedding if query is short (v1.7)
|
|
134
|
+
let indexToSearch = indexToUse;
|
|
135
|
+
const isShortQuery = query.split(' ').length <= 3;
|
|
136
|
+
if (isShortQuery && !filters?.entityTypes) {
|
|
137
|
+
// For short queries without type filter, the name_semantic index is often more precise
|
|
138
|
+
indexToSearch = "entity:name_semantic";
|
|
139
|
+
}
|
|
140
|
+
let semanticCall = `~${indexToSearch}{id | query: vec($query_vector), k: $topk, ef: $ef_search, bind_distance: dist`;
|
|
141
|
+
if (hnswFilters.length > 0) {
|
|
142
|
+
semanticCall += `, filter: ${hnswFilters.join(" && ")}`;
|
|
143
|
+
}
|
|
144
|
+
semanticCall += `}`;
|
|
145
|
+
let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at}`];
|
|
146
|
+
if (metaJoins.length > 0) {
|
|
147
|
+
bodyConstraints.push(...metaJoins);
|
|
148
|
+
}
|
|
149
|
+
if (options.timeRangeHours) {
|
|
150
|
+
const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
|
|
151
|
+
params.min_ts = minTs;
|
|
152
|
+
}
|
|
153
|
+
if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
|
|
154
|
+
graphConstraints.requiredRelations.forEach((relType, index) => {
|
|
155
|
+
const relParam = `rel_type_${index}`;
|
|
156
|
+
params[relParam] = relType;
|
|
157
|
+
bodyConstraints.push(`rel_match[id, $${relParam}]`);
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
|
|
161
|
+
params.target_ids = graphConstraints.targetEntityIds;
|
|
162
|
+
bodyConstraints.push(`target_match[id, t_id]`, `is_in(t_id, $target_ids)`);
|
|
163
|
+
}
|
|
164
|
+
if (filters?.minScore) {
|
|
165
|
+
params.min_score = filters.minScore;
|
|
166
|
+
bodyConstraints.push(`score >= $min_score`);
|
|
167
|
+
}
|
|
168
|
+
const helperRules = [
|
|
169
|
+
`rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}`,
|
|
170
|
+
`rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0`
|
|
171
|
+
];
|
|
172
|
+
if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
|
|
173
|
+
helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type}`);
|
|
174
|
+
}
|
|
175
|
+
if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
|
|
176
|
+
helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id}`);
|
|
177
|
+
}
|
|
178
|
+
const datalogQuery = [
|
|
179
|
+
...helperRules,
|
|
180
|
+
`?[id, name, type, metadata, created_at, score, dist] := ${bodyConstraints.join(', ')}, rank_val[id, pr], score = (1.0 - dist)`,
|
|
181
|
+
`:sort -score`,
|
|
182
|
+
`:limit $limit`
|
|
183
|
+
].join('\n').trim();
|
|
184
|
+
console.error('--- DEBUG: Cozo Datalog Query ---');
|
|
185
|
+
console.error(datalogQuery);
|
|
186
|
+
console.error('--- DEBUG: Params ---');
|
|
187
|
+
console.dir(params, { depth: null });
|
|
188
|
+
try {
|
|
189
|
+
const results = await this.db.run(datalogQuery, params);
|
|
190
|
+
let searchResults = results.rows.map((r) => ({
|
|
191
|
+
id: r[0],
|
|
192
|
+
entity_id: r[0],
|
|
193
|
+
name: r[1],
|
|
194
|
+
type: r[2],
|
|
195
|
+
metadata: r[3],
|
|
196
|
+
explanation: `DEBUG: raw_score=${r[5]}, dist=${r[6]}`,
|
|
197
|
+
created_at: Array.isArray(r[4]) ? r[4][0] : r[4], // CozoDB returns [start, end] for Validity
|
|
198
|
+
score: Number(r[5]) || 0,
|
|
199
|
+
source: "advanced_hybrid",
|
|
200
|
+
}));
|
|
201
|
+
// Post-Filtering for Time Range
|
|
202
|
+
if (options.timeRangeHours) {
|
|
203
|
+
const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
|
|
204
|
+
searchResults = searchResults.filter(r => (r.created_at || 0) > minTs);
|
|
205
|
+
}
|
|
206
|
+
// Post-Filtering for Metadata (since CozoDB get() in Datalog often fails)
|
|
207
|
+
if (filters?.metadata) {
|
|
208
|
+
searchResults = searchResults.filter(r => {
|
|
209
|
+
if (!r.metadata || typeof r.metadata !== 'object')
|
|
210
|
+
return false;
|
|
211
|
+
return Object.entries(filters.metadata).every(([key, val]) => r.metadata[key] === val);
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
const finalResults = this.applyTimeDecay(searchResults);
|
|
215
|
+
await this.updateCache(options, queryEmbedding, finalResults);
|
|
216
|
+
return finalResults;
|
|
217
|
+
}
|
|
218
|
+
catch (e) {
|
|
219
|
+
console.error("[HybridSearch] Error in advancedSearch:", e.message);
|
|
220
|
+
return this.search(options);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
async search(options) {
|
|
224
|
+
const { query, limit = 10 } = options;
|
|
225
|
+
const queryEmbedding = await this.embeddingService.embed(query);
|
|
226
|
+
const cachedResults = await this.tryCacheLookup(options, queryEmbedding);
|
|
227
|
+
if (cachedResults) {
|
|
228
|
+
// Add debug info to cached results too
|
|
229
|
+
return cachedResults.map(r => ({
|
|
230
|
+
...r,
|
|
231
|
+
explanation: (typeof r.explanation === 'string' ? r.explanation : JSON.stringify(r.explanation)) + ` | CACHED`
|
|
232
|
+
}));
|
|
233
|
+
}
|
|
234
|
+
const { limit: queryLimit = 10, filters, graphConstraints, vectorParams } = options;
|
|
235
|
+
// @ts-ignore
|
|
236
|
+
const { topk = 5, efSearch = 50 } = vectorParams || {};
|
|
237
|
+
// Fallback Mock
|
|
238
|
+
return [];
|
|
239
|
+
}
|
|
240
|
+
async graphRag(options) {
|
|
241
|
+
console.error("[HybridSearch] Starting graphRag with options:", JSON.stringify(options, null, 2));
|
|
242
|
+
const { query, limit = 5, filters, graphConstraints } = options;
|
|
243
|
+
const maxDepth = graphConstraints?.maxDepth || 2;
|
|
244
|
+
const queryEmbedding = await this.embeddingService.embed(query);
|
|
245
|
+
const topk = limit * 2;
|
|
246
|
+
const params = {
|
|
247
|
+
query_vector: queryEmbedding,
|
|
248
|
+
topk: topk,
|
|
249
|
+
ef_search: 100,
|
|
250
|
+
max_depth: maxDepth,
|
|
251
|
+
limit: limit
|
|
252
|
+
};
|
|
253
|
+
let hnswFilters = [];
|
|
254
|
+
const metaRules = [];
|
|
255
|
+
const metaJoins = [];
|
|
256
|
+
if (filters?.entityTypes && filters.entityTypes.length > 0) {
|
|
257
|
+
params.allowed_types = filters.entityTypes;
|
|
258
|
+
hnswFilters.push(`is_in(type, $allowed_types)`);
|
|
259
|
+
}
|
|
260
|
+
if (filters?.metadata) {
|
|
261
|
+
Object.entries(filters.metadata).forEach(([key, value], index) => {
|
|
262
|
+
const paramName = `meta_val_${index}`;
|
|
263
|
+
params[paramName] = value;
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
let seedSemanticCall = `~entity:semantic{id, type, metadata | query: vec($query_vector), k: $topk, ef: $ef_search, bind_distance: dist`;
|
|
267
|
+
if (hnswFilters.length > 0) {
|
|
268
|
+
seedSemanticCall += `, filter: ${hnswFilters.join(" && ")}`;
|
|
269
|
+
}
|
|
270
|
+
seedSemanticCall += `}`;
|
|
271
|
+
let seedConstraints = [seedSemanticCall];
|
|
272
|
+
if (options.timeRangeHours) {
|
|
273
|
+
const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
|
|
274
|
+
params.min_ts = minTs;
|
|
275
|
+
}
|
|
276
|
+
// Datalog Query for Graph-RAG:
|
|
277
|
+
// 1. Find seed entities via vector search (with inline filtering)
|
|
278
|
+
// 2. Explore the graph starting from seeds up to maxDepth hops
|
|
279
|
+
// 3. Collect all reached entities and observations
|
|
280
|
+
// 4. Calculate a combined score based on vector distance, graph distance, and PageRank
|
|
281
|
+
const datalogQuery = `
|
|
282
|
+
rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}
|
|
283
|
+
rank_val[id, r] := *entity{id, @ "NOW"}, not *entity_rank{entity_id: id}, r = 0.0
|
|
284
|
+
|
|
285
|
+
seeds[id, score] := ${seedConstraints.join(", ")}, score = 1.0 - dist
|
|
286
|
+
|
|
287
|
+
path[start_id, current_id, d] := seeds[start_id, _], current_id = start_id, d = 0
|
|
288
|
+
path[start_id, next_id, d_new] := path[start_id, current_id, d], *relationship{from_id: current_id, to_id: next_id}, d < $max_depth, d_new = d + 1
|
|
289
|
+
path[start_id, next_id, d_new] := path[start_id, current_id, d], *relationship{to_id: current_id, from_id: next_id}, d < $max_depth, d_new = d + 1
|
|
290
|
+
|
|
291
|
+
result_entities[id, final_score, depth] := path[seed_id, id, depth], seeds[seed_id, seed_score], rank_val[id, pr], final_score = seed_score * (1.0 - 0.2 * depth)
|
|
292
|
+
|
|
293
|
+
?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at}, source = 'graph_rag_entity', text = ''
|
|
294
|
+
|
|
295
|
+
:sort -score
|
|
296
|
+
:limit $limit
|
|
297
|
+
`.trim();
|
|
298
|
+
console.error("[HybridSearch] Graph-RAG Datalog Query:\n", datalogQuery);
|
|
299
|
+
try {
|
|
300
|
+
const results = await this.db.run(datalogQuery, params);
|
|
301
|
+
let searchResults = results.rows.map((r) => ({
|
|
302
|
+
id: r[0],
|
|
303
|
+
name: r[1],
|
|
304
|
+
type: r[2],
|
|
305
|
+
metadata: r[3],
|
|
306
|
+
created_at: Array.isArray(r[4]) ? r[4][0] : r[4],
|
|
307
|
+
score: Number(r[5]) || 0,
|
|
308
|
+
source: r[6],
|
|
309
|
+
text: r[7] || undefined,
|
|
310
|
+
explanation: {
|
|
311
|
+
source_score: r[5],
|
|
312
|
+
details: `Found via graph expansion (Source: ${r[6]})`
|
|
313
|
+
}
|
|
314
|
+
}));
|
|
315
|
+
// Post-filtering for time range
|
|
316
|
+
if (options.timeRangeHours) {
|
|
317
|
+
const minTs = Date.now() - (options.timeRangeHours * 3600 * 1000);
|
|
318
|
+
searchResults = searchResults.filter(r => (r.created_at || 0) > minTs);
|
|
319
|
+
}
|
|
320
|
+
// Post-filtering for metadata
|
|
321
|
+
if (filters?.metadata) {
|
|
322
|
+
searchResults = searchResults.filter(r => {
|
|
323
|
+
if (!r.metadata || typeof r.metadata !== 'object')
|
|
324
|
+
return false;
|
|
325
|
+
return Object.entries(filters.metadata).every(([key, val]) => r.metadata[key] === val);
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
return this.applyTimeDecay(searchResults);
|
|
329
|
+
}
|
|
330
|
+
catch (e) {
|
|
331
|
+
console.error("[HybridSearch] Error in graphRag:", e.message);
|
|
332
|
+
// Fallback to normal search on error
|
|
333
|
+
return this.search(options);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
exports.HybridSearch = HybridSearch;
|