cozo-memory 1.2.6 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -36
- package/dist/benchmark.js +410 -132
- package/dist/db-service.test.js +313 -0
- package/dist/export-import-service.js +9 -5
- package/dist/index.js +825 -10
- package/dist/logger.test.js +75 -0
- package/dist/memory-service.test.js +222 -0
- package/dist/timestamp-utils.test.js +68 -0
- package/package.json +6 -3
package/dist/benchmark.js
CHANGED
|
@@ -8,153 +8,431 @@ const path_1 = __importDefault(require("path"));
|
|
|
8
8
|
const fs_1 = __importDefault(require("fs"));
|
|
9
9
|
const perf_hooks_1 = require("perf_hooks");
|
|
10
10
|
const BENCHMARK_DB_PATH = path_1.default.join(process.cwd(), "benchmark_db");
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
function parseArgs() {
|
|
12
|
+
const args = process.argv.slice(2);
|
|
13
|
+
const opts = {
|
|
14
|
+
format: process.env.BENCH_FORMAT || "text",
|
|
15
|
+
runs: parseInt(process.env.BENCH_RUNS || "5", 10),
|
|
16
|
+
warmupRuns: parseInt(process.env.BENCH_WARMUP || "2", 10),
|
|
17
|
+
enableRerank: (process.env.BENCH_ENABLE_RERANK || "false").toLowerCase() !== "false",
|
|
18
|
+
};
|
|
19
|
+
for (let i = 0; i < args.length; i++) {
|
|
20
|
+
const a = args[i];
|
|
21
|
+
if (a === "--format" && args[i + 1])
|
|
22
|
+
opts.format = args[++i];
|
|
23
|
+
else if (a === "--runs" && args[i + 1])
|
|
24
|
+
opts.runs = Math.max(1, parseInt(args[++i], 10));
|
|
25
|
+
else if (a === "--warmup" && args[i + 1])
|
|
26
|
+
opts.warmupRuns = Math.max(0, parseInt(args[++i], 10));
|
|
27
|
+
else if (a === "--csv" && args[i + 1])
|
|
28
|
+
opts.csvPath = args[++i];
|
|
29
|
+
else if (a === "--enable-rerank")
|
|
30
|
+
opts.enableRerank = true;
|
|
31
|
+
else if (a === "--no-rerank")
|
|
32
|
+
opts.enableRerank = false;
|
|
33
|
+
}
|
|
34
|
+
if (!["text", "json", "markdown"].includes(opts.format)) {
|
|
35
|
+
opts.format = "text";
|
|
36
|
+
}
|
|
37
|
+
return opts;
|
|
38
|
+
}
|
|
39
|
+
function percentile(sorted, p) {
|
|
40
|
+
if (sorted.length === 0)
|
|
41
|
+
return 0;
|
|
42
|
+
const pos = (sorted.length - 1) * p;
|
|
43
|
+
const base = Math.floor(pos);
|
|
44
|
+
const rest = pos - base;
|
|
45
|
+
if (sorted[base + 1] !== undefined) {
|
|
46
|
+
return sorted[base] + rest * (sorted[base + 1] - sorted[base]);
|
|
47
|
+
}
|
|
48
|
+
return sorted[base];
|
|
49
|
+
}
|
|
50
|
+
function mean(values) {
|
|
51
|
+
if (values.length === 0)
|
|
52
|
+
return 0;
|
|
53
|
+
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
54
|
+
}
|
|
55
|
+
function median(values) {
|
|
56
|
+
const sorted = values.slice().sort((a, b) => a - b);
|
|
57
|
+
return percentile(sorted, 0.5);
|
|
58
|
+
}
|
|
59
|
+
function stddev(values) {
|
|
60
|
+
if (values.length < 2)
|
|
61
|
+
return 0;
|
|
62
|
+
const m = mean(values);
|
|
63
|
+
const v = values.reduce((s, x) => s + (x - m) ** 2, 0) / (values.length - 1);
|
|
64
|
+
return Math.sqrt(v);
|
|
65
|
+
}
|
|
66
|
+
function formatNum(n, digits = 2) {
|
|
67
|
+
return n.toFixed(digits);
|
|
68
|
+
}
|
|
69
|
+
async function time(fn) {
|
|
70
|
+
const t0 = perf_hooks_1.performance.now();
|
|
71
|
+
const result = await fn();
|
|
72
|
+
const t1 = perf_hooks_1.performance.now();
|
|
73
|
+
return { result, ms: t1 - t0 };
|
|
74
|
+
}
|
|
75
|
+
async function warmupServer(server, times = 2) {
|
|
76
|
+
const durations = [];
|
|
77
|
+
for (let i = 0; i < times; i++) {
|
|
78
|
+
const { ms } = await time(async () => {
|
|
79
|
+
await server.hybridSearch.search({ query: "warmup benchmark", limit: 5, includeEntities: true, includeObservations: true });
|
|
80
|
+
await server.hybridSearch.graphRag({ query: "warmup", limit: 5, graphConstraints: { maxDepth: 1 } });
|
|
81
|
+
});
|
|
82
|
+
durations.push(ms);
|
|
83
|
+
}
|
|
84
|
+
return durations.length ? mean(durations) : 0;
|
|
85
|
+
}
|
|
86
|
+
function computeNDCG(results, expectedNames, k) {
|
|
87
|
+
const topK = results.slice(0, k);
|
|
88
|
+
const relevances = topK.map((r) => {
|
|
89
|
+
const name = (r.name || "").toLowerCase();
|
|
90
|
+
return expectedNames.some(e => e.toLowerCase() === name) ? 1 : 0;
|
|
91
|
+
});
|
|
92
|
+
const ideal = expectedNames.slice(0, k).map(() => 1);
|
|
93
|
+
const dcg = relevances.reduce((sum, rel, idx) => sum + rel / Math.log2(idx + 2), 0);
|
|
94
|
+
const idealDcg = ideal.reduce((sum, rel, idx) => sum + rel / Math.log2(idx + 2), 0);
|
|
95
|
+
return idealDcg === 0 ? 0 : dcg / idealDcg;
|
|
96
|
+
}
|
|
97
|
+
function computeRecall(results, expectedNames, k) {
|
|
98
|
+
const topK = results.slice(0, k);
|
|
99
|
+
const found = expectedNames.filter(name => topK.some(r => (r.name || "").toLowerCase() === name.toLowerCase()));
|
|
100
|
+
return expectedNames.length ? found.length / expectedNames.length : 0;
|
|
101
|
+
}
|
|
102
|
+
function computeMRR(results, expectedNames) {
|
|
103
|
+
for (let i = 0; i < results.length; i++) {
|
|
104
|
+
const name = (results[i].name || "").toLowerCase();
|
|
105
|
+
if (expectedNames.some(e => e.toLowerCase() === name)) {
|
|
106
|
+
return 1 / (i + 1);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return 0;
|
|
110
|
+
}
|
|
111
|
+
async function seedData(server) {
|
|
112
|
+
const entities = [];
|
|
113
|
+
const addEntity = async (name, type, metadata) => {
|
|
114
|
+
const entity = await server.createEntity({ name, type, metadata });
|
|
115
|
+
entities.push(entity);
|
|
116
|
+
return entity;
|
|
117
|
+
};
|
|
118
|
+
const acme = await addEntity("Acme Corp", "Organization", {});
|
|
119
|
+
const openai = await addEntity("OpenAI", "Organization", {});
|
|
120
|
+
const google = await addEntity("Google", "Organization", {});
|
|
121
|
+
const samOpenAI = await addEntity("Sam Altman", "Person", {});
|
|
122
|
+
const samAcme = await addEntity("Sam Brown", "Person", {});
|
|
123
|
+
const aliceGoogle = await addEntity("Alice Chen", "Person", {});
|
|
124
|
+
const aliceAcme = await addEntity("Alice Walker", "Person", {});
|
|
125
|
+
const bobEngineer = await addEntity("Bob Martinez", "Person", {});
|
|
126
|
+
const projectX = await addEntity("Project X", "Project", {});
|
|
127
|
+
const projectY = await addEntity("Project Y", "Project", {});
|
|
128
|
+
const datalog = await addEntity("Datalog", "Technology", {});
|
|
129
|
+
const python = await addEntity("Python", "Technology", {});
|
|
130
|
+
const rust = await addEntity("Rust", "Technology", {});
|
|
131
|
+
const oldInitiative = await addEntity("Legacy Initiative", "Project", {});
|
|
132
|
+
const newInitiative = await addEntity("Cloud Initiative", "Project", {});
|
|
133
|
+
const obs = async (entityId, text, metadata) => server.addObservation({ entity_id: entityId, text, metadata });
|
|
134
|
+
const rel = async (fromId, toId, relationType, strength = 0.9) => server.createRelation({ from_id: fromId, to_id: toId, relation_type: relationType, strength });
|
|
135
|
+
await obs(samOpenAI.id, "Sam Altman is the CEO of OpenAI since 2019.", { year: 2019 });
|
|
136
|
+
await obs(samOpenAI.id, "Sam Altman briefly joined Acme Corp as advisor in 2023.", { year: 2023 });
|
|
137
|
+
await obs(samOpenAI.id, "Sam Altman returned to OpenAI full-time in late 2023.", { year: 2023 });
|
|
138
|
+
await obs(samAcme.id, "Sam Brown is the CFO of Acme Corp since 2021.", { year: 2021 });
|
|
139
|
+
await obs(aliceGoogle.id, "Alice Chen works at Google on search ranking.", { year: 2022 });
|
|
140
|
+
await obs(aliceGoogle.id, "Alice Chen moved to Acme Corp as VP Engineering in 2024.", { year: 2024 });
|
|
141
|
+
await obs(aliceAcme.id, "Alice Walker is a product manager at Acme Corp.", { year: 2020 });
|
|
142
|
+
await obs(bobEngineer.id, "Bob Martinez is a senior engineer on Project X.", { year: 2022 });
|
|
143
|
+
await obs(bobEngineer.id, "Bob Martinez switched from Python to Rust in 2024.", { year: 2024 });
|
|
144
|
+
await obs(projectX.id, "Project X is Acme Corp's internal search engine.", { year: 2021 });
|
|
145
|
+
await obs(projectX.id, "Project X is being rewritten in Rust.", { year: 2024 });
|
|
146
|
+
await obs(projectY.id, "Project Y is Acme Corp's data lake.", { year: 2022 });
|
|
147
|
+
await obs(projectY.id, "Project Y was paused in 2024.", { year: 2024 });
|
|
148
|
+
await obs(acme.id, "Acme Corp acquired a Datalog startup in 2022.", { year: 2022 });
|
|
149
|
+
await obs(acme.id, "Acme Corp is headquartered in Berlin.", { year: 2020 });
|
|
150
|
+
await obs(datalog.id, "Datalog is used inside Project X for policy rules.", { year: 2023 });
|
|
151
|
+
await obs(datalog.id, "Datalog was replaced by SQL in Project Y.", { year: 2024 });
|
|
152
|
+
await obs(oldInitiative.id, "Legacy Initiative was cancelled in 2023.", { year: 2023 });
|
|
153
|
+
await obs(newInitiative.id, "Cloud Initiative started in 2024.", { year: 2024 });
|
|
154
|
+
await rel(samOpenAI.id, openai.id, "works_at", 0.95);
|
|
155
|
+
await rel(samOpenAI.id, acme.id, "advised", 0.7);
|
|
156
|
+
await rel(samAcme.id, acme.id, "works_at", 0.95);
|
|
157
|
+
await rel(aliceGoogle.id, google.id, "works_at", 0.9);
|
|
158
|
+
await rel(aliceGoogle.id, acme.id, "works_at", 0.95);
|
|
159
|
+
await rel(aliceAcme.id, acme.id, "works_at", 0.95);
|
|
160
|
+
await rel(bobEngineer.id, projectX.id, "works_on", 0.95);
|
|
161
|
+
await rel(bobEngineer.id, projectY.id, "works_on", 0.4);
|
|
162
|
+
await rel(projectX.id, datalog.id, "uses_tech", 0.9);
|
|
163
|
+
await rel(projectX.id, rust.id, "uses_tech", 0.85);
|
|
164
|
+
await rel(projectY.id, python.id, "uses_tech", 0.8);
|
|
165
|
+
await rel(acme.id, oldInitiative.id, "owns", 0.7);
|
|
166
|
+
await rel(acme.id, newInitiative.id, "owns", 0.9);
|
|
167
|
+
const distractors = [];
|
|
168
|
+
for (let i = 0; i < 220; i++) {
|
|
169
|
+
distractors.push(`Background note ${i}: noise about ${i % 5 === 0 ? 'Paris' : i % 5 === 1 ? 'Tokyo' : i % 5 === 2 ? 'finance' : i % 5 === 3 ? 'marketing' : 'logistics'} seed ${i}.`);
|
|
170
|
+
}
|
|
171
|
+
for (let i = 0; i < distractors.length; i++) {
|
|
172
|
+
const target = entities[i % entities.length];
|
|
173
|
+
await server.addObservation({ entity_id: target.id, text: distractors[i] });
|
|
174
|
+
}
|
|
175
|
+
return { entities, NUM_ENTITIES: entities.length, NUM_OBSERVATIONS: 265, NUM_RELATIONS: 14 };
|
|
176
|
+
}
|
|
177
|
+
async function measureRecall(server, runs, warmupRuns, opts) {
|
|
178
|
+
const tasks = [
|
|
179
|
+
{ query: "Who works at OpenAI?", expected: ["Sam Altman", "OpenAI"], type: "factual" },
|
|
180
|
+
{ query: "Current CEO of OpenAI", expected: ["Sam Altman"], type: "factual" },
|
|
181
|
+
{ query: "Alice engineering manager Acme", expected: ["Alice Walker", "Alice Chen"], type: "ambiguous" },
|
|
182
|
+
{ query: "Who is Bob's colleague on the search engine project?", expected: ["Project X"], type: "relational" },
|
|
183
|
+
{ query: "Project using Datalog and Rust", expected: ["Project X"], type: "multi-hop" },
|
|
184
|
+
{ query: "Technology switched by Bob in 2024", expected: ["Rust", "Python"], type: "temporal" },
|
|
185
|
+
{ query: "Current Acme active initiative 2024", expected: ["Cloud Initiative"], type: "temporal" },
|
|
186
|
+
{ query: "Acme acquisition technology 2022", expected: ["Datalog"], type: "multi-hop" },
|
|
187
|
+
{ query: "Sam Altman Acme advisor", expected: ["Sam Altman", "Acme Corp"], type: "relational" },
|
|
188
|
+
{ query: "Person VP Engineering Acme 2024", expected: ["Alice Chen", "Alice Walker"], type: "temporal" },
|
|
189
|
+
];
|
|
190
|
+
const methods = [
|
|
191
|
+
{ name: "Hybrid Search", fn: (q) => server.hybridSearch.search({ query: q, limit: 10, includeEntities: true, includeObservations: true }) },
|
|
192
|
+
{ name: "Graph-RAG", fn: (q) => server.hybridSearch.graphRag({ query: q, limit: 10, graphConstraints: { maxDepth: 2 } }) },
|
|
193
|
+
{ name: "Graph-Walking", fn: (q) => server.graph_walking({ query: q, limit: 10, max_depth: 3 }) },
|
|
194
|
+
...(opts.enableRerank ? [
|
|
195
|
+
{ name: "Reranked Search", fn: (q) => server.hybridSearch.search({ query: q, limit: 10, rerank: true, includeEntities: true, includeObservations: true }) },
|
|
196
|
+
{ name: "Graph-RAG (Reranked)", fn: (q) => server.hybridSearch.graphRag({ query: q, limit: 10, graphConstraints: { maxDepth: 2 }, rerank: true }) },
|
|
197
|
+
] : []),
|
|
198
|
+
];
|
|
199
|
+
const results = [];
|
|
200
|
+
for (const method of methods) {
|
|
201
|
+
const allRunsRecall10 = [];
|
|
202
|
+
const allRunsRecall3 = [];
|
|
203
|
+
const allRunsMRR = [];
|
|
204
|
+
const allRunsNDCG10 = [];
|
|
205
|
+
const allRunsLatency = [];
|
|
206
|
+
for (let r = 0; r < warmupRuns + runs; r++) {
|
|
207
|
+
await server.hybridSearch.clearCache();
|
|
208
|
+
let r10 = 0, r3 = 0, mrr = 0, ndcg = 0, lat = 0;
|
|
209
|
+
for (const task of tasks) {
|
|
210
|
+
const { result, ms } = await time(() => method.fn(task.query));
|
|
211
|
+
r10 += computeRecall(result, task.expected, 10);
|
|
212
|
+
r3 += computeRecall(result, task.expected, 3);
|
|
213
|
+
mrr += computeMRR(result, task.expected);
|
|
214
|
+
ndcg += computeNDCG(result, task.expected, 10);
|
|
215
|
+
lat += ms;
|
|
216
|
+
}
|
|
217
|
+
const n = tasks.length;
|
|
218
|
+
if (r >= warmupRuns) {
|
|
219
|
+
allRunsRecall10.push(r10 / n);
|
|
220
|
+
allRunsRecall3.push(r3 / n);
|
|
221
|
+
allRunsMRR.push(mrr / n);
|
|
222
|
+
allRunsNDCG10.push(ndcg / n);
|
|
223
|
+
allRunsLatency.push(lat / n);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
results.push({
|
|
227
|
+
method: method.name,
|
|
228
|
+
recallAt10: mean(allRunsRecall10),
|
|
229
|
+
recallAt3: mean(allRunsRecall3),
|
|
230
|
+
mrr: mean(allRunsMRR),
|
|
231
|
+
ndcgAt10: mean(allRunsNDCG10),
|
|
232
|
+
avgLatencyMs: mean(allRunsLatency),
|
|
233
|
+
p50LatencyMs: median(allRunsLatency),
|
|
234
|
+
p95LatencyMs: percentile(allRunsLatency.slice().sort((a, b) => a - b), 0.95),
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
return results;
|
|
238
|
+
}
|
|
239
|
+
async function runBenchmark(opts) {
|
|
240
|
+
console.log(`🚀 Starting Performance Benchmark (runs=${opts.runs}, warmup=${opts.warmupRuns}, format=${opts.format})`);
|
|
14
241
|
if (fs_1.default.existsSync(BENCHMARK_DB_PATH + ".db")) {
|
|
15
242
|
fs_1.default.unlinkSync(BENCHMARK_DB_PATH + ".db");
|
|
16
243
|
}
|
|
17
|
-
// Measure Memory Baseline
|
|
18
244
|
const memStart = process.memoryUsage();
|
|
19
|
-
|
|
245
|
+
const envStart = perf_hooks_1.performance.now();
|
|
20
246
|
console.log("• Initializing Server & Loading Embedding Model...");
|
|
21
|
-
const initStart = perf_hooks_1.performance.now();
|
|
22
247
|
const server = new index_1.MemoryServer(BENCHMARK_DB_PATH);
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
248
|
+
await server.initPromise;
|
|
249
|
+
const embedWarmupStart = perf_hooks_1.performance.now();
|
|
250
|
+
await server.embeddingService.embed("benchmark-warmup");
|
|
251
|
+
const embedWarmupEnd = perf_hooks_1.performance.now();
|
|
252
|
+
const initMs = embedWarmupEnd - envStart;
|
|
253
|
+
const firstEmbeddingMs = embedWarmupEnd - embedWarmupStart;
|
|
254
|
+
console.log(` -> Init + Warmup: ${formatNum(initMs)}ms`);
|
|
255
|
+
console.log(` -> First embedding: ${formatNum(firstEmbeddingMs)}ms`);
|
|
27
256
|
const memAfterInit = process.memoryUsage();
|
|
28
|
-
console.log(
|
|
29
|
-
|
|
30
|
-
const NUM_ENTITIES =
|
|
31
|
-
const
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
const entities = [];
|
|
37
|
-
for (let i = 0; i < NUM_ENTITIES; i++) {
|
|
38
|
-
entities.push(await server.createEntity({
|
|
39
|
-
name: `Entity_${i}`,
|
|
40
|
-
type: i % 2 === 0 ? "Person" : "Project",
|
|
41
|
-
metadata: { index: i }
|
|
42
|
-
}));
|
|
43
|
-
}
|
|
44
|
-
// Observations
|
|
45
|
-
for (let i = 0; i < NUM_OBSERVATIONS; i++) {
|
|
46
|
-
const entity = entities[i % NUM_ENTITIES];
|
|
47
|
-
// @ts-ignore
|
|
48
|
-
await server.addObservation({
|
|
49
|
-
// @ts-ignore
|
|
50
|
-
entity_id: entity.id,
|
|
51
|
-
text: `This is observation number ${i} for entity ${ // @ts-ignore
|
|
52
|
-
entity.name}. It contains some random keywords like apple, banana, and cherry.`
|
|
53
|
-
});
|
|
54
|
-
}
|
|
55
|
-
// Relations
|
|
56
|
-
for (let i = 0; i < NUM_RELATIONS; i++) {
|
|
57
|
-
const from = entities[i % NUM_ENTITIES];
|
|
58
|
-
const to = entities[(i + 1) % NUM_ENTITIES];
|
|
59
|
-
// @ts-ignore
|
|
60
|
-
await server.createRelation({
|
|
61
|
-
// @ts-ignore
|
|
62
|
-
from_id: from.id,
|
|
63
|
-
// @ts-ignore
|
|
64
|
-
to_id: to.id,
|
|
65
|
-
relation_type: "related_to",
|
|
66
|
-
strength: 0.5
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
const dataEnd = perf_hooks_1.performance.now();
|
|
70
|
-
console.log(` -> Data Ingestion Time: ${(dataEnd - dataStart).toFixed(2)}ms`);
|
|
71
|
-
console.log(` -> Avg Time per Operation: ${((dataEnd - dataStart) / (NUM_ENTITIES + NUM_OBSERVATIONS + NUM_RELATIONS)).toFixed(2)}ms`);
|
|
257
|
+
console.log(`\n• Seeding Data...`);
|
|
258
|
+
const seedStart = perf_hooks_1.performance.now();
|
|
259
|
+
const { entities, NUM_ENTITIES, NUM_OBSERVATIONS, NUM_RELATIONS } = await seedData(server);
|
|
260
|
+
const seedEnd = perf_hooks_1.performance.now();
|
|
261
|
+
const totalOps = NUM_ENTITIES + NUM_OBSERVATIONS + NUM_RELATIONS;
|
|
262
|
+
const ingestionTotalMs = seedEnd - seedStart;
|
|
263
|
+
const ingestionAvgMs = ingestionTotalMs / totalOps;
|
|
264
|
+
console.log(` -> Data Ingestion: ${formatNum(ingestionTotalMs)}ms (${formatNum(ingestionAvgMs)} ms/op)`);
|
|
72
265
|
const memAfterData = process.memoryUsage();
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
console.log("\n• Running Queries (Hybrid Search)...");
|
|
266
|
+
await warmupServer(server, opts.warmupRuns);
|
|
267
|
+
console.log("\n• Running Query Benchmarks...");
|
|
76
268
|
const queries = [
|
|
77
269
|
"observation number 10",
|
|
78
|
-
"
|
|
79
|
-
|
|
80
|
-
"Project related"
|
|
270
|
+
"alpha beta gamma",
|
|
271
|
+
`Entity_${NUM_ENTITIES - 1}`,
|
|
272
|
+
"Project related",
|
|
273
|
+
"delta observation keywords",
|
|
274
|
+
"colleague technology",
|
|
275
|
+
"Bob Alice relation",
|
|
276
|
+
"works on project",
|
|
277
|
+
"Senior Engineer Berlin",
|
|
278
|
+
"graph traversal seed",
|
|
81
279
|
];
|
|
82
|
-
const
|
|
83
|
-
for (
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
const vectorOnlyEnd = perf_hooks_1.performance.now();
|
|
118
|
-
const vectorTime = vectorOnlyEnd - vectorOnlyStart;
|
|
119
|
-
console.log(` -> Raw Vector Search Time: ${vectorTime.toFixed(2)}ms`);
|
|
120
|
-
console.log(` -> Overhead (Hybrid Logic + RRF): ${(avgQueryTime - vectorTime).toFixed(2)}ms`);
|
|
121
|
-
// Graph Benchmark
|
|
122
|
-
console.log("\n• Running Graph Benchmarks (Graph-RAG & Graph-Walking)...");
|
|
123
|
-
// Graph-RAG
|
|
124
|
-
const ragStart = perf_hooks_1.performance.now();
|
|
125
|
-
// @ts-ignore
|
|
126
|
-
await server.hybridSearch.graphRag({
|
|
127
|
-
query: "Entity_0",
|
|
128
|
-
limit: 20,
|
|
129
|
-
graphConstraints: {
|
|
130
|
-
maxDepth: 2
|
|
280
|
+
const runs = { hybrid: [], reranked: [], graphRag: [], graphWalking: [], rawVector: [] };
|
|
281
|
+
for (let r = 0; r < opts.runs; r++) {
|
|
282
|
+
await server.hybridSearch.clearCache();
|
|
283
|
+
for (const q of queries) {
|
|
284
|
+
const hybridMs = (await time(() => server.hybridSearch.search({
|
|
285
|
+
query: q,
|
|
286
|
+
limit: 10,
|
|
287
|
+
includeEntities: true,
|
|
288
|
+
includeObservations: true,
|
|
289
|
+
}))).ms;
|
|
290
|
+
runs.hybrid.push(hybridMs);
|
|
291
|
+
if (opts.enableRerank) {
|
|
292
|
+
const rerankedMs = (await time(() => server.hybridSearch.search({
|
|
293
|
+
query: q,
|
|
294
|
+
limit: 10,
|
|
295
|
+
rerank: true,
|
|
296
|
+
includeEntities: true,
|
|
297
|
+
includeObservations: true,
|
|
298
|
+
}))).ms;
|
|
299
|
+
runs.reranked.push(rerankedMs);
|
|
300
|
+
}
|
|
301
|
+
const graphRagMs = (await time(() => server.hybridSearch.graphRag({
|
|
302
|
+
query: q,
|
|
303
|
+
limit: 10,
|
|
304
|
+
graphConstraints: { maxDepth: 2 },
|
|
305
|
+
}))).ms;
|
|
306
|
+
runs.graphRag.push(graphRagMs);
|
|
307
|
+
const startEntityId = entities[r % entities.length].id;
|
|
308
|
+
const walkMs = (await time(() => server.graph_walking({
|
|
309
|
+
query: q,
|
|
310
|
+
start_entity_id: startEntityId,
|
|
311
|
+
max_depth: 3,
|
|
312
|
+
limit: 10,
|
|
313
|
+
}))).ms;
|
|
314
|
+
runs.graphWalking.push(walkMs);
|
|
131
315
|
}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
const startEntityId = entities[0].id;
|
|
139
|
-
// @ts-ignore
|
|
140
|
-
await server.graph_walking({
|
|
141
|
-
query: "related concepts",
|
|
142
|
-
start_entity_id: startEntityId,
|
|
143
|
-
max_depth: 3,
|
|
144
|
-
limit: 10
|
|
145
|
-
});
|
|
146
|
-
const walkEnd = perf_hooks_1.performance.now();
|
|
147
|
-
console.log(` -> Graph-Walking (Recursive) Time: ${(walkEnd - walkStart).toFixed(2)}ms`);
|
|
148
|
-
// Final Memory
|
|
316
|
+
const qEmb = await server.embeddingService.embed("benchmark-vector-baseline");
|
|
317
|
+
const vectorMs = (await time(() => server.db.run(`?[id, score] := ~entity:semantic { id | query: vec($qEmb), k: 10, ef: 20 }, score = 1.0`, { qEmb }))).ms;
|
|
318
|
+
runs.rawVector.push(vectorMs);
|
|
319
|
+
}
|
|
320
|
+
console.log("\n• Running Recall Evaluation...");
|
|
321
|
+
const recall = await measureRecall(server, opts.runs, opts.warmupRuns, opts);
|
|
149
322
|
const memFinal = process.memoryUsage();
|
|
150
|
-
console.log(
|
|
151
|
-
console.log(` -> RSS: ${(
|
|
152
|
-
console.log(` ->
|
|
153
|
-
|
|
154
|
-
|
|
323
|
+
console.log(`\n• Final Memory Stats:`);
|
|
324
|
+
console.log(` -> RSS Init: ${formatNum(memAfterInit.rss / 1024 / 1024)} MB`);
|
|
325
|
+
console.log(` -> RSS After Data: ${formatNum(memAfterData.rss / 1024 / 1024)} MB`);
|
|
326
|
+
console.log(` -> RSS Final: ${formatNum(memFinal.rss / 1024 / 1024)} MB`);
|
|
327
|
+
console.log(` -> Heap Used Final: ${formatNum(memFinal.heapUsed / 1024 / 1024)} MB`);
|
|
328
|
+
const summary = {
|
|
329
|
+
environment: {
|
|
330
|
+
nodeVersion: process.version,
|
|
331
|
+
platform: process.platform,
|
|
332
|
+
timestamp: new Date().toISOString(),
|
|
333
|
+
embeddingModel: process.env.EMBEDDING_MODEL || "Xenova/bge-m3",
|
|
334
|
+
dbEngine: process.env.DB_ENGINE || "sqlite",
|
|
335
|
+
},
|
|
336
|
+
warmup: {
|
|
337
|
+
initMs,
|
|
338
|
+
firstEmbeddingMs,
|
|
339
|
+
},
|
|
340
|
+
ingestion: {
|
|
341
|
+
totalMs: ingestionTotalMs,
|
|
342
|
+
avgPerOpMs: ingestionAvgMs,
|
|
343
|
+
throughputOpsPerSec: 1000 / ingestionAvgMs,
|
|
344
|
+
},
|
|
345
|
+
memory: {
|
|
346
|
+
rssAfterInitMB: memAfterInit.rss / 1024 / 1024,
|
|
347
|
+
rssAfterDataMB: memAfterData.rss / 1024 / 1024,
|
|
348
|
+
rssFinalMB: memFinal.rss / 1024 / 1024,
|
|
349
|
+
heapUsedFinalMB: memFinal.heapUsed / 1024 / 1024,
|
|
350
|
+
},
|
|
351
|
+
queries: {
|
|
352
|
+
rawVectorMs: { avg: mean(runs.rawVector), p50: median(runs.rawVector), p95: percentile(runs.rawVector.slice().sort((a, b) => a - b), 0.95) },
|
|
353
|
+
hybrid: { avg: mean(runs.hybrid), p50: median(runs.hybrid), p95: percentile(runs.hybrid.slice().sort((a, b) => a - b), 0.95) },
|
|
354
|
+
reranked: { avg: mean(runs.reranked), p50: median(runs.reranked), p95: percentile(runs.reranked.slice().sort((a, b) => a - b), 0.95) },
|
|
355
|
+
graphRag: { avg: mean(runs.graphRag), p50: median(runs.graphRag), p95: percentile(runs.graphRag.slice().sort((a, b) => a - b), 0.95) },
|
|
356
|
+
graphWalking: { avg: mean(runs.graphWalking), p50: median(runs.graphWalking), p95: percentile(runs.graphWalking.slice().sort((a, b) => a - b), 0.95) },
|
|
357
|
+
},
|
|
358
|
+
recall,
|
|
359
|
+
};
|
|
360
|
+
const output = renderSummary(summary, opts);
|
|
361
|
+
if (opts.format === "json") {
|
|
362
|
+
console.log(JSON.stringify(summary, null, 2));
|
|
363
|
+
}
|
|
364
|
+
else if (opts.format === "markdown") {
|
|
365
|
+
console.log(output);
|
|
366
|
+
}
|
|
367
|
+
else {
|
|
368
|
+
console.log(output);
|
|
369
|
+
}
|
|
370
|
+
if (opts.csvPath && recall.length) {
|
|
371
|
+
const header = ["method", "recall_at_10", "recall_at_3", "mrr", "ndcg_at_10", "avg_latency_ms", "p50_latency_ms", "p95_latency_ms"];
|
|
372
|
+
const rows = recall.map(r => [r.method, r.recallAt10.toFixed(4), r.recallAt3.toFixed(4), r.mrr.toFixed(4), r.ndcgAt10.toFixed(4), r.avgLatencyMs.toFixed(2), r.p50LatencyMs.toFixed(2), r.p95LatencyMs.toFixed(2)]);
|
|
373
|
+
const csv = [header.join(","), ...rows.map(r => r.join(","))].join("\n");
|
|
374
|
+
fs_1.default.writeFileSync(opts.csvPath, csv);
|
|
375
|
+
console.log(`\n• CSV written to: ${opts.csvPath}`);
|
|
376
|
+
}
|
|
155
377
|
server.db.close();
|
|
156
378
|
if (fs_1.default.existsSync(BENCHMARK_DB_PATH + ".db")) {
|
|
157
379
|
fs_1.default.unlinkSync(BENCHMARK_DB_PATH + ".db");
|
|
158
380
|
}
|
|
159
381
|
}
|
|
160
|
-
|
|
382
|
+
function renderSummary(s, opts) {
|
|
383
|
+
const lines = [];
|
|
384
|
+
lines.push("==================================================");
|
|
385
|
+
lines.push("CozoDB Memory Benchmark Results");
|
|
386
|
+
lines.push("==================================================");
|
|
387
|
+
lines.push(`Environment: ${s.environment.nodeVersion} on ${s.environment.platform}`);
|
|
388
|
+
lines.push(`Timestamp: ${s.environment.timestamp}`);
|
|
389
|
+
lines.push(`Embedding: ${s.environment.embeddingModel}`);
|
|
390
|
+
lines.push(`DB Engine: ${s.environment.dbEngine}`);
|
|
391
|
+
lines.push(`Runs: ${opts.runs} Warmup: ${opts.warmupRuns}`);
|
|
392
|
+
lines.push("");
|
|
393
|
+
lines.push("## Warmup");
|
|
394
|
+
lines.push(`- Init + Warmup: ${formatNum(s.warmup.initMs)} ms`);
|
|
395
|
+
lines.push(`- First embedding: ${formatNum(s.warmup.firstEmbeddingMs)} ms`);
|
|
396
|
+
lines.push("");
|
|
397
|
+
lines.push("## Ingestion");
|
|
398
|
+
lines.push(`- Total ingestion: ${formatNum(s.ingestion.totalMs)} ms`);
|
|
399
|
+
lines.push(`- Avg per operation: ${formatNum(s.ingestion.avgPerOpMs)} ms`);
|
|
400
|
+
lines.push(`- Throughput: ${formatNum(s.ingestion.throughputOpsPerSec)} ops/sec`);
|
|
401
|
+
lines.push("");
|
|
402
|
+
lines.push("## Memory");
|
|
403
|
+
lines.push(`- RSS after init: ${formatNum(s.memory.rssAfterInitMB)} MB`);
|
|
404
|
+
lines.push(`- RSS after data load: ${formatNum(s.memory.rssAfterDataMB)} MB`);
|
|
405
|
+
lines.push(`- RSS final: ${formatNum(s.memory.rssFinalMB)} MB`);
|
|
406
|
+
lines.push(`- Heap used final: ${formatNum(s.memory.heapUsedFinalMB)} MB`);
|
|
407
|
+
lines.push("");
|
|
408
|
+
lines.push("## Query Latency (ms)");
|
|
409
|
+
lines.push("| Method | Avg | P50 | P95 |");
|
|
410
|
+
lines.push("|------------------|----------|----------|----------|");
|
|
411
|
+
lines.push(`| Raw Vector | ${formatNum(s.queries.rawVectorMs.avg, 2).padEnd(8)} | ${formatNum(s.queries.rawVectorMs.p50, 2).padEnd(8)} | ${formatNum(s.queries.rawVectorMs.p95, 2).padEnd(8)} |`);
|
|
412
|
+
lines.push(`| Hybrid Search | ${formatNum(s.queries.hybrid.avg, 2).padEnd(8)} | ${formatNum(s.queries.hybrid.p50, 2).padEnd(8)} | ${formatNum(s.queries.hybrid.p95, 2).padEnd(8)} |`);
|
|
413
|
+
if (opts.enableRerank) {
|
|
414
|
+
lines.push(`| Reranked Search | ${formatNum(s.queries.reranked.avg, 2).padEnd(8)} | ${formatNum(s.queries.reranked.p50, 2).padEnd(8)} | ${formatNum(s.queries.reranked.p95, 2).padEnd(8)} |`);
|
|
415
|
+
}
|
|
416
|
+
lines.push(`| Graph-RAG | ${formatNum(s.queries.graphRag.avg, 2).padEnd(8)} | ${formatNum(s.queries.graphRag.p50, 2).padEnd(8)} | ${formatNum(s.queries.graphRag.p95, 2).padEnd(8)} |`);
|
|
417
|
+
lines.push(`| Graph-Walking | ${formatNum(s.queries.graphWalking.avg, 2).padEnd(8)} | ${formatNum(s.queries.graphWalking.p50, 2).padEnd(8)} | ${formatNum(s.queries.graphWalking.p95, 2).padEnd(8)} |`);
|
|
418
|
+
lines.push("");
|
|
419
|
+
lines.push(`## Recall & Quality (Mean across ${opts.runs} runs)`);
|
|
420
|
+
lines.push("| Method | Recall@10 | Recall@3 | MRR | nDCG@10 | Avg Latency | P50 Latency | P95 Latency |");
|
|
421
|
+
lines.push("|-----------------------|-----------|----------|-------|---------|-------------|-------------|-------------|");
|
|
422
|
+
for (const r of s.recall) {
|
|
423
|
+
lines.push(`| ${r.method.padEnd(21)} | ${r.recallAt10.toFixed(3).padStart(9)} | ${r.recallAt3.toFixed(3).padStart(8)} | ${r.mrr.toFixed(3).padStart(5)} | ${r.ndcgAt10.toFixed(3).padStart(7)} | ${formatNum(r.avgLatencyMs, 2).padStart(11)} | ${formatNum(r.p50LatencyMs, 2).padStart(11)} | ${formatNum(r.p95LatencyMs, 2).padStart(11)} |`);
|
|
424
|
+
}
|
|
425
|
+
lines.push("");
|
|
426
|
+
lines.push("## Benchmarking external systems");
|
|
427
|
+
lines.push("Recall/Quality numbers for Chroma, Qdrant, Mem0 are not generated by this script.");
|
|
428
|
+
lines.push("Fill the comparison table in docs/BENCHMARKS.md only from published/public benchmarks.");
|
|
429
|
+
lines.push("Do not insert internal estimates into the publication table.");
|
|
430
|
+
lines.push("");
|
|
431
|
+
lines.push("==================================================");
|
|
432
|
+
return lines.join("\n");
|
|
433
|
+
}
|
|
434
|
+
const opts = parseArgs();
|
|
435
|
+
runBenchmark(opts).catch((err) => {
|
|
436
|
+
console.error("Benchmark failed:", err);
|
|
437
|
+
process.exit(1);
|
|
438
|
+
});
|