cozo-memory 1.1.3 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,520 @@
1
+ "use strict";
2
+ /**
3
+ * GraphRAG-R1 Inspired Adaptive Retrieval System
4
+ *
5
+ * Based on: Yu et al., "GraphRAG-R1: Graph Retrieval-Augmented Generation
6
+ * with Process-Constrained Reinforcement Learning" (WWW 2026)
7
+ *
8
+ * Key Innovations:
9
+ * 1. Strategy Performance Tracking
10
+ * 2. Progressive Retrieval Attenuation (PRA)
11
+ * 3. Cost-Aware F1 (CAF) Scoring
12
+ * 4. Adaptive Strategy Selection
13
+ */
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.AdaptiveGraphRetrieval = exports.QueryComplexity = exports.RetrievalStrategy = void 0;
16
+ // Retrieval Strategies
17
+ var RetrievalStrategy;
18
+ (function (RetrievalStrategy) {
19
+ RetrievalStrategy["VECTOR_ONLY"] = "vector_only";
20
+ RetrievalStrategy["GRAPH_WALK"] = "graph_walk";
21
+ RetrievalStrategy["HYBRID_FUSION"] = "hybrid_fusion";
22
+ RetrievalStrategy["COMMUNITY_EXPANSION"] = "community_expansion";
23
+ RetrievalStrategy["SEMANTIC_WALK"] = "semantic_walk";
24
+ })(RetrievalStrategy || (exports.RetrievalStrategy = RetrievalStrategy = {}));
25
+ // Query Complexity Classification
26
+ var QueryComplexity;
27
+ (function (QueryComplexity) {
28
+ QueryComplexity["SIMPLE"] = "simple";
29
+ QueryComplexity["MODERATE"] = "moderate";
30
+ QueryComplexity["COMPLEX"] = "complex";
31
+ QueryComplexity["EXPLORATORY"] = "exploratory"; // Open-ended, broad search
32
+ })(QueryComplexity || (exports.QueryComplexity = QueryComplexity = {}));
33
+ class AdaptiveGraphRetrieval {
34
+ db;
35
+ embeddingService;
36
+ strategyPerformance;
37
+ config;
38
+ // Performance tracking table name
39
+ PERF_TABLE = 'adaptive_retrieval_performance';
40
+ constructor(db, embeddingService, config) {
41
+ this.db = db;
42
+ this.embeddingService = embeddingService;
43
+ this.strategyPerformance = new Map();
44
+ this.config = {
45
+ enablePRA: true,
46
+ enableCAF: true,
47
+ maxRetrievalCalls: 5,
48
+ explorationRate: 0.1, // 10% exploration
49
+ decayFactor: 0.8, // PRA decay
50
+ costPenalty: 0.1, // CAF penalty per retrieval
51
+ ...config
52
+ };
53
+ this.initializePerformanceTracking();
54
+ }
55
+ /**
56
+ * Initialize performance tracking table in CozoDB
57
+ */
58
+ async initializePerformanceTracking() {
59
+ try {
60
+ // Check if table exists
61
+ const relations = await this.db.run('::relations');
62
+ const tableExists = relations.rows.some((r) => r[0] === this.PERF_TABLE);
63
+ if (!tableExists) {
64
+ await this.db.run(`
65
+ :create ${this.PERF_TABLE} {
66
+ strategy: String =>
67
+ success_count: Int,
68
+ total_count: Int,
69
+ avg_f1_score: Float,
70
+ avg_retrieval_cost: Float,
71
+ avg_latency: Float,
72
+ last_used: Int
73
+ }
74
+ `);
75
+ console.error('[AdaptiveRetrieval] Performance tracking table created');
76
+ }
77
+ // Load existing performance data
78
+ await this.loadPerformanceData();
79
+ }
80
+ catch (error) {
81
+ console.error('[AdaptiveRetrieval] Error initializing:', error.message);
82
+ }
83
+ }
84
+ /**
85
+ * Load performance data from database
86
+ */
87
+ async loadPerformanceData() {
88
+ try {
89
+ const result = await this.db.run(`
90
+ ?[strategy, success_count, total_count, avg_f1_score, avg_retrieval_cost, avg_latency, last_used] :=
91
+ *${this.PERF_TABLE}{
92
+ strategy,
93
+ success_count,
94
+ total_count,
95
+ avg_f1_score,
96
+ avg_retrieval_cost,
97
+ avg_latency,
98
+ last_used
99
+ }
100
+ `);
101
+ for (const row of result.rows) {
102
+ const [strategy, successCount, totalCount, avgF1, avgCost, avgLatency, lastUsed] = row;
103
+ this.strategyPerformance.set(strategy, {
104
+ strategy: strategy,
105
+ successCount: Number(successCount),
106
+ totalCount: Number(totalCount),
107
+ avgF1Score: Number(avgF1),
108
+ avgRetrievalCost: Number(avgCost),
109
+ avgLatency: Number(avgLatency),
110
+ lastUsed: Number(lastUsed)
111
+ });
112
+ }
113
+ console.error(`[AdaptiveRetrieval] Loaded ${result.rows.length} strategy performance records`);
114
+ }
115
+ catch (error) {
116
+ console.error('[AdaptiveRetrieval] Error loading performance data:', error.message);
117
+ }
118
+ }
119
+ /**
120
+ * Classify query complexity using heuristics
121
+ * In production, this could use an LLM classifier
122
+ */
123
+ classifyQueryComplexity(query) {
124
+ const words = query.toLowerCase().split(/\s+/);
125
+ const questionWords = ['who', 'what', 'where', 'when', 'why', 'how'];
126
+ const complexIndicators = ['relationship', 'connection', 'compare', 'analyze', 'explain'];
127
+ const multiHopIndicators = ['and', 'also', 'related', 'connected', 'between'];
128
+ // Simple: Short, single question word
129
+ if (words.length < 8 && questionWords.some(w => words.includes(w))) {
130
+ return QueryComplexity.SIMPLE;
131
+ }
132
+ // Complex: Contains complex indicators or multiple question words
133
+ if (complexIndicators.some(ind => query.toLowerCase().includes(ind)) ||
134
+ questionWords.filter(w => words.includes(w)).length > 1) {
135
+ return QueryComplexity.COMPLEX;
136
+ }
137
+ // Exploratory: Broad, open-ended
138
+ if (words.includes('all') || words.includes('everything') || words.includes('explore')) {
139
+ return QueryComplexity.EXPLORATORY;
140
+ }
141
+ // Moderate: Multi-hop indicators
142
+ if (multiHopIndicators.some(ind => words.includes(ind))) {
143
+ return QueryComplexity.MODERATE;
144
+ }
145
+ return QueryComplexity.MODERATE;
146
+ }
147
+ /**
148
+ * Select best strategy based on query complexity and historical performance
149
+ * Implements epsilon-greedy exploration
150
+ */
151
+ selectStrategy(complexity) {
152
+ // Exploration: Random strategy
153
+ if (Math.random() < this.config.explorationRate) {
154
+ const strategies = Object.values(RetrievalStrategy);
155
+ return strategies[Math.floor(Math.random() * strategies.length)];
156
+ }
157
+ // Exploitation: Best strategy for complexity
158
+ const strategyScores = new Map();
159
+ for (const strategy of Object.values(RetrievalStrategy)) {
160
+ const perf = this.strategyPerformance.get(strategy);
161
+ if (!perf || perf.totalCount === 0) {
162
+ // No data: Give moderate score to encourage exploration
163
+ strategyScores.set(strategy, 0.5);
164
+ continue;
165
+ }
166
+ // Calculate score based on F1, cost, and recency
167
+ const successRate = perf.successCount / perf.totalCount;
168
+ const costEfficiency = 1 / (1 + perf.avgRetrievalCost);
169
+ const recencyBonus = (Date.now() - perf.lastUsed) < 3600000 ? 0.1 : 0; // 1 hour
170
+ let score = (successRate * 0.6) + (costEfficiency * 0.3) + recencyBonus;
171
+ // Adjust based on query complexity
172
+ switch (complexity) {
173
+ case QueryComplexity.SIMPLE:
174
+ if (strategy === RetrievalStrategy.VECTOR_ONLY)
175
+ score *= 1.2;
176
+ break;
177
+ case QueryComplexity.MODERATE:
178
+ if (strategy === RetrievalStrategy.HYBRID_FUSION)
179
+ score *= 1.2;
180
+ break;
181
+ case QueryComplexity.COMPLEX:
182
+ if (strategy === RetrievalStrategy.GRAPH_WALK ||
183
+ strategy === RetrievalStrategy.SEMANTIC_WALK)
184
+ score *= 1.2;
185
+ break;
186
+ case QueryComplexity.EXPLORATORY:
187
+ if (strategy === RetrievalStrategy.COMMUNITY_EXPANSION)
188
+ score *= 1.2;
189
+ break;
190
+ }
191
+ strategyScores.set(strategy, score);
192
+ }
193
+ // Select strategy with highest score
194
+ let bestStrategy = RetrievalStrategy.HYBRID_FUSION;
195
+ let bestScore = 0;
196
+ for (const [strategy, score] of strategyScores.entries()) {
197
+ if (score > bestScore) {
198
+ bestScore = score;
199
+ bestStrategy = strategy;
200
+ }
201
+ }
202
+ return bestStrategy;
203
+ }
204
+ /**
205
+ * Execute retrieval with selected strategy
206
+ */
207
+ async executeStrategy(strategy, query, limit = 10) {
208
+ const startTime = Date.now();
209
+ let results = [];
210
+ let retrievalCount = 0;
211
+ try {
212
+ const queryEmbedding = await this.embeddingService.embed(query);
213
+ retrievalCount++;
214
+ switch (strategy) {
215
+ case RetrievalStrategy.VECTOR_ONLY:
216
+ results = await this.vectorSearch(queryEmbedding, limit);
217
+ break;
218
+ case RetrievalStrategy.GRAPH_WALK:
219
+ results = await this.graphWalkSearch(query, queryEmbedding, limit);
220
+ retrievalCount += 2; // Additional graph traversal
221
+ break;
222
+ case RetrievalStrategy.HYBRID_FUSION:
223
+ results = await this.hybridFusionSearch(query, queryEmbedding, limit);
224
+ retrievalCount += 3; // Vector + FTS + Graph
225
+ break;
226
+ case RetrievalStrategy.COMMUNITY_EXPANSION:
227
+ results = await this.communityExpansionSearch(queryEmbedding, limit);
228
+ retrievalCount += 2; // Vector + Community
229
+ break;
230
+ case RetrievalStrategy.SEMANTIC_WALK:
231
+ results = await this.semanticWalkSearch(query, queryEmbedding, limit);
232
+ retrievalCount += 3; // Multi-hop traversal
233
+ break;
234
+ }
235
+ const latency = Date.now() - startTime;
236
+ return {
237
+ results,
238
+ strategy,
239
+ retrievalCount,
240
+ latency
241
+ };
242
+ }
243
+ catch (error) {
244
+ console.error(`[AdaptiveRetrieval] Strategy ${strategy} failed:`, error.message);
245
+ return {
246
+ results: [],
247
+ strategy,
248
+ retrievalCount,
249
+ latency: Date.now() - startTime
250
+ };
251
+ }
252
+ }
253
+ /**
254
+ * Progressive Retrieval Attenuation (PRA) Reward
255
+ * Encourages essential retrievals, penalizes excessive ones
256
+ */
257
+ calculatePRAReward(retrievalCount) {
258
+ if (!this.config.enablePRA)
259
+ return 1.0;
260
+ // Reward decreases exponentially with retrieval count
261
+ const reward = Math.pow(this.config.decayFactor, retrievalCount - 1);
262
+ return Math.max(0.1, reward); // Minimum 0.1
263
+ }
264
+ /**
265
+ * Cost-Aware F1 (CAF) Score
266
+ * Balances answer quality with computational cost
267
+ */
268
+ calculateCAFScore(f1Score, retrievalCount) {
269
+ if (!this.config.enableCAF)
270
+ return f1Score;
271
+ // Exponentially decaying penalty for retrieval calls
272
+ const costPenalty = Math.exp(-this.config.costPenalty * retrievalCount);
273
+ return f1Score * costPenalty;
274
+ }
275
+ /**
276
+ * Update strategy performance based on feedback
277
+ */
278
+ async updateStrategyPerformance(strategy, f1Score, retrievalCount, latency, success) {
279
+ const perf = this.strategyPerformance.get(strategy) || {
280
+ strategy,
281
+ successCount: 0,
282
+ totalCount: 0,
283
+ avgF1Score: 0,
284
+ avgRetrievalCost: 0,
285
+ avgLatency: 0,
286
+ lastUsed: Date.now()
287
+ };
288
+ // Update counts
289
+ perf.totalCount++;
290
+ if (success)
291
+ perf.successCount++;
292
+ // Update running averages
293
+ const n = perf.totalCount;
294
+ perf.avgF1Score = ((perf.avgF1Score * (n - 1)) + f1Score) / n;
295
+ perf.avgRetrievalCost = ((perf.avgRetrievalCost * (n - 1)) + retrievalCount) / n;
296
+ perf.avgLatency = ((perf.avgLatency * (n - 1)) + latency) / n;
297
+ perf.lastUsed = Date.now();
298
+ this.strategyPerformance.set(strategy, perf);
299
+ // Persist to database
300
+ try {
301
+ await this.db.run(`
302
+ ?[strategy, success_count, total_count, avg_f1_score, avg_retrieval_cost, avg_latency, last_used] <-
303
+ [[$strategy, $success_count, $total_count, $avg_f1_score, $avg_retrieval_cost, $avg_latency, $last_used]]
304
+ :put ${this.PERF_TABLE} {
305
+ strategy => success_count, total_count, avg_f1_score, avg_retrieval_cost, avg_latency, last_used
306
+ }
307
+ `, {
308
+ strategy,
309
+ success_count: perf.successCount,
310
+ total_count: perf.totalCount,
311
+ avg_f1_score: perf.avgF1Score,
312
+ avg_retrieval_cost: perf.avgRetrievalCost,
313
+ avg_latency: perf.avgLatency,
314
+ last_used: perf.lastUsed
315
+ });
316
+ }
317
+ catch (error) {
318
+ console.error('[AdaptiveRetrieval] Error persisting performance:', error.message);
319
+ }
320
+ }
321
+ /**
322
+ * Main adaptive retrieval method
323
+ */
324
+ async retrieve(query, limit = 10) {
325
+ // 1. Classify query complexity
326
+ const complexity = this.classifyQueryComplexity(query);
327
+ console.error(`[AdaptiveRetrieval] Query complexity: ${complexity}`);
328
+ // 2. Select best strategy
329
+ const strategy = this.selectStrategy(complexity);
330
+ console.error(`[AdaptiveRetrieval] Selected strategy: ${strategy}`);
331
+ // 3. Execute strategy
332
+ const result = await this.executeStrategy(strategy, query, limit);
333
+ // 4. Calculate rewards
334
+ const praReward = this.calculatePRAReward(result.retrievalCount);
335
+ result.cafScore = this.calculateCAFScore(result.f1Score || 0.5, result.retrievalCount);
336
+ console.error(`[AdaptiveRetrieval] PRA Reward: ${praReward.toFixed(3)}, CAF Score: ${result.cafScore.toFixed(3)}`);
337
+ return result;
338
+ }
339
+ // ==================== Strategy Implementations ====================
340
+ async vectorSearch(embedding, limit) {
341
+ const result = await this.db.run(`
342
+ ?[id, name, type, score] :=
343
+ ~entity:semantic{id | query: vec($embedding), k: $limit, ef: 100, bind_distance: dist},
344
+ *entity{id, name, type, @ "NOW"},
345
+ score = 1.0 - dist
346
+ :order -score
347
+ `, { embedding, limit });
348
+ return result.rows.map((r) => ({
349
+ id: r[0],
350
+ name: r[1],
351
+ type: r[2],
352
+ score: r[3]
353
+ }));
354
+ }
355
+ async graphWalkSearch(query, embedding, limit) {
356
+ // Simplified graph walk - find seeds then expand
357
+ const seeds = await this.vectorSearch(embedding, 3);
358
+ if (seeds.length === 0)
359
+ return [];
360
+ const seedIds = seeds.map(s => s.id);
361
+ const result = await this.db.run(`
362
+ seeds[id] <- $seeds
363
+
364
+ # 1-hop expansion
365
+ neighbors[id] :=
366
+ seeds[seed_id],
367
+ *relationship{from_id: seed_id, to_id: id, @ "NOW"}
368
+
369
+ neighbors[id] :=
370
+ seeds[seed_id],
371
+ *relationship{from_id: id, to_id: seed_id, @ "NOW"}
372
+
373
+ # Get entity details
374
+ ?[id, name, type, score] :=
375
+ neighbors[id],
376
+ *entity{id, name, type, @ "NOW"},
377
+ score = 0.8
378
+
379
+ :limit $limit
380
+ `, { seeds: seedIds.map(id => [id]), limit });
381
+ return result.rows.map((r) => ({
382
+ id: r[0],
383
+ name: r[1],
384
+ type: r[2],
385
+ score: r[3]
386
+ }));
387
+ }
388
+ async hybridFusionSearch(query, embedding, limit) {
389
+ // Simplified hybrid - combine vector + FTS
390
+ const [vectorResults, ftsResults] = await Promise.all([
391
+ this.vectorSearch(embedding, limit),
392
+ this.ftsSearch(query, limit)
393
+ ]);
394
+ // Simple RRF fusion
395
+ const scoreMap = new Map();
396
+ vectorResults.forEach((r, idx) => {
397
+ scoreMap.set(r.id, (scoreMap.get(r.id) || 0) + (1 / (idx + 1)));
398
+ });
399
+ ftsResults.forEach((r, idx) => {
400
+ scoreMap.set(r.id, (scoreMap.get(r.id) || 0) + (1 / (idx + 1)));
401
+ });
402
+ const allResults = [...vectorResults, ...ftsResults];
403
+ const uniqueResults = Array.from(new Map(allResults.map(r => [r.id, r])).values());
404
+ return uniqueResults
405
+ .map(r => ({ ...r, score: scoreMap.get(r.id) || 0 }))
406
+ .sort((a, b) => b.score - a.score)
407
+ .slice(0, limit);
408
+ }
409
+ async ftsSearch(query, limit) {
410
+ try {
411
+ const result = await this.db.run(`
412
+ ?[id, name, type, score] :=
413
+ ~entity:fts{id | query: $query, k: $limit, score_kind: 'tf_idf', bind_score: score},
414
+ *entity{id, name, type, @ "NOW"}
415
+ :order -score
416
+ `, { query, limit });
417
+ return result.rows.map((r) => ({
418
+ id: r[0],
419
+ name: r[1],
420
+ type: r[2],
421
+ score: r[3]
422
+ }));
423
+ }
424
+ catch (error) {
425
+ return [];
426
+ }
427
+ }
428
+ async communityExpansionSearch(embedding, limit) {
429
+ // Find seeds, get their communities, expand
430
+ const seeds = await this.vectorSearch(embedding, 2);
431
+ if (seeds.length === 0)
432
+ return [];
433
+ const result = await this.db.run(`
434
+ seeds[id] <- $seeds
435
+
436
+ # Get communities of seeds
437
+ communities[comm_id] :=
438
+ seeds[seed_id],
439
+ *entity_community{entity_id: seed_id, community_id: comm_id}
440
+
441
+ # Get all entities in those communities
442
+ ?[id, name, type, score] :=
443
+ communities[comm_id],
444
+ *entity_community{entity_id: id, community_id: comm_id},
445
+ *entity{id, name, type, @ "NOW"},
446
+ score = 0.7
447
+
448
+ :limit $limit
449
+ `, { seeds: seeds.map(s => [s.id]), limit });
450
+ return result.rows.map((r) => ({
451
+ id: r[0],
452
+ name: r[1],
453
+ type: r[2],
454
+ score: r[3]
455
+ }));
456
+ }
457
+ async semanticWalkSearch(query, embedding, limit) {
458
+ // Multi-hop semantic walk
459
+ const seeds = await this.vectorSearch(embedding, 2);
460
+ if (seeds.length === 0)
461
+ return [];
462
+ const result = await this.db.run(`
463
+ seeds[id, score] <- $seeds
464
+
465
+ # 2-hop walk with semantic filtering
466
+ path[id, hop, path_score] :=
467
+ seeds[id, score],
468
+ hop = 0,
469
+ path_score = score
470
+
471
+ path[next_id, hop_new, path_score_new] :=
472
+ path[current_id, hop, path_score],
473
+ *relationship{from_id: current_id, to_id: next_id, @ "NOW"},
474
+ hop < 2,
475
+ hop_new = hop + 1,
476
+ ~entity:semantic{id: next_id | query: vec($embedding), k: 100, ef: 100, bind_distance: dist},
477
+ sim = 1.0 - dist,
478
+ sim > 0.5,
479
+ path_score_new = path_score * sim * 0.8
480
+
481
+ ?[id, name, type, max_score] :=
482
+ path[id, _, score],
483
+ *entity{id, name, type, @ "NOW"},
484
+ max_score = max(score)
485
+
486
+ :order -max_score
487
+ :limit $limit
488
+ `, {
489
+ seeds: seeds.map(s => [s.id, s.score]),
490
+ embedding,
491
+ limit
492
+ });
493
+ return result.rows.map((r) => ({
494
+ id: r[0],
495
+ name: r[1],
496
+ type: r[2],
497
+ score: r[3]
498
+ }));
499
+ }
500
+ /**
501
+ * Get performance statistics
502
+ */
503
+ getPerformanceStats() {
504
+ return new Map(this.strategyPerformance);
505
+ }
506
+ /**
507
+ * Reset performance tracking (for testing)
508
+ */
509
+ async resetPerformance() {
510
+ this.strategyPerformance.clear();
511
+ try {
512
+ await this.db.run(`:rm ${this.PERF_TABLE} {}`);
513
+ console.error('[AdaptiveRetrieval] Performance data reset');
514
+ }
515
+ catch (error) {
516
+ console.error('[AdaptiveRetrieval] Error resetting performance:', error.message);
517
+ }
518
+ }
519
+ }
520
+ exports.AdaptiveGraphRetrieval = AdaptiveGraphRetrieval;