mark-improving-agent 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +335 -0
- package/VERSION +1 -0
- package/bin/cli.js +12 -0
- package/dist/agent/context.js +78 -0
- package/dist/agent/index.js +6 -0
- package/dist/agent/runtime.js +195 -0
- package/dist/agent/task-graph.js +209 -0
- package/dist/agent/types.js +1 -0
- package/dist/cli/index.js +206 -0
- package/dist/core/cognition/active-inference.js +296 -0
- package/dist/core/cognition/cognitive-architecture.js +263 -0
- package/dist/core/cognition/dual-process.js +102 -0
- package/dist/core/cognition/index.js +13 -0
- package/dist/core/cognition/learning-from-failure.js +184 -0
- package/dist/core/cognition/meta-agent.js +407 -0
- package/dist/core/cognition/metacognition.js +322 -0
- package/dist/core/cognition/react.js +177 -0
- package/dist/core/cognition/retrieval-anchor.js +99 -0
- package/dist/core/cognition/self-evolution.js +294 -0
- package/dist/core/cognition/self-verification.js +190 -0
- package/dist/core/cognition/thought-graph.js +495 -0
- package/dist/core/cognition/tool-augmented-llm.js +188 -0
- package/dist/core/cognition/tool-execution-verifier.js +204 -0
- package/dist/core/collaboration/agentic-loop.js +165 -0
- package/dist/core/collaboration/index.js +3 -0
- package/dist/core/collaboration/multi-agent-system.js +186 -0
- package/dist/core/collaboration/multi-agent.js +110 -0
- package/dist/core/consciousness/emotion-engine.js +101 -0
- package/dist/core/consciousness/flow-machine.js +121 -0
- package/dist/core/consciousness/index.js +4 -0
- package/dist/core/consciousness/personality.js +103 -0
- package/dist/core/consciousness/types.js +1 -0
- package/dist/core/emotional-protocol.js +54 -0
- package/dist/core/evolution/engine.js +194 -0
- package/dist/core/evolution/goal-engine.js +153 -0
- package/dist/core/evolution/index.js +6 -0
- package/dist/core/evolution/meta-learning.js +172 -0
- package/dist/core/evolution/reflection.js +158 -0
- package/dist/core/evolution/self-healer.js +139 -0
- package/dist/core/evolution/types.js +1 -0
- package/dist/core/healing-rl.js +266 -0
- package/dist/core/heartbeat.js +408 -0
- package/dist/core/identity/index.js +3 -0
- package/dist/core/identity/reflexion.js +165 -0
- package/dist/core/identity/self-model.js +274 -0
- package/dist/core/identity/self-verifier.js +158 -0
- package/dist/core/identity/types.js +12 -0
- package/dist/core/lesson-bank.js +301 -0
- package/dist/core/memory/adaptive-rag.js +440 -0
- package/dist/core/memory/archive-store.js +187 -0
- package/dist/core/memory/dream-consolidation.js +366 -0
- package/dist/core/memory/embedder.js +130 -0
- package/dist/core/memory/hopfield-network.js +128 -0
- package/dist/core/memory/index.js +9 -0
- package/dist/core/memory/knowledge-graph.js +151 -0
- package/dist/core/memory/spaced-repetition.js +113 -0
- package/dist/core/memory/store.js +404 -0
- package/dist/core/memory/types.js +1 -0
- package/dist/core/psychology/analysis.js +456 -0
- package/dist/core/psychology/index.js +1 -0
- package/dist/core/rollback-manager.js +191 -0
- package/dist/core/security/index.js +1 -0
- package/dist/core/security/privacy.js +132 -0
- package/dist/core/truth-teller.js +253 -0
- package/dist/core/truthfulness.js +99 -0
- package/dist/core/types.js +2 -0
- package/dist/event/bus.js +47 -0
- package/dist/index.js +8 -0
- package/dist/skills/dag.js +181 -0
- package/dist/skills/index.js +5 -0
- package/dist/skills/registry.js +40 -0
- package/dist/skills/types.js +1 -0
- package/dist/storage/archive.js +77 -0
- package/dist/storage/checkpoint.js +119 -0
- package/dist/storage/types.js +1 -0
- package/dist/utils/config.js +81 -0
- package/dist/utils/logger.js +49 -0
- package/dist/version.js +1 -0
- package/package.json +37 -0
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adaptive RAG Memory (ARM)
|
|
3
|
+
*
|
|
4
|
+
* Paper: "ARM: Adaptive RAG Memory System for LLM Agents"
|
|
5
|
+
*
|
|
6
|
+
* Key mechanisms:
|
|
7
|
+
* 1. Query Classification: Classify query type to select optimal retrieval strategy
|
|
8
|
+
* 2. Dynamic Strategy Selection: Choose between semantic, keyword, graph, or hybrid
|
|
9
|
+
* 3. Iterative Refinement: Refine retrieval based on results
|
|
10
|
+
* 4. Memory-Aware: Use historical performance to guide retrieval
|
|
11
|
+
*
|
|
12
|
+
* Query types:
|
|
13
|
+
* - Factual: Who/What/When/Where
|
|
14
|
+
* - Analytical: Why/How (causal reasoning)
|
|
15
|
+
* - Exploratory: Open-ended discovery
|
|
16
|
+
* - Navigational: Find specific information
|
|
17
|
+
* - Transactional: Action-oriented tasks
|
|
18
|
+
*/
|
|
19
|
+
import { randomUUID } from 'crypto';
|
|
20
|
+
const DEFAULT_CONFIG = {
|
|
21
|
+
enableRefinement: true,
|
|
22
|
+
maxRefinementIterations: 3,
|
|
23
|
+
relevanceThreshold: 0.6,
|
|
24
|
+
diversityWeight: 0.3,
|
|
25
|
+
recencyWeight: 0.2,
|
|
26
|
+
};
|
|
27
|
+
const QUERY_TYPE_PATTERNS = {
|
|
28
|
+
factual: {
|
|
29
|
+
patterns: [/谁|什么|哪个|多少|when|what|who|where|which/i],
|
|
30
|
+
weight: 0.9,
|
|
31
|
+
},
|
|
32
|
+
analytical: {
|
|
33
|
+
patterns: [/为什么|怎么|如何|why|how|原因|分析/i],
|
|
34
|
+
weight: 0.85,
|
|
35
|
+
},
|
|
36
|
+
exploratory: {
|
|
37
|
+
patterns: [/探索|发现|寻找|研究|discover|explore|find/i],
|
|
38
|
+
weight: 0.8,
|
|
39
|
+
},
|
|
40
|
+
navigational: {
|
|
41
|
+
patterns: [/查找|搜索|定位|navigate|search|find|locate/i],
|
|
42
|
+
weight: 0.75,
|
|
43
|
+
},
|
|
44
|
+
transactional: {
|
|
45
|
+
patterns: [/执行|完成|创建|删除|修改|do|execute|create|update|delete/i],
|
|
46
|
+
weight: 0.8,
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
const STRATEGY_PREFERENCES = {
|
|
50
|
+
factual: ['keyword', 'semantic', 'hybrid'],
|
|
51
|
+
analytical: ['semantic', 'graph', 'hybrid'],
|
|
52
|
+
exploratory: ['semantic', 'graph', 'hybrid'],
|
|
53
|
+
navigational: ['keyword', 'semantic'],
|
|
54
|
+
transactional: ['keyword', 'hybrid'],
|
|
55
|
+
};
|
|
56
|
+
export function createAdaptiveRAGMemory(config) {
|
|
57
|
+
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
58
|
+
// Memory storage
|
|
59
|
+
const memoryEntries = new Map();
|
|
60
|
+
// Query history for learning
|
|
61
|
+
const queryHistory = [];
|
|
62
|
+
// Strategy performance tracking
|
|
63
|
+
const strategyPerformance = new Map();
|
|
64
|
+
for (const s of ['semantic', 'keyword', 'graph', 'hybrid', 'adaptive']) {
|
|
65
|
+
strategyPerformance.set(s, { attempts: 0, successes: 0 });
|
|
66
|
+
}
|
|
67
|
+
// Cache for recent queries
|
|
68
|
+
const queryCache = new Map();
|
|
69
|
+
let stats = {
|
|
70
|
+
totalQueries: 0,
|
|
71
|
+
queriesByType: { factual: 0, analytical: 0, exploratory: 0, navigational: 0, transactional: 0 },
|
|
72
|
+
strategyUsage: { semantic: 0, keyword: 0, graph: 0, hybrid: 0, adaptive: 0 },
|
|
73
|
+
avgRefinementIterations: 0,
|
|
74
|
+
avgRetrievalTime: 0,
|
|
75
|
+
cacheHits: 0,
|
|
76
|
+
cacheMisses: 0,
|
|
77
|
+
};
|
|
78
|
+
function classifyQuery(query) {
|
|
79
|
+
const queryLower = query.toLowerCase();
|
|
80
|
+
const scores = {
|
|
81
|
+
factual: 0,
|
|
82
|
+
analytical: 0,
|
|
83
|
+
exploratory: 0,
|
|
84
|
+
navigational: 0,
|
|
85
|
+
transactional: 0,
|
|
86
|
+
};
|
|
87
|
+
// Pattern matching
|
|
88
|
+
for (const [type, { patterns, weight }] of Object.entries(QUERY_TYPE_PATTERNS)) {
|
|
89
|
+
for (const pattern of patterns) {
|
|
90
|
+
if (pattern.test(queryLower)) {
|
|
91
|
+
scores[type] += weight;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// Score-based classification
|
|
96
|
+
const entries = Object.entries(scores);
|
|
97
|
+
entries.sort((a, b) => b[1] - a[1]);
|
|
98
|
+
const topType = entries[0][0];
|
|
99
|
+
const topScore = entries[0][1];
|
|
100
|
+
const secondScore = entries[1][1];
|
|
101
|
+
// Extract key entities (simplified)
|
|
102
|
+
const words = query.split(/\s+/).filter(w => w.length > 2);
|
|
103
|
+
const keyEntities = words.slice(0, 5);
|
|
104
|
+
// Calculate complexity based on query length and structure
|
|
105
|
+
const complexity = Math.min(1, (query.length / 200) + (words.length / 20));
|
|
106
|
+
return {
|
|
107
|
+
type: topScore > 0 ? topType : 'exploratory',
|
|
108
|
+
confidence: topScore / (topScore + secondScore + 0.1),
|
|
109
|
+
keyEntities,
|
|
110
|
+
complexity,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
async function retrieve(query, options) {
|
|
114
|
+
const startTime = Date.now();
|
|
115
|
+
stats.totalQueries++;
|
|
116
|
+
// Check cache
|
|
117
|
+
const cacheKey = `${query}:${options?.strategy || 'adaptive'}`;
|
|
118
|
+
if (queryCache.has(cacheKey)) {
|
|
119
|
+
stats.cacheHits++;
|
|
120
|
+
return queryCache.get(cacheKey);
|
|
121
|
+
}
|
|
122
|
+
stats.cacheMisses++;
|
|
123
|
+
// Classify query
|
|
124
|
+
const classification = classifyQuery(query);
|
|
125
|
+
// Select strategy
|
|
126
|
+
let strategy = options?.strategy || 'adaptive';
|
|
127
|
+
if (strategy === 'adaptive') {
|
|
128
|
+
strategy = getOptimalStrategy(classification.type);
|
|
129
|
+
}
|
|
130
|
+
stats.queriesByType[classification.type]++;
|
|
131
|
+
stats.strategyUsage[strategy]++;
|
|
132
|
+
// Perform retrieval
|
|
133
|
+
const limit = options?.limit || 10;
|
|
134
|
+
let items = [];
|
|
135
|
+
switch (strategy) {
|
|
136
|
+
case 'semantic':
|
|
137
|
+
items = await semanticSearch(query, limit);
|
|
138
|
+
break;
|
|
139
|
+
case 'keyword':
|
|
140
|
+
items = keywordSearch(query, limit);
|
|
141
|
+
break;
|
|
142
|
+
case 'graph':
|
|
143
|
+
items = graphSearch(query, limit);
|
|
144
|
+
break;
|
|
145
|
+
case 'hybrid':
|
|
146
|
+
items = hybridSearch(query, limit);
|
|
147
|
+
break;
|
|
148
|
+
default:
|
|
149
|
+
items = adaptiveSearch(query, limit, classification);
|
|
150
|
+
}
|
|
151
|
+
let refinementIterations = 0;
|
|
152
|
+
// Iterative refinement
|
|
153
|
+
if (cfg.enableRefinement && items.length > 0) {
|
|
154
|
+
let results = { items, strategy, queryType: classification.type, refinementIterations, timestamp: Date.now() };
|
|
155
|
+
while (refinementIterations < cfg.maxRefinementIterations) {
|
|
156
|
+
const avgScore = results.items.reduce((sum, i) => sum + i.score, 0) / results.items.length;
|
|
157
|
+
if (avgScore >= cfg.relevanceThreshold)
|
|
158
|
+
break;
|
|
159
|
+
results = refineResults(results, query);
|
|
160
|
+
refinementIterations++;
|
|
161
|
+
}
|
|
162
|
+
items = results.items;
|
|
163
|
+
}
|
|
164
|
+
const retrievalTime = Date.now() - startTime;
|
|
165
|
+
stats.avgRetrievalTime = (stats.avgRetrievalTime * (stats.totalQueries - 1) + retrievalTime) / stats.totalQueries;
|
|
166
|
+
stats.avgRefinementIterations = (stats.avgRefinementIterations * (stats.totalQueries - 1) + refinementIterations) / stats.totalQueries;
|
|
167
|
+
const result = {
|
|
168
|
+
items: items,
|
|
169
|
+
strategy,
|
|
170
|
+
queryType: classification.type,
|
|
171
|
+
refinementIterations,
|
|
172
|
+
timestamp: Date.now(),
|
|
173
|
+
};
|
|
174
|
+
queryCache.set(cacheKey, result);
|
|
175
|
+
if (queryCache.size > 100) {
|
|
176
|
+
// Remove oldest entry
|
|
177
|
+
const firstKey = queryCache.keys().next().value;
|
|
178
|
+
if (firstKey)
|
|
179
|
+
queryCache.delete(firstKey);
|
|
180
|
+
}
|
|
181
|
+
return result;
|
|
182
|
+
}
|
|
183
|
+
async function retrieveByType(query, type) {
|
|
184
|
+
const classification = classifyQuery(query);
|
|
185
|
+
return retrieve(query, { strategy: getOptimalStrategy(type) });
|
|
186
|
+
}
|
|
187
|
+
function semanticSearch(query, limit) {
|
|
188
|
+
const queryLower = query.toLowerCase();
|
|
189
|
+
const queryWords = queryLower.split(/\s+/).filter(w => w.length > 2);
|
|
190
|
+
const results = [];
|
|
191
|
+
for (const entry of memoryEntries.values()) {
|
|
192
|
+
const contentLower = entry.content.toLowerCase();
|
|
193
|
+
const contentWords = contentLower.split(/\s+/).filter(w => w.length > 2);
|
|
194
|
+
// TF-IDF like scoring
|
|
195
|
+
let score = 0;
|
|
196
|
+
for (const qWord of queryWords) {
|
|
197
|
+
for (const cWord of contentWords) {
|
|
198
|
+
if (cWord.includes(qWord) || qWord.includes(cWord)) {
|
|
199
|
+
score += 1 / (1 + Math.abs(cWord.length - qWord.length));
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Boost by success rate and recency
|
|
204
|
+
score *= (0.5 + entry.successRate * 0.5);
|
|
205
|
+
const recencyBoost = Math.exp(-(Date.now() - entry.lastAccessed) / (7 * 24 * 60 * 60 * 1000));
|
|
206
|
+
score *= (1 + recencyBoost * cfg.recencyWeight);
|
|
207
|
+
if (score > 0) {
|
|
208
|
+
results.push({ item: entry, score });
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return results
|
|
212
|
+
.sort((a, b) => b.score - a.score)
|
|
213
|
+
.slice(0, limit)
|
|
214
|
+
.map(r => ({
|
|
215
|
+
content: r.item.content,
|
|
216
|
+
score: r.score,
|
|
217
|
+
source: r.item.id,
|
|
218
|
+
relevance: `semantic_match:${r.score.toFixed(2)}`,
|
|
219
|
+
}));
|
|
220
|
+
}
|
|
221
|
+
function keywordSearch(query, limit) {
|
|
222
|
+
const queryLower = query.toLowerCase();
|
|
223
|
+
const queryWords = queryLower.split(/\s+/);
|
|
224
|
+
const results = [];
|
|
225
|
+
for (const entry of memoryEntries.values()) {
|
|
226
|
+
const contentLower = entry.content.toLowerCase();
|
|
227
|
+
let matchCount = 0;
|
|
228
|
+
let matchedWords = [];
|
|
229
|
+
for (const word of queryWords) {
|
|
230
|
+
if (contentLower.includes(word)) {
|
|
231
|
+
matchCount++;
|
|
232
|
+
matchedWords.push(word);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
if (matchCount > 0) {
|
|
236
|
+
const score = matchCount / queryWords.length;
|
|
237
|
+
results.push({ item: entry, score });
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return results
|
|
241
|
+
.sort((a, b) => b.score - a.score)
|
|
242
|
+
.slice(0, limit)
|
|
243
|
+
.map(r => ({
|
|
244
|
+
content: r.item.content,
|
|
245
|
+
score: r.score,
|
|
246
|
+
source: r.item.id,
|
|
247
|
+
relevance: `keyword_match:${r.score.toFixed(2)}`,
|
|
248
|
+
}));
|
|
249
|
+
}
|
|
250
|
+
function graphSearch(query, limit) {
|
|
251
|
+
// Simplified graph-based search
|
|
252
|
+
// In a real implementation, this would use a knowledge graph
|
|
253
|
+
const classification = classifyQuery(query);
|
|
254
|
+
// Search by query type affinity
|
|
255
|
+
const results = [];
|
|
256
|
+
for (const entry of memoryEntries.values()) {
|
|
257
|
+
// Boost entries that match the query type
|
|
258
|
+
const typeMatch = entry.queryTypes.includes(classification.type) ? 0.3 : 0;
|
|
259
|
+
const contentMatch = semanticSearch(query, limit).find(r => r.source === entry.id);
|
|
260
|
+
let score = typeMatch;
|
|
261
|
+
if (contentMatch) {
|
|
262
|
+
score += contentMatch.score * 0.7;
|
|
263
|
+
}
|
|
264
|
+
if (score > 0) {
|
|
265
|
+
results.push({ item: entry, score });
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
return results
|
|
269
|
+
.sort((a, b) => b.score - a.score)
|
|
270
|
+
.slice(0, limit)
|
|
271
|
+
.map(r => ({
|
|
272
|
+
content: r.item.content,
|
|
273
|
+
score: r.score,
|
|
274
|
+
source: r.item.id,
|
|
275
|
+
relevance: `graph_match:${r.score.toFixed(2)}`,
|
|
276
|
+
}));
|
|
277
|
+
}
|
|
278
|
+
function hybridSearch(query, limit) {
|
|
279
|
+
// Combine semantic and keyword search
|
|
280
|
+
const semanticResults = semanticSearch(query, limit);
|
|
281
|
+
const keywordResults = keywordSearch(query, limit);
|
|
282
|
+
// Merge and dedupe
|
|
283
|
+
const seen = new Set();
|
|
284
|
+
const merged = [];
|
|
285
|
+
for (const r of semanticResults) {
|
|
286
|
+
if (!seen.has(r.source)) {
|
|
287
|
+
seen.add(r.source);
|
|
288
|
+
merged.push(r);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
for (const r of keywordResults) {
|
|
292
|
+
if (!seen.has(r.source)) {
|
|
293
|
+
seen.add(r.source);
|
|
294
|
+
merged.push({ ...r, score: r.score * 0.8 }); // Weight keyword results slightly lower
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
return merged.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
298
|
+
}
|
|
299
|
+
function adaptiveSearch(query, limit, classification) {
|
|
300
|
+
const strategy = getOptimalStrategy(classification.type);
|
|
301
|
+
switch (strategy) {
|
|
302
|
+
case 'semantic':
|
|
303
|
+
return semanticSearch(query, limit);
|
|
304
|
+
case 'keyword':
|
|
305
|
+
return keywordSearch(query, limit);
|
|
306
|
+
case 'graph':
|
|
307
|
+
return graphSearch(query, limit);
|
|
308
|
+
case 'hybrid':
|
|
309
|
+
return hybridSearch(query, limit);
|
|
310
|
+
default:
|
|
311
|
+
return hybridSearch(query, limit);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
function refineResults(results, query) {
|
|
315
|
+
const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
|
316
|
+
// Re-score based on additional factors
|
|
317
|
+
const refinedItems = results.items.map(item => {
|
|
318
|
+
let newScore = item.score;
|
|
319
|
+
// Boost items with more query word coverage
|
|
320
|
+
const contentLower = String(item.content).toLowerCase();
|
|
321
|
+
const coverage = queryWords.filter(w => contentLower.includes(w)).length / queryWords.length;
|
|
322
|
+
newScore *= (1 + coverage * 0.5);
|
|
323
|
+
// Boost diverse results (avoid similar content)
|
|
324
|
+
return { ...item, score: newScore };
|
|
325
|
+
});
|
|
326
|
+
// Sort by score
|
|
327
|
+
refinedItems.sort((a, b) => b.score - a.score);
|
|
328
|
+
return {
|
|
329
|
+
...results,
|
|
330
|
+
items: refinedItems,
|
|
331
|
+
refinementIterations: results.refinementIterations + 1,
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
function addEntry(entry) {
|
|
335
|
+
const id = randomUUID();
|
|
336
|
+
const memoryEntry = {
|
|
337
|
+
...entry,
|
|
338
|
+
id,
|
|
339
|
+
accessCount: 0,
|
|
340
|
+
lastAccessed: Date.now(),
|
|
341
|
+
successRate: 0.5,
|
|
342
|
+
};
|
|
343
|
+
memoryEntries.set(id, memoryEntry);
|
|
344
|
+
return id;
|
|
345
|
+
}
|
|
346
|
+
function updateEntry(id, updates) {
|
|
347
|
+
const entry = memoryEntries.get(id);
|
|
348
|
+
if (entry) {
|
|
349
|
+
Object.assign(entry, updates);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
function getEntry(id) {
|
|
353
|
+
return memoryEntries.get(id);
|
|
354
|
+
}
|
|
355
|
+
function learnFromFeedback(query, retrievedIds, helpful) {
|
|
356
|
+
queryHistory.push({
|
|
357
|
+
query,
|
|
358
|
+
type: classifyQuery(query).type,
|
|
359
|
+
strategy: getOptimalStrategy(classifyQuery(query).type),
|
|
360
|
+
success: helpful,
|
|
361
|
+
});
|
|
362
|
+
// Update success rates
|
|
363
|
+
for (const id of retrievedIds) {
|
|
364
|
+
const entry = memoryEntries.get(id);
|
|
365
|
+
if (entry) {
|
|
366
|
+
entry.accessCount++;
|
|
367
|
+
entry.lastAccessed = Date.now();
|
|
368
|
+
entry.successRate = entry.successRate * 0.95 + (helpful ? 0.05 : 0);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
// Keep history bounded
|
|
372
|
+
if (queryHistory.length > 100) {
|
|
373
|
+
queryHistory.shift();
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
function getOptimalStrategy(queryType) {
|
|
377
|
+
const preferences = STRATEGY_PREFERENCES[queryType];
|
|
378
|
+
// Look at recent history to find best performing strategy
|
|
379
|
+
const recentByType = queryHistory.filter(q => q.type === queryType).slice(-20);
|
|
380
|
+
if (recentByType.length < 3) {
|
|
381
|
+
return preferences[0];
|
|
382
|
+
}
|
|
383
|
+
const strategyCounts = new Map();
|
|
384
|
+
for (const q of recentByType) {
|
|
385
|
+
const current = strategyCounts.get(q.strategy) || { total: 0, success: 0 };
|
|
386
|
+
current.total++;
|
|
387
|
+
if (q.success)
|
|
388
|
+
current.success++;
|
|
389
|
+
strategyCounts.set(q.strategy, current);
|
|
390
|
+
}
|
|
391
|
+
let bestStrategy = preferences[0];
|
|
392
|
+
let bestSuccessRate = 0;
|
|
393
|
+
for (const [strategy, counts] of strategyCounts) {
|
|
394
|
+
const rate = counts.success / counts.total;
|
|
395
|
+
if (rate > bestSuccessRate) {
|
|
396
|
+
bestSuccessRate = rate;
|
|
397
|
+
bestStrategy = strategy;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
return bestStrategy;
|
|
401
|
+
}
|
|
402
|
+
function getConfig() {
|
|
403
|
+
return { ...cfg };
|
|
404
|
+
}
|
|
405
|
+
function updateConfig(newConfig) {
|
|
406
|
+
Object.assign(cfg, newConfig);
|
|
407
|
+
}
|
|
408
|
+
function getStats() {
|
|
409
|
+
return { ...stats };
|
|
410
|
+
}
|
|
411
|
+
return {
|
|
412
|
+
classifyQuery,
|
|
413
|
+
retrieve,
|
|
414
|
+
retrieveByType,
|
|
415
|
+
addEntry,
|
|
416
|
+
updateEntry,
|
|
417
|
+
getEntry,
|
|
418
|
+
learnFromFeedback,
|
|
419
|
+
getConfig,
|
|
420
|
+
updateConfig,
|
|
421
|
+
getStats,
|
|
422
|
+
getOptimalStrategy,
|
|
423
|
+
refineResults,
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
// Helper to format retrieval results
|
|
427
|
+
export function formatRetrievalResult(result) {
|
|
428
|
+
const itemsStr = result.items.map((i, idx) => `${idx + 1}. [${i.score.toFixed(2)}] ${String(i.content).slice(0, 80)}...`).join('\n');
|
|
429
|
+
return `
|
|
430
|
+
Retrieval Result:
|
|
431
|
+
Strategy: ${result.strategy}
|
|
432
|
+
Query Type: ${result.queryType}
|
|
433
|
+
Items: ${result.items.length}
|
|
434
|
+
Refinements: ${result.refinementIterations}
|
|
435
|
+
Timestamp: ${new Date(result.timestamp).toISOString()}
|
|
436
|
+
|
|
437
|
+
Items:
|
|
438
|
+
${itemsStr}
|
|
439
|
+
`.trim();
|
|
440
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { randomUUID } from 'crypto';
|
|
2
|
+
import { atomicWriteJSON, ensureDir, safeReadDir, readJSON } from '../../storage/archive.js';
|
|
3
|
+
const BLOCKS_DIR = 'blocks';
|
|
4
|
+
const INDEX_FILE = 'index.json';
|
|
5
|
+
async function loadIndex(dataDir) {
|
|
6
|
+
const indexPath = `${dataDir}/${INDEX_FILE}`;
|
|
7
|
+
const data = await readJSON(indexPath, { entries: {}, version: 1 });
|
|
8
|
+
return data;
|
|
9
|
+
}
|
|
10
|
+
async function saveIndex(impl) {
|
|
11
|
+
if (!impl.indexDirty)
|
|
12
|
+
return;
|
|
13
|
+
const indexPath = `${impl.dataDir}/${INDEX_FILE}`;
|
|
14
|
+
await atomicWriteJSON(indexPath, impl.index);
|
|
15
|
+
impl.indexDirty = false;
|
|
16
|
+
}
|
|
17
|
+
async function loadEntry(dataDir, id) {
|
|
18
|
+
const blockPath = `${dataDir}/${BLOCKS_DIR}/${id}.json`;
|
|
19
|
+
return await readJSON(blockPath, null);
|
|
20
|
+
}
|
|
21
|
+
async function saveEntry(dataDir, entry) {
|
|
22
|
+
const blockPath = `${dataDir}/${BLOCKS_DIR}/${entry.id}.json`;
|
|
23
|
+
await atomicWriteJSON(blockPath, entry);
|
|
24
|
+
}
|
|
25
|
+
export function createArchiveStore(dataDir) {
|
|
26
|
+
const impl = {
|
|
27
|
+
dataDir,
|
|
28
|
+
index: { entries: {}, version: 1 },
|
|
29
|
+
indexDirty: false,
|
|
30
|
+
};
|
|
31
|
+
const archive = async (entries) => {
|
|
32
|
+
await ensureDir(`${dataDir}/${BLOCKS_DIR}`);
|
|
33
|
+
const ids = [];
|
|
34
|
+
for (const entry of entries) {
|
|
35
|
+
const id = entry.id || randomUUID();
|
|
36
|
+
const finalEntry = { ...entry, id };
|
|
37
|
+
await saveEntry(dataDir, finalEntry);
|
|
38
|
+
impl.index.entries[id] = {
|
|
39
|
+
id,
|
|
40
|
+
tags: finalEntry.tags,
|
|
41
|
+
timestamp: finalEntry.timestamp,
|
|
42
|
+
importance: finalEntry.importance,
|
|
43
|
+
};
|
|
44
|
+
impl.indexDirty = true;
|
|
45
|
+
ids.push(id);
|
|
46
|
+
}
|
|
47
|
+
await saveIndex(impl);
|
|
48
|
+
return ids;
|
|
49
|
+
};
|
|
50
|
+
const retrieve = async (id) => {
|
|
51
|
+
return await loadEntry(dataDir, id);
|
|
52
|
+
};
|
|
53
|
+
const search = async (query, limit = 20) => {
|
|
54
|
+
const results = [];
|
|
55
|
+
const queryLower = query.toLowerCase();
|
|
56
|
+
// Simple keyword matching in index first
|
|
57
|
+
for (const [id, indexEntry] of Object.entries(impl.index.entries)) {
|
|
58
|
+
const contentMatch = id.includes(queryLower) ||
|
|
59
|
+
indexEntry.tags.some(t => t.toLowerCase().includes(queryLower));
|
|
60
|
+
if (contentMatch) {
|
|
61
|
+
const entry = await loadEntry(dataDir, id);
|
|
62
|
+
if (entry) {
|
|
63
|
+
results.push({
|
|
64
|
+
entry,
|
|
65
|
+
score: indexEntry.importance,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// Sort by importance descending
|
|
71
|
+
results.sort((a, b) => b.score - a.score);
|
|
72
|
+
return results.slice(0, limit).map(r => r.entry);
|
|
73
|
+
};
|
|
74
|
+
const getByTimeRange = async (start, end) => {
|
|
75
|
+
const results = [];
|
|
76
|
+
for (const [id, indexEntry] of Object.entries(impl.index.entries)) {
|
|
77
|
+
if (indexEntry.timestamp >= start && indexEntry.timestamp <= end) {
|
|
78
|
+
const entry = await loadEntry(dataDir, id);
|
|
79
|
+
if (entry) {
|
|
80
|
+
results.push(entry);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
// Sort by timestamp descending (newest first)
|
|
85
|
+
results.sort((a, b) => b.timestamp - a.timestamp);
|
|
86
|
+
return results;
|
|
87
|
+
};
|
|
88
|
+
const getByTag = async (tag) => {
|
|
89
|
+
const results = [];
|
|
90
|
+
const tagLower = tag.toLowerCase();
|
|
91
|
+
for (const [id, indexEntry] of Object.entries(impl.index.entries)) {
|
|
92
|
+
if (indexEntry.tags.some(t => t.toLowerCase() === tagLower)) {
|
|
93
|
+
const entry = await loadEntry(dataDir, id);
|
|
94
|
+
if (entry) {
|
|
95
|
+
results.push(entry);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Sort by importance descending
|
|
100
|
+
results.sort((a, b) => b.importance - a.importance);
|
|
101
|
+
return results;
|
|
102
|
+
};
|
|
103
|
+
const exportArchive = async () => {
|
|
104
|
+
const allEntries = [];
|
|
105
|
+
for (const id of Object.keys(impl.index.entries)) {
|
|
106
|
+
const entry = await loadEntry(dataDir, id);
|
|
107
|
+
if (entry) {
|
|
108
|
+
allEntries.push(entry);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return JSON.stringify({
|
|
112
|
+
version: 1,
|
|
113
|
+
exportedAt: Date.now(),
|
|
114
|
+
entries: allEntries,
|
|
115
|
+
}, null, 2);
|
|
116
|
+
};
|
|
117
|
+
const importArchive = async (json) => {
|
|
118
|
+
const data = JSON.parse(json);
|
|
119
|
+
if (!data.entries || !Array.isArray(data.entries)) {
|
|
120
|
+
throw new Error('Invalid archive format');
|
|
121
|
+
}
|
|
122
|
+
let imported = 0;
|
|
123
|
+
for (const entry of data.entries) {
|
|
124
|
+
if (entry.id) {
|
|
125
|
+
await saveEntry(dataDir, entry);
|
|
126
|
+
impl.index.entries[entry.id] = {
|
|
127
|
+
id: entry.id,
|
|
128
|
+
tags: entry.tags || [],
|
|
129
|
+
timestamp: entry.timestamp || Date.now(),
|
|
130
|
+
importance: entry.importance || 0.5,
|
|
131
|
+
};
|
|
132
|
+
imported++;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
impl.indexDirty = true;
|
|
136
|
+
await saveIndex(impl);
|
|
137
|
+
return imported;
|
|
138
|
+
};
|
|
139
|
+
const getStats = async () => {
|
|
140
|
+
let totalSize = 0;
|
|
141
|
+
let oldestEntry = null;
|
|
142
|
+
let newestEntry = null;
|
|
143
|
+
for (const [id, indexEntry] of Object.entries(impl.index.entries)) {
|
|
144
|
+
if (!oldestEntry || indexEntry.timestamp < oldestEntry) {
|
|
145
|
+
oldestEntry = indexEntry.timestamp;
|
|
146
|
+
}
|
|
147
|
+
if (!newestEntry || indexEntry.timestamp > newestEntry) {
|
|
148
|
+
newestEntry = indexEntry.timestamp;
|
|
149
|
+
}
|
|
150
|
+
// Estimate size from entry
|
|
151
|
+
totalSize += JSON.stringify(indexEntry).length;
|
|
152
|
+
}
|
|
153
|
+
return {
|
|
154
|
+
totalEntries: Object.keys(impl.index.entries).length,
|
|
155
|
+
totalSize,
|
|
156
|
+
oldestEntry,
|
|
157
|
+
newestEntry,
|
|
158
|
+
};
|
|
159
|
+
};
|
|
160
|
+
const boot = async () => {
|
|
161
|
+
await ensureDir(`${dataDir}/${BLOCKS_DIR}`);
|
|
162
|
+
const loadedIndex = await loadIndex(dataDir);
|
|
163
|
+
impl.index = loadedIndex;
|
|
164
|
+
// Verify index consistency - check that all indexed entries exist
|
|
165
|
+
const blockFiles = await safeReadDir(`${dataDir}/${BLOCKS_DIR}`);
|
|
166
|
+
const validIds = new Set(blockFiles.map(f => f.replace('.json', '')));
|
|
167
|
+
for (const id of Object.keys(impl.index.entries)) {
|
|
168
|
+
if (!validIds.has(id)) {
|
|
169
|
+
// Entry indexed but block file missing - rebuild index
|
|
170
|
+
console.warn(`Archive index inconsistency: block file missing for ${id}, rebuilding index...`);
|
|
171
|
+
impl.index.entries = {};
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
};
|
|
176
|
+
return {
|
|
177
|
+
archive,
|
|
178
|
+
retrieve,
|
|
179
|
+
search,
|
|
180
|
+
getByTimeRange,
|
|
181
|
+
getByTag,
|
|
182
|
+
exportArchive,
|
|
183
|
+
importArchive,
|
|
184
|
+
getStats,
|
|
185
|
+
boot,
|
|
186
|
+
};
|
|
187
|
+
}
|