@claude-flow/cli 3.0.0-alpha.15 → 3.0.0-alpha.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/commands/analyze.d.ts +19 -0
- package/dist/src/commands/analyze.d.ts.map +1 -0
- package/dist/src/commands/analyze.js +1819 -0
- package/dist/src/commands/analyze.js.map +1 -0
- package/dist/src/commands/hooks.d.ts.map +1 -1
- package/dist/src/commands/hooks.js +325 -1
- package/dist/src/commands/hooks.js.map +1 -1
- package/dist/src/commands/index.d.ts +2 -0
- package/dist/src/commands/index.d.ts.map +1 -1
- package/dist/src/commands/index.js +12 -0
- package/dist/src/commands/index.js.map +1 -1
- package/dist/src/commands/mcp.js +3 -3
- package/dist/src/commands/mcp.js.map +1 -1
- package/dist/src/commands/route.d.ts +16 -0
- package/dist/src/commands/route.d.ts.map +1 -0
- package/dist/src/commands/route.js +597 -0
- package/dist/src/commands/route.js.map +1 -0
- package/dist/src/init/claudemd-generator.d.ts.map +1 -1
- package/dist/src/init/claudemd-generator.js +218 -362
- package/dist/src/init/claudemd-generator.js.map +1 -1
- package/dist/src/mcp-client.d.ts.map +1 -1
- package/dist/src/mcp-client.js +2 -0
- package/dist/src/mcp-client.js.map +1 -1
- package/dist/src/mcp-tools/analyze-tools.d.ts +38 -0
- package/dist/src/mcp-tools/analyze-tools.d.ts.map +1 -0
- package/dist/src/mcp-tools/analyze-tools.js +317 -0
- package/dist/src/mcp-tools/analyze-tools.js.map +1 -0
- package/dist/src/mcp-tools/index.d.ts +2 -0
- package/dist/src/mcp-tools/index.d.ts.map +1 -1
- package/dist/src/mcp-tools/index.js +2 -0
- package/dist/src/mcp-tools/index.js.map +1 -1
- package/dist/src/ruvector/ast-analyzer.d.ts +67 -0
- package/dist/src/ruvector/ast-analyzer.d.ts.map +1 -0
- package/dist/src/ruvector/ast-analyzer.js +277 -0
- package/dist/src/ruvector/ast-analyzer.js.map +1 -0
- package/dist/src/ruvector/coverage-router.d.ts +145 -0
- package/dist/src/ruvector/coverage-router.d.ts.map +1 -0
- package/dist/src/ruvector/coverage-router.js +451 -0
- package/dist/src/ruvector/coverage-router.js.map +1 -0
- package/dist/src/ruvector/coverage-tools.d.ts +33 -0
- package/dist/src/ruvector/coverage-tools.d.ts.map +1 -0
- package/dist/src/ruvector/coverage-tools.js +157 -0
- package/dist/src/ruvector/coverage-tools.js.map +1 -0
- package/dist/src/ruvector/diff-classifier.d.ts +154 -0
- package/dist/src/ruvector/diff-classifier.d.ts.map +1 -0
- package/dist/src/ruvector/diff-classifier.js +508 -0
- package/dist/src/ruvector/diff-classifier.js.map +1 -0
- package/dist/src/ruvector/graph-analyzer.d.ts +174 -0
- package/dist/src/ruvector/graph-analyzer.d.ts.map +1 -0
- package/dist/src/ruvector/graph-analyzer.js +878 -0
- package/dist/src/ruvector/graph-analyzer.js.map +1 -0
- package/dist/src/ruvector/index.d.ts +27 -0
- package/dist/src/ruvector/index.d.ts.map +1 -0
- package/dist/src/ruvector/index.js +47 -0
- package/dist/src/ruvector/index.js.map +1 -0
- package/dist/src/ruvector/q-learning-router.d.ts +211 -0
- package/dist/src/ruvector/q-learning-router.d.ts.map +1 -0
- package/dist/src/ruvector/q-learning-router.js +681 -0
- package/dist/src/ruvector/q-learning-router.js.map +1 -0
- package/dist/src/ruvector/vector-db.d.ts +69 -0
- package/dist/src/ruvector/vector-db.d.ts.map +1 -0
- package/dist/src/ruvector/vector-db.js +243 -0
- package/dist/src/ruvector/vector-db.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +13 -1
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Q-Learning Router for Task Routing
|
|
3
|
+
*
|
|
4
|
+
* Uses reinforcement learning to optimize task routing decisions
|
|
5
|
+
* based on historical performance and context.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Caching for repeated task patterns (LRU cache)
|
|
9
|
+
* - Optimized state space with feature hashing
|
|
10
|
+
* - Epsilon decay with exponential annealing
|
|
11
|
+
* - Experience replay buffer for stable learning
|
|
12
|
+
* - Model persistence to .swarm/q-learning-model.json
|
|
13
|
+
*
|
|
14
|
+
* @module q-learning-router
|
|
15
|
+
*/
|
|
16
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
17
|
+
import { dirname } from 'path';
|
|
18
|
+
/**
|
|
19
|
+
* Default configuration
|
|
20
|
+
*/
|
|
21
|
+
const DEFAULT_CONFIG = {
|
|
22
|
+
learningRate: 0.1,
|
|
23
|
+
gamma: 0.99,
|
|
24
|
+
explorationInitial: 1.0,
|
|
25
|
+
explorationFinal: 0.01,
|
|
26
|
+
explorationDecay: 10000,
|
|
27
|
+
explorationDecayType: 'exponential',
|
|
28
|
+
maxStates: 10000,
|
|
29
|
+
numActions: 8,
|
|
30
|
+
replayBufferSize: 1000,
|
|
31
|
+
replayBatchSize: 32,
|
|
32
|
+
enableReplay: true,
|
|
33
|
+
cacheSize: 256,
|
|
34
|
+
cacheTTL: 300000,
|
|
35
|
+
modelPath: '.swarm/q-learning-model.json',
|
|
36
|
+
autoSaveInterval: 100,
|
|
37
|
+
stateSpaceDim: 64,
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Route names mapping
|
|
41
|
+
*/
|
|
42
|
+
const ROUTE_NAMES = [
|
|
43
|
+
'coder',
|
|
44
|
+
'tester',
|
|
45
|
+
'reviewer',
|
|
46
|
+
'architect',
|
|
47
|
+
'researcher',
|
|
48
|
+
'optimizer',
|
|
49
|
+
'debugger',
|
|
50
|
+
'documenter',
|
|
51
|
+
];
|
|
52
|
+
/**
|
|
53
|
+
* Task feature keywords for state representation
|
|
54
|
+
*/
|
|
55
|
+
const FEATURE_KEYWORDS = [
|
|
56
|
+
// Code-related
|
|
57
|
+
'implement', 'code', 'write', 'create', 'build', 'develop',
|
|
58
|
+
// Testing-related
|
|
59
|
+
'test', 'spec', 'coverage', 'unit', 'integration', 'e2e',
|
|
60
|
+
// Review-related
|
|
61
|
+
'review', 'check', 'audit', 'analyze', 'inspect',
|
|
62
|
+
// Architecture-related
|
|
63
|
+
'architect', 'design', 'structure', 'pattern', 'system',
|
|
64
|
+
// Research-related
|
|
65
|
+
'research', 'investigate', 'explore', 'find', 'search',
|
|
66
|
+
// Optimization-related
|
|
67
|
+
'optimize', 'performance', 'speed', 'memory', 'improve',
|
|
68
|
+
// Debug-related
|
|
69
|
+
'debug', 'fix', 'bug', 'error', 'issue', 'problem',
|
|
70
|
+
// Documentation-related
|
|
71
|
+
'document', 'docs', 'readme', 'comment', 'explain',
|
|
72
|
+
];
|
|
73
|
+
/**
|
|
74
|
+
* Q-Learning Router for intelligent task routing
|
|
75
|
+
*
|
|
76
|
+
* Optimized with:
|
|
77
|
+
* - LRU cache for repeated task patterns
|
|
78
|
+
* - Feature hashing for efficient state space
|
|
79
|
+
* - Exponential epsilon decay
|
|
80
|
+
* - Prioritized experience replay
|
|
81
|
+
* - Model persistence
|
|
82
|
+
*/
|
|
83
|
+
export class QLearningRouter {
|
|
84
|
+
config;
|
|
85
|
+
qTable = new Map();
|
|
86
|
+
epsilon;
|
|
87
|
+
stepCount = 0;
|
|
88
|
+
updateCount = 0;
|
|
89
|
+
avgTDError = 0;
|
|
90
|
+
ruvectorEngine = null;
|
|
91
|
+
useNative = false;
|
|
92
|
+
// Experience replay buffer (circular buffer)
|
|
93
|
+
replayBuffer = [];
|
|
94
|
+
replayBufferIdx = 0;
|
|
95
|
+
totalExperiences = 0;
|
|
96
|
+
// LRU cache for route decisions
|
|
97
|
+
routeCache = new Map();
|
|
98
|
+
cacheOrder = [];
|
|
99
|
+
cacheHits = 0;
|
|
100
|
+
cacheMisses = 0;
|
|
101
|
+
// Feature hash cache for state representation
|
|
102
|
+
featureHashCache = new Map();
|
|
103
|
+
constructor(config = {}) {
|
|
104
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
105
|
+
this.epsilon = this.config.explorationInitial;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Initialize the router, attempting to load ruvector native module
|
|
109
|
+
* and restore persisted model if available
|
|
110
|
+
*/
|
|
111
|
+
async initialize() {
|
|
112
|
+
try {
|
|
113
|
+
const ruvector = await import('@ruvector/core');
|
|
114
|
+
this.ruvectorEngine = ruvector.createQLearning?.(this.config);
|
|
115
|
+
this.useNative = !!this.ruvectorEngine;
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
// Fallback to JS implementation
|
|
119
|
+
this.useNative = false;
|
|
120
|
+
}
|
|
121
|
+
// Try to load persisted model
|
|
122
|
+
await this.loadModel();
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Load model from persistence file
|
|
126
|
+
*/
|
|
127
|
+
async loadModel(path) {
|
|
128
|
+
const modelPath = path || this.config.modelPath;
|
|
129
|
+
try {
|
|
130
|
+
if (!existsSync(modelPath)) {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
const data = readFileSync(modelPath, 'utf-8');
|
|
134
|
+
const model = JSON.parse(data);
|
|
135
|
+
// Validate version compatibility
|
|
136
|
+
if (!model.version || !model.version.startsWith('1.')) {
|
|
137
|
+
console.warn(`[Q-Learning] Incompatible model version: ${model.version}`);
|
|
138
|
+
return false;
|
|
139
|
+
}
|
|
140
|
+
// Import Q-table
|
|
141
|
+
this.import(model.qTable);
|
|
142
|
+
// Restore stats
|
|
143
|
+
this.stepCount = model.stats.stepCount || 0;
|
|
144
|
+
this.updateCount = model.stats.updateCount || 0;
|
|
145
|
+
this.avgTDError = model.stats.avgTDError || 0;
|
|
146
|
+
this.epsilon = model.stats.epsilon || this.config.explorationInitial;
|
|
147
|
+
this.totalExperiences = model.metadata?.totalExperiences || 0;
|
|
148
|
+
return true;
|
|
149
|
+
}
|
|
150
|
+
catch (err) {
|
|
151
|
+
console.warn(`[Q-Learning] Failed to load model: ${err}`);
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Save model to persistence file
|
|
157
|
+
*/
|
|
158
|
+
async saveModel(path) {
|
|
159
|
+
const modelPath = path || this.config.modelPath;
|
|
160
|
+
try {
|
|
161
|
+
// Ensure directory exists
|
|
162
|
+
const dir = dirname(modelPath);
|
|
163
|
+
if (!existsSync(dir)) {
|
|
164
|
+
mkdirSync(dir, { recursive: true });
|
|
165
|
+
}
|
|
166
|
+
const model = {
|
|
167
|
+
version: '1.0.0',
|
|
168
|
+
config: {
|
|
169
|
+
learningRate: this.config.learningRate,
|
|
170
|
+
gamma: this.config.gamma,
|
|
171
|
+
explorationDecayType: this.config.explorationDecayType,
|
|
172
|
+
numActions: this.config.numActions,
|
|
173
|
+
},
|
|
174
|
+
qTable: this.export(),
|
|
175
|
+
stats: {
|
|
176
|
+
stepCount: this.stepCount,
|
|
177
|
+
updateCount: this.updateCount,
|
|
178
|
+
avgTDError: this.avgTDError,
|
|
179
|
+
epsilon: this.epsilon,
|
|
180
|
+
},
|
|
181
|
+
metadata: {
|
|
182
|
+
savedAt: new Date().toISOString(),
|
|
183
|
+
totalExperiences: this.totalExperiences,
|
|
184
|
+
},
|
|
185
|
+
};
|
|
186
|
+
writeFileSync(modelPath, JSON.stringify(model, null, 2));
|
|
187
|
+
return true;
|
|
188
|
+
}
|
|
189
|
+
catch (err) {
|
|
190
|
+
console.warn(`[Q-Learning] Failed to save model: ${err}`);
|
|
191
|
+
return false;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Route a task based on its context
|
|
196
|
+
* Uses LRU cache for repeated task patterns
|
|
197
|
+
*/
|
|
198
|
+
route(taskContext, explore = true) {
|
|
199
|
+
const stateKey = this.hashStateOptimized(taskContext);
|
|
200
|
+
// Check cache first (only for exploitation, not exploration)
|
|
201
|
+
if (!explore) {
|
|
202
|
+
const cached = this.getCachedRoute(stateKey);
|
|
203
|
+
if (cached) {
|
|
204
|
+
this.cacheHits++;
|
|
205
|
+
return cached;
|
|
206
|
+
}
|
|
207
|
+
this.cacheMisses++;
|
|
208
|
+
}
|
|
209
|
+
// Check if we should explore using decayed epsilon
|
|
210
|
+
const shouldExplore = explore && Math.random() < this.epsilon;
|
|
211
|
+
let actionIdx;
|
|
212
|
+
let qValues;
|
|
213
|
+
if (shouldExplore) {
|
|
214
|
+
// Random exploration
|
|
215
|
+
actionIdx = Math.floor(Math.random() * this.config.numActions);
|
|
216
|
+
qValues = this.getQValues(stateKey);
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
// Exploit - choose best action
|
|
220
|
+
qValues = this.getQValues(stateKey);
|
|
221
|
+
actionIdx = this.argmax(qValues);
|
|
222
|
+
}
|
|
223
|
+
// Calculate confidence from softmax of Q-values
|
|
224
|
+
const confidence = this.softmaxConfidence(qValues, actionIdx);
|
|
225
|
+
// Get alternatives sorted by Q-value
|
|
226
|
+
const alternatives = ROUTE_NAMES
|
|
227
|
+
.map((route, idx) => ({ route, score: qValues[idx] }))
|
|
228
|
+
.sort((a, b) => b.score - a.score)
|
|
229
|
+
.slice(1, 4); // Top 3 alternatives
|
|
230
|
+
const decision = {
|
|
231
|
+
route: ROUTE_NAMES[actionIdx] || 'coder',
|
|
232
|
+
confidence,
|
|
233
|
+
qValues,
|
|
234
|
+
explored: shouldExplore,
|
|
235
|
+
alternatives,
|
|
236
|
+
};
|
|
237
|
+
// Cache the decision for exploitation queries
|
|
238
|
+
if (!shouldExplore) {
|
|
239
|
+
this.cacheRoute(stateKey, decision);
|
|
240
|
+
}
|
|
241
|
+
return decision;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Get cached route decision (LRU cache)
|
|
245
|
+
*/
|
|
246
|
+
getCachedRoute(stateKey) {
|
|
247
|
+
const entry = this.routeCache.get(stateKey);
|
|
248
|
+
if (!entry) {
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
// Check TTL
|
|
252
|
+
if (Date.now() - entry.timestamp > this.config.cacheTTL) {
|
|
253
|
+
this.routeCache.delete(stateKey);
|
|
254
|
+
this.cacheOrder = this.cacheOrder.filter(k => k !== stateKey);
|
|
255
|
+
return null;
|
|
256
|
+
}
|
|
257
|
+
// Update LRU order
|
|
258
|
+
this.cacheOrder = this.cacheOrder.filter(k => k !== stateKey);
|
|
259
|
+
this.cacheOrder.push(stateKey);
|
|
260
|
+
entry.hits++;
|
|
261
|
+
return entry.decision;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Cache a route decision (LRU eviction)
|
|
265
|
+
*/
|
|
266
|
+
cacheRoute(stateKey, decision) {
|
|
267
|
+
// Evict oldest if cache is full
|
|
268
|
+
while (this.routeCache.size >= this.config.cacheSize && this.cacheOrder.length > 0) {
|
|
269
|
+
const oldest = this.cacheOrder.shift();
|
|
270
|
+
if (oldest) {
|
|
271
|
+
this.routeCache.delete(oldest);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
this.routeCache.set(stateKey, {
|
|
275
|
+
decision,
|
|
276
|
+
timestamp: Date.now(),
|
|
277
|
+
hits: 0,
|
|
278
|
+
});
|
|
279
|
+
this.cacheOrder.push(stateKey);
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Invalidate cache (call after significant Q-table updates)
|
|
283
|
+
*/
|
|
284
|
+
invalidateCache() {
|
|
285
|
+
this.routeCache.clear();
|
|
286
|
+
this.cacheOrder = [];
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Update Q-values based on feedback
|
|
290
|
+
* Includes experience replay for stable learning
|
|
291
|
+
*/
|
|
292
|
+
update(taskContext, action, reward, nextContext) {
|
|
293
|
+
const stateKey = this.hashStateOptimized(taskContext);
|
|
294
|
+
const actionIdx = ROUTE_NAMES.indexOf(action);
|
|
295
|
+
if (actionIdx === -1) {
|
|
296
|
+
return 0;
|
|
297
|
+
}
|
|
298
|
+
const nextStateKey = nextContext ? this.hashStateOptimized(nextContext) : null;
|
|
299
|
+
// Store experience in replay buffer
|
|
300
|
+
if (this.config.enableReplay) {
|
|
301
|
+
const experience = {
|
|
302
|
+
stateKey,
|
|
303
|
+
actionIdx,
|
|
304
|
+
reward,
|
|
305
|
+
nextStateKey,
|
|
306
|
+
timestamp: Date.now(),
|
|
307
|
+
priority: Math.abs(reward) + 0.1, // Initial priority based on reward magnitude
|
|
308
|
+
};
|
|
309
|
+
this.addToReplayBuffer(experience);
|
|
310
|
+
}
|
|
311
|
+
// Perform direct update
|
|
312
|
+
const tdError = this.updateQValue(stateKey, actionIdx, reward, nextStateKey);
|
|
313
|
+
// Perform experience replay
|
|
314
|
+
if (this.config.enableReplay && this.replayBuffer.length >= this.config.replayBatchSize) {
|
|
315
|
+
this.experienceReplay();
|
|
316
|
+
}
|
|
317
|
+
// Decay exploration using configured strategy
|
|
318
|
+
this.stepCount++;
|
|
319
|
+
this.epsilon = this.calculateEpsilon();
|
|
320
|
+
// Prune Q-table if needed
|
|
321
|
+
if (this.qTable.size > this.config.maxStates) {
|
|
322
|
+
this.pruneQTable();
|
|
323
|
+
}
|
|
324
|
+
this.updateCount++;
|
|
325
|
+
this.avgTDError = (this.avgTDError * (this.updateCount - 1) + Math.abs(tdError)) / this.updateCount;
|
|
326
|
+
// Auto-save periodically
|
|
327
|
+
if (this.config.autoSaveInterval > 0 && this.updateCount % this.config.autoSaveInterval === 0) {
|
|
328
|
+
this.saveModel().catch(() => { }); // Fire and forget
|
|
329
|
+
}
|
|
330
|
+
// Invalidate cache periodically to reflect Q-table changes
|
|
331
|
+
if (this.updateCount % 50 === 0) {
|
|
332
|
+
this.invalidateCache();
|
|
333
|
+
}
|
|
334
|
+
return tdError;
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* Internal Q-value update
|
|
338
|
+
*/
|
|
339
|
+
updateQValue(stateKey, actionIdx, reward, nextStateKey) {
|
|
340
|
+
const entry = this.getOrCreateEntry(stateKey);
|
|
341
|
+
const currentQ = entry.qValues[actionIdx];
|
|
342
|
+
// Calculate target Q-value
|
|
343
|
+
let targetQ;
|
|
344
|
+
if (nextStateKey) {
|
|
345
|
+
const nextQValues = this.getQValues(nextStateKey);
|
|
346
|
+
const maxNextQ = Math.max(...nextQValues);
|
|
347
|
+
targetQ = reward + this.config.gamma * maxNextQ;
|
|
348
|
+
}
|
|
349
|
+
else {
|
|
350
|
+
// Terminal state
|
|
351
|
+
targetQ = reward;
|
|
352
|
+
}
|
|
353
|
+
// TD error
|
|
354
|
+
const tdError = targetQ - currentQ;
|
|
355
|
+
// Update Q-value
|
|
356
|
+
entry.qValues[actionIdx] += this.config.learningRate * tdError;
|
|
357
|
+
entry.visits++;
|
|
358
|
+
entry.lastUpdate = Date.now();
|
|
359
|
+
return tdError;
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Add experience to circular replay buffer
|
|
363
|
+
*/
|
|
364
|
+
addToReplayBuffer(experience) {
|
|
365
|
+
if (this.replayBuffer.length < this.config.replayBufferSize) {
|
|
366
|
+
this.replayBuffer.push(experience);
|
|
367
|
+
}
|
|
368
|
+
else {
|
|
369
|
+
this.replayBuffer[this.replayBufferIdx] = experience;
|
|
370
|
+
}
|
|
371
|
+
this.replayBufferIdx = (this.replayBufferIdx + 1) % this.config.replayBufferSize;
|
|
372
|
+
this.totalExperiences++;
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Perform prioritized experience replay
|
|
376
|
+
* Samples mini-batch from buffer and updates Q-values
|
|
377
|
+
*/
|
|
378
|
+
experienceReplay() {
|
|
379
|
+
if (this.replayBuffer.length < this.config.replayBatchSize) {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
// Prioritized sampling based on TD error magnitude
|
|
383
|
+
const batch = this.samplePrioritizedBatch(this.config.replayBatchSize);
|
|
384
|
+
for (const exp of batch) {
|
|
385
|
+
const tdError = this.updateQValue(exp.stateKey, exp.actionIdx, exp.reward, exp.nextStateKey);
|
|
386
|
+
// Update priority for future sampling
|
|
387
|
+
exp.priority = Math.abs(tdError) + 0.01; // Small constant to avoid zero priority
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Sample a prioritized batch from replay buffer
|
|
392
|
+
* Uses proportional prioritization
|
|
393
|
+
*/
|
|
394
|
+
samplePrioritizedBatch(batchSize) {
|
|
395
|
+
const totalPriority = this.replayBuffer.reduce((sum, exp) => sum + exp.priority, 0);
|
|
396
|
+
const batch = [];
|
|
397
|
+
const selected = new Set();
|
|
398
|
+
while (batch.length < batchSize && selected.size < this.replayBuffer.length) {
|
|
399
|
+
let threshold = Math.random() * totalPriority;
|
|
400
|
+
let cumSum = 0;
|
|
401
|
+
for (let i = 0; i < this.replayBuffer.length; i++) {
|
|
402
|
+
if (selected.has(i))
|
|
403
|
+
continue;
|
|
404
|
+
cumSum += this.replayBuffer[i].priority;
|
|
405
|
+
if (cumSum >= threshold) {
|
|
406
|
+
batch.push(this.replayBuffer[i]);
|
|
407
|
+
selected.add(i);
|
|
408
|
+
break;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
return batch;
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Calculate epsilon using configured decay strategy
|
|
416
|
+
*/
|
|
417
|
+
calculateEpsilon() {
|
|
418
|
+
const { explorationInitial, explorationFinal, explorationDecay, explorationDecayType } = this.config;
|
|
419
|
+
const progress = Math.min(this.stepCount / explorationDecay, 1.0);
|
|
420
|
+
switch (explorationDecayType) {
|
|
421
|
+
case 'linear':
|
|
422
|
+
return explorationFinal + (explorationInitial - explorationFinal) * (1 - progress);
|
|
423
|
+
case 'exponential':
|
|
424
|
+
// Exponential decay: epsilon = final + (initial - final) * exp(-decay_rate * step)
|
|
425
|
+
const decayRate = -Math.log((explorationFinal / explorationInitial) + 1e-8) / explorationDecay;
|
|
426
|
+
return explorationFinal + (explorationInitial - explorationFinal) * Math.exp(-decayRate * this.stepCount);
|
|
427
|
+
case 'cosine':
|
|
428
|
+
// Cosine annealing: smooth transition
|
|
429
|
+
return explorationFinal + (explorationInitial - explorationFinal) * 0.5 * (1 + Math.cos(Math.PI * progress));
|
|
430
|
+
default:
|
|
431
|
+
return Math.max(explorationFinal, explorationInitial - this.stepCount / explorationDecay);
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Get statistics including cache and replay buffer metrics
|
|
436
|
+
*/
|
|
437
|
+
getStats() {
|
|
438
|
+
const cacheHitRate = this.cacheHits + this.cacheMisses > 0
|
|
439
|
+
? this.cacheHits / (this.cacheHits + this.cacheMisses)
|
|
440
|
+
: 0;
|
|
441
|
+
return {
|
|
442
|
+
updateCount: this.updateCount,
|
|
443
|
+
qTableSize: this.qTable.size,
|
|
444
|
+
epsilon: this.epsilon,
|
|
445
|
+
avgTDError: this.avgTDError,
|
|
446
|
+
stepCount: this.stepCount,
|
|
447
|
+
useNative: this.useNative ? 1 : 0,
|
|
448
|
+
// Cache metrics
|
|
449
|
+
cacheSize: this.routeCache.size,
|
|
450
|
+
cacheHits: this.cacheHits,
|
|
451
|
+
cacheMisses: this.cacheMisses,
|
|
452
|
+
cacheHitRate,
|
|
453
|
+
// Replay buffer metrics
|
|
454
|
+
replayBufferSize: this.replayBuffer.length,
|
|
455
|
+
totalExperiences: this.totalExperiences,
|
|
456
|
+
// Feature hash cache
|
|
457
|
+
featureHashCacheSize: this.featureHashCache.size,
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Reset the router (clears all learned data)
|
|
462
|
+
*/
|
|
463
|
+
reset() {
|
|
464
|
+
this.qTable.clear();
|
|
465
|
+
this.epsilon = this.config.explorationInitial;
|
|
466
|
+
this.stepCount = 0;
|
|
467
|
+
this.updateCount = 0;
|
|
468
|
+
this.avgTDError = 0;
|
|
469
|
+
// Reset replay buffer
|
|
470
|
+
this.replayBuffer = [];
|
|
471
|
+
this.replayBufferIdx = 0;
|
|
472
|
+
this.totalExperiences = 0;
|
|
473
|
+
// Reset cache
|
|
474
|
+
this.routeCache.clear();
|
|
475
|
+
this.cacheOrder = [];
|
|
476
|
+
this.cacheHits = 0;
|
|
477
|
+
this.cacheMisses = 0;
|
|
478
|
+
// Reset feature hash cache
|
|
479
|
+
this.featureHashCache.clear();
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Export Q-table for persistence
|
|
483
|
+
*/
|
|
484
|
+
export() {
|
|
485
|
+
const result = {};
|
|
486
|
+
for (const [key, entry] of this.qTable) {
|
|
487
|
+
result[key] = {
|
|
488
|
+
qValues: Array.from(entry.qValues),
|
|
489
|
+
visits: entry.visits,
|
|
490
|
+
};
|
|
491
|
+
}
|
|
492
|
+
return result;
|
|
493
|
+
}
|
|
494
|
+
/**
|
|
495
|
+
* Import Q-table from persistence
|
|
496
|
+
*/
|
|
497
|
+
import(data) {
|
|
498
|
+
this.qTable.clear();
|
|
499
|
+
for (const [key, entry] of Object.entries(data)) {
|
|
500
|
+
this.qTable.set(key, {
|
|
501
|
+
qValues: new Float32Array(entry.qValues),
|
|
502
|
+
visits: entry.visits,
|
|
503
|
+
lastUpdate: Date.now(),
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
// Private methods
|
|
508
|
+
/**
|
|
509
|
+
* Legacy hash function (kept for backward compatibility)
|
|
510
|
+
*/
|
|
511
|
+
hashState(context) {
|
|
512
|
+
// Simple hash for context string
|
|
513
|
+
let hash = 0;
|
|
514
|
+
for (let i = 0; i < context.length; i++) {
|
|
515
|
+
const char = context.charCodeAt(i);
|
|
516
|
+
hash = ((hash << 5) - hash) + char;
|
|
517
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
518
|
+
}
|
|
519
|
+
return `state_${hash}`;
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Optimized state hashing using feature extraction
|
|
523
|
+
* Creates a more semantic representation of the task context
|
|
524
|
+
*/
|
|
525
|
+
hashStateOptimized(context) {
|
|
526
|
+
// Check feature hash cache first
|
|
527
|
+
if (this.featureHashCache.has(context)) {
|
|
528
|
+
const cached = this.featureHashCache.get(context);
|
|
529
|
+
return this.featureVectorToKey(cached);
|
|
530
|
+
}
|
|
531
|
+
// Extract features from context
|
|
532
|
+
const features = this.extractFeatures(context);
|
|
533
|
+
// Cache the feature vector
|
|
534
|
+
if (this.featureHashCache.size < 1000) { // Limit cache size
|
|
535
|
+
this.featureHashCache.set(context, features);
|
|
536
|
+
}
|
|
537
|
+
return this.featureVectorToKey(features);
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Extract feature vector from task context
|
|
541
|
+
* Uses keyword matching and n-gram hashing
|
|
542
|
+
*/
|
|
543
|
+
extractFeatures(context) {
|
|
544
|
+
const features = new Float32Array(this.config.stateSpaceDim);
|
|
545
|
+
const lowerContext = context.toLowerCase();
|
|
546
|
+
const words = lowerContext.split(/\s+/);
|
|
547
|
+
// Feature 1-32: Keyword presence (binary features)
|
|
548
|
+
for (let i = 0; i < FEATURE_KEYWORDS.length && i < 32; i++) {
|
|
549
|
+
if (lowerContext.includes(FEATURE_KEYWORDS[i])) {
|
|
550
|
+
features[i] = 1.0;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
// Feature 33-40: Context length buckets
|
|
554
|
+
const lengthBucket = Math.min(Math.floor(context.length / 50), 7);
|
|
555
|
+
features[32 + lengthBucket] = 1.0;
|
|
556
|
+
// Feature 41-48: Word count buckets
|
|
557
|
+
const wordBucket = Math.min(Math.floor(words.length / 5), 7);
|
|
558
|
+
features[40 + wordBucket] = 1.0;
|
|
559
|
+
// Feature 49-56: File extension hints
|
|
560
|
+
const extPatterns = ['.ts', '.js', '.py', '.go', '.rs', '.java', '.md', '.json'];
|
|
561
|
+
for (let i = 0; i < extPatterns.length; i++) {
|
|
562
|
+
if (lowerContext.includes(extPatterns[i])) {
|
|
563
|
+
features[48 + i] = 1.0;
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
// Feature 57-64: N-gram hash features (for capturing unique patterns)
|
|
567
|
+
for (let i = 0; i < words.length - 1 && i < 8; i++) {
|
|
568
|
+
const bigram = `${words[i]}_${words[i + 1]}`;
|
|
569
|
+
const hash = this.murmurhash3(bigram) % 8;
|
|
570
|
+
features[56 + hash] += 0.25;
|
|
571
|
+
}
|
|
572
|
+
// Normalize features
|
|
573
|
+
let norm = 0;
|
|
574
|
+
for (let i = 0; i < features.length; i++) {
|
|
575
|
+
norm += features[i] * features[i];
|
|
576
|
+
}
|
|
577
|
+
norm = Math.sqrt(norm) || 1;
|
|
578
|
+
for (let i = 0; i < features.length; i++) {
|
|
579
|
+
features[i] /= norm;
|
|
580
|
+
}
|
|
581
|
+
return features;
|
|
582
|
+
}
|
|
583
|
+
/**
|
|
584
|
+
* Convert feature vector to state key
|
|
585
|
+
* Uses locality-sensitive hashing for similar contexts
|
|
586
|
+
*/
|
|
587
|
+
featureVectorToKey(features) {
|
|
588
|
+
// Quantize features to create discrete state
|
|
589
|
+
const quantized = [];
|
|
590
|
+
for (let i = 0; i < features.length; i += 4) {
|
|
591
|
+
let bucket = 0;
|
|
592
|
+
for (let j = 0; j < 4 && i + j < features.length; j++) {
|
|
593
|
+
if (features[i + j] > 0.25) {
|
|
594
|
+
bucket |= (1 << j);
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
quantized.push(bucket);
|
|
598
|
+
}
|
|
599
|
+
// Create hash from quantized values
|
|
600
|
+
let hash = 0;
|
|
601
|
+
for (let i = 0; i < quantized.length; i++) {
|
|
602
|
+
hash = ((hash << 4) ^ quantized[i]) & 0x7fffffff;
|
|
603
|
+
}
|
|
604
|
+
return `fstate_${hash.toString(36)}`;
|
|
605
|
+
}
|
|
606
|
+
/**
|
|
607
|
+
* MurmurHash3 32-bit implementation for n-gram hashing
|
|
608
|
+
*/
|
|
609
|
+
murmurhash3(str) {
|
|
610
|
+
let h1 = 0xdeadbeef;
|
|
611
|
+
const c1 = 0xcc9e2d51;
|
|
612
|
+
const c2 = 0x1b873593;
|
|
613
|
+
for (let i = 0; i < str.length; i++) {
|
|
614
|
+
let k1 = str.charCodeAt(i);
|
|
615
|
+
k1 = Math.imul(k1, c1);
|
|
616
|
+
k1 = (k1 << 15) | (k1 >>> 17);
|
|
617
|
+
k1 = Math.imul(k1, c2);
|
|
618
|
+
h1 ^= k1;
|
|
619
|
+
h1 = (h1 << 13) | (h1 >>> 19);
|
|
620
|
+
h1 = Math.imul(h1, 5) + 0xe6546b64;
|
|
621
|
+
}
|
|
622
|
+
h1 ^= str.length;
|
|
623
|
+
h1 ^= h1 >>> 16;
|
|
624
|
+
h1 = Math.imul(h1, 0x85ebca6b);
|
|
625
|
+
h1 ^= h1 >>> 13;
|
|
626
|
+
h1 = Math.imul(h1, 0xc2b2ae35);
|
|
627
|
+
h1 ^= h1 >>> 16;
|
|
628
|
+
return h1 >>> 0;
|
|
629
|
+
}
|
|
630
|
+
getQValues(stateKey) {
|
|
631
|
+
const entry = this.qTable.get(stateKey);
|
|
632
|
+
if (!entry) {
|
|
633
|
+
return new Array(this.config.numActions).fill(0);
|
|
634
|
+
}
|
|
635
|
+
return Array.from(entry.qValues);
|
|
636
|
+
}
|
|
637
|
+
getOrCreateEntry(stateKey) {
|
|
638
|
+
let entry = this.qTable.get(stateKey);
|
|
639
|
+
if (!entry) {
|
|
640
|
+
entry = {
|
|
641
|
+
qValues: new Float32Array(this.config.numActions),
|
|
642
|
+
visits: 0,
|
|
643
|
+
lastUpdate: Date.now(),
|
|
644
|
+
};
|
|
645
|
+
this.qTable.set(stateKey, entry);
|
|
646
|
+
}
|
|
647
|
+
return entry;
|
|
648
|
+
}
|
|
649
|
+
argmax(values) {
|
|
650
|
+
let maxIdx = 0;
|
|
651
|
+
let maxVal = values[0];
|
|
652
|
+
for (let i = 1; i < values.length; i++) {
|
|
653
|
+
if (values[i] > maxVal) {
|
|
654
|
+
maxVal = values[i];
|
|
655
|
+
maxIdx = i;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
return maxIdx;
|
|
659
|
+
}
|
|
660
|
+
softmaxConfidence(qValues, actionIdx) {
|
|
661
|
+
const maxQ = Math.max(...qValues);
|
|
662
|
+
const expValues = qValues.map(q => Math.exp(q - maxQ)); // Subtract max for numerical stability
|
|
663
|
+
const sumExp = expValues.reduce((a, b) => a + b, 0);
|
|
664
|
+
return expValues[actionIdx] / sumExp;
|
|
665
|
+
}
|
|
666
|
+
pruneQTable() {
|
|
667
|
+
const entries = Array.from(this.qTable.entries())
|
|
668
|
+
.sort((a, b) => a[1].lastUpdate - b[1].lastUpdate);
|
|
669
|
+
const toRemove = entries.length - Math.floor(this.config.maxStates * 0.8);
|
|
670
|
+
for (let i = 0; i < toRemove; i++) {
|
|
671
|
+
this.qTable.delete(entries[i][0]);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Factory function
|
|
677
|
+
*/
|
|
678
|
+
export function createQLearningRouter(config) {
|
|
679
|
+
return new QLearningRouter(config);
|
|
680
|
+
}
|
|
681
|
+
//# sourceMappingURL=q-learning-router.js.map
|