web-agent-bridge 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +12 -4
- package/public/commander-dashboard.html +243 -0
- package/public/css/premium.css +317 -317
- package/public/demo.html +259 -259
- package/public/index.html +644 -644
- package/public/mesh-dashboard.html +309 -382
- package/public/premium-dashboard.html +2487 -2487
- package/public/premium.html +791 -791
- package/public/script/wab.min.js +124 -87
- package/script/ai-agent-bridge.js +154 -84
- package/sdk/agent-mesh.js +287 -171
- package/sdk/commander.js +262 -0
- package/sdk/index.js +260 -260
- package/server/index.js +8 -1
- package/server/migrations/002_premium_features.sql +418 -418
- package/server/models/db.js +24 -5
- package/server/routes/admin-premium.js +671 -671
- package/server/routes/commander.js +316 -0
- package/server/routes/mesh.js +370 -201
- package/server/routes/premium-v2.js +686 -686
- package/server/routes/premium.js +724 -724
- package/server/services/agent-learning.js +230 -77
- package/server/services/agent-memory.js +625 -625
- package/server/services/agent-mesh.js +260 -67
- package/server/services/agent-symphony.js +548 -518
- package/server/services/commander.js +738 -0
- package/server/services/edge-compute.js +440 -0
- package/server/services/local-ai.js +389 -0
- package/server/services/plugins.js +747 -747
- package/server/services/self-healing.js +843 -843
- package/server/services/swarm.js +788 -788
- package/server/services/vision.js +871 -871
- package/public/admin/dashboard.html +0 -848
- package/public/admin/login.html +0 -84
- package/public/video/tutorial.mp4 +0 -0
|
@@ -9,10 +9,11 @@
|
|
|
9
9
|
* - Prediction accuracy over time
|
|
10
10
|
*
|
|
11
11
|
* Learning algorithms:
|
|
12
|
-
* - Multi-armed bandit for
|
|
13
|
-
* -
|
|
14
|
-
* -
|
|
15
|
-
* -
|
|
12
|
+
* - Multi-armed bandit (UCB1) for exploration/exploitation
|
|
13
|
+
* - Linear policy model with sigmoid activation and gradient descent
|
|
14
|
+
* - Temporal discount for preference freshness (recent > old)
|
|
15
|
+
* - Sequential pattern mining for behavior chains
|
|
16
|
+
* - Confidence estimation: volume × accuracy × recency
|
|
16
17
|
*/
|
|
17
18
|
|
|
18
19
|
const crypto = require('crypto');
|
|
@@ -70,7 +71,7 @@ db.exec(`
|
|
|
70
71
|
pulls INTEGER DEFAULT 0,
|
|
71
72
|
total_reward REAL DEFAULT 0.0,
|
|
72
73
|
avg_reward REAL DEFAULT 0.0,
|
|
73
|
-
ucb_score REAL DEFAULT
|
|
74
|
+
ucb_score REAL DEFAULT 0.0,
|
|
74
75
|
created_at TEXT DEFAULT (datetime('now')),
|
|
75
76
|
updated_at TEXT DEFAULT (datetime('now')),
|
|
76
77
|
UNIQUE(site_id, agent_id, domain, action)
|
|
@@ -98,40 +99,41 @@ db.exec(`
|
|
|
98
99
|
// ─── Config ──────────────────────────────────────────────────────────
|
|
99
100
|
|
|
100
101
|
const LEARNING_RATE = 0.1;
|
|
101
|
-
const DISCOUNT_FACTOR = 0.95;
|
|
102
|
-
const DECAY_RATE = 0.01;
|
|
103
|
-
const UCB_EXPLORATION = 1.414;
|
|
102
|
+
const DISCOUNT_FACTOR = 0.95; // Temporal discount per decision step
|
|
103
|
+
const DECAY_RATE = 0.01; // Recency decay per hour
|
|
104
|
+
const UCB_EXPLORATION = 1.414; // √2 for UCB1
|
|
104
105
|
const MIN_CONFIDENCE = 0.01;
|
|
105
106
|
const MAX_SEQUENCE_LENGTH = 5;
|
|
106
107
|
|
|
107
108
|
// ─── Prepared Statements ─────────────────────────────────────────────
|
|
108
109
|
|
|
109
110
|
const stmts = {
|
|
110
|
-
insertDecision: db.prepare(
|
|
111
|
-
updateOutcome: db.prepare(
|
|
112
|
-
getDecision: db.prepare(
|
|
113
|
-
getRecentDecisions: db.prepare(
|
|
114
|
-
getDecisionsByOutcome: db.prepare(
|
|
115
|
-
getAllDomainDecisions: db.prepare(
|
|
116
|
-
countDecisions: db.prepare(
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
111
|
+
insertDecision: db.prepare('INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'),
|
|
112
|
+
updateOutcome: db.prepare('UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?'),
|
|
113
|
+
getDecision: db.prepare('SELECT * FROM learning_decisions WHERE id = ?'),
|
|
114
|
+
getRecentDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?'),
|
|
115
|
+
getDecisionsByOutcome: db.prepare("SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?"),
|
|
116
|
+
getAllDomainDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC'),
|
|
117
|
+
countDecisions: db.prepare('SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?'),
|
|
118
|
+
getRecentRewards: db.prepare("SELECT reward, created_at FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending' ORDER BY created_at DESC LIMIT ?"),
|
|
119
|
+
|
|
120
|
+
upsertPolicy: db.prepare("INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')"),
|
|
121
|
+
getPolicies: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC'),
|
|
122
|
+
getPolicy: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?'),
|
|
123
|
+
|
|
124
|
+
insertPattern: db.prepare('INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)'),
|
|
125
|
+
findPattern: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?'),
|
|
126
|
+
updatePattern: db.prepare("UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?"),
|
|
127
|
+
getTopPatterns: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?'),
|
|
128
|
+
|
|
129
|
+
upsertArm: db.prepare('INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING'),
|
|
130
|
+
getArms: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC'),
|
|
131
|
+
getArm: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?'),
|
|
132
|
+
updateArm: db.prepare("UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = ?, ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?"),
|
|
133
|
+
|
|
134
|
+
insertSession: db.prepare('INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)'),
|
|
135
|
+
updateSession: db.prepare("UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?"),
|
|
136
|
+
getSessionHistory: db.prepare('SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?'),
|
|
135
137
|
|
|
136
138
|
getStats: db.prepare(`SELECT
|
|
137
139
|
(SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ?) as total_decisions,
|
|
@@ -140,6 +142,11 @@ const stmts = {
|
|
|
140
142
|
(SELECT AVG(reward) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending') as avg_reward,
|
|
141
143
|
(SELECT COUNT(DISTINCT domain) FROM learning_policies WHERE site_id = ? AND agent_id = ?) as policy_domains,
|
|
142
144
|
(SELECT COUNT(*) FROM learning_patterns WHERE site_id = ? AND agent_id = ?) as total_patterns`),
|
|
145
|
+
|
|
146
|
+
deletePolicies: db.prepare('DELETE FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ?'),
|
|
147
|
+
deletePatterns: db.prepare('DELETE FROM learning_patterns WHERE site_id = ? AND agent_id = ?'),
|
|
148
|
+
deleteArms: db.prepare('DELETE FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?'),
|
|
149
|
+
deleteDecisions: db.prepare('DELETE FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ?'),
|
|
143
150
|
};
|
|
144
151
|
|
|
145
152
|
// ─── Core Learning API ───────────────────────────────────────────────
|
|
@@ -149,9 +156,11 @@ const stmts = {
|
|
|
149
156
|
*/
|
|
150
157
|
function recordDecision(siteId, agentId, domain, action, context = {}, features = {}) {
|
|
151
158
|
const id = crypto.randomUUID();
|
|
152
|
-
const
|
|
159
|
+
const extractedFeatures = { ..._extractFeatures(context), ...features };
|
|
160
|
+
const predictedReward = _predict(siteId, agentId, domain, extractedFeatures);
|
|
153
161
|
|
|
154
|
-
stmts.insertDecision.run(id, siteId, agentId, domain, action,
|
|
162
|
+
stmts.insertDecision.run(id, siteId, agentId, domain, action,
|
|
163
|
+
JSON.stringify(context), predictedReward, JSON.stringify(extractedFeatures));
|
|
155
164
|
|
|
156
165
|
// Ensure bandit arm exists
|
|
157
166
|
stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
|
|
@@ -172,49 +181,106 @@ function feedback(decisionId, outcome, reward) {
|
|
|
172
181
|
const features = JSON.parse(decision.features || '{}');
|
|
173
182
|
const predError = reward - (decision.predicted_reward || 0);
|
|
174
183
|
|
|
175
|
-
// Update policy weights via gradient descent
|
|
184
|
+
// Update policy weights via gradient descent with temporal discount
|
|
176
185
|
_updatePolicies(decision.site_id, decision.agent_id, decision.domain, features, predError);
|
|
177
186
|
|
|
178
|
-
// Update bandit arm
|
|
187
|
+
// Update bandit arm with actual reward
|
|
179
188
|
_updateBanditArm(decision.site_id, decision.agent_id, decision.domain, decision.action, reward);
|
|
180
189
|
|
|
181
190
|
// Mine patterns from recent decisions
|
|
182
191
|
_minePatterns(decision.site_id, decision.agent_id, decision.domain);
|
|
183
192
|
|
|
184
193
|
return {
|
|
185
|
-
|
|
194
|
+
decisionId,
|
|
195
|
+
predictionError: Math.round(predError * 1000) / 1000,
|
|
186
196
|
updatedConfidence: _getConfidence(decision.site_id, decision.agent_id, decision.domain),
|
|
197
|
+
accuracy: Math.round((1 - Math.abs(predError)) * 1000) / 1000,
|
|
187
198
|
};
|
|
188
199
|
}
|
|
189
200
|
|
|
201
|
+
/**
|
|
202
|
+
* Batch feedback — provide multiple outcomes at once.
|
|
203
|
+
*/
|
|
204
|
+
function batchFeedback(feedbackList) {
|
|
205
|
+
const results = [];
|
|
206
|
+
const txn = db.transaction(() => {
|
|
207
|
+
for (const fb of feedbackList) {
|
|
208
|
+
try {
|
|
209
|
+
results.push(feedback(fb.decisionId, fb.outcome, fb.reward));
|
|
210
|
+
} catch (err) {
|
|
211
|
+
results.push({ decisionId: fb.decisionId, error: err.message });
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
txn();
|
|
216
|
+
return results;
|
|
217
|
+
}
|
|
218
|
+
|
|
190
219
|
/**
|
|
191
220
|
* Get the best action for a domain using learned policies + bandit scores.
|
|
221
|
+
* UCB scores are normalized to [0,1] before blending with policy prediction.
|
|
192
222
|
*/
|
|
193
223
|
function recommend(siteId, agentId, domain, availableActions, context = {}) {
|
|
194
224
|
const features = _extractFeatures(context);
|
|
195
225
|
|
|
196
|
-
//
|
|
197
|
-
const
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
226
|
+
// Get all arms to find normalization bounds
|
|
227
|
+
const allArms = stmts.getArms.all(siteId, agentId, domain);
|
|
228
|
+
const armMap = {};
|
|
229
|
+
for (const arm of allArms) armMap[arm.action] = arm;
|
|
230
|
+
|
|
231
|
+
// Normalize UCB scores to [0,1]
|
|
232
|
+
let minUCB = Infinity, maxUCB = -Infinity;
|
|
233
|
+
for (const arm of allArms) {
|
|
234
|
+
if (arm.pulls > 0) {
|
|
235
|
+
if (arm.ucb_score < minUCB) minUCB = arm.ucb_score;
|
|
236
|
+
if (arm.ucb_score > maxUCB) maxUCB = arm.ucb_score;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
const ucbRange = maxUCB - minUCB;
|
|
201
240
|
|
|
202
|
-
|
|
203
|
-
const
|
|
241
|
+
const scored = availableActions.map((action) => {
|
|
242
|
+
const arm = armMap[action] || _getOrCreateArm(siteId, agentId, domain, action);
|
|
243
|
+
const policyScore = _predict(siteId, agentId, domain, { ...features, [`action:${action}`]: 1 });
|
|
244
|
+
|
|
245
|
+
// Normalize bandit score to [0,1]
|
|
246
|
+
let normalizedBandit;
|
|
247
|
+
if (arm.pulls === 0) {
|
|
248
|
+
normalizedBandit = 1.0; // unexplored arms get maximum exploration bonus
|
|
249
|
+
} else if (ucbRange > 0) {
|
|
250
|
+
normalizedBandit = (arm.ucb_score - minUCB) / ucbRange;
|
|
251
|
+
} else {
|
|
252
|
+
normalizedBandit = arm.avg_reward; // single arm — use raw avg
|
|
253
|
+
}
|
|
204
254
|
|
|
205
|
-
|
|
255
|
+
// Blend: as confidence grows, lean more on policy, less on exploration
|
|
256
|
+
const confidence = _getConfidence(siteId, agentId, domain);
|
|
257
|
+
const policyWeight = 0.4 + confidence * 0.4; // [0.4, 0.8]
|
|
258
|
+
const banditWeight = 1 - policyWeight; // [0.2, 0.6]
|
|
259
|
+
const blended = policyWeight * policyScore + banditWeight * normalizedBandit;
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
action,
|
|
263
|
+
score: Math.round(blended * 1000) / 1000,
|
|
264
|
+
policyScore: Math.round(policyScore * 1000) / 1000,
|
|
265
|
+
banditScore: Math.round(normalizedBandit * 1000) / 1000,
|
|
266
|
+
pulls: arm.pulls,
|
|
267
|
+
avgReward: Math.round((arm.avg_reward || 0) * 1000) / 1000,
|
|
268
|
+
};
|
|
206
269
|
});
|
|
207
270
|
|
|
208
271
|
scored.sort((a, b) => b.score - a.score);
|
|
209
272
|
|
|
210
273
|
const confidence = _getConfidence(siteId, agentId, domain);
|
|
211
|
-
const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence',
|
|
274
|
+
const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 5);
|
|
212
275
|
|
|
213
276
|
return {
|
|
214
277
|
recommended: scored[0]?.action || availableActions[0],
|
|
215
278
|
rankings: scored,
|
|
216
279
|
confidence,
|
|
217
|
-
|
|
280
|
+
explorationLevel: confidence < 0.3 ? 'high' : confidence < 0.6 ? 'medium' : 'low',
|
|
281
|
+
patterns: topPatterns.map((p) => ({
|
|
282
|
+
sequence: p.sequence, frequency: p.frequency, confidence: p.confidence
|
|
283
|
+
})),
|
|
218
284
|
};
|
|
219
285
|
}
|
|
220
286
|
|
|
@@ -229,7 +295,7 @@ function getPreferences(siteId, agentId, domain) {
|
|
|
229
295
|
const accepted = decisions.filter((d) => d.outcome === 'accepted');
|
|
230
296
|
const rejected = decisions.filter((d) => d.outcome === 'rejected');
|
|
231
297
|
|
|
232
|
-
// Build preference profile
|
|
298
|
+
// Build preference profile from weights
|
|
233
299
|
const profile = {};
|
|
234
300
|
for (const p of policies) {
|
|
235
301
|
if (Math.abs(p.weight) > 0.05) {
|
|
@@ -242,17 +308,37 @@ function getPreferences(siteId, agentId, domain) {
|
|
|
242
308
|
}
|
|
243
309
|
}
|
|
244
310
|
|
|
311
|
+
// Compute action frequencies
|
|
312
|
+
const actionFreqs = {};
|
|
313
|
+
for (const d of decisions) {
|
|
314
|
+
actionFreqs[d.action] = (actionFreqs[d.action] || 0) + 1;
|
|
315
|
+
}
|
|
316
|
+
|
|
245
317
|
return {
|
|
246
318
|
domain,
|
|
247
319
|
profile,
|
|
248
|
-
acceptRate: decisions.length > 0 ? accepted.length / decisions.length : 0,
|
|
320
|
+
acceptRate: decisions.length > 0 ? Math.round((accepted.length / decisions.length) * 1000) / 1000 : 0,
|
|
321
|
+
rejectRate: decisions.length > 0 ? Math.round((rejected.length / decisions.length) * 1000) / 1000 : 0,
|
|
249
322
|
totalDecisions: decisions.length,
|
|
250
|
-
avgReward: decisions.length > 0
|
|
323
|
+
avgReward: decisions.length > 0
|
|
324
|
+
? Math.round((decisions.reduce((s, d) => s + d.reward, 0) / decisions.length) * 1000) / 1000
|
|
325
|
+
: 0,
|
|
326
|
+
topActions: Object.entries(actionFreqs)
|
|
327
|
+
.sort(([, a], [, b]) => b - a)
|
|
328
|
+
.slice(0, 5)
|
|
329
|
+
.map(([action, count]) => ({ action, count, percentage: Math.round((count / decisions.length) * 100) })),
|
|
251
330
|
topPatterns: patterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency })),
|
|
252
331
|
confidence: _getConfidence(siteId, agentId, domain),
|
|
253
332
|
};
|
|
254
333
|
}
|
|
255
334
|
|
|
335
|
+
/**
|
|
336
|
+
* Get reward history — recent rewards over time for charting.
|
|
337
|
+
*/
|
|
338
|
+
function getRewardHistory(siteId, agentId, limit = 30) {
|
|
339
|
+
return stmts.getRecentRewards.all(siteId, agentId, limit).reverse();
|
|
340
|
+
}
|
|
341
|
+
|
|
256
342
|
// ─── Learning Sessions ───────────────────────────────────────────────
|
|
257
343
|
|
|
258
344
|
function startSession(siteId, agentId) {
|
|
@@ -264,7 +350,30 @@ function startSession(siteId, agentId) {
|
|
|
264
350
|
function endSession(sessionId, decisionsMade, correctPredictions) {
|
|
265
351
|
const accuracy = decisionsMade > 0 ? correctPredictions / decisionsMade : 0;
|
|
266
352
|
stmts.updateSession.run(decisionsMade, correctPredictions, accuracy, sessionId);
|
|
267
|
-
return { accuracy };
|
|
353
|
+
return { accuracy: Math.round(accuracy * 1000) / 1000 };
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// ─── Reset ───────────────────────────────────────────────────────────
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Reset all learned data for a specific domain.
|
|
360
|
+
*/
|
|
361
|
+
function resetDomain(siteId, agentId, domain) {
|
|
362
|
+
const txn = db.transaction(() => {
|
|
363
|
+
stmts.deletePolicies.run(siteId, agentId, domain);
|
|
364
|
+
stmts.deleteArms.run(siteId, agentId, domain);
|
|
365
|
+
stmts.deleteDecisions.run(siteId, agentId, domain);
|
|
366
|
+
});
|
|
367
|
+
txn();
|
|
368
|
+
return { reset: true, domain };
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Reset all patterns for an agent.
|
|
373
|
+
*/
|
|
374
|
+
function resetPatterns(siteId, agentId) {
|
|
375
|
+
stmts.deletePatterns.run(siteId, agentId);
|
|
376
|
+
return { reset: true };
|
|
268
377
|
}
|
|
269
378
|
|
|
270
379
|
// ─── Stats ───────────────────────────────────────────────────────────
|
|
@@ -273,12 +382,17 @@ function getStats(siteId, agentId) {
|
|
|
273
382
|
const row = stmts.getStats.get(siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId);
|
|
274
383
|
const sessions = stmts.getSessionHistory.all(siteId, agentId, 10);
|
|
275
384
|
const recentAccuracy = sessions.length > 0 ? sessions.reduce((s, sess) => s + sess.accuracy, 0) / sessions.length : 0;
|
|
385
|
+
const rewardHistory = stmts.getRecentRewards.all(siteId, agentId, 30).reverse();
|
|
276
386
|
|
|
277
387
|
return {
|
|
278
388
|
...row,
|
|
389
|
+
avg_reward: row.avg_reward !== null ? Math.round(row.avg_reward * 1000) / 1000 : 0,
|
|
279
390
|
recentAccuracy: Math.round(recentAccuracy * 1000) / 1000,
|
|
280
391
|
sessionsCount: sessions.length,
|
|
281
|
-
acceptRate: row.total_decisions > 0
|
|
392
|
+
acceptRate: row.total_decisions > 0
|
|
393
|
+
? Math.round((row.accepted / row.total_decisions) * 1000) / 1000
|
|
394
|
+
: 0,
|
|
395
|
+
rewardHistory,
|
|
282
396
|
};
|
|
283
397
|
}
|
|
284
398
|
|
|
@@ -289,11 +403,16 @@ function _predict(siteId, agentId, domain, features) {
|
|
|
289
403
|
if (policies.length === 0) return 0.5; // No data yet — neutral prediction
|
|
290
404
|
|
|
291
405
|
let score = 0;
|
|
406
|
+
let matchedFeatures = 0;
|
|
292
407
|
for (const p of policies) {
|
|
293
408
|
const featureVal = features[p.feature];
|
|
294
409
|
if (featureVal !== undefined) {
|
|
295
410
|
const fv = typeof featureVal === 'number' ? featureVal : (featureVal ? 1 : 0);
|
|
296
|
-
|
|
411
|
+
|
|
412
|
+
// Apply temporal discount: older policies (fewer recent updates) matter less
|
|
413
|
+
const recencyBoost = p.update_count > 0 ? Math.pow(DISCOUNT_FACTOR, Math.max(0, 10 - p.update_count)) : 1;
|
|
414
|
+
score += p.weight * fv * recencyBoost;
|
|
415
|
+
matchedFeatures++;
|
|
297
416
|
}
|
|
298
417
|
}
|
|
299
418
|
|
|
@@ -304,14 +423,21 @@ function _predict(siteId, agentId, domain, features) {
|
|
|
304
423
|
function _updatePolicies(siteId, agentId, domain, features, error) {
|
|
305
424
|
for (const [feature, value] of Object.entries(features)) {
|
|
306
425
|
const fv = typeof value === 'number' ? value : (value ? 1 : 0);
|
|
307
|
-
|
|
426
|
+
if (fv === 0) continue; // Skip zero-valued features
|
|
308
427
|
|
|
428
|
+
const gradient = error * fv * LEARNING_RATE;
|
|
309
429
|
const existing = stmts.getPolicy.get(siteId, agentId, domain, feature);
|
|
310
|
-
|
|
430
|
+
|
|
431
|
+
// Apply weight decay to prevent unbounded growth
|
|
432
|
+
const currentWeight = existing ? existing.weight * DISCOUNT_FACTOR : 0;
|
|
433
|
+
const newWeight = currentWeight + gradient;
|
|
434
|
+
|
|
435
|
+
// Clamp weights to [-5, 5] to prevent extreme values
|
|
436
|
+
const clampedWeight = Math.max(-5, Math.min(5, newWeight));
|
|
311
437
|
|
|
312
438
|
stmts.upsertPolicy.run(
|
|
313
|
-
crypto.randomUUID(), siteId, agentId, domain, feature,
|
|
314
|
-
|
|
439
|
+
crypto.randomUUID(), siteId, agentId, domain, feature, clampedWeight,
|
|
440
|
+
clampedWeight, Math.abs(error)
|
|
315
441
|
);
|
|
316
442
|
}
|
|
317
443
|
}
|
|
@@ -320,24 +446,30 @@ function _updatePolicies(siteId, agentId, domain, features, error) {
|
|
|
320
446
|
|
|
321
447
|
function _getOrCreateArm(siteId, agentId, domain, action) {
|
|
322
448
|
stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
|
|
323
|
-
const
|
|
324
|
-
return
|
|
449
|
+
const arm = stmts.getArm.get(siteId, agentId, domain, action);
|
|
450
|
+
return arm || { pulls: 0, ucb_score: 0, avg_reward: 0, total_reward: 0 };
|
|
325
451
|
}
|
|
326
452
|
|
|
327
453
|
function _updateBanditArm(siteId, agentId, domain, action, reward) {
|
|
328
|
-
const
|
|
329
|
-
|
|
454
|
+
const arm = stmts.getArm.get(siteId, agentId, domain, action);
|
|
455
|
+
if (!arm) {
|
|
456
|
+
stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
|
|
457
|
+
return;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const newPulls = arm.pulls + 1;
|
|
461
|
+
const newTotalReward = arm.total_reward + reward;
|
|
462
|
+
const newAvgReward = newTotalReward / newPulls;
|
|
330
463
|
|
|
464
|
+
// UCB1: avg_reward + C * sqrt(ln(N) / n_i)
|
|
465
|
+
// We need total pulls across all arms in this domain
|
|
331
466
|
const arms = stmts.getArms.all(siteId, agentId, domain);
|
|
332
|
-
const
|
|
333
|
-
const armPulls = arm ? arm.pulls + 1 : 1;
|
|
467
|
+
const totalPulls = arms.reduce((s, a) => s + a.pulls, 0) + 1; // +1 for this pull
|
|
334
468
|
|
|
335
|
-
|
|
336
|
-
const
|
|
337
|
-
const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / armPulls);
|
|
338
|
-
const ucbScore = avgReward + exploration;
|
|
469
|
+
const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / newPulls);
|
|
470
|
+
const ucbScore = newAvgReward + exploration;
|
|
339
471
|
|
|
340
|
-
stmts.updateArm.run(reward,
|
|
472
|
+
stmts.updateArm.run(reward, newAvgReward, ucbScore, siteId, agentId, domain, action);
|
|
341
473
|
}
|
|
342
474
|
|
|
343
475
|
// ─── Internal: Pattern Mining ────────────────────────────────────────
|
|
@@ -352,6 +484,7 @@ function _minePatterns(siteId, agentId, domain) {
|
|
|
352
484
|
const existing = stmts.findPattern.get(siteId, agentId, sequence);
|
|
353
485
|
|
|
354
486
|
if (existing) {
|
|
487
|
+
// Asymptotic approach to 1.0 — confidence grows slower as it increases
|
|
355
488
|
const newConf = Math.min(0.99, existing.confidence + 0.05 * (1 - existing.confidence));
|
|
356
489
|
stmts.updatePattern.run(newConf, existing.id);
|
|
357
490
|
} else {
|
|
@@ -365,10 +498,25 @@ function _minePatterns(siteId, agentId, domain) {
|
|
|
365
498
|
function _extractFeatures(context) {
|
|
366
499
|
const features = {};
|
|
367
500
|
|
|
368
|
-
if (context.price !== undefined)
|
|
501
|
+
if (context.price !== undefined) {
|
|
502
|
+
features.price = context.price;
|
|
503
|
+
// Bucketize price for discrete learning
|
|
504
|
+
if (context.price < 10) features['price_bucket:cheap'] = 1;
|
|
505
|
+
else if (context.price < 50) features['price_bucket:moderate'] = 1;
|
|
506
|
+
else if (context.price < 200) features['price_bucket:premium'] = 1;
|
|
507
|
+
else features['price_bucket:luxury'] = 1;
|
|
508
|
+
}
|
|
369
509
|
if (context.quantity !== undefined) features.quantity = context.quantity;
|
|
370
|
-
if (context.discount !== undefined)
|
|
510
|
+
if (context.discount !== undefined) {
|
|
511
|
+
features.discount = context.discount;
|
|
512
|
+
features.has_discount = context.discount > 0 ? 1 : 0;
|
|
513
|
+
}
|
|
514
|
+
if (context.rating !== undefined) {
|
|
515
|
+
features.rating = context.rating;
|
|
516
|
+
features.high_rated = context.rating >= 4.0 ? 1 : 0;
|
|
517
|
+
}
|
|
371
518
|
if (context.category) features[`category:${context.category}`] = 1;
|
|
519
|
+
if (context.brand) features[`brand:${context.brand}`] = 1;
|
|
372
520
|
if (context.timeOfDay !== undefined) {
|
|
373
521
|
features.morning = context.timeOfDay < 12 ? 1 : 0;
|
|
374
522
|
features.afternoon = context.timeOfDay >= 12 && context.timeOfDay < 18 ? 1 : 0;
|
|
@@ -376,8 +524,9 @@ function _extractFeatures(context) {
|
|
|
376
524
|
}
|
|
377
525
|
if (context.isRepeat !== undefined) features.repeat_visit = context.isRepeat ? 1 : 0;
|
|
378
526
|
if (context.urgency !== undefined) features.urgency = context.urgency;
|
|
527
|
+
if (context.inStock !== undefined) features.in_stock = context.inStock ? 1 : 0;
|
|
379
528
|
|
|
380
|
-
// Pass through any raw features
|
|
529
|
+
// Pass through any raw numeric features
|
|
381
530
|
for (const [k, v] of Object.entries(context)) {
|
|
382
531
|
if (features[k] === undefined && typeof v === 'number') {
|
|
383
532
|
features[k] = v;
|
|
@@ -396,9 +545,10 @@ function _getConfidence(siteId, agentId, domain) {
|
|
|
396
545
|
const withOutcome = decisions.filter((d) => d.outcome !== 'pending');
|
|
397
546
|
if (withOutcome.length === 0) return MIN_CONFIDENCE;
|
|
398
547
|
|
|
399
|
-
//
|
|
548
|
+
// Volume component: log scale, saturates around 30 decisions
|
|
400
549
|
const volumeConf = Math.min(1, withOutcome.length / 30);
|
|
401
550
|
|
|
551
|
+
// Accuracy component: how close predictions were to actual rewards
|
|
402
552
|
let accuracySum = 0;
|
|
403
553
|
for (const d of withOutcome) {
|
|
404
554
|
if (d.predicted_reward !== null) {
|
|
@@ -408,15 +558,18 @@ function _getConfidence(siteId, agentId, domain) {
|
|
|
408
558
|
}
|
|
409
559
|
const accuracyConf = withOutcome.length > 0 ? accuracySum / withOutcome.length : 0.5;
|
|
410
560
|
|
|
411
|
-
// Recency
|
|
561
|
+
// Recency component: exponential decay based on age of newest data
|
|
412
562
|
const latestTs = new Date(withOutcome[0].created_at).getTime();
|
|
413
563
|
const ageHours = (Date.now() - latestTs) / 3600000;
|
|
414
564
|
const recencyConf = Math.exp(-DECAY_RATE * ageHours);
|
|
415
565
|
|
|
416
|
-
return Math.max(MIN_CONFIDENCE, Math.min(0.99,
|
|
566
|
+
return Math.max(MIN_CONFIDENCE, Math.min(0.99,
|
|
567
|
+
volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2
|
|
568
|
+
));
|
|
417
569
|
}
|
|
418
570
|
|
|
419
571
|
module.exports = {
|
|
420
|
-
recordDecision, feedback, recommend, getPreferences,
|
|
421
|
-
startSession, endSession,
|
|
572
|
+
recordDecision, feedback, batchFeedback, recommend, getPreferences,
|
|
573
|
+
getRewardHistory, startSession, endSession,
|
|
574
|
+
resetDomain, resetPatterns, getStats,
|
|
422
575
|
};
|