web-agent-bridge 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +12 -4
  2. package/public/commander-dashboard.html +243 -0
  3. package/public/css/premium.css +317 -317
  4. package/public/demo.html +259 -259
  5. package/public/index.html +644 -644
  6. package/public/mesh-dashboard.html +309 -382
  7. package/public/premium-dashboard.html +2487 -2487
  8. package/public/premium.html +791 -791
  9. package/public/script/wab.min.js +124 -87
  10. package/script/ai-agent-bridge.js +154 -84
  11. package/sdk/agent-mesh.js +287 -171
  12. package/sdk/commander.js +262 -0
  13. package/sdk/index.js +260 -260
  14. package/server/index.js +8 -1
  15. package/server/migrations/002_premium_features.sql +418 -418
  16. package/server/models/db.js +24 -5
  17. package/server/routes/admin-premium.js +671 -671
  18. package/server/routes/commander.js +316 -0
  19. package/server/routes/mesh.js +370 -201
  20. package/server/routes/premium-v2.js +686 -686
  21. package/server/routes/premium.js +724 -724
  22. package/server/services/agent-learning.js +230 -77
  23. package/server/services/agent-memory.js +625 -625
  24. package/server/services/agent-mesh.js +260 -67
  25. package/server/services/agent-symphony.js +548 -518
  26. package/server/services/commander.js +738 -0
  27. package/server/services/edge-compute.js +440 -0
  28. package/server/services/local-ai.js +389 -0
  29. package/server/services/plugins.js +747 -747
  30. package/server/services/self-healing.js +843 -843
  31. package/server/services/swarm.js +788 -788
  32. package/server/services/vision.js +871 -871
  33. package/public/admin/dashboard.html +0 -848
  34. package/public/admin/login.html +0 -84
  35. package/public/video/tutorial.mp4 +0 -0
@@ -9,10 +9,11 @@
9
9
  * - Prediction accuracy over time
10
10
  *
11
11
  * Learning algorithms:
12
- * - Multi-armed bandit for action selection
13
- * - Exponential decay for preference freshness
14
- * - Bayesian confidence updates
15
- * - Pattern sequence mining for behavior chains
12
+ * - Multi-armed bandit (UCB1) for exploration/exploitation
13
+ * - Linear policy model with sigmoid activation and gradient descent
14
+ * - Temporal discount for preference freshness (recent > old)
15
+ * - Sequential pattern mining for behavior chains
16
+ * - Confidence estimation: volume × accuracy × recency
16
17
  */
17
18
 
18
19
  const crypto = require('crypto');
@@ -70,7 +71,7 @@ db.exec(`
70
71
  pulls INTEGER DEFAULT 0,
71
72
  total_reward REAL DEFAULT 0.0,
72
73
  avg_reward REAL DEFAULT 0.0,
73
- ucb_score REAL DEFAULT 1000.0,
74
+ ucb_score REAL DEFAULT 0.0,
74
75
  created_at TEXT DEFAULT (datetime('now')),
75
76
  updated_at TEXT DEFAULT (datetime('now')),
76
77
  UNIQUE(site_id, agent_id, domain, action)
@@ -98,40 +99,41 @@ db.exec(`
98
99
  // ─── Config ──────────────────────────────────────────────────────────
99
100
 
100
101
  const LEARNING_RATE = 0.1;
101
- const DISCOUNT_FACTOR = 0.95;
102
- const DECAY_RATE = 0.01;
103
- const UCB_EXPLORATION = 1.414;
102
+ const DISCOUNT_FACTOR = 0.95; // Temporal discount per decision step
103
+ const DECAY_RATE = 0.01; // Recency decay per hour
104
+ const UCB_EXPLORATION = 1.414; // √2 for UCB1
104
105
  const MIN_CONFIDENCE = 0.01;
105
106
  const MAX_SEQUENCE_LENGTH = 5;
106
107
 
107
108
  // ─── Prepared Statements ─────────────────────────────────────────────
108
109
 
109
110
  const stmts = {
110
- insertDecision: db.prepare(`INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`),
111
- updateOutcome: db.prepare(`UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?`),
112
- getDecision: db.prepare(`SELECT * FROM learning_decisions WHERE id = ?`),
113
- getRecentDecisions: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?`),
114
- getDecisionsByOutcome: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?`),
115
- getAllDomainDecisions: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC`),
116
- countDecisions: db.prepare(`SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?`),
117
-
118
- upsertPolicy: db.prepare(`INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')`),
119
- getPolicies: db.prepare(`SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC`),
120
- getPolicy: db.prepare(`SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?`),
121
-
122
- insertPattern: db.prepare(`INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)`),
123
- findPattern: db.prepare(`SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?`),
124
- updatePattern: db.prepare(`UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?`),
125
- getTopPatterns: db.prepare(`SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?`),
126
-
127
- upsertArm: db.prepare(`INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING`),
128
- getArms: db.prepare(`SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC`),
129
- updateArm: db.prepare(`UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = (total_reward + ?) / (pulls + 1), ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?`),
130
- getTotalPulls: db.prepare(`SELECT SUM(pulls) as total FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?`),
131
-
132
- insertSession: db.prepare(`INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)`),
133
- updateSession: db.prepare(`UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?`),
134
- getSessionHistory: db.prepare(`SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?`),
111
+ insertDecision: db.prepare('INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'),
112
+ updateOutcome: db.prepare('UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?'),
113
+ getDecision: db.prepare('SELECT * FROM learning_decisions WHERE id = ?'),
114
+ getRecentDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?'),
115
+ getDecisionsByOutcome: db.prepare("SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?"),
116
+ getAllDomainDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC'),
117
+ countDecisions: db.prepare('SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?'),
118
+ getRecentRewards: db.prepare("SELECT reward, created_at FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending' ORDER BY created_at DESC LIMIT ?"),
119
+
120
+ upsertPolicy: db.prepare("INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')"),
121
+ getPolicies: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC'),
122
+ getPolicy: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?'),
123
+
124
+ insertPattern: db.prepare('INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)'),
125
+ findPattern: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?'),
126
+ updatePattern: db.prepare("UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?"),
127
+ getTopPatterns: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?'),
128
+
129
+ upsertArm: db.prepare('INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING'),
130
+ getArms: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC'),
131
+ getArm: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?'),
132
+ updateArm: db.prepare("UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = ?, ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?"),
133
+
134
+ insertSession: db.prepare('INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)'),
135
+ updateSession: db.prepare("UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?"),
136
+ getSessionHistory: db.prepare('SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?'),
135
137
 
136
138
  getStats: db.prepare(`SELECT
137
139
  (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ?) as total_decisions,
@@ -140,6 +142,11 @@ const stmts = {
140
142
  (SELECT AVG(reward) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending') as avg_reward,
141
143
  (SELECT COUNT(DISTINCT domain) FROM learning_policies WHERE site_id = ? AND agent_id = ?) as policy_domains,
142
144
  (SELECT COUNT(*) FROM learning_patterns WHERE site_id = ? AND agent_id = ?) as total_patterns`),
145
+
146
+ deletePolicies: db.prepare('DELETE FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ?'),
147
+ deletePatterns: db.prepare('DELETE FROM learning_patterns WHERE site_id = ? AND agent_id = ?'),
148
+ deleteArms: db.prepare('DELETE FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?'),
149
+ deleteDecisions: db.prepare('DELETE FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ?'),
143
150
  };
144
151
 
145
152
  // ─── Core Learning API ───────────────────────────────────────────────
@@ -149,9 +156,11 @@ const stmts = {
149
156
  */
150
157
  function recordDecision(siteId, agentId, domain, action, context = {}, features = {}) {
151
158
  const id = crypto.randomUUID();
152
- const predictedReward = _predict(siteId, agentId, domain, features);
159
+ const extractedFeatures = { ..._extractFeatures(context), ...features };
160
+ const predictedReward = _predict(siteId, agentId, domain, extractedFeatures);
153
161
 
154
- stmts.insertDecision.run(id, siteId, agentId, domain, action, JSON.stringify(context), predictedReward, JSON.stringify(features));
162
+ stmts.insertDecision.run(id, siteId, agentId, domain, action,
163
+ JSON.stringify(context), predictedReward, JSON.stringify(extractedFeatures));
155
164
 
156
165
  // Ensure bandit arm exists
157
166
  stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
@@ -172,49 +181,106 @@ function feedback(decisionId, outcome, reward) {
172
181
  const features = JSON.parse(decision.features || '{}');
173
182
  const predError = reward - (decision.predicted_reward || 0);
174
183
 
175
- // Update policy weights via gradient descent
184
+ // Update policy weights via gradient descent with temporal discount
176
185
  _updatePolicies(decision.site_id, decision.agent_id, decision.domain, features, predError);
177
186
 
178
- // Update bandit arm
187
+ // Update bandit arm with actual reward
179
188
  _updateBanditArm(decision.site_id, decision.agent_id, decision.domain, decision.action, reward);
180
189
 
181
190
  // Mine patterns from recent decisions
182
191
  _minePatterns(decision.site_id, decision.agent_id, decision.domain);
183
192
 
184
193
  return {
185
- predictionError: predError,
194
+ decisionId,
195
+ predictionError: Math.round(predError * 1000) / 1000,
186
196
  updatedConfidence: _getConfidence(decision.site_id, decision.agent_id, decision.domain),
197
+ accuracy: Math.round((1 - Math.abs(predError)) * 1000) / 1000,
187
198
  };
188
199
  }
189
200
 
201
+ /**
202
+ * Batch feedback — provide multiple outcomes at once.
203
+ */
204
+ function batchFeedback(feedbackList) {
205
+ const results = [];
206
+ const txn = db.transaction(() => {
207
+ for (const fb of feedbackList) {
208
+ try {
209
+ results.push(feedback(fb.decisionId, fb.outcome, fb.reward));
210
+ } catch (err) {
211
+ results.push({ decisionId: fb.decisionId, error: err.message });
212
+ }
213
+ }
214
+ });
215
+ txn();
216
+ return results;
217
+ }
218
+
190
219
  /**
191
220
  * Get the best action for a domain using learned policies + bandit scores.
221
+ * UCB scores are normalized to [0,1] before blending with policy prediction.
192
222
  */
193
223
  function recommend(siteId, agentId, domain, availableActions, context = {}) {
194
224
  const features = _extractFeatures(context);
195
225
 
196
- // Score each action
197
- const scored = availableActions.map((action) => {
198
- const arm = _getOrCreateArm(siteId, agentId, domain, action);
199
- const policyScore = _predict(siteId, agentId, domain, { ...features, action });
200
- const banditScore = arm.ucb_score || 0;
226
+ // Get all arms to find normalization bounds
227
+ const allArms = stmts.getArms.all(siteId, agentId, domain);
228
+ const armMap = {};
229
+ for (const arm of allArms) armMap[arm.action] = arm;
230
+
231
+ // Normalize UCB scores to [0,1]
232
+ let minUCB = Infinity, maxUCB = -Infinity;
233
+ for (const arm of allArms) {
234
+ if (arm.pulls > 0) {
235
+ if (arm.ucb_score < minUCB) minUCB = arm.ucb_score;
236
+ if (arm.ucb_score > maxUCB) maxUCB = arm.ucb_score;
237
+ }
238
+ }
239
+ const ucbRange = maxUCB - minUCB;
201
240
 
202
- // Blend policy prediction with bandit exploration
203
- const blended = 0.6 * policyScore + 0.4 * banditScore;
241
+ const scored = availableActions.map((action) => {
242
+ const arm = armMap[action] || _getOrCreateArm(siteId, agentId, domain, action);
243
+ const policyScore = _predict(siteId, agentId, domain, { ...features, [`action:${action}`]: 1 });
244
+
245
+ // Normalize bandit score to [0,1]
246
+ let normalizedBandit;
247
+ if (arm.pulls === 0) {
248
+ normalizedBandit = 1.0; // unexplored arms get maximum exploration bonus
249
+ } else if (ucbRange > 0) {
250
+ normalizedBandit = (arm.ucb_score - minUCB) / ucbRange;
251
+ } else {
252
+ normalizedBandit = arm.avg_reward; // single arm — use raw avg
253
+ }
204
254
 
205
- return { action, score: blended, policyScore, banditScore, pulls: arm.pulls };
255
+ // Blend: as confidence grows, lean more on policy, less on exploration
256
+ const confidence = _getConfidence(siteId, agentId, domain);
257
+ const policyWeight = 0.4 + confidence * 0.4; // [0.4, 0.8]
258
+ const banditWeight = 1 - policyWeight; // [0.2, 0.6]
259
+ const blended = policyWeight * policyScore + banditWeight * normalizedBandit;
260
+
261
+ return {
262
+ action,
263
+ score: Math.round(blended * 1000) / 1000,
264
+ policyScore: Math.round(policyScore * 1000) / 1000,
265
+ banditScore: Math.round(normalizedBandit * 1000) / 1000,
266
+ pulls: arm.pulls,
267
+ avgReward: Math.round((arm.avg_reward || 0) * 1000) / 1000,
268
+ };
206
269
  });
207
270
 
208
271
  scored.sort((a, b) => b.score - a.score);
209
272
 
210
273
  const confidence = _getConfidence(siteId, agentId, domain);
211
- const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 3);
274
+ const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 5);
212
275
 
213
276
  return {
214
277
  recommended: scored[0]?.action || availableActions[0],
215
278
  rankings: scored,
216
279
  confidence,
217
- patterns: topPatterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency, confidence: p.confidence })),
280
+ explorationLevel: confidence < 0.3 ? 'high' : confidence < 0.6 ? 'medium' : 'low',
281
+ patterns: topPatterns.map((p) => ({
282
+ sequence: p.sequence, frequency: p.frequency, confidence: p.confidence
283
+ })),
218
284
  };
219
285
  }
220
286
 
@@ -229,7 +295,7 @@ function getPreferences(siteId, agentId, domain) {
229
295
  const accepted = decisions.filter((d) => d.outcome === 'accepted');
230
296
  const rejected = decisions.filter((d) => d.outcome === 'rejected');
231
297
 
232
- // Build preference profile
298
+ // Build preference profile from weights
233
299
  const profile = {};
234
300
  for (const p of policies) {
235
301
  if (Math.abs(p.weight) > 0.05) {
@@ -242,17 +308,37 @@ function getPreferences(siteId, agentId, domain) {
242
308
  }
243
309
  }
244
310
 
311
+ // Compute action frequencies
312
+ const actionFreqs = {};
313
+ for (const d of decisions) {
314
+ actionFreqs[d.action] = (actionFreqs[d.action] || 0) + 1;
315
+ }
316
+
245
317
  return {
246
318
  domain,
247
319
  profile,
248
- acceptRate: decisions.length > 0 ? accepted.length / decisions.length : 0,
320
+ acceptRate: decisions.length > 0 ? Math.round((accepted.length / decisions.length) * 1000) / 1000 : 0,
321
+ rejectRate: decisions.length > 0 ? Math.round((rejected.length / decisions.length) * 1000) / 1000 : 0,
249
322
  totalDecisions: decisions.length,
250
- avgReward: decisions.length > 0 ? decisions.reduce((s, d) => s + d.reward, 0) / decisions.length : 0,
323
+ avgReward: decisions.length > 0
324
+ ? Math.round((decisions.reduce((s, d) => s + d.reward, 0) / decisions.length) * 1000) / 1000
325
+ : 0,
326
+ topActions: Object.entries(actionFreqs)
327
+ .sort(([, a], [, b]) => b - a)
328
+ .slice(0, 5)
329
+ .map(([action, count]) => ({ action, count, percentage: Math.round((count / decisions.length) * 100) })),
251
330
  topPatterns: patterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency })),
252
331
  confidence: _getConfidence(siteId, agentId, domain),
253
332
  };
254
333
  }
255
334
 
335
+ /**
336
+ * Get reward history — recent rewards over time for charting.
337
+ */
338
+ function getRewardHistory(siteId, agentId, limit = 30) {
339
+ return stmts.getRecentRewards.all(siteId, agentId, limit).reverse();
340
+ }
341
+
256
342
  // ─── Learning Sessions ───────────────────────────────────────────────
257
343
 
258
344
  function startSession(siteId, agentId) {
@@ -264,7 +350,30 @@ function startSession(siteId, agentId) {
264
350
  function endSession(sessionId, decisionsMade, correctPredictions) {
265
351
  const accuracy = decisionsMade > 0 ? correctPredictions / decisionsMade : 0;
266
352
  stmts.updateSession.run(decisionsMade, correctPredictions, accuracy, sessionId);
267
- return { accuracy };
353
+ return { accuracy: Math.round(accuracy * 1000) / 1000 };
354
+ }
355
+
356
+ // ─── Reset ───────────────────────────────────────────────────────────
357
+
358
+ /**
359
+ * Reset all learned data for a specific domain.
360
+ */
361
+ function resetDomain(siteId, agentId, domain) {
362
+ const txn = db.transaction(() => {
363
+ stmts.deletePolicies.run(siteId, agentId, domain);
364
+ stmts.deleteArms.run(siteId, agentId, domain);
365
+ stmts.deleteDecisions.run(siteId, agentId, domain);
366
+ });
367
+ txn();
368
+ return { reset: true, domain };
369
+ }
370
+
371
+ /**
372
+ * Reset all patterns for an agent.
373
+ */
374
+ function resetPatterns(siteId, agentId) {
375
+ stmts.deletePatterns.run(siteId, agentId);
376
+ return { reset: true };
268
377
  }
269
378
 
270
379
  // ─── Stats ───────────────────────────────────────────────────────────
@@ -273,12 +382,17 @@ function getStats(siteId, agentId) {
273
382
  const row = stmts.getStats.get(siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId);
274
383
  const sessions = stmts.getSessionHistory.all(siteId, agentId, 10);
275
384
  const recentAccuracy = sessions.length > 0 ? sessions.reduce((s, sess) => s + sess.accuracy, 0) / sessions.length : 0;
385
+ const rewardHistory = stmts.getRecentRewards.all(siteId, agentId, 30).reverse();
276
386
 
277
387
  return {
278
388
  ...row,
389
+ avg_reward: row.avg_reward !== null ? Math.round(row.avg_reward * 1000) / 1000 : 0,
279
390
  recentAccuracy: Math.round(recentAccuracy * 1000) / 1000,
280
391
  sessionsCount: sessions.length,
281
- acceptRate: row.total_decisions > 0 ? Math.round((row.accepted / row.total_decisions) * 1000) / 1000 : 0,
392
+ acceptRate: row.total_decisions > 0
393
+ ? Math.round((row.accepted / row.total_decisions) * 1000) / 1000
394
+ : 0,
395
+ rewardHistory,
282
396
  };
283
397
  }
284
398
 
@@ -289,11 +403,16 @@ function _predict(siteId, agentId, domain, features) {
289
403
  if (policies.length === 0) return 0.5; // No data yet — neutral prediction
290
404
 
291
405
  let score = 0;
406
+ let matchedFeatures = 0;
292
407
  for (const p of policies) {
293
408
  const featureVal = features[p.feature];
294
409
  if (featureVal !== undefined) {
295
410
  const fv = typeof featureVal === 'number' ? featureVal : (featureVal ? 1 : 0);
296
- score += p.weight * fv;
411
+
412
+ // Apply temporal discount: older policies (fewer recent updates) matter less
413
+ const recencyBoost = p.update_count > 0 ? Math.pow(DISCOUNT_FACTOR, Math.max(0, 10 - p.update_count)) : 1;
414
+ score += p.weight * fv * recencyBoost;
415
+ matchedFeatures++;
297
416
  }
298
417
  }
299
418
 
@@ -304,14 +423,21 @@ function _predict(siteId, agentId, domain, features) {
304
423
  function _updatePolicies(siteId, agentId, domain, features, error) {
305
424
  for (const [feature, value] of Object.entries(features)) {
306
425
  const fv = typeof value === 'number' ? value : (value ? 1 : 0);
307
- const gradient = error * fv * LEARNING_RATE;
426
+ if (fv === 0) continue; // Skip zero-valued features
308
427
 
428
+ const gradient = error * fv * LEARNING_RATE;
309
429
  const existing = stmts.getPolicy.get(siteId, agentId, domain, feature);
310
- const newWeight = existing ? existing.weight + gradient : gradient;
430
+
431
+ // Apply weight decay to prevent unbounded growth
432
+ const currentWeight = existing ? existing.weight * DISCOUNT_FACTOR : 0;
433
+ const newWeight = currentWeight + gradient;
434
+
435
+ // Clamp weights to [-5, 5] to prevent extreme values
436
+ const clampedWeight = Math.max(-5, Math.min(5, newWeight));
311
437
 
312
438
  stmts.upsertPolicy.run(
313
- crypto.randomUUID(), siteId, agentId, domain, feature, newWeight,
314
- newWeight, Math.abs(error)
439
+ crypto.randomUUID(), siteId, agentId, domain, feature, clampedWeight,
440
+ clampedWeight, Math.abs(error)
315
441
  );
316
442
  }
317
443
  }
@@ -320,24 +446,30 @@ function _updatePolicies(siteId, agentId, domain, features, error) {
320
446
 
321
447
  function _getOrCreateArm(siteId, agentId, domain, action) {
322
448
  stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
323
- const arms = stmts.getArms.all(siteId, agentId, domain);
324
- return arms.find((a) => a.action === action) || { pulls: 0, ucb_score: 1000, avg_reward: 0 };
449
+ const arm = stmts.getArm.get(siteId, agentId, domain, action);
450
+ return arm || { pulls: 0, ucb_score: 0, avg_reward: 0, total_reward: 0 };
325
451
  }
326
452
 
327
453
  function _updateBanditArm(siteId, agentId, domain, action, reward) {
328
- const totalPullsRow = stmts.getTotalPulls.get(siteId, agentId, domain);
329
- const totalPulls = (totalPullsRow?.total || 0) + 1;
454
+ const arm = stmts.getArm.get(siteId, agentId, domain, action);
455
+ if (!arm) {
456
+ stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
457
+ return;
458
+ }
459
+
460
+ const newPulls = arm.pulls + 1;
461
+ const newTotalReward = arm.total_reward + reward;
462
+ const newAvgReward = newTotalReward / newPulls;
330
463
 
464
+ // UCB1: avg_reward + C * sqrt(ln(N) / n_i)
465
+ // We need total pulls across all arms in this domain
331
466
  const arms = stmts.getArms.all(siteId, agentId, domain);
332
- const arm = arms.find((a) => a.action === action);
333
- const armPulls = arm ? arm.pulls + 1 : 1;
467
+ const totalPulls = arms.reduce((s, a) => s + a.pulls, 0) + 1; // +1 for this pull
334
468
 
335
- // UCB1 formula
336
- const avgReward = arm ? (arm.total_reward + reward) / armPulls : reward;
337
- const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / armPulls);
338
- const ucbScore = avgReward + exploration;
469
+ const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / newPulls);
470
+ const ucbScore = newAvgReward + exploration;
339
471
 
340
- stmts.updateArm.run(reward, reward, ucbScore, siteId, agentId, domain, action);
472
+ stmts.updateArm.run(reward, newAvgReward, ucbScore, siteId, agentId, domain, action);
341
473
  }
342
474
 
343
475
  // ─── Internal: Pattern Mining ────────────────────────────────────────
@@ -352,6 +484,7 @@ function _minePatterns(siteId, agentId, domain) {
352
484
  const existing = stmts.findPattern.get(siteId, agentId, sequence);
353
485
 
354
486
  if (existing) {
487
+ // Asymptotic approach to 1.0 — confidence grows slower as it increases
355
488
  const newConf = Math.min(0.99, existing.confidence + 0.05 * (1 - existing.confidence));
356
489
  stmts.updatePattern.run(newConf, existing.id);
357
490
  } else {
@@ -365,10 +498,25 @@ function _minePatterns(siteId, agentId, domain) {
365
498
  function _extractFeatures(context) {
366
499
  const features = {};
367
500
 
368
- if (context.price !== undefined) features.price = context.price;
501
+ if (context.price !== undefined) {
502
+ features.price = context.price;
503
+ // Bucketize price for discrete learning
504
+ if (context.price < 10) features['price_bucket:cheap'] = 1;
505
+ else if (context.price < 50) features['price_bucket:moderate'] = 1;
506
+ else if (context.price < 200) features['price_bucket:premium'] = 1;
507
+ else features['price_bucket:luxury'] = 1;
508
+ }
369
509
  if (context.quantity !== undefined) features.quantity = context.quantity;
370
- if (context.discount !== undefined) features.discount = context.discount;
510
+ if (context.discount !== undefined) {
511
+ features.discount = context.discount;
512
+ features.has_discount = context.discount > 0 ? 1 : 0;
513
+ }
514
+ if (context.rating !== undefined) {
515
+ features.rating = context.rating;
516
+ features.high_rated = context.rating >= 4.0 ? 1 : 0;
517
+ }
371
518
  if (context.category) features[`category:${context.category}`] = 1;
519
+ if (context.brand) features[`brand:${context.brand}`] = 1;
372
520
  if (context.timeOfDay !== undefined) {
373
521
  features.morning = context.timeOfDay < 12 ? 1 : 0;
374
522
  features.afternoon = context.timeOfDay >= 12 && context.timeOfDay < 18 ? 1 : 0;
@@ -376,8 +524,9 @@ function _extractFeatures(context) {
376
524
  }
377
525
  if (context.isRepeat !== undefined) features.repeat_visit = context.isRepeat ? 1 : 0;
378
526
  if (context.urgency !== undefined) features.urgency = context.urgency;
527
+ if (context.inStock !== undefined) features.in_stock = context.inStock ? 1 : 0;
379
528
 
380
- // Pass through any raw features
529
+ // Pass through any raw numeric features
381
530
  for (const [k, v] of Object.entries(context)) {
382
531
  if (features[k] === undefined && typeof v === 'number') {
383
532
  features[k] = v;
@@ -396,9 +545,10 @@ function _getConfidence(siteId, agentId, domain) {
396
545
  const withOutcome = decisions.filter((d) => d.outcome !== 'pending');
397
546
  if (withOutcome.length === 0) return MIN_CONFIDENCE;
398
547
 
399
- // Confidence = f(data volume, prediction accuracy, recency)
548
+ // Volume component: log scale, saturates around 30 decisions
400
549
  const volumeConf = Math.min(1, withOutcome.length / 30);
401
550
 
551
+ // Accuracy component: how close predictions were to actual rewards
402
552
  let accuracySum = 0;
403
553
  for (const d of withOutcome) {
404
554
  if (d.predicted_reward !== null) {
@@ -408,15 +558,18 @@ function _getConfidence(siteId, agentId, domain) {
408
558
  }
409
559
  const accuracyConf = withOutcome.length > 0 ? accuracySum / withOutcome.length : 0.5;
410
560
 
411
- // Recency decay confidence for old data
561
+ // Recency component: exponential decay based on age of newest data
412
562
  const latestTs = new Date(withOutcome[0].created_at).getTime();
413
563
  const ageHours = (Date.now() - latestTs) / 3600000;
414
564
  const recencyConf = Math.exp(-DECAY_RATE * ageHours);
415
565
 
416
- return Math.max(MIN_CONFIDENCE, Math.min(0.99, volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2));
566
+ return Math.max(MIN_CONFIDENCE, Math.min(0.99,
567
+ volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2
568
+ ));
417
569
  }
418
570
 
419
571
  module.exports = {
420
- recordDecision, feedback, recommend, getPreferences,
421
- startSession, endSession, getStats,
572
+ recordDecision, feedback, batchFeedback, recommend, getPreferences,
573
+ getRewardHistory, startSession, endSession,
574
+ resetDomain, resetPatterns, getStats,
422
575
  };