npm - web-agent-bridge - Versions diffs - 2.3.0 → 2.3.1 - Mend

web-agent-bridge 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/package.json +12 -4
package/public/commander-dashboard.html +243 -0
package/public/css/premium.css +317 -317
package/public/demo.html +259 -259
package/public/index.html +644 -644
package/public/mesh-dashboard.html +309 -382
package/public/premium-dashboard.html +2487 -2487
package/public/premium.html +791 -791
package/public/script/wab.min.js +124 -87
package/script/ai-agent-bridge.js +154 -84
package/sdk/agent-mesh.js +287 -171
package/sdk/commander.js +262 -0
package/sdk/index.js +260 -260
package/server/index.js +8 -1
package/server/migrations/002_premium_features.sql +418 -418
package/server/models/db.js +24 -5
package/server/routes/admin-premium.js +671 -671
package/server/routes/commander.js +316 -0
package/server/routes/mesh.js +370 -201
package/server/routes/premium-v2.js +686 -686
package/server/routes/premium.js +724 -724
package/server/services/agent-learning.js +230 -77
package/server/services/agent-memory.js +625 -625
package/server/services/agent-mesh.js +260 -67
package/server/services/agent-symphony.js +548 -518
package/server/services/commander.js +738 -0
package/server/services/edge-compute.js +440 -0
package/server/services/local-ai.js +389 -0
package/server/services/plugins.js +747 -747
package/server/services/self-healing.js +843 -843
package/server/services/swarm.js +788 -788
package/server/services/vision.js +871 -871
package/public/admin/dashboard.html +0 -848
package/public/admin/login.html +0 -84
package/public/video/tutorial.mp4 +0 -0

package/server/services/agent-learning.js CHANGED Viewed

@@ -9,10 +9,11 @@
  *   - Prediction accuracy over time
  *
  * Learning algorithms:
- *   - Multi-armed bandit for action selection
- *   - Exponential decay for preference freshness
- *   - Bayesian confidence updates
- *   - Pattern sequence mining for behavior chains
+ *   - Multi-armed bandit (UCB1) for exploration/exploitation
+ *   - Linear policy model with sigmoid activation and gradient descent
+ *   - Temporal discount for preference freshness (recent > old)
+ *   - Sequential pattern mining for behavior chains
+ *   - Confidence estimation: volume × accuracy × recency
  */
 const crypto = require('crypto');
@@ -70,7 +71,7 @@ db.exec(`
     pulls INTEGER DEFAULT 0,
     total_reward REAL DEFAULT 0.0,
     avg_reward REAL DEFAULT 0.0,
-    ucb_score REAL DEFAULT 1000.0,
+    ucb_score REAL DEFAULT 0.0,
     created_at TEXT DEFAULT (datetime('now')),
     updated_at TEXT DEFAULT (datetime('now')),
     UNIQUE(site_id, agent_id, domain, action)
@@ -98,40 +99,41 @@ db.exec(`
 // ─── Config ──────────────────────────────────────────────────────────
 const LEARNING_RATE = 0.1;
-const DISCOUNT_FACTOR = 0.95;
-const DECAY_RATE = 0.01;
-const UCB_EXPLORATION = 1.414;
+const DISCOUNT_FACTOR = 0.95;     // Temporal discount per decision step
+const DECAY_RATE = 0.01;          // Recency decay per hour
+const UCB_EXPLORATION = 1.414;    // √2 for UCB1
 const MIN_CONFIDENCE = 0.01;
 const MAX_SEQUENCE_LENGTH = 5;
 // ─── Prepared Statements ─────────────────────────────────────────────
 const stmts = {
-  insertDecision: db.prepare(`INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`),
-  updateOutcome: db.prepare(`UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?`),
-  getDecision: db.prepare(`SELECT * FROM learning_decisions WHERE id = ?`),
-  getRecentDecisions: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?`),
-  getDecisionsByOutcome: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?`),
-  getAllDomainDecisions: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC`),
-  countDecisions: db.prepare(`SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?`),
-  upsertPolicy: db.prepare(`INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')`),
-  getPolicies: db.prepare(`SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC`),
-  getPolicy: db.prepare(`SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?`),
-  insertPattern: db.prepare(`INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)`),
-  findPattern: db.prepare(`SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?`),
-  updatePattern: db.prepare(`UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?`),
-  getTopPatterns: db.prepare(`SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?`),
-  upsertArm: db.prepare(`INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING`),
-  getArms: db.prepare(`SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC`),
-  updateArm: db.prepare(`UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = (total_reward + ?) / (pulls + 1), ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?`),
-  getTotalPulls: db.prepare(`SELECT SUM(pulls) as total FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?`),
-  insertSession: db.prepare(`INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)`),
-  updateSession: db.prepare(`UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?`),
-  getSessionHistory: db.prepare(`SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?`),
+  insertDecision: db.prepare('INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'),
+  updateOutcome: db.prepare('UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?'),
+  getDecision: db.prepare('SELECT * FROM learning_decisions WHERE id = ?'),
+  getRecentDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?'),
+  getDecisionsByOutcome: db.prepare("SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?"),
+  getAllDomainDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC'),
+  countDecisions: db.prepare('SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?'),
+  getRecentRewards: db.prepare("SELECT reward, created_at FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending' ORDER BY created_at DESC LIMIT ?"),
+  upsertPolicy: db.prepare("INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')"),
+  getPolicies: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC'),
+  getPolicy: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?'),
+  insertPattern: db.prepare('INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)'),
+  findPattern: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?'),
+  updatePattern: db.prepare("UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?"),
+  getTopPatterns: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?'),
+  upsertArm: db.prepare('INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING'),
+  getArms: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC'),
+  getArm: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?'),
+  updateArm: db.prepare("UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = ?, ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?"),
+  insertSession: db.prepare('INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)'),
+  updateSession: db.prepare("UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?"),
+  getSessionHistory: db.prepare('SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?'),
   getStats: db.prepare(`SELECT
     (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ?) as total_decisions,
@@ -140,6 +142,11 @@ const stmts = {
     (SELECT AVG(reward) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending') as avg_reward,
     (SELECT COUNT(DISTINCT domain) FROM learning_policies WHERE site_id = ? AND agent_id = ?) as policy_domains,
     (SELECT COUNT(*) FROM learning_patterns WHERE site_id = ? AND agent_id = ?) as total_patterns`),
+  deletePolicies: db.prepare('DELETE FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ?'),
+  deletePatterns: db.prepare('DELETE FROM learning_patterns WHERE site_id = ? AND agent_id = ?'),
+  deleteArms: db.prepare('DELETE FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?'),
+  deleteDecisions: db.prepare('DELETE FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ?'),
 };
 // ─── Core Learning API ───────────────────────────────────────────────
@@ -149,9 +156,11 @@ const stmts = {
  */
 function recordDecision(siteId, agentId, domain, action, context = {}, features = {}) {
   const id = crypto.randomUUID();
-  const predictedReward = _predict(siteId, agentId, domain, features);
+  const extractedFeatures = { ..._extractFeatures(context), ...features };
+  const predictedReward = _predict(siteId, agentId, domain, extractedFeatures);
-  stmts.insertDecision.run(id, siteId, agentId, domain, action, JSON.stringify(context), predictedReward, JSON.stringify(features));
+  stmts.insertDecision.run(id, siteId, agentId, domain, action,
+    JSON.stringify(context), predictedReward, JSON.stringify(extractedFeatures));
   // Ensure bandit arm exists
   stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
@@ -172,49 +181,106 @@ function feedback(decisionId, outcome, reward) {
   const features = JSON.parse(decision.features || '{}');
   const predError = reward - (decision.predicted_reward || 0);
-  // Update policy weights via gradient descent
+  // Update policy weights via gradient descent with temporal discount
   _updatePolicies(decision.site_id, decision.agent_id, decision.domain, features, predError);
-  // Update bandit arm
+  // Update bandit arm with actual reward
   _updateBanditArm(decision.site_id, decision.agent_id, decision.domain, decision.action, reward);
   // Mine patterns from recent decisions
   _minePatterns(decision.site_id, decision.agent_id, decision.domain);
   return {
-    predictionError: predError,
+    decisionId,
+    predictionError: Math.round(predError * 1000) / 1000,
     updatedConfidence: _getConfidence(decision.site_id, decision.agent_id, decision.domain),
+    accuracy: Math.round((1 - Math.abs(predError)) * 1000) / 1000,
   };
 }
+/**
+ * Batch feedback — provide multiple outcomes at once.
+ */
+function batchFeedback(feedbackList) {
+  const results = [];
+  const txn = db.transaction(() => {
+    for (const fb of feedbackList) {
+      try {
+        results.push(feedback(fb.decisionId, fb.outcome, fb.reward));
+      } catch (err) {
+        results.push({ decisionId: fb.decisionId, error: err.message });
+      }
+    }
+  });
+  txn();
+  return results;
+}
 /**
  * Get the best action for a domain using learned policies + bandit scores.
+ * UCB scores are normalized to [0,1] before blending with policy prediction.
  */
 function recommend(siteId, agentId, domain, availableActions, context = {}) {
   const features = _extractFeatures(context);
-  // Score each action
-  const scored = availableActions.map((action) => {
-    const arm = _getOrCreateArm(siteId, agentId, domain, action);
-    const policyScore = _predict(siteId, agentId, domain, { ...features, action });
-    const banditScore = arm.ucb_score || 0;
+  // Get all arms to find normalization bounds
+  const allArms = stmts.getArms.all(siteId, agentId, domain);
+  const armMap = {};
+  for (const arm of allArms) armMap[arm.action] = arm;
+  // Normalize UCB scores to [0,1]
+  let minUCB = Infinity, maxUCB = -Infinity;
+  for (const arm of allArms) {
+    if (arm.pulls > 0) {
+      if (arm.ucb_score < minUCB) minUCB = arm.ucb_score;
+      if (arm.ucb_score > maxUCB) maxUCB = arm.ucb_score;
+    }
+  }
+  const ucbRange = maxUCB - minUCB;
-    // Blend policy prediction with bandit exploration
-    const blended = 0.6 * policyScore + 0.4 * banditScore;
+  const scored = availableActions.map((action) => {
+    const arm = armMap[action] || _getOrCreateArm(siteId, agentId, domain, action);
+    const policyScore = _predict(siteId, agentId, domain, { ...features, [`action:${action}`]: 1 });
+    // Normalize bandit score to [0,1]
+    let normalizedBandit;
+    if (arm.pulls === 0) {
+      normalizedBandit = 1.0; // unexplored arms get maximum exploration bonus
+    } else if (ucbRange > 0) {
+      normalizedBandit = (arm.ucb_score - minUCB) / ucbRange;
+    } else {
+      normalizedBandit = arm.avg_reward; // single arm — use raw avg
+    }
-    return { action, score: blended, policyScore, banditScore, pulls: arm.pulls };
+    // Blend: as confidence grows, lean more on policy, less on exploration
+    const confidence = _getConfidence(siteId, agentId, domain);
+    const policyWeight = 0.4 + confidence * 0.4; // [0.4, 0.8]
+    const banditWeight = 1 - policyWeight;        // [0.2, 0.6]
+    const blended = policyWeight * policyScore + banditWeight * normalizedBandit;
+    return {
+      action,
+      score: Math.round(blended * 1000) / 1000,
+      policyScore: Math.round(policyScore * 1000) / 1000,
+      banditScore: Math.round(normalizedBandit * 1000) / 1000,
+      pulls: arm.pulls,
+      avgReward: Math.round((arm.avg_reward || 0) * 1000) / 1000,
+    };
   });
   scored.sort((a, b) => b.score - a.score);
   const confidence = _getConfidence(siteId, agentId, domain);
-  const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 3);
+  const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 5);
   return {
     recommended: scored[0]?.action || availableActions[0],
     rankings: scored,
     confidence,
-    patterns: topPatterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency, confidence: p.confidence })),
+    explorationLevel: confidence < 0.3 ? 'high' : confidence < 0.6 ? 'medium' : 'low',
+    patterns: topPatterns.map((p) => ({
+      sequence: p.sequence, frequency: p.frequency, confidence: p.confidence
+    })),
   };
 }
@@ -229,7 +295,7 @@ function getPreferences(siteId, agentId, domain) {
   const accepted = decisions.filter((d) => d.outcome === 'accepted');
   const rejected = decisions.filter((d) => d.outcome === 'rejected');
-  // Build preference profile
+  // Build preference profile from weights
   const profile = {};
   for (const p of policies) {
     if (Math.abs(p.weight) > 0.05) {
@@ -242,17 +308,37 @@ function getPreferences(siteId, agentId, domain) {
     }
   }
+  // Compute action frequencies
+  const actionFreqs = {};
+  for (const d of decisions) {
+    actionFreqs[d.action] = (actionFreqs[d.action] || 0) + 1;
+  }
   return {
     domain,
     profile,
-    acceptRate: decisions.length > 0 ? accepted.length / decisions.length : 0,
+    acceptRate: decisions.length > 0 ? Math.round((accepted.length / decisions.length) * 1000) / 1000 : 0,
+    rejectRate: decisions.length > 0 ? Math.round((rejected.length / decisions.length) * 1000) / 1000 : 0,
     totalDecisions: decisions.length,
-    avgReward: decisions.length > 0 ? decisions.reduce((s, d) => s + d.reward, 0) / decisions.length : 0,
+    avgReward: decisions.length > 0
+      ? Math.round((decisions.reduce((s, d) => s + d.reward, 0) / decisions.length) * 1000) / 1000
+      : 0,
+    topActions: Object.entries(actionFreqs)
+      .sort(([, a], [, b]) => b - a)
+      .slice(0, 5)
+      .map(([action, count]) => ({ action, count, percentage: Math.round((count / decisions.length) * 100) })),
     topPatterns: patterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency })),
     confidence: _getConfidence(siteId, agentId, domain),
   };
 }
+/**
+ * Get reward history — recent rewards over time for charting.
+ */
+function getRewardHistory(siteId, agentId, limit = 30) {
+  return stmts.getRecentRewards.all(siteId, agentId, limit).reverse();
+}
 // ─── Learning Sessions ───────────────────────────────────────────────
 function startSession(siteId, agentId) {
@@ -264,7 +350,30 @@ function startSession(siteId, agentId) {
 function endSession(sessionId, decisionsMade, correctPredictions) {
   const accuracy = decisionsMade > 0 ? correctPredictions / decisionsMade : 0;
   stmts.updateSession.run(decisionsMade, correctPredictions, accuracy, sessionId);
-  return { accuracy };
+  return { accuracy: Math.round(accuracy * 1000) / 1000 };
+}
+// ─── Reset ───────────────────────────────────────────────────────────
+/**
+ * Reset all learned data for a specific domain.
+ */
+function resetDomain(siteId, agentId, domain) {
+  const txn = db.transaction(() => {
+    stmts.deletePolicies.run(siteId, agentId, domain);
+    stmts.deleteArms.run(siteId, agentId, domain);
+    stmts.deleteDecisions.run(siteId, agentId, domain);
+  });
+  txn();
+  return { reset: true, domain };
+}
+/**
+ * Reset all patterns for an agent.
+ */
+function resetPatterns(siteId, agentId) {
+  stmts.deletePatterns.run(siteId, agentId);
+  return { reset: true };
 }
 // ─── Stats ───────────────────────────────────────────────────────────
@@ -273,12 +382,17 @@ function getStats(siteId, agentId) {
   const row = stmts.getStats.get(siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId);
   const sessions = stmts.getSessionHistory.all(siteId, agentId, 10);
   const recentAccuracy = sessions.length > 0 ? sessions.reduce((s, sess) => s + sess.accuracy, 0) / sessions.length : 0;
+  const rewardHistory = stmts.getRecentRewards.all(siteId, agentId, 30).reverse();
   return {
     ...row,
+    avg_reward: row.avg_reward !== null ? Math.round(row.avg_reward * 1000) / 1000 : 0,
     recentAccuracy: Math.round(recentAccuracy * 1000) / 1000,
     sessionsCount: sessions.length,
-    acceptRate: row.total_decisions > 0 ? Math.round((row.accepted / row.total_decisions) * 1000) / 1000 : 0,
+    acceptRate: row.total_decisions > 0
+      ? Math.round((row.accepted / row.total_decisions) * 1000) / 1000
+      : 0,
+    rewardHistory,
   };
 }
@@ -289,11 +403,16 @@ function _predict(siteId, agentId, domain, features) {
   if (policies.length === 0) return 0.5; // No data yet — neutral prediction
   let score = 0;
+  let matchedFeatures = 0;
   for (const p of policies) {
     const featureVal = features[p.feature];
     if (featureVal !== undefined) {
       const fv = typeof featureVal === 'number' ? featureVal : (featureVal ? 1 : 0);
-      score += p.weight * fv;
+      // Apply temporal discount: older policies (fewer recent updates) matter less
+      const recencyBoost = p.update_count > 0 ? Math.pow(DISCOUNT_FACTOR, Math.max(0, 10 - p.update_count)) : 1;
+      score += p.weight * fv * recencyBoost;
+      matchedFeatures++;
     }
   }
@@ -304,14 +423,21 @@ function _predict(siteId, agentId, domain, features) {
 function _updatePolicies(siteId, agentId, domain, features, error) {
   for (const [feature, value] of Object.entries(features)) {
     const fv = typeof value === 'number' ? value : (value ? 1 : 0);
-    const gradient = error * fv * LEARNING_RATE;
+    if (fv === 0) continue; // Skip zero-valued features
+    const gradient = error * fv * LEARNING_RATE;
     const existing = stmts.getPolicy.get(siteId, agentId, domain, feature);
-    const newWeight = existing ? existing.weight + gradient : gradient;
+    // Apply weight decay to prevent unbounded growth
+    const currentWeight = existing ? existing.weight * DISCOUNT_FACTOR : 0;
+    const newWeight = currentWeight + gradient;
+    // Clamp weights to [-5, 5] to prevent extreme values
+    const clampedWeight = Math.max(-5, Math.min(5, newWeight));
     stmts.upsertPolicy.run(
-      crypto.randomUUID(), siteId, agentId, domain, feature, newWeight,
-      newWeight, Math.abs(error)
+      crypto.randomUUID(), siteId, agentId, domain, feature, clampedWeight,
+      clampedWeight, Math.abs(error)
     );
   }
 }
@@ -320,24 +446,30 @@ function _updatePolicies(siteId, agentId, domain, features, error) {
 function _getOrCreateArm(siteId, agentId, domain, action) {
   stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
-  const arms = stmts.getArms.all(siteId, agentId, domain);
-  return arms.find((a) => a.action === action) || { pulls: 0, ucb_score: 1000, avg_reward: 0 };
+  const arm = stmts.getArm.get(siteId, agentId, domain, action);
+  return arm || { pulls: 0, ucb_score: 0, avg_reward: 0, total_reward: 0 };
 }
 function _updateBanditArm(siteId, agentId, domain, action, reward) {
-  const totalPullsRow = stmts.getTotalPulls.get(siteId, agentId, domain);
-  const totalPulls = (totalPullsRow?.total || 0) + 1;
+  const arm = stmts.getArm.get(siteId, agentId, domain, action);
+  if (!arm) {
+    stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
+    return;
+  }
+  const newPulls = arm.pulls + 1;
+  const newTotalReward = arm.total_reward + reward;
+  const newAvgReward = newTotalReward / newPulls;
+  // UCB1: avg_reward + C * sqrt(ln(N) / n_i)
+  // We need total pulls across all arms in this domain
   const arms = stmts.getArms.all(siteId, agentId, domain);
-  const arm = arms.find((a) => a.action === action);
-  const armPulls = arm ? arm.pulls + 1 : 1;
+  const totalPulls = arms.reduce((s, a) => s + a.pulls, 0) + 1; // +1 for this pull
-  // UCB1 formula
-  const avgReward = arm ? (arm.total_reward + reward) / armPulls : reward;
-  const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / armPulls);
-  const ucbScore = avgReward + exploration;
+  const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / newPulls);
+  const ucbScore = newAvgReward + exploration;
-  stmts.updateArm.run(reward, reward, ucbScore, siteId, agentId, domain, action);
+  stmts.updateArm.run(reward, newAvgReward, ucbScore, siteId, agentId, domain, action);
 }
 // ─── Internal: Pattern Mining ────────────────────────────────────────
@@ -352,6 +484,7 @@ function _minePatterns(siteId, agentId, domain) {
     const existing = stmts.findPattern.get(siteId, agentId, sequence);
     if (existing) {
+      // Asymptotic approach to 1.0 — confidence grows slower as it increases
       const newConf = Math.min(0.99, existing.confidence + 0.05 * (1 - existing.confidence));
       stmts.updatePattern.run(newConf, existing.id);
     } else {
@@ -365,10 +498,25 @@ function _minePatterns(siteId, agentId, domain) {
 function _extractFeatures(context) {
   const features = {};
-  if (context.price !== undefined) features.price = context.price;
+  if (context.price !== undefined) {
+    features.price = context.price;
+    // Bucketize price for discrete learning
+    if (context.price < 10) features['price_bucket:cheap'] = 1;
+    else if (context.price < 50) features['price_bucket:moderate'] = 1;
+    else if (context.price < 200) features['price_bucket:premium'] = 1;
+    else features['price_bucket:luxury'] = 1;
+  }
   if (context.quantity !== undefined) features.quantity = context.quantity;
-  if (context.discount !== undefined) features.discount = context.discount;
+  if (context.discount !== undefined) {
+    features.discount = context.discount;
+    features.has_discount = context.discount > 0 ? 1 : 0;
+  }
+  if (context.rating !== undefined) {
+    features.rating = context.rating;
+    features.high_rated = context.rating >= 4.0 ? 1 : 0;
+  }
   if (context.category) features[`category:${context.category}`] = 1;
+  if (context.brand) features[`brand:${context.brand}`] = 1;
   if (context.timeOfDay !== undefined) {
     features.morning = context.timeOfDay < 12 ? 1 : 0;
     features.afternoon = context.timeOfDay >= 12 && context.timeOfDay < 18 ? 1 : 0;
@@ -376,8 +524,9 @@ function _extractFeatures(context) {
   }
   if (context.isRepeat !== undefined) features.repeat_visit = context.isRepeat ? 1 : 0;
   if (context.urgency !== undefined) features.urgency = context.urgency;
+  if (context.inStock !== undefined) features.in_stock = context.inStock ? 1 : 0;
-  // Pass through any raw features
+  // Pass through any raw numeric features
   for (const [k, v] of Object.entries(context)) {
     if (features[k] === undefined && typeof v === 'number') {
       features[k] = v;
@@ -396,9 +545,10 @@ function _getConfidence(siteId, agentId, domain) {
   const withOutcome = decisions.filter((d) => d.outcome !== 'pending');
   if (withOutcome.length === 0) return MIN_CONFIDENCE;
-  // Confidence = f(data volume, prediction accuracy, recency)
+  // Volume component: log scale, saturates around 30 decisions
   const volumeConf = Math.min(1, withOutcome.length / 30);
+  // Accuracy component: how close predictions were to actual rewards
   let accuracySum = 0;
   for (const d of withOutcome) {
     if (d.predicted_reward !== null) {
@@ -408,15 +558,18 @@ function _getConfidence(siteId, agentId, domain) {
   }
   const accuracyConf = withOutcome.length > 0 ? accuracySum / withOutcome.length : 0.5;
-  // Recency — decay confidence for old data
+  // Recency component: exponential decay based on age of newest data
   const latestTs = new Date(withOutcome[0].created_at).getTime();
   const ageHours = (Date.now() - latestTs) / 3600000;
   const recencyConf = Math.exp(-DECAY_RATE * ageHours);
-  return Math.max(MIN_CONFIDENCE, Math.min(0.99, volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2));
+  return Math.max(MIN_CONFIDENCE, Math.min(0.99,
+    volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2
+  ));
 }
 module.exports = {
-  recordDecision, feedback, recommend, getPreferences,
-  startSession, endSession, getStats,
+  recordDecision, feedback, batchFeedback, recommend, getPreferences,
+  getRewardHistory, startSession, endSession,
+  resetDomain, resetPatterns, getStats,
 };