web-agent-bridge 3.4.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. package/LICENSE +84 -84
  2. package/README.ar.md +1565 -1304
  3. package/README.md +171 -298
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -237
  6. package/bin/wab-init.js +244 -223
  7. package/bin/wab.js +80 -80
  8. package/examples/azure-dns-wab.js +83 -83
  9. package/examples/bidi-agent.js +119 -119
  10. package/examples/cloudflare-wab-dns.js +121 -121
  11. package/examples/cpanel-wab-dns.js +114 -114
  12. package/examples/cross-site-agent.js +91 -91
  13. package/examples/dns-discovery-agent.js +166 -166
  14. package/examples/gcp-dns-wab.js +76 -76
  15. package/examples/governance-agent.js +169 -169
  16. package/examples/mcp-agent.js +94 -94
  17. package/examples/next-app-router/README.md +44 -44
  18. package/examples/plesk-wab-dns.js +103 -103
  19. package/examples/puppeteer-agent.js +108 -108
  20. package/examples/route53-wab-dns.js +144 -144
  21. package/examples/saas-dashboard/README.md +55 -55
  22. package/examples/safe-mode-agent.js +96 -96
  23. package/examples/self-discovery.js +106 -0
  24. package/examples/shopify-hydrogen/README.md +74 -74
  25. package/examples/vision-agent.js +171 -171
  26. package/examples/wab-sign.js +74 -74
  27. package/examples/wab-verify.js +60 -60
  28. package/examples/wordpress-elementor/README.md +77 -77
  29. package/package.json +93 -93
  30. package/public/.well-known/agent-tools.json +180 -180
  31. package/public/.well-known/ai-assets.json +59 -59
  32. package/public/.well-known/security.txt +8 -8
  33. package/public/.well-known/wab.json +28 -28
  34. package/public/activate.html +448 -368
  35. package/public/adopt.html +236 -0
  36. package/public/adoption-metrics.html +188 -188
  37. package/public/agent-workspace.html +359 -349
  38. package/public/ai.html +198 -198
  39. package/public/api.html +397 -413
  40. package/public/atp.html +171 -0
  41. package/public/azure-dns-integration.html +289 -289
  42. package/public/browser.html +486 -486
  43. package/public/cloudflare-integration.html +380 -380
  44. package/public/commander-dashboard.html +243 -243
  45. package/public/cookies.html +210 -210
  46. package/public/cpanel-integration.html +398 -398
  47. package/public/css/agent-workspace.css +1713 -1713
  48. package/public/css/premium.css +317 -317
  49. package/public/css/styles.css +1401 -1263
  50. package/public/dashboard-shieldlink.html +295 -0
  51. package/public/dashboard.html +711 -707
  52. package/public/dns.html +436 -436
  53. package/public/docs.html +588 -588
  54. package/public/enterprise-mesh.ar.html +80 -0
  55. package/public/enterprise-mesh.html +81 -0
  56. package/public/feed.xml +89 -89
  57. package/public/gcp-dns-integration.html +318 -318
  58. package/public/governance.ar.html +70 -0
  59. package/public/governance.html +69 -0
  60. package/public/growth.html +465 -465
  61. package/public/index.html +1372 -1266
  62. package/public/integrations.html +556 -556
  63. package/public/js/activate.js +449 -145
  64. package/public/js/agent-workspace.js +1740 -1740
  65. package/public/js/auth-nav.js +117 -65
  66. package/public/js/auth-redirect.js +12 -12
  67. package/public/js/cookie-consent.js +56 -56
  68. package/public/js/dns.js +438 -438
  69. package/public/js/wab-demo-page.js +721 -721
  70. package/public/js/ws-client.js +74 -74
  71. package/public/l-preview.html +242 -0
  72. package/public/llms-full.txt +360 -360
  73. package/public/llms.txt +125 -125
  74. package/public/login.html +85 -85
  75. package/public/mesh-dashboard.html +328 -328
  76. package/public/milestones.html +346 -0
  77. package/public/one-click.html +779 -0
  78. package/public/openapi.json +669 -669
  79. package/public/partners.ar.html +145 -0
  80. package/public/partners.html +143 -0
  81. package/public/phone-shield.html +281 -281
  82. package/public/plesk-integration.html +375 -375
  83. package/public/premium-dashboard.html +2489 -2489
  84. package/public/premium.html +793 -793
  85. package/public/privacy.html +297 -297
  86. package/public/provider-onboarding.html +172 -172
  87. package/public/provider-sandbox.html +134 -134
  88. package/public/providers.html +359 -359
  89. package/public/refusals.html +172 -0
  90. package/public/register.html +105 -105
  91. package/public/registrar-integrations.html +141 -141
  92. package/public/ring4.html +292 -0
  93. package/public/robots.txt +99 -99
  94. package/public/route53-integration.html +531 -531
  95. package/public/score.html +263 -0
  96. package/public/script/wab-consent.d.ts +36 -36
  97. package/public/script/wab-consent.js +104 -104
  98. package/public/script/wab-schema.js +131 -131
  99. package/public/script/wab.d.ts +108 -108
  100. package/public/script/wab.min.js +580 -580
  101. package/public/security.txt +8 -8
  102. package/public/shieldlink.html +244 -0
  103. package/public/shieldqr.html +231 -231
  104. package/public/sitemap.xml +13 -1
  105. package/public/terms.html +256 -256
  106. package/public/trust-graph-api.ar.html +92 -0
  107. package/public/trust-graph-api.html +91 -0
  108. package/public/wab-features.html +560 -0
  109. package/public/wab-trust.html +200 -200
  110. package/public/wab-truth.html +375 -0
  111. package/public/wab-vs-protocols.html +210 -210
  112. package/public/whitepaper.html +449 -449
  113. package/script/ai-agent-bridge.js +1754 -1754
  114. package/sdk/README.md +99 -99
  115. package/sdk/agent-mesh.js +449 -449
  116. package/sdk/atp.js +103 -0
  117. package/sdk/auto-discovery.js +301 -288
  118. package/sdk/commander.js +262 -262
  119. package/sdk/governance.js +262 -262
  120. package/sdk/index.d.ts +464 -464
  121. package/sdk/index.js +653 -649
  122. package/sdk/multi-agent.js +318 -318
  123. package/sdk/safe-mode.js +221 -221
  124. package/sdk/safety-shield.js +219 -219
  125. package/sdk/schema-discovery.js +83 -83
  126. package/server/adapters/index.js +520 -520
  127. package/server/config/plans.js +412 -367
  128. package/server/config/secrets.js +102 -102
  129. package/server/control-plane/index.js +301 -301
  130. package/server/data-plane/index.js +354 -354
  131. package/server/index.js +793 -670
  132. package/server/llm/index.js +404 -404
  133. package/server/middleware/adminAuth.js +35 -35
  134. package/server/middleware/api-tier.js +170 -0
  135. package/server/middleware/auth.js +50 -50
  136. package/server/middleware/featureGate.js +88 -88
  137. package/server/middleware/rateLimits.js +100 -100
  138. package/server/middleware/sensitiveAction.js +157 -157
  139. package/server/middleware/wab-trust.js +141 -0
  140. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  141. package/server/migrations/002_premium_features.sql +418 -418
  142. package/server/migrations/003_ads_integer_cents.sql +33 -33
  143. package/server/migrations/004_agent_os.sql +158 -158
  144. package/server/migrations/005_marketplace_metering.sql +126 -126
  145. package/server/migrations/006_growth_suite.sql +138 -0
  146. package/server/migrations/007_governance.sql +106 -106
  147. package/server/migrations/008_plans.sql +144 -144
  148. package/server/migrations/009_shieldqr.sql +30 -30
  149. package/server/migrations/010_extended_trust.sql +33 -33
  150. package/server/migrations/011_outreach.sql +47 -0
  151. package/server/migrations/012_shieldlink.sql +116 -0
  152. package/server/migrations/013_ct_monitor.sql +13 -0
  153. package/server/migrations/014_wab_advanced_features.sql +128 -0
  154. package/server/migrations/015_wab_truth_layer.sql +101 -0
  155. package/server/migrations/016_ring4_external_trust.sql +84 -0
  156. package/server/migrations/017_ring4_extensions.sql +69 -0
  157. package/server/migrations/018_commercial_foundations.sql +167 -0
  158. package/server/migrations/019_unify_tier_constraints.sql +133 -0
  159. package/server/migrations/020_agent_transaction_primitive.sql +119 -0
  160. package/server/models/adapters/index.js +33 -33
  161. package/server/models/adapters/mysql.js +183 -183
  162. package/server/models/adapters/postgresql.js +172 -172
  163. package/server/models/adapters/sqlite.js +7 -7
  164. package/server/models/db.js +740 -740
  165. package/server/observability/failure-analysis.js +337 -337
  166. package/server/observability/index.js +394 -394
  167. package/server/protocol/capabilities.js +223 -223
  168. package/server/protocol/index.js +243 -243
  169. package/server/protocol/schema.js +584 -584
  170. package/server/registry/certification.js +271 -271
  171. package/server/registry/index.js +326 -326
  172. package/server/routes/activate.js +478 -0
  173. package/server/routes/admin-outreach.js +239 -0
  174. package/server/routes/admin-plans.js +76 -76
  175. package/server/routes/admin-premium.js +674 -673
  176. package/server/routes/admin-shieldlink.js +137 -0
  177. package/server/routes/admin-shieldqr.js +90 -90
  178. package/server/routes/admin-trust-monitor.js +139 -83
  179. package/server/routes/admin.js +550 -549
  180. package/server/routes/adopt.js +61 -0
  181. package/server/routes/ads.js +130 -130
  182. package/server/routes/agent-workspace.js +540 -540
  183. package/server/routes/api-keys.js +127 -0
  184. package/server/routes/api.js +150 -150
  185. package/server/routes/auth.js +71 -71
  186. package/server/routes/billing.js +57 -57
  187. package/server/routes/commander.js +316 -316
  188. package/server/routes/customer-shieldlink.js +133 -0
  189. package/server/routes/demo-showcase.js +332 -332
  190. package/server/routes/demo-store.js +154 -154
  191. package/server/routes/diagnose.js +373 -0
  192. package/server/routes/discovery.js +2348 -2348
  193. package/server/routes/enterprise-mesh.js +170 -0
  194. package/server/routes/gateway.js +173 -173
  195. package/server/routes/governance-saas.js +203 -0
  196. package/server/routes/governance.js +208 -208
  197. package/server/routes/growth.js +1048 -0
  198. package/server/routes/intent.js +328 -0
  199. package/server/routes/license.js +251 -251
  200. package/server/routes/mesh.js +469 -469
  201. package/server/routes/noscript.js +543 -543
  202. package/server/routes/partners.js +201 -0
  203. package/server/routes/plans.js +33 -33
  204. package/server/routes/premium-v2.js +686 -686
  205. package/server/routes/premium.js +724 -724
  206. package/server/routes/providers.js +650 -650
  207. package/server/routes/reputation.js +411 -0
  208. package/server/routes/ring4.js +885 -0
  209. package/server/routes/runtime.js +2148 -2148
  210. package/server/routes/shieldlink.js +70 -0
  211. package/server/routes/shieldqr.js +88 -88
  212. package/server/routes/sovereign.js +465 -465
  213. package/server/routes/transactions.js +233 -0
  214. package/server/routes/truth-layer.js +670 -0
  215. package/server/routes/universal.js +200 -200
  216. package/server/routes/unsubscribe.js +51 -0
  217. package/server/routes/wab-api.js +850 -850
  218. package/server/routes/wab-cache.js +282 -0
  219. package/server/runtime/container-worker.js +111 -111
  220. package/server/runtime/container.js +448 -448
  221. package/server/runtime/distributed-worker.js +362 -362
  222. package/server/runtime/event-bus.js +210 -210
  223. package/server/runtime/index.js +253 -253
  224. package/server/runtime/queue.js +599 -599
  225. package/server/runtime/replay.js +666 -666
  226. package/server/runtime/sandbox.js +266 -266
  227. package/server/runtime/scheduler.js +534 -534
  228. package/server/runtime/session-engine.js +293 -293
  229. package/server/runtime/state-manager.js +188 -188
  230. package/server/secrets/wab-signing-key.pem +3 -0
  231. package/server/secrets/wab-signing-pub.pem +3 -0
  232. package/server/security/cross-site-redactor.js +196 -196
  233. package/server/security/dry-run.js +180 -180
  234. package/server/security/human-gate-rate-limit.js +147 -147
  235. package/server/security/human-gate-transports.js +178 -178
  236. package/server/security/human-gate.js +281 -281
  237. package/server/security/index.js +368 -368
  238. package/server/security/intent-engine.js +245 -245
  239. package/server/security/reward-guard.js +171 -171
  240. package/server/security/rollback-store.js +239 -239
  241. package/server/security/token-scope.js +404 -404
  242. package/server/security/url-policy.js +139 -139
  243. package/server/services/adoption-agent.js +182 -0
  244. package/server/services/agent-chat.js +506 -506
  245. package/server/services/agent-learning.js +601 -601
  246. package/server/services/agent-memory.js +625 -625
  247. package/server/services/agent-mesh.js +555 -555
  248. package/server/services/agent-symphony.js +717 -717
  249. package/server/services/agent-tasks.js +1807 -1807
  250. package/server/services/api-key-engine.js +292 -292
  251. package/server/services/cluster.js +894 -894
  252. package/server/services/commander.js +738 -738
  253. package/server/services/edge-compute.js +440 -440
  254. package/server/services/email.js +233 -233
  255. package/server/services/fairness-engine.js +409 -0
  256. package/server/services/fairness.js +420 -0
  257. package/server/services/governance.js +466 -466
  258. package/server/services/hosted-runtime.js +205 -205
  259. package/server/services/lfd.js +635 -635
  260. package/server/services/local-ai.js +389 -389
  261. package/server/services/marketplace.js +270 -270
  262. package/server/services/metering.js +182 -182
  263. package/server/services/modules/affiliate-intelligence.js +93 -93
  264. package/server/services/modules/agent-firewall.js +90 -90
  265. package/server/services/modules/bounty.js +89 -89
  266. package/server/services/modules/collective-bargaining.js +92 -92
  267. package/server/services/modules/dark-pattern.js +66 -66
  268. package/server/services/modules/gov-intelligence.js +45 -45
  269. package/server/services/modules/neural.js +55 -55
  270. package/server/services/modules/notary.js +49 -49
  271. package/server/services/modules/price-time-machine.js +86 -86
  272. package/server/services/modules/protocol.js +104 -104
  273. package/server/services/negotiation.js +439 -439
  274. package/server/services/outreach-agent.js +312 -0
  275. package/server/services/plans.js +214 -214
  276. package/server/services/plugins.js +771 -771
  277. package/server/services/price-intelligence.js +566 -566
  278. package/server/services/price-shield.js +1137 -1137
  279. package/server/services/provider-clients.js +740 -740
  280. package/server/services/reputation.js +465 -465
  281. package/server/services/search-engine.js +357 -357
  282. package/server/services/security.js +513 -513
  283. package/server/services/self-healing.js +843 -843
  284. package/server/services/shieldlink.js +492 -0
  285. package/server/services/shieldqr.js +322 -322
  286. package/server/services/sovereign-shield.js +542 -542
  287. package/server/services/ssl-ct-monitor.js +224 -0
  288. package/server/services/ssl-inspector.js +42 -42
  289. package/server/services/ssl-monitor.js +167 -167
  290. package/server/services/stripe.js +206 -205
  291. package/server/services/swarm.js +788 -788
  292. package/server/services/transactions.js +525 -0
  293. package/server/services/universal-scraper.js +662 -662
  294. package/server/services/verification.js +481 -481
  295. package/server/services/vision.js +1163 -1163
  296. package/server/services/wab-crypto.js +178 -178
  297. package/server/utils/cache.js +125 -125
  298. package/server/utils/migrate.js +81 -81
  299. package/server/utils/safe-fetch.js +228 -228
  300. package/server/utils/secureFields.js +50 -50
  301. package/server/ws.js +161 -161
  302. package/templates/artisan-marketplace.yaml +104 -104
  303. package/templates/book-price-scout.yaml +98 -98
  304. package/templates/electronics-price-tracker.yaml +108 -108
  305. package/templates/flight-deal-hunter.yaml +113 -113
  306. package/templates/freelancer-direct.yaml +116 -116
  307. package/templates/grocery-price-compare.yaml +93 -93
  308. package/templates/hotel-direct-booking.yaml +113 -113
  309. package/templates/local-services.yaml +98 -98
  310. package/templates/olive-oil-tunisia.yaml +88 -88
  311. package/templates/organic-farm-fresh.yaml +101 -101
  312. package/templates/restaurant-direct.yaml +97 -97
  313. package/templates/ring4/banking-sovereign.yaml +55 -0
  314. package/templates/ring4/ecommerce-sovereign.yaml +58 -0
  315. package/templates/ring4/healthcare-sovereign.yaml +60 -0
@@ -1,601 +1,601 @@
1
- /**
2
- * Agent Learning Engine — Local Reinforcement Learning
3
- *
4
- * Agents learn from user decisions, building behavioral models locally
5
- * without sending data to external LLMs. The engine tracks:
6
- * - Decision patterns (what the user chooses and when)
7
- * - Reward signals (accepted/rejected/modified outcomes)
8
- * - Policy weights (which factors matter most to this user)
9
- * - Prediction accuracy over time
10
- *
11
- * Learning algorithms:
12
- * - Multi-armed bandit (UCB1) for exploration/exploitation
13
- * - Linear policy model with sigmoid activation and gradient descent
14
- * - Temporal discount for preference freshness (recent > old)
15
- * - Sequential pattern mining for behavior chains
16
- * - Confidence estimation: volume × accuracy × recency
17
- */
18
-
19
- const crypto = require('crypto');
20
- const { db } = require('../models/db');
21
- let rewardGuard;
22
- try { rewardGuard = require('../security/reward-guard'); } catch { rewardGuard = null; }
23
-
24
- // ─── Schema ──────────────────────────────────────────────────────────
25
-
26
- db.exec(`
27
- CREATE TABLE IF NOT EXISTS learning_decisions (
28
- id TEXT PRIMARY KEY,
29
- site_id TEXT NOT NULL,
30
- agent_id TEXT NOT NULL,
31
- domain TEXT NOT NULL,
32
- action TEXT NOT NULL,
33
- context TEXT DEFAULT '{}',
34
- outcome TEXT DEFAULT 'pending',
35
- reward REAL DEFAULT 0.0,
36
- predicted_reward REAL,
37
- features TEXT DEFAULT '{}',
38
- created_at TEXT DEFAULT (datetime('now'))
39
- );
40
-
41
- CREATE TABLE IF NOT EXISTS learning_policies (
42
- id TEXT PRIMARY KEY,
43
- site_id TEXT NOT NULL,
44
- agent_id TEXT NOT NULL,
45
- domain TEXT NOT NULL,
46
- feature TEXT NOT NULL,
47
- weight REAL DEFAULT 0.0,
48
- update_count INTEGER DEFAULT 0,
49
- last_error REAL DEFAULT 0.0,
50
- created_at TEXT DEFAULT (datetime('now')),
51
- updated_at TEXT DEFAULT (datetime('now')),
52
- UNIQUE(site_id, agent_id, domain, feature)
53
- );
54
-
55
- CREATE TABLE IF NOT EXISTS learning_patterns (
56
- id TEXT PRIMARY KEY,
57
- site_id TEXT NOT NULL,
58
- agent_id TEXT NOT NULL,
59
- pattern_type TEXT NOT NULL,
60
- sequence TEXT NOT NULL,
61
- frequency INTEGER DEFAULT 1,
62
- confidence REAL DEFAULT 0.5,
63
- last_seen TEXT DEFAULT (datetime('now')),
64
- created_at TEXT DEFAULT (datetime('now'))
65
- );
66
-
67
- CREATE TABLE IF NOT EXISTS learning_bandit_arms (
68
- id TEXT PRIMARY KEY,
69
- site_id TEXT NOT NULL,
70
- agent_id TEXT NOT NULL,
71
- domain TEXT NOT NULL,
72
- action TEXT NOT NULL,
73
- pulls INTEGER DEFAULT 0,
74
- total_reward REAL DEFAULT 0.0,
75
- avg_reward REAL DEFAULT 0.0,
76
- ucb_score REAL DEFAULT 0.0,
77
- created_at TEXT DEFAULT (datetime('now')),
78
- updated_at TEXT DEFAULT (datetime('now')),
79
- UNIQUE(site_id, agent_id, domain, action)
80
- );
81
-
82
- CREATE TABLE IF NOT EXISTS learning_sessions (
83
- id TEXT PRIMARY KEY,
84
- site_id TEXT NOT NULL,
85
- agent_id TEXT NOT NULL,
86
- decisions_made INTEGER DEFAULT 0,
87
- correct_predictions INTEGER DEFAULT 0,
88
- accuracy REAL DEFAULT 0.0,
89
- started_at TEXT DEFAULT (datetime('now')),
90
- ended_at TEXT
91
- );
92
-
93
- CREATE INDEX IF NOT EXISTS idx_learn_dec_site ON learning_decisions(site_id, agent_id);
94
- CREATE INDEX IF NOT EXISTS idx_learn_dec_domain ON learning_decisions(domain);
95
- CREATE INDEX IF NOT EXISTS idx_learn_dec_outcome ON learning_decisions(outcome);
96
- CREATE INDEX IF NOT EXISTS idx_learn_pol_lookup ON learning_policies(site_id, agent_id, domain);
97
- CREATE INDEX IF NOT EXISTS idx_learn_pat_seq ON learning_patterns(site_id, agent_id, pattern_type);
98
- CREATE INDEX IF NOT EXISTS idx_learn_bandit ON learning_bandit_arms(site_id, agent_id, domain);
99
- `);
100
-
101
- // ─── Config ──────────────────────────────────────────────────────────
102
-
103
- const LEARNING_RATE = 0.1;
104
- const DISCOUNT_FACTOR = 0.95; // Temporal discount per decision step
105
- const DECAY_RATE = 0.01; // Recency decay per hour
106
- const UCB_EXPLORATION = 1.414; // √2 for UCB1
107
- const MIN_CONFIDENCE = 0.01;
108
- const MAX_SEQUENCE_LENGTH = 5;
109
-
110
- // ─── Prepared Statements ─────────────────────────────────────────────
111
-
112
- const stmts = {
113
- insertDecision: db.prepare('INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'),
114
- updateOutcome: db.prepare('UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?'),
115
- getDecision: db.prepare('SELECT * FROM learning_decisions WHERE id = ?'),
116
- getRecentDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?'),
117
- getDecisionsByOutcome: db.prepare("SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?"),
118
- getAllDomainDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC'),
119
- countDecisions: db.prepare('SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?'),
120
- getRecentRewards: db.prepare("SELECT reward, created_at FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending' ORDER BY created_at DESC LIMIT ?"),
121
-
122
- upsertPolicy: db.prepare("INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')"),
123
- getPolicies: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC'),
124
- getPolicy: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?'),
125
-
126
- insertPattern: db.prepare('INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)'),
127
- findPattern: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?'),
128
- updatePattern: db.prepare("UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?"),
129
- getTopPatterns: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?'),
130
-
131
- upsertArm: db.prepare('INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING'),
132
- getArms: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC'),
133
- getArm: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?'),
134
- updateArm: db.prepare("UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = ?, ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?"),
135
-
136
- insertSession: db.prepare('INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)'),
137
- updateSession: db.prepare("UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?"),
138
- getSessionHistory: db.prepare('SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?'),
139
-
140
- getStats: db.prepare(`SELECT
141
- (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ?) as total_decisions,
142
- (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = 'accepted') as accepted,
143
- (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = 'rejected') as rejected,
144
- (SELECT AVG(reward) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending') as avg_reward,
145
- (SELECT COUNT(DISTINCT domain) FROM learning_policies WHERE site_id = ? AND agent_id = ?) as policy_domains,
146
- (SELECT COUNT(*) FROM learning_patterns WHERE site_id = ? AND agent_id = ?) as total_patterns`),
147
-
148
- deletePolicies: db.prepare('DELETE FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ?'),
149
- deletePatterns: db.prepare('DELETE FROM learning_patterns WHERE site_id = ? AND agent_id = ?'),
150
- deleteArms: db.prepare('DELETE FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?'),
151
- deleteDecisions: db.prepare('DELETE FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ?'),
152
- };
153
-
154
- // ─── Core Learning API ───────────────────────────────────────────────
155
-
156
- /**
157
- * Record a decision the agent is about to make, with predicted reward.
158
- */
159
- function recordDecision(siteId, agentId, domain, action, context = {}, features = {}) {
160
- const id = crypto.randomUUID();
161
- const extractedFeatures = { ..._extractFeatures(context), ...features };
162
- const predictedReward = _predict(siteId, agentId, domain, extractedFeatures);
163
-
164
- stmts.insertDecision.run(id, siteId, agentId, domain, action,
165
- JSON.stringify(context), predictedReward, JSON.stringify(extractedFeatures));
166
-
167
- // Ensure bandit arm exists
168
- stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
169
-
170
- return { decisionId: id, predictedReward, confidence: _getConfidence(siteId, agentId, domain) };
171
- }
172
-
173
- /**
174
- * Provide feedback on a decision — the outcome and actual reward.
175
- * This is the core learning signal.
176
- */
177
- function feedback(decisionId, outcome, reward, opts = {}) {
178
- const decision = stmts.getDecision.get(decisionId);
179
- if (!decision) throw new Error('Decision not found');
180
-
181
- // ── Reward guard: clamp / block / flag malicious or anomalous rewards ──
182
- let safeReward = reward;
183
- let guardDecision = 'accepted';
184
- if (rewardGuard) {
185
- const sanitized = rewardGuard.sanitizeReward({
186
- siteId: decision.site_id,
187
- agentId: decision.agent_id,
188
- domain: decision.domain,
189
- action: decision.action,
190
- reward,
191
- actorId: opts.actorId,
192
- approvedBy: opts.approvedBy,
193
- });
194
- safeReward = sanitized.reward;
195
- guardDecision = sanitized.decision;
196
- if (guardDecision === 'blocked') {
197
- // Don't propagate to policy / bandit.
198
- stmts.updateOutcome.run(outcome, safeReward, decisionId);
199
- return { decisionId, blocked: true, reason: sanitized.reason, guardDecision };
200
- }
201
- }
202
-
203
- stmts.updateOutcome.run(outcome, safeReward, decisionId);
204
-
205
- const features = JSON.parse(decision.features || '{}');
206
- const predError = safeReward - (decision.predicted_reward || 0);
207
-
208
- // Update policy weights via gradient descent with temporal discount
209
- _updatePolicies(decision.site_id, decision.agent_id, decision.domain, features, predError);
210
-
211
- // Update bandit arm with actual reward
212
- _updateBanditArm(decision.site_id, decision.agent_id, decision.domain, decision.action, safeReward);
213
-
214
- // Mine patterns from recent decisions
215
- _minePatterns(decision.site_id, decision.agent_id, decision.domain);
216
-
217
- return {
218
- decisionId,
219
- predictionError: Math.round(predError * 1000) / 1000,
220
- updatedConfidence: _getConfidence(decision.site_id, decision.agent_id, decision.domain),
221
- accuracy: Math.round((1 - Math.abs(predError)) * 1000) / 1000,
222
- guardDecision,
223
- appliedReward: safeReward,
224
- };
225
- }
226
-
227
- /**
228
- * Batch feedback — provide multiple outcomes at once.
229
- */
230
- function batchFeedback(feedbackList) {
231
- const results = [];
232
- const txn = db.transaction(() => {
233
- for (const fb of feedbackList) {
234
- try {
235
- results.push(feedback(fb.decisionId, fb.outcome, fb.reward));
236
- } catch (err) {
237
- results.push({ decisionId: fb.decisionId, error: err.message });
238
- }
239
- }
240
- });
241
- txn();
242
- return results;
243
- }
244
-
245
- /**
246
- * Get the best action for a domain using learned policies + bandit scores.
247
- * UCB scores are normalized to [0,1] before blending with policy prediction.
248
- */
249
- function recommend(siteId, agentId, domain, availableActions, context = {}) {
250
- const features = _extractFeatures(context);
251
-
252
- // Get all arms to find normalization bounds
253
- const allArms = stmts.getArms.all(siteId, agentId, domain);
254
- const armMap = {};
255
- for (const arm of allArms) armMap[arm.action] = arm;
256
-
257
- // Normalize UCB scores to [0,1]
258
- let minUCB = Infinity, maxUCB = -Infinity;
259
- for (const arm of allArms) {
260
- if (arm.pulls > 0) {
261
- if (arm.ucb_score < minUCB) minUCB = arm.ucb_score;
262
- if (arm.ucb_score > maxUCB) maxUCB = arm.ucb_score;
263
- }
264
- }
265
- const ucbRange = maxUCB - minUCB;
266
-
267
- const scored = availableActions.map((action) => {
268
- const arm = armMap[action] || _getOrCreateArm(siteId, agentId, domain, action);
269
- const policyScore = _predict(siteId, agentId, domain, { ...features, [`action:${action}`]: 1 });
270
-
271
- // Normalize bandit score to [0,1]
272
- let normalizedBandit;
273
- if (arm.pulls === 0) {
274
- normalizedBandit = 1.0; // unexplored arms get maximum exploration bonus
275
- } else if (ucbRange > 0) {
276
- normalizedBandit = (arm.ucb_score - minUCB) / ucbRange;
277
- } else {
278
- normalizedBandit = arm.avg_reward; // single arm — use raw avg
279
- }
280
-
281
- // Blend: as confidence grows, lean more on policy, less on exploration
282
- const confidence = _getConfidence(siteId, agentId, domain);
283
- const policyWeight = 0.4 + confidence * 0.4; // [0.4, 0.8]
284
- const banditWeight = 1 - policyWeight; // [0.2, 0.6]
285
- const blended = policyWeight * policyScore + banditWeight * normalizedBandit;
286
-
287
- return {
288
- action,
289
- score: Math.round(blended * 1000) / 1000,
290
- policyScore: Math.round(policyScore * 1000) / 1000,
291
- banditScore: Math.round(normalizedBandit * 1000) / 1000,
292
- pulls: arm.pulls,
293
- avgReward: Math.round((arm.avg_reward || 0) * 1000) / 1000,
294
- };
295
- });
296
-
297
- scored.sort((a, b) => b.score - a.score);
298
-
299
- const confidence = _getConfidence(siteId, agentId, domain);
300
- const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 5);
301
-
302
- return {
303
- recommended: scored[0]?.action || availableActions[0],
304
- rankings: scored,
305
- confidence,
306
- explorationLevel: confidence < 0.3 ? 'high' : confidence < 0.6 ? 'medium' : 'low',
307
- patterns: topPatterns.map((p) => ({
308
- sequence: p.sequence, frequency: p.frequency, confidence: p.confidence
309
- })),
310
- };
311
- }
312
-
313
- /**
314
- * Get learned preference summary for a domain.
315
- */
316
- function getPreferences(siteId, agentId, domain) {
317
- const policies = stmts.getPolicies.all(siteId, agentId, domain);
318
- const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 50);
319
- const patterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 10);
320
-
321
- const accepted = decisions.filter((d) => d.outcome === 'accepted');
322
- const rejected = decisions.filter((d) => d.outcome === 'rejected');
323
-
324
- // Build preference profile from weights
325
- const profile = {};
326
- for (const p of policies) {
327
- if (Math.abs(p.weight) > 0.05) {
328
- profile[p.feature] = {
329
- weight: Math.round(p.weight * 1000) / 1000,
330
- direction: p.weight > 0 ? 'preferred' : 'avoided',
331
- strength: Math.abs(p.weight) > 0.5 ? 'strong' : Math.abs(p.weight) > 0.2 ? 'moderate' : 'weak',
332
- updates: p.update_count,
333
- };
334
- }
335
- }
336
-
337
- // Compute action frequencies
338
- const actionFreqs = {};
339
- for (const d of decisions) {
340
- actionFreqs[d.action] = (actionFreqs[d.action] || 0) + 1;
341
- }
342
-
343
- return {
344
- domain,
345
- profile,
346
- acceptRate: decisions.length > 0 ? Math.round((accepted.length / decisions.length) * 1000) / 1000 : 0,
347
- rejectRate: decisions.length > 0 ? Math.round((rejected.length / decisions.length) * 1000) / 1000 : 0,
348
- totalDecisions: decisions.length,
349
- avgReward: decisions.length > 0
350
- ? Math.round((decisions.reduce((s, d) => s + d.reward, 0) / decisions.length) * 1000) / 1000
351
- : 0,
352
- topActions: Object.entries(actionFreqs)
353
- .sort(([, a], [, b]) => b - a)
354
- .slice(0, 5)
355
- .map(([action, count]) => ({ action, count, percentage: Math.round((count / decisions.length) * 100) })),
356
- topPatterns: patterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency })),
357
- confidence: _getConfidence(siteId, agentId, domain),
358
- };
359
- }
360
-
361
- /**
362
- * Get reward history — recent rewards over time for charting.
363
- */
364
- function getRewardHistory(siteId, agentId, limit = 30) {
365
- return stmts.getRecentRewards.all(siteId, agentId, limit).reverse();
366
- }
367
-
368
- // ─── Learning Sessions ───────────────────────────────────────────────
369
-
370
- function startSession(siteId, agentId) {
371
- const id = crypto.randomUUID();
372
- stmts.insertSession.run(id, siteId, agentId);
373
- return { sessionId: id };
374
- }
375
-
376
- function endSession(sessionId, decisionsMade, correctPredictions) {
377
- const accuracy = decisionsMade > 0 ? correctPredictions / decisionsMade : 0;
378
- stmts.updateSession.run(decisionsMade, correctPredictions, accuracy, sessionId);
379
- return { accuracy: Math.round(accuracy * 1000) / 1000 };
380
- }
381
-
382
- // ─── Reset ───────────────────────────────────────────────────────────
383
-
384
- /**
385
- * Reset all learned data for a specific domain.
386
- */
387
- function resetDomain(siteId, agentId, domain) {
388
- const txn = db.transaction(() => {
389
- stmts.deletePolicies.run(siteId, agentId, domain);
390
- stmts.deleteArms.run(siteId, agentId, domain);
391
- stmts.deleteDecisions.run(siteId, agentId, domain);
392
- });
393
- txn();
394
- return { reset: true, domain };
395
- }
396
-
397
- /**
398
- * Reset all patterns for an agent.
399
- */
400
- function resetPatterns(siteId, agentId) {
401
- stmts.deletePatterns.run(siteId, agentId);
402
- return { reset: true };
403
- }
404
-
405
- // ─── Stats ───────────────────────────────────────────────────────────
406
-
407
- function getStats(siteId, agentId) {
408
- const row = stmts.getStats.get(siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId);
409
- const sessions = stmts.getSessionHistory.all(siteId, agentId, 10);
410
- const recentAccuracy = sessions.length > 0 ? sessions.reduce((s, sess) => s + sess.accuracy, 0) / sessions.length : 0;
411
- const rewardHistory = stmts.getRecentRewards.all(siteId, agentId, 30).reverse();
412
-
413
- return {
414
- ...row,
415
- avg_reward: row.avg_reward !== null ? Math.round(row.avg_reward * 1000) / 1000 : 0,
416
- recentAccuracy: Math.round(recentAccuracy * 1000) / 1000,
417
- sessionsCount: sessions.length,
418
- acceptRate: row.total_decisions > 0
419
- ? Math.round((row.accepted / row.total_decisions) * 1000) / 1000
420
- : 0,
421
- rewardHistory,
422
- };
423
- }
424
-
425
- // ─── Internal: Prediction via Linear Model ───────────────────────────
426
-
427
- function _predict(siteId, agentId, domain, features) {
428
- const policies = stmts.getPolicies.all(siteId, agentId, domain);
429
- if (policies.length === 0) return 0.5; // No data yet — neutral prediction
430
-
431
- let score = 0;
432
- let matchedFeatures = 0;
433
- for (const p of policies) {
434
- const featureVal = features[p.feature];
435
- if (featureVal !== undefined) {
436
- const fv = typeof featureVal === 'number' ? featureVal : (featureVal ? 1 : 0);
437
-
438
- // Apply temporal discount: older policies (fewer recent updates) matter less
439
- const recencyBoost = p.update_count > 0 ? Math.pow(DISCOUNT_FACTOR, Math.max(0, 10 - p.update_count)) : 1;
440
- score += p.weight * fv * recencyBoost;
441
- matchedFeatures++;
442
- }
443
- }
444
-
445
- // Sigmoid squash to [0, 1]
446
- return 1 / (1 + Math.exp(-score));
447
- }
448
-
449
- function _updatePolicies(siteId, agentId, domain, features, error) {
450
- for (const [feature, value] of Object.entries(features)) {
451
- const fv = typeof value === 'number' ? value : (value ? 1 : 0);
452
- if (fv === 0) continue; // Skip zero-valued features
453
-
454
- const gradient = error * fv * LEARNING_RATE;
455
- const existing = stmts.getPolicy.get(siteId, agentId, domain, feature);
456
-
457
- // Apply weight decay to prevent unbounded growth
458
- const currentWeight = existing ? existing.weight * DISCOUNT_FACTOR : 0;
459
- const newWeight = currentWeight + gradient;
460
-
461
- // Clamp weights to [-5, 5] to prevent extreme values
462
- const clampedWeight = Math.max(-5, Math.min(5, newWeight));
463
-
464
- stmts.upsertPolicy.run(
465
- crypto.randomUUID(), siteId, agentId, domain, feature, clampedWeight,
466
- clampedWeight, Math.abs(error)
467
- );
468
- }
469
- }
470
-
471
- // ─── Internal: Multi-Armed Bandit ────────────────────────────────────
472
-
473
- function _getOrCreateArm(siteId, agentId, domain, action) {
474
- stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
475
- const arm = stmts.getArm.get(siteId, agentId, domain, action);
476
- return arm || { pulls: 0, ucb_score: 0, avg_reward: 0, total_reward: 0 };
477
- }
478
-
479
- function _updateBanditArm(siteId, agentId, domain, action, reward) {
480
- const arm = stmts.getArm.get(siteId, agentId, domain, action);
481
- if (!arm) {
482
- stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
483
- return;
484
- }
485
-
486
- const newPulls = arm.pulls + 1;
487
- const newTotalReward = arm.total_reward + reward;
488
- const newAvgReward = newTotalReward / newPulls;
489
-
490
- // UCB1: avg_reward + C * sqrt(ln(N) / n_i)
491
- // We need total pulls across all arms in this domain
492
- const arms = stmts.getArms.all(siteId, agentId, domain);
493
- const totalPulls = arms.reduce((s, a) => s + a.pulls, 0) + 1; // +1 for this pull
494
-
495
- const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / newPulls);
496
- const ucbScore = newAvgReward + exploration;
497
-
498
- stmts.updateArm.run(reward, newAvgReward, ucbScore, siteId, agentId, domain, action);
499
- }
500
-
501
- // ─── Internal: Pattern Mining ────────────────────────────────────────
502
-
503
- function _minePatterns(siteId, agentId, domain) {
504
- const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 20);
505
- if (decisions.length < 3) return;
506
-
507
- // Extract action sequences of length 2-5
508
- for (let len = 2; len <= Math.min(MAX_SEQUENCE_LENGTH, decisions.length); len++) {
509
- const sequence = decisions.slice(0, len).map((d) => d.action).reverse().join(' → ');
510
- const existing = stmts.findPattern.get(siteId, agentId, sequence);
511
-
512
- if (existing) {
513
- // Asymptotic approach to 1.0 — confidence grows slower as it increases
514
- const newConf = Math.min(0.99, existing.confidence + 0.05 * (1 - existing.confidence));
515
- stmts.updatePattern.run(newConf, existing.id);
516
- } else {
517
- stmts.insertPattern.run(crypto.randomUUID(), siteId, agentId, 'action_sequence', sequence, 0.3);
518
- }
519
- }
520
- }
521
-
522
- // ─── Internal: Feature Extraction ────────────────────────────────────
523
-
524
- function _extractFeatures(context) {
525
- const features = {};
526
-
527
- if (context.price !== undefined) {
528
- features.price = context.price;
529
- // Bucketize price for discrete learning
530
- if (context.price < 10) features['price_bucket:cheap'] = 1;
531
- else if (context.price < 50) features['price_bucket:moderate'] = 1;
532
- else if (context.price < 200) features['price_bucket:premium'] = 1;
533
- else features['price_bucket:luxury'] = 1;
534
- }
535
- if (context.quantity !== undefined) features.quantity = context.quantity;
536
- if (context.discount !== undefined) {
537
- features.discount = context.discount;
538
- features.has_discount = context.discount > 0 ? 1 : 0;
539
- }
540
- if (context.rating !== undefined) {
541
- features.rating = context.rating;
542
- features.high_rated = context.rating >= 4.0 ? 1 : 0;
543
- }
544
- if (context.category) features[`category:${context.category}`] = 1;
545
- if (context.brand) features[`brand:${context.brand}`] = 1;
546
- if (context.timeOfDay !== undefined) {
547
- features.morning = context.timeOfDay < 12 ? 1 : 0;
548
- features.afternoon = context.timeOfDay >= 12 && context.timeOfDay < 18 ? 1 : 0;
549
- features.evening = context.timeOfDay >= 18 ? 1 : 0;
550
- }
551
- if (context.isRepeat !== undefined) features.repeat_visit = context.isRepeat ? 1 : 0;
552
- if (context.urgency !== undefined) features.urgency = context.urgency;
553
- if (context.inStock !== undefined) features.in_stock = context.inStock ? 1 : 0;
554
-
555
- // Pass through any raw numeric features
556
- for (const [k, v] of Object.entries(context)) {
557
- if (features[k] === undefined && typeof v === 'number') {
558
- features[k] = v;
559
- }
560
- }
561
-
562
- return features;
563
- }
564
-
565
- // ─── Internal: Confidence Estimation ─────────────────────────────────
566
-
567
- function _getConfidence(siteId, agentId, domain) {
568
- const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 50);
569
- if (decisions.length === 0) return 0;
570
-
571
- const withOutcome = decisions.filter((d) => d.outcome !== 'pending');
572
- if (withOutcome.length === 0) return MIN_CONFIDENCE;
573
-
574
- // Volume component: log scale, saturates around 30 decisions
575
- const volumeConf = Math.min(1, withOutcome.length / 30);
576
-
577
- // Accuracy component: how close predictions were to actual rewards
578
- let accuracySum = 0;
579
- for (const d of withOutcome) {
580
- if (d.predicted_reward !== null) {
581
- const error = Math.abs(d.reward - d.predicted_reward);
582
- accuracySum += Math.max(0, 1 - error);
583
- }
584
- }
585
- const accuracyConf = withOutcome.length > 0 ? accuracySum / withOutcome.length : 0.5;
586
-
587
- // Recency component: exponential decay based on age of newest data
588
- const latestTs = new Date(withOutcome[0].created_at).getTime();
589
- const ageHours = (Date.now() - latestTs) / 3600000;
590
- const recencyConf = Math.exp(-DECAY_RATE * ageHours);
591
-
592
- return Math.max(MIN_CONFIDENCE, Math.min(0.99,
593
- volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2
594
- ));
595
- }
596
-
597
- module.exports = {
598
- recordDecision, feedback, batchFeedback, recommend, getPreferences,
599
- getRewardHistory, startSession, endSession,
600
- resetDomain, resetPatterns, getStats,
601
- };
1
+ /**
2
+ * Agent Learning Engine — Local Reinforcement Learning
3
+ *
4
+ * Agents learn from user decisions, building behavioral models locally
5
+ * without sending data to external LLMs. The engine tracks:
6
+ * - Decision patterns (what the user chooses and when)
7
+ * - Reward signals (accepted/rejected/modified outcomes)
8
+ * - Policy weights (which factors matter most to this user)
9
+ * - Prediction accuracy over time
10
+ *
11
+ * Learning algorithms:
12
+ * - Multi-armed bandit (UCB1) for exploration/exploitation
13
+ * - Linear policy model with sigmoid activation and gradient descent
14
+ * - Temporal discount for preference freshness (recent > old)
15
+ * - Sequential pattern mining for behavior chains
16
+ * - Confidence estimation: volume × accuracy × recency
17
+ */
18
+
19
+ const crypto = require('crypto');
20
+ const { db } = require('../models/db');
21
+ let rewardGuard;
22
+ try { rewardGuard = require('../security/reward-guard'); } catch { rewardGuard = null; }
23
+
24
+ // ─── Schema ──────────────────────────────────────────────────────────
25
+
26
+ db.exec(`
27
+ CREATE TABLE IF NOT EXISTS learning_decisions (
28
+ id TEXT PRIMARY KEY,
29
+ site_id TEXT NOT NULL,
30
+ agent_id TEXT NOT NULL,
31
+ domain TEXT NOT NULL,
32
+ action TEXT NOT NULL,
33
+ context TEXT DEFAULT '{}',
34
+ outcome TEXT DEFAULT 'pending',
35
+ reward REAL DEFAULT 0.0,
36
+ predicted_reward REAL,
37
+ features TEXT DEFAULT '{}',
38
+ created_at TEXT DEFAULT (datetime('now'))
39
+ );
40
+
41
+ CREATE TABLE IF NOT EXISTS learning_policies (
42
+ id TEXT PRIMARY KEY,
43
+ site_id TEXT NOT NULL,
44
+ agent_id TEXT NOT NULL,
45
+ domain TEXT NOT NULL,
46
+ feature TEXT NOT NULL,
47
+ weight REAL DEFAULT 0.0,
48
+ update_count INTEGER DEFAULT 0,
49
+ last_error REAL DEFAULT 0.0,
50
+ created_at TEXT DEFAULT (datetime('now')),
51
+ updated_at TEXT DEFAULT (datetime('now')),
52
+ UNIQUE(site_id, agent_id, domain, feature)
53
+ );
54
+
55
+ CREATE TABLE IF NOT EXISTS learning_patterns (
56
+ id TEXT PRIMARY KEY,
57
+ site_id TEXT NOT NULL,
58
+ agent_id TEXT NOT NULL,
59
+ pattern_type TEXT NOT NULL,
60
+ sequence TEXT NOT NULL,
61
+ frequency INTEGER DEFAULT 1,
62
+ confidence REAL DEFAULT 0.5,
63
+ last_seen TEXT DEFAULT (datetime('now')),
64
+ created_at TEXT DEFAULT (datetime('now'))
65
+ );
66
+
67
+ CREATE TABLE IF NOT EXISTS learning_bandit_arms (
68
+ id TEXT PRIMARY KEY,
69
+ site_id TEXT NOT NULL,
70
+ agent_id TEXT NOT NULL,
71
+ domain TEXT NOT NULL,
72
+ action TEXT NOT NULL,
73
+ pulls INTEGER DEFAULT 0,
74
+ total_reward REAL DEFAULT 0.0,
75
+ avg_reward REAL DEFAULT 0.0,
76
+ ucb_score REAL DEFAULT 0.0,
77
+ created_at TEXT DEFAULT (datetime('now')),
78
+ updated_at TEXT DEFAULT (datetime('now')),
79
+ UNIQUE(site_id, agent_id, domain, action)
80
+ );
81
+
82
+ CREATE TABLE IF NOT EXISTS learning_sessions (
83
+ id TEXT PRIMARY KEY,
84
+ site_id TEXT NOT NULL,
85
+ agent_id TEXT NOT NULL,
86
+ decisions_made INTEGER DEFAULT 0,
87
+ correct_predictions INTEGER DEFAULT 0,
88
+ accuracy REAL DEFAULT 0.0,
89
+ started_at TEXT DEFAULT (datetime('now')),
90
+ ended_at TEXT
91
+ );
92
+
93
+ CREATE INDEX IF NOT EXISTS idx_learn_dec_site ON learning_decisions(site_id, agent_id);
94
+ CREATE INDEX IF NOT EXISTS idx_learn_dec_domain ON learning_decisions(domain);
95
+ CREATE INDEX IF NOT EXISTS idx_learn_dec_outcome ON learning_decisions(outcome);
96
+ CREATE INDEX IF NOT EXISTS idx_learn_pol_lookup ON learning_policies(site_id, agent_id, domain);
97
+ CREATE INDEX IF NOT EXISTS idx_learn_pat_seq ON learning_patterns(site_id, agent_id, pattern_type);
98
+ CREATE INDEX IF NOT EXISTS idx_learn_bandit ON learning_bandit_arms(site_id, agent_id, domain);
99
+ `);
100
+
101
+ // ─── Config ──────────────────────────────────────────────────────────
102
+
103
+ const LEARNING_RATE = 0.1;
104
+ const DISCOUNT_FACTOR = 0.95; // Temporal discount per decision step
105
+ const DECAY_RATE = 0.01; // Recency decay per hour
106
+ const UCB_EXPLORATION = 1.414; // √2 for UCB1
107
+ const MIN_CONFIDENCE = 0.01;
108
+ const MAX_SEQUENCE_LENGTH = 5;
109
+
110
+ // ─── Prepared Statements ─────────────────────────────────────────────
111
+
112
+ const stmts = {
113
+ insertDecision: db.prepare('INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)'),
114
+ updateOutcome: db.prepare('UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?'),
115
+ getDecision: db.prepare('SELECT * FROM learning_decisions WHERE id = ?'),
116
+ getRecentDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?'),
117
+ getDecisionsByOutcome: db.prepare("SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?"),
118
+ getAllDomainDecisions: db.prepare('SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC'),
119
+ countDecisions: db.prepare('SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?'),
120
+ getRecentRewards: db.prepare("SELECT reward, created_at FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending' ORDER BY created_at DESC LIMIT ?"),
121
+
122
+ upsertPolicy: db.prepare("INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')"),
123
+ getPolicies: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC'),
124
+ getPolicy: db.prepare('SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?'),
125
+
126
+ insertPattern: db.prepare('INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)'),
127
+ findPattern: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?'),
128
+ updatePattern: db.prepare("UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?"),
129
+ getTopPatterns: db.prepare('SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?'),
130
+
131
+ upsertArm: db.prepare('INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING'),
132
+ getArms: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC'),
133
+ getArm: db.prepare('SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?'),
134
+ updateArm: db.prepare("UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = ?, ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?"),
135
+
136
+ insertSession: db.prepare('INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)'),
137
+ updateSession: db.prepare("UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?"),
138
+ getSessionHistory: db.prepare('SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?'),
139
+
140
+ getStats: db.prepare(`SELECT
141
+ (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ?) as total_decisions,
142
+ (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = 'accepted') as accepted,
143
+ (SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = 'rejected') as rejected,
144
+ (SELECT AVG(reward) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending') as avg_reward,
145
+ (SELECT COUNT(DISTINCT domain) FROM learning_policies WHERE site_id = ? AND agent_id = ?) as policy_domains,
146
+ (SELECT COUNT(*) FROM learning_patterns WHERE site_id = ? AND agent_id = ?) as total_patterns`),
147
+
148
+ deletePolicies: db.prepare('DELETE FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ?'),
149
+ deletePatterns: db.prepare('DELETE FROM learning_patterns WHERE site_id = ? AND agent_id = ?'),
150
+ deleteArms: db.prepare('DELETE FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?'),
151
+ deleteDecisions: db.prepare('DELETE FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ?'),
152
+ };
153
+
154
+ // ─── Core Learning API ───────────────────────────────────────────────
155
+
156
+ /**
157
+ * Record a decision the agent is about to make, with predicted reward.
158
+ */
159
+ function recordDecision(siteId, agentId, domain, action, context = {}, features = {}) {
160
+ const id = crypto.randomUUID();
161
+ const extractedFeatures = { ..._extractFeatures(context), ...features };
162
+ const predictedReward = _predict(siteId, agentId, domain, extractedFeatures);
163
+
164
+ stmts.insertDecision.run(id, siteId, agentId, domain, action,
165
+ JSON.stringify(context), predictedReward, JSON.stringify(extractedFeatures));
166
+
167
+ // Ensure bandit arm exists
168
+ stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
169
+
170
+ return { decisionId: id, predictedReward, confidence: _getConfidence(siteId, agentId, domain) };
171
+ }
172
+
173
+ /**
174
+ * Provide feedback on a decision — the outcome and actual reward.
175
+ * This is the core learning signal.
176
+ */
177
+ function feedback(decisionId, outcome, reward, opts = {}) {
178
+ const decision = stmts.getDecision.get(decisionId);
179
+ if (!decision) throw new Error('Decision not found');
180
+
181
+ // ── Reward guard: clamp / block / flag malicious or anomalous rewards ──
182
+ let safeReward = reward;
183
+ let guardDecision = 'accepted';
184
+ if (rewardGuard) {
185
+ const sanitized = rewardGuard.sanitizeReward({
186
+ siteId: decision.site_id,
187
+ agentId: decision.agent_id,
188
+ domain: decision.domain,
189
+ action: decision.action,
190
+ reward,
191
+ actorId: opts.actorId,
192
+ approvedBy: opts.approvedBy,
193
+ });
194
+ safeReward = sanitized.reward;
195
+ guardDecision = sanitized.decision;
196
+ if (guardDecision === 'blocked') {
197
+ // Don't propagate to policy / bandit.
198
+ stmts.updateOutcome.run(outcome, safeReward, decisionId);
199
+ return { decisionId, blocked: true, reason: sanitized.reason, guardDecision };
200
+ }
201
+ }
202
+
203
+ stmts.updateOutcome.run(outcome, safeReward, decisionId);
204
+
205
+ const features = JSON.parse(decision.features || '{}');
206
+ const predError = safeReward - (decision.predicted_reward || 0);
207
+
208
+ // Update policy weights via gradient descent with temporal discount
209
+ _updatePolicies(decision.site_id, decision.agent_id, decision.domain, features, predError);
210
+
211
+ // Update bandit arm with actual reward
212
+ _updateBanditArm(decision.site_id, decision.agent_id, decision.domain, decision.action, safeReward);
213
+
214
+ // Mine patterns from recent decisions
215
+ _minePatterns(decision.site_id, decision.agent_id, decision.domain);
216
+
217
+ return {
218
+ decisionId,
219
+ predictionError: Math.round(predError * 1000) / 1000,
220
+ updatedConfidence: _getConfidence(decision.site_id, decision.agent_id, decision.domain),
221
+ accuracy: Math.round((1 - Math.abs(predError)) * 1000) / 1000,
222
+ guardDecision,
223
+ appliedReward: safeReward,
224
+ };
225
+ }
226
+
227
+ /**
228
+ * Batch feedback — provide multiple outcomes at once.
229
+ */
230
+ function batchFeedback(feedbackList) {
231
+ const results = [];
232
+ const txn = db.transaction(() => {
233
+ for (const fb of feedbackList) {
234
+ try {
235
+ results.push(feedback(fb.decisionId, fb.outcome, fb.reward));
236
+ } catch (err) {
237
+ results.push({ decisionId: fb.decisionId, error: err.message });
238
+ }
239
+ }
240
+ });
241
+ txn();
242
+ return results;
243
+ }
244
+
245
+ /**
246
+ * Get the best action for a domain using learned policies + bandit scores.
247
+ * UCB scores are normalized to [0,1] before blending with policy prediction.
248
+ */
249
+ function recommend(siteId, agentId, domain, availableActions, context = {}) {
250
+ const features = _extractFeatures(context);
251
+
252
+ // Get all arms to find normalization bounds
253
+ const allArms = stmts.getArms.all(siteId, agentId, domain);
254
+ const armMap = {};
255
+ for (const arm of allArms) armMap[arm.action] = arm;
256
+
257
+ // Normalize UCB scores to [0,1]
258
+ let minUCB = Infinity, maxUCB = -Infinity;
259
+ for (const arm of allArms) {
260
+ if (arm.pulls > 0) {
261
+ if (arm.ucb_score < minUCB) minUCB = arm.ucb_score;
262
+ if (arm.ucb_score > maxUCB) maxUCB = arm.ucb_score;
263
+ }
264
+ }
265
+ const ucbRange = maxUCB - minUCB;
266
+
267
+ const scored = availableActions.map((action) => {
268
+ const arm = armMap[action] || _getOrCreateArm(siteId, agentId, domain, action);
269
+ const policyScore = _predict(siteId, agentId, domain, { ...features, [`action:${action}`]: 1 });
270
+
271
+ // Normalize bandit score to [0,1]
272
+ let normalizedBandit;
273
+ if (arm.pulls === 0) {
274
+ normalizedBandit = 1.0; // unexplored arms get maximum exploration bonus
275
+ } else if (ucbRange > 0) {
276
+ normalizedBandit = (arm.ucb_score - minUCB) / ucbRange;
277
+ } else {
278
+ normalizedBandit = arm.avg_reward; // single arm — use raw avg
279
+ }
280
+
281
+ // Blend: as confidence grows, lean more on policy, less on exploration
282
+ const confidence = _getConfidence(siteId, agentId, domain);
283
+ const policyWeight = 0.4 + confidence * 0.4; // [0.4, 0.8]
284
+ const banditWeight = 1 - policyWeight; // [0.2, 0.6]
285
+ const blended = policyWeight * policyScore + banditWeight * normalizedBandit;
286
+
287
+ return {
288
+ action,
289
+ score: Math.round(blended * 1000) / 1000,
290
+ policyScore: Math.round(policyScore * 1000) / 1000,
291
+ banditScore: Math.round(normalizedBandit * 1000) / 1000,
292
+ pulls: arm.pulls,
293
+ avgReward: Math.round((arm.avg_reward || 0) * 1000) / 1000,
294
+ };
295
+ });
296
+
297
+ scored.sort((a, b) => b.score - a.score);
298
+
299
+ const confidence = _getConfidence(siteId, agentId, domain);
300
+ const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 5);
301
+
302
+ return {
303
+ recommended: scored[0]?.action || availableActions[0],
304
+ rankings: scored,
305
+ confidence,
306
+ explorationLevel: confidence < 0.3 ? 'high' : confidence < 0.6 ? 'medium' : 'low',
307
+ patterns: topPatterns.map((p) => ({
308
+ sequence: p.sequence, frequency: p.frequency, confidence: p.confidence
309
+ })),
310
+ };
311
+ }
312
+
313
+ /**
314
+ * Get learned preference summary for a domain.
315
+ */
316
+ function getPreferences(siteId, agentId, domain) {
317
+ const policies = stmts.getPolicies.all(siteId, agentId, domain);
318
+ const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 50);
319
+ const patterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 10);
320
+
321
+ const accepted = decisions.filter((d) => d.outcome === 'accepted');
322
+ const rejected = decisions.filter((d) => d.outcome === 'rejected');
323
+
324
+ // Build preference profile from weights
325
+ const profile = {};
326
+ for (const p of policies) {
327
+ if (Math.abs(p.weight) > 0.05) {
328
+ profile[p.feature] = {
329
+ weight: Math.round(p.weight * 1000) / 1000,
330
+ direction: p.weight > 0 ? 'preferred' : 'avoided',
331
+ strength: Math.abs(p.weight) > 0.5 ? 'strong' : Math.abs(p.weight) > 0.2 ? 'moderate' : 'weak',
332
+ updates: p.update_count,
333
+ };
334
+ }
335
+ }
336
+
337
+ // Compute action frequencies
338
+ const actionFreqs = {};
339
+ for (const d of decisions) {
340
+ actionFreqs[d.action] = (actionFreqs[d.action] || 0) + 1;
341
+ }
342
+
343
+ return {
344
+ domain,
345
+ profile,
346
+ acceptRate: decisions.length > 0 ? Math.round((accepted.length / decisions.length) * 1000) / 1000 : 0,
347
+ rejectRate: decisions.length > 0 ? Math.round((rejected.length / decisions.length) * 1000) / 1000 : 0,
348
+ totalDecisions: decisions.length,
349
+ avgReward: decisions.length > 0
350
+ ? Math.round((decisions.reduce((s, d) => s + d.reward, 0) / decisions.length) * 1000) / 1000
351
+ : 0,
352
+ topActions: Object.entries(actionFreqs)
353
+ .sort(([, a], [, b]) => b - a)
354
+ .slice(0, 5)
355
+ .map(([action, count]) => ({ action, count, percentage: Math.round((count / decisions.length) * 100) })),
356
+ topPatterns: patterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency })),
357
+ confidence: _getConfidence(siteId, agentId, domain),
358
+ };
359
+ }
360
+
361
+ /**
362
+ * Get reward history — recent rewards over time for charting.
363
+ */
364
+ function getRewardHistory(siteId, agentId, limit = 30) {
365
+ return stmts.getRecentRewards.all(siteId, agentId, limit).reverse();
366
+ }
367
+
368
+ // ─── Learning Sessions ───────────────────────────────────────────────
369
+
370
+ function startSession(siteId, agentId) {
371
+ const id = crypto.randomUUID();
372
+ stmts.insertSession.run(id, siteId, agentId);
373
+ return { sessionId: id };
374
+ }
375
+
376
+ function endSession(sessionId, decisionsMade, correctPredictions) {
377
+ const accuracy = decisionsMade > 0 ? correctPredictions / decisionsMade : 0;
378
+ stmts.updateSession.run(decisionsMade, correctPredictions, accuracy, sessionId);
379
+ return { accuracy: Math.round(accuracy * 1000) / 1000 };
380
+ }
381
+
382
+ // ─── Reset ───────────────────────────────────────────────────────────
383
+
384
+ /**
385
+ * Reset all learned data for a specific domain.
386
+ */
387
+ function resetDomain(siteId, agentId, domain) {
388
+ const txn = db.transaction(() => {
389
+ stmts.deletePolicies.run(siteId, agentId, domain);
390
+ stmts.deleteArms.run(siteId, agentId, domain);
391
+ stmts.deleteDecisions.run(siteId, agentId, domain);
392
+ });
393
+ txn();
394
+ return { reset: true, domain };
395
+ }
396
+
397
+ /**
398
+ * Reset all patterns for an agent.
399
+ */
400
+ function resetPatterns(siteId, agentId) {
401
+ stmts.deletePatterns.run(siteId, agentId);
402
+ return { reset: true };
403
+ }
404
+
405
+ // ─── Stats ───────────────────────────────────────────────────────────
406
+
407
+ function getStats(siteId, agentId) {
408
+ const row = stmts.getStats.get(siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId);
409
+ const sessions = stmts.getSessionHistory.all(siteId, agentId, 10);
410
+ const recentAccuracy = sessions.length > 0 ? sessions.reduce((s, sess) => s + sess.accuracy, 0) / sessions.length : 0;
411
+ const rewardHistory = stmts.getRecentRewards.all(siteId, agentId, 30).reverse();
412
+
413
+ return {
414
+ ...row,
415
+ avg_reward: row.avg_reward !== null ? Math.round(row.avg_reward * 1000) / 1000 : 0,
416
+ recentAccuracy: Math.round(recentAccuracy * 1000) / 1000,
417
+ sessionsCount: sessions.length,
418
+ acceptRate: row.total_decisions > 0
419
+ ? Math.round((row.accepted / row.total_decisions) * 1000) / 1000
420
+ : 0,
421
+ rewardHistory,
422
+ };
423
+ }
424
+
425
+ // ─── Internal: Prediction via Linear Model ───────────────────────────
426
+
427
+ function _predict(siteId, agentId, domain, features) {
428
+ const policies = stmts.getPolicies.all(siteId, agentId, domain);
429
+ if (policies.length === 0) return 0.5; // No data yet — neutral prediction
430
+
431
+ let score = 0;
432
+ let matchedFeatures = 0;
433
+ for (const p of policies) {
434
+ const featureVal = features[p.feature];
435
+ if (featureVal !== undefined) {
436
+ const fv = typeof featureVal === 'number' ? featureVal : (featureVal ? 1 : 0);
437
+
438
+ // Apply temporal discount: older policies (fewer recent updates) matter less
439
+ const recencyBoost = p.update_count > 0 ? Math.pow(DISCOUNT_FACTOR, Math.max(0, 10 - p.update_count)) : 1;
440
+ score += p.weight * fv * recencyBoost;
441
+ matchedFeatures++;
442
+ }
443
+ }
444
+
445
+ // Sigmoid squash to [0, 1]
446
+ return 1 / (1 + Math.exp(-score));
447
+ }
448
+
449
+ function _updatePolicies(siteId, agentId, domain, features, error) {
450
+ for (const [feature, value] of Object.entries(features)) {
451
+ const fv = typeof value === 'number' ? value : (value ? 1 : 0);
452
+ if (fv === 0) continue; // Skip zero-valued features
453
+
454
+ const gradient = error * fv * LEARNING_RATE;
455
+ const existing = stmts.getPolicy.get(siteId, agentId, domain, feature);
456
+
457
+ // Apply weight decay to prevent unbounded growth
458
+ const currentWeight = existing ? existing.weight * DISCOUNT_FACTOR : 0;
459
+ const newWeight = currentWeight + gradient;
460
+
461
+ // Clamp weights to [-5, 5] to prevent extreme values
462
+ const clampedWeight = Math.max(-5, Math.min(5, newWeight));
463
+
464
+ stmts.upsertPolicy.run(
465
+ crypto.randomUUID(), siteId, agentId, domain, feature, clampedWeight,
466
+ clampedWeight, Math.abs(error)
467
+ );
468
+ }
469
+ }
470
+
471
+ // ─── Internal: Multi-Armed Bandit ────────────────────────────────────
472
+
473
+ function _getOrCreateArm(siteId, agentId, domain, action) {
474
+ stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
475
+ const arm = stmts.getArm.get(siteId, agentId, domain, action);
476
+ return arm || { pulls: 0, ucb_score: 0, avg_reward: 0, total_reward: 0 };
477
+ }
478
+
479
+ function _updateBanditArm(siteId, agentId, domain, action, reward) {
480
+ const arm = stmts.getArm.get(siteId, agentId, domain, action);
481
+ if (!arm) {
482
+ stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
483
+ return;
484
+ }
485
+
486
+ const newPulls = arm.pulls + 1;
487
+ const newTotalReward = arm.total_reward + reward;
488
+ const newAvgReward = newTotalReward / newPulls;
489
+
490
+ // UCB1: avg_reward + C * sqrt(ln(N) / n_i)
491
+ // We need total pulls across all arms in this domain
492
+ const arms = stmts.getArms.all(siteId, agentId, domain);
493
+ const totalPulls = arms.reduce((s, a) => s + a.pulls, 0) + 1; // +1 for this pull
494
+
495
+ const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / newPulls);
496
+ const ucbScore = newAvgReward + exploration;
497
+
498
+ stmts.updateArm.run(reward, newAvgReward, ucbScore, siteId, agentId, domain, action);
499
+ }
500
+
501
+ // ─── Internal: Pattern Mining ────────────────────────────────────────
502
+
503
+ function _minePatterns(siteId, agentId, domain) {
504
+ const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 20);
505
+ if (decisions.length < 3) return;
506
+
507
+ // Extract action sequences of length 2-5
508
+ for (let len = 2; len <= Math.min(MAX_SEQUENCE_LENGTH, decisions.length); len++) {
509
+ const sequence = decisions.slice(0, len).map((d) => d.action).reverse().join(' → ');
510
+ const existing = stmts.findPattern.get(siteId, agentId, sequence);
511
+
512
+ if (existing) {
513
+ // Asymptotic approach to 1.0 — confidence grows slower as it increases
514
+ const newConf = Math.min(0.99, existing.confidence + 0.05 * (1 - existing.confidence));
515
+ stmts.updatePattern.run(newConf, existing.id);
516
+ } else {
517
+ stmts.insertPattern.run(crypto.randomUUID(), siteId, agentId, 'action_sequence', sequence, 0.3);
518
+ }
519
+ }
520
+ }
521
+
522
+ // ─── Internal: Feature Extraction ────────────────────────────────────
523
+
524
+ function _extractFeatures(context) {
525
+ const features = {};
526
+
527
+ if (context.price !== undefined) {
528
+ features.price = context.price;
529
+ // Bucketize price for discrete learning
530
+ if (context.price < 10) features['price_bucket:cheap'] = 1;
531
+ else if (context.price < 50) features['price_bucket:moderate'] = 1;
532
+ else if (context.price < 200) features['price_bucket:premium'] = 1;
533
+ else features['price_bucket:luxury'] = 1;
534
+ }
535
+ if (context.quantity !== undefined) features.quantity = context.quantity;
536
+ if (context.discount !== undefined) {
537
+ features.discount = context.discount;
538
+ features.has_discount = context.discount > 0 ? 1 : 0;
539
+ }
540
+ if (context.rating !== undefined) {
541
+ features.rating = context.rating;
542
+ features.high_rated = context.rating >= 4.0 ? 1 : 0;
543
+ }
544
+ if (context.category) features[`category:${context.category}`] = 1;
545
+ if (context.brand) features[`brand:${context.brand}`] = 1;
546
+ if (context.timeOfDay !== undefined) {
547
+ features.morning = context.timeOfDay < 12 ? 1 : 0;
548
+ features.afternoon = context.timeOfDay >= 12 && context.timeOfDay < 18 ? 1 : 0;
549
+ features.evening = context.timeOfDay >= 18 ? 1 : 0;
550
+ }
551
+ if (context.isRepeat !== undefined) features.repeat_visit = context.isRepeat ? 1 : 0;
552
+ if (context.urgency !== undefined) features.urgency = context.urgency;
553
+ if (context.inStock !== undefined) features.in_stock = context.inStock ? 1 : 0;
554
+
555
+ // Pass through any raw numeric features
556
+ for (const [k, v] of Object.entries(context)) {
557
+ if (features[k] === undefined && typeof v === 'number') {
558
+ features[k] = v;
559
+ }
560
+ }
561
+
562
+ return features;
563
+ }
564
+
565
+ // ─── Internal: Confidence Estimation ─────────────────────────────────
566
+
567
+ function _getConfidence(siteId, agentId, domain) {
568
+ const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 50);
569
+ if (decisions.length === 0) return 0;
570
+
571
+ const withOutcome = decisions.filter((d) => d.outcome !== 'pending');
572
+ if (withOutcome.length === 0) return MIN_CONFIDENCE;
573
+
574
+ // Volume component: log scale, saturates around 30 decisions
575
+ const volumeConf = Math.min(1, withOutcome.length / 30);
576
+
577
+ // Accuracy component: how close predictions were to actual rewards
578
+ let accuracySum = 0;
579
+ for (const d of withOutcome) {
580
+ if (d.predicted_reward !== null) {
581
+ const error = Math.abs(d.reward - d.predicted_reward);
582
+ accuracySum += Math.max(0, 1 - error);
583
+ }
584
+ }
585
+ const accuracyConf = withOutcome.length > 0 ? accuracySum / withOutcome.length : 0.5;
586
+
587
+ // Recency component: exponential decay based on age of newest data
588
+ const latestTs = new Date(withOutcome[0].created_at).getTime();
589
+ const ageHours = (Date.now() - latestTs) / 3600000;
590
+ const recencyConf = Math.exp(-DECAY_RATE * ageHours);
591
+
592
+ return Math.max(MIN_CONFIDENCE, Math.min(0.99,
593
+ volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2
594
+ ));
595
+ }
596
+
597
+ module.exports = {
598
+ recordDecision, feedback, batchFeedback, recommend, getPreferences,
599
+ getRewardHistory, startSession, endSession,
600
+ resetDomain, resetPatterns, getStats,
601
+ };