web-agent-bridge 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ar.md +7 -0
- package/README.md +7 -0
- package/package.json +1 -1
- package/public/index.html +52 -0
- package/public/llms.txt +1 -0
- package/public/mesh-dashboard.html +401 -0
- package/public/script/wab.min.js +138 -0
- package/script/ai-agent-bridge.js +126 -0
- package/sdk/agent-mesh.js +333 -0
- package/sdk/index.js +2 -1
- package/sdk/package.json +1 -1
- package/server/index.js +5 -0
- package/server/routes/mesh.js +300 -0
- package/server/services/agent-learning.js +422 -0
- package/server/services/agent-mesh.js +346 -0
- package/server/services/agent-symphony.js +681 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Learning Engine — Local Reinforcement Learning
|
|
3
|
+
*
|
|
4
|
+
* Agents learn from user decisions, building behavioral models locally
|
|
5
|
+
* without sending data to external LLMs. The engine tracks:
|
|
6
|
+
* - Decision patterns (what the user chooses and when)
|
|
7
|
+
* - Reward signals (accepted/rejected/modified outcomes)
|
|
8
|
+
* - Policy weights (which factors matter most to this user)
|
|
9
|
+
* - Prediction accuracy over time
|
|
10
|
+
*
|
|
11
|
+
* Learning algorithms:
|
|
12
|
+
* - Multi-armed bandit for action selection
|
|
13
|
+
* - Exponential decay for preference freshness
|
|
14
|
+
* - Bayesian confidence updates
|
|
15
|
+
* - Pattern sequence mining for behavior chains
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const crypto = require('crypto');
|
|
19
|
+
const { db } = require('../models/db');
|
|
20
|
+
|
|
21
|
+
// ─── Schema ──────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
db.exec(`
|
|
24
|
+
CREATE TABLE IF NOT EXISTS learning_decisions (
|
|
25
|
+
id TEXT PRIMARY KEY,
|
|
26
|
+
site_id TEXT NOT NULL,
|
|
27
|
+
agent_id TEXT NOT NULL,
|
|
28
|
+
domain TEXT NOT NULL,
|
|
29
|
+
action TEXT NOT NULL,
|
|
30
|
+
context TEXT DEFAULT '{}',
|
|
31
|
+
outcome TEXT DEFAULT 'pending',
|
|
32
|
+
reward REAL DEFAULT 0.0,
|
|
33
|
+
predicted_reward REAL,
|
|
34
|
+
features TEXT DEFAULT '{}',
|
|
35
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
CREATE TABLE IF NOT EXISTS learning_policies (
|
|
39
|
+
id TEXT PRIMARY KEY,
|
|
40
|
+
site_id TEXT NOT NULL,
|
|
41
|
+
agent_id TEXT NOT NULL,
|
|
42
|
+
domain TEXT NOT NULL,
|
|
43
|
+
feature TEXT NOT NULL,
|
|
44
|
+
weight REAL DEFAULT 0.0,
|
|
45
|
+
update_count INTEGER DEFAULT 0,
|
|
46
|
+
last_error REAL DEFAULT 0.0,
|
|
47
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
48
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
49
|
+
UNIQUE(site_id, agent_id, domain, feature)
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
CREATE TABLE IF NOT EXISTS learning_patterns (
|
|
53
|
+
id TEXT PRIMARY KEY,
|
|
54
|
+
site_id TEXT NOT NULL,
|
|
55
|
+
agent_id TEXT NOT NULL,
|
|
56
|
+
pattern_type TEXT NOT NULL,
|
|
57
|
+
sequence TEXT NOT NULL,
|
|
58
|
+
frequency INTEGER DEFAULT 1,
|
|
59
|
+
confidence REAL DEFAULT 0.5,
|
|
60
|
+
last_seen TEXT DEFAULT (datetime('now')),
|
|
61
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
CREATE TABLE IF NOT EXISTS learning_bandit_arms (
|
|
65
|
+
id TEXT PRIMARY KEY,
|
|
66
|
+
site_id TEXT NOT NULL,
|
|
67
|
+
agent_id TEXT NOT NULL,
|
|
68
|
+
domain TEXT NOT NULL,
|
|
69
|
+
action TEXT NOT NULL,
|
|
70
|
+
pulls INTEGER DEFAULT 0,
|
|
71
|
+
total_reward REAL DEFAULT 0.0,
|
|
72
|
+
avg_reward REAL DEFAULT 0.0,
|
|
73
|
+
ucb_score REAL DEFAULT 1000.0,
|
|
74
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
75
|
+
updated_at TEXT DEFAULT (datetime('now')),
|
|
76
|
+
UNIQUE(site_id, agent_id, domain, action)
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
CREATE TABLE IF NOT EXISTS learning_sessions (
|
|
80
|
+
id TEXT PRIMARY KEY,
|
|
81
|
+
site_id TEXT NOT NULL,
|
|
82
|
+
agent_id TEXT NOT NULL,
|
|
83
|
+
decisions_made INTEGER DEFAULT 0,
|
|
84
|
+
correct_predictions INTEGER DEFAULT 0,
|
|
85
|
+
accuracy REAL DEFAULT 0.0,
|
|
86
|
+
started_at TEXT DEFAULT (datetime('now')),
|
|
87
|
+
ended_at TEXT
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
CREATE INDEX IF NOT EXISTS idx_learn_dec_site ON learning_decisions(site_id, agent_id);
|
|
91
|
+
CREATE INDEX IF NOT EXISTS idx_learn_dec_domain ON learning_decisions(domain);
|
|
92
|
+
CREATE INDEX IF NOT EXISTS idx_learn_dec_outcome ON learning_decisions(outcome);
|
|
93
|
+
CREATE INDEX IF NOT EXISTS idx_learn_pol_lookup ON learning_policies(site_id, agent_id, domain);
|
|
94
|
+
CREATE INDEX IF NOT EXISTS idx_learn_pat_seq ON learning_patterns(site_id, agent_id, pattern_type);
|
|
95
|
+
CREATE INDEX IF NOT EXISTS idx_learn_bandit ON learning_bandit_arms(site_id, agent_id, domain);
|
|
96
|
+
`);
|
|
97
|
+
|
|
98
|
+
// ─── Config ──────────────────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
const LEARNING_RATE = 0.1;
|
|
101
|
+
const DISCOUNT_FACTOR = 0.95;
|
|
102
|
+
const DECAY_RATE = 0.01;
|
|
103
|
+
const UCB_EXPLORATION = 1.414;
|
|
104
|
+
const MIN_CONFIDENCE = 0.01;
|
|
105
|
+
const MAX_SEQUENCE_LENGTH = 5;
|
|
106
|
+
|
|
107
|
+
// ─── Prepared Statements ─────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
const stmts = {
|
|
110
|
+
insertDecision: db.prepare(`INSERT INTO learning_decisions (id, site_id, agent_id, domain, action, context, predicted_reward, features) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`),
|
|
111
|
+
updateOutcome: db.prepare(`UPDATE learning_decisions SET outcome = ?, reward = ? WHERE id = ?`),
|
|
112
|
+
getDecision: db.prepare(`SELECT * FROM learning_decisions WHERE id = ?`),
|
|
113
|
+
getRecentDecisions: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC LIMIT ?`),
|
|
114
|
+
getDecisionsByOutcome: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = ? ORDER BY created_at DESC LIMIT ?`),
|
|
115
|
+
getAllDomainDecisions: db.prepare(`SELECT * FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY created_at DESC`),
|
|
116
|
+
countDecisions: db.prepare(`SELECT COUNT(*) as count FROM learning_decisions WHERE site_id = ? AND agent_id = ?`),
|
|
117
|
+
|
|
118
|
+
upsertPolicy: db.prepare(`INSERT INTO learning_policies (id, site_id, agent_id, domain, feature, weight) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, feature) DO UPDATE SET weight = ?, update_count = update_count + 1, last_error = ?, updated_at = datetime('now')`),
|
|
119
|
+
getPolicies: db.prepare(`SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ABS(weight) DESC`),
|
|
120
|
+
getPolicy: db.prepare(`SELECT * FROM learning_policies WHERE site_id = ? AND agent_id = ? AND domain = ? AND feature = ?`),
|
|
121
|
+
|
|
122
|
+
insertPattern: db.prepare(`INSERT INTO learning_patterns (id, site_id, agent_id, pattern_type, sequence, confidence) VALUES (?, ?, ?, ?, ?, ?)`),
|
|
123
|
+
findPattern: db.prepare(`SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND sequence = ?`),
|
|
124
|
+
updatePattern: db.prepare(`UPDATE learning_patterns SET frequency = frequency + 1, confidence = ?, last_seen = datetime('now') WHERE id = ?`),
|
|
125
|
+
getTopPatterns: db.prepare(`SELECT * FROM learning_patterns WHERE site_id = ? AND agent_id = ? AND pattern_type = ? ORDER BY frequency DESC, confidence DESC LIMIT ?`),
|
|
126
|
+
|
|
127
|
+
upsertArm: db.prepare(`INSERT INTO learning_bandit_arms (id, site_id, agent_id, domain, action) VALUES (?, ?, ?, ?, ?) ON CONFLICT(site_id, agent_id, domain, action) DO NOTHING`),
|
|
128
|
+
getArms: db.prepare(`SELECT * FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ? ORDER BY ucb_score DESC`),
|
|
129
|
+
updateArm: db.prepare(`UPDATE learning_bandit_arms SET pulls = pulls + 1, total_reward = total_reward + ?, avg_reward = (total_reward + ?) / (pulls + 1), ucb_score = ?, updated_at = datetime('now') WHERE site_id = ? AND agent_id = ? AND domain = ? AND action = ?`),
|
|
130
|
+
getTotalPulls: db.prepare(`SELECT SUM(pulls) as total FROM learning_bandit_arms WHERE site_id = ? AND agent_id = ? AND domain = ?`),
|
|
131
|
+
|
|
132
|
+
insertSession: db.prepare(`INSERT INTO learning_sessions (id, site_id, agent_id) VALUES (?, ?, ?)`),
|
|
133
|
+
updateSession: db.prepare(`UPDATE learning_sessions SET decisions_made = ?, correct_predictions = ?, accuracy = ?, ended_at = datetime('now') WHERE id = ?`),
|
|
134
|
+
getSessionHistory: db.prepare(`SELECT * FROM learning_sessions WHERE site_id = ? AND agent_id = ? ORDER BY started_at DESC LIMIT ?`),
|
|
135
|
+
|
|
136
|
+
getStats: db.prepare(`SELECT
|
|
137
|
+
(SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ?) as total_decisions,
|
|
138
|
+
(SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = 'accepted') as accepted,
|
|
139
|
+
(SELECT COUNT(*) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome = 'rejected') as rejected,
|
|
140
|
+
(SELECT AVG(reward) FROM learning_decisions WHERE site_id = ? AND agent_id = ? AND outcome != 'pending') as avg_reward,
|
|
141
|
+
(SELECT COUNT(DISTINCT domain) FROM learning_policies WHERE site_id = ? AND agent_id = ?) as policy_domains,
|
|
142
|
+
(SELECT COUNT(*) FROM learning_patterns WHERE site_id = ? AND agent_id = ?) as total_patterns`),
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
// ─── Core Learning API ───────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Record a decision the agent is about to make, with predicted reward.
|
|
149
|
+
*/
|
|
150
|
+
function recordDecision(siteId, agentId, domain, action, context = {}, features = {}) {
|
|
151
|
+
const id = crypto.randomUUID();
|
|
152
|
+
const predictedReward = _predict(siteId, agentId, domain, features);
|
|
153
|
+
|
|
154
|
+
stmts.insertDecision.run(id, siteId, agentId, domain, action, JSON.stringify(context), predictedReward, JSON.stringify(features));
|
|
155
|
+
|
|
156
|
+
// Ensure bandit arm exists
|
|
157
|
+
stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
|
|
158
|
+
|
|
159
|
+
return { decisionId: id, predictedReward, confidence: _getConfidence(siteId, agentId, domain) };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Provide feedback on a decision — the outcome and actual reward.
|
|
164
|
+
* This is the core learning signal.
|
|
165
|
+
*/
|
|
166
|
+
function feedback(decisionId, outcome, reward) {
|
|
167
|
+
const decision = stmts.getDecision.get(decisionId);
|
|
168
|
+
if (!decision) throw new Error('Decision not found');
|
|
169
|
+
|
|
170
|
+
stmts.updateOutcome.run(outcome, reward, decisionId);
|
|
171
|
+
|
|
172
|
+
const features = JSON.parse(decision.features || '{}');
|
|
173
|
+
const predError = reward - (decision.predicted_reward || 0);
|
|
174
|
+
|
|
175
|
+
// Update policy weights via gradient descent
|
|
176
|
+
_updatePolicies(decision.site_id, decision.agent_id, decision.domain, features, predError);
|
|
177
|
+
|
|
178
|
+
// Update bandit arm
|
|
179
|
+
_updateBanditArm(decision.site_id, decision.agent_id, decision.domain, decision.action, reward);
|
|
180
|
+
|
|
181
|
+
// Mine patterns from recent decisions
|
|
182
|
+
_minePatterns(decision.site_id, decision.agent_id, decision.domain);
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
predictionError: predError,
|
|
186
|
+
updatedConfidence: _getConfidence(decision.site_id, decision.agent_id, decision.domain),
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Get the best action for a domain using learned policies + bandit scores.
|
|
192
|
+
*/
|
|
193
|
+
function recommend(siteId, agentId, domain, availableActions, context = {}) {
|
|
194
|
+
const features = _extractFeatures(context);
|
|
195
|
+
|
|
196
|
+
// Score each action
|
|
197
|
+
const scored = availableActions.map((action) => {
|
|
198
|
+
const arm = _getOrCreateArm(siteId, agentId, domain, action);
|
|
199
|
+
const policyScore = _predict(siteId, agentId, domain, { ...features, action });
|
|
200
|
+
const banditScore = arm.ucb_score || 0;
|
|
201
|
+
|
|
202
|
+
// Blend policy prediction with bandit exploration
|
|
203
|
+
const blended = 0.6 * policyScore + 0.4 * banditScore;
|
|
204
|
+
|
|
205
|
+
return { action, score: blended, policyScore, banditScore, pulls: arm.pulls };
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
scored.sort((a, b) => b.score - a.score);
|
|
209
|
+
|
|
210
|
+
const confidence = _getConfidence(siteId, agentId, domain);
|
|
211
|
+
const topPatterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 3);
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
recommended: scored[0]?.action || availableActions[0],
|
|
215
|
+
rankings: scored,
|
|
216
|
+
confidence,
|
|
217
|
+
patterns: topPatterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency, confidence: p.confidence })),
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Get learned preference summary for a domain.
|
|
223
|
+
*/
|
|
224
|
+
function getPreferences(siteId, agentId, domain) {
|
|
225
|
+
const policies = stmts.getPolicies.all(siteId, agentId, domain);
|
|
226
|
+
const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 50);
|
|
227
|
+
const patterns = stmts.getTopPatterns.all(siteId, agentId, 'action_sequence', 10);
|
|
228
|
+
|
|
229
|
+
const accepted = decisions.filter((d) => d.outcome === 'accepted');
|
|
230
|
+
const rejected = decisions.filter((d) => d.outcome === 'rejected');
|
|
231
|
+
|
|
232
|
+
// Build preference profile
|
|
233
|
+
const profile = {};
|
|
234
|
+
for (const p of policies) {
|
|
235
|
+
if (Math.abs(p.weight) > 0.05) {
|
|
236
|
+
profile[p.feature] = {
|
|
237
|
+
weight: Math.round(p.weight * 1000) / 1000,
|
|
238
|
+
direction: p.weight > 0 ? 'preferred' : 'avoided',
|
|
239
|
+
strength: Math.abs(p.weight) > 0.5 ? 'strong' : Math.abs(p.weight) > 0.2 ? 'moderate' : 'weak',
|
|
240
|
+
updates: p.update_count,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return {
|
|
246
|
+
domain,
|
|
247
|
+
profile,
|
|
248
|
+
acceptRate: decisions.length > 0 ? accepted.length / decisions.length : 0,
|
|
249
|
+
totalDecisions: decisions.length,
|
|
250
|
+
avgReward: decisions.length > 0 ? decisions.reduce((s, d) => s + d.reward, 0) / decisions.length : 0,
|
|
251
|
+
topPatterns: patterns.map((p) => ({ sequence: p.sequence, frequency: p.frequency })),
|
|
252
|
+
confidence: _getConfidence(siteId, agentId, domain),
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// ─── Learning Sessions ───────────────────────────────────────────────
|
|
257
|
+
|
|
258
|
+
function startSession(siteId, agentId) {
|
|
259
|
+
const id = crypto.randomUUID();
|
|
260
|
+
stmts.insertSession.run(id, siteId, agentId);
|
|
261
|
+
return { sessionId: id };
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function endSession(sessionId, decisionsMade, correctPredictions) {
|
|
265
|
+
const accuracy = decisionsMade > 0 ? correctPredictions / decisionsMade : 0;
|
|
266
|
+
stmts.updateSession.run(decisionsMade, correctPredictions, accuracy, sessionId);
|
|
267
|
+
return { accuracy };
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// ─── Stats ───────────────────────────────────────────────────────────
|
|
271
|
+
|
|
272
|
+
function getStats(siteId, agentId) {
|
|
273
|
+
const row = stmts.getStats.get(siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId, siteId, agentId);
|
|
274
|
+
const sessions = stmts.getSessionHistory.all(siteId, agentId, 10);
|
|
275
|
+
const recentAccuracy = sessions.length > 0 ? sessions.reduce((s, sess) => s + sess.accuracy, 0) / sessions.length : 0;
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
...row,
|
|
279
|
+
recentAccuracy: Math.round(recentAccuracy * 1000) / 1000,
|
|
280
|
+
sessionsCount: sessions.length,
|
|
281
|
+
acceptRate: row.total_decisions > 0 ? Math.round((row.accepted / row.total_decisions) * 1000) / 1000 : 0,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ─── Internal: Prediction via Linear Model ───────────────────────────
|
|
286
|
+
|
|
287
|
+
function _predict(siteId, agentId, domain, features) {
|
|
288
|
+
const policies = stmts.getPolicies.all(siteId, agentId, domain);
|
|
289
|
+
if (policies.length === 0) return 0.5; // No data yet — neutral prediction
|
|
290
|
+
|
|
291
|
+
let score = 0;
|
|
292
|
+
for (const p of policies) {
|
|
293
|
+
const featureVal = features[p.feature];
|
|
294
|
+
if (featureVal !== undefined) {
|
|
295
|
+
const fv = typeof featureVal === 'number' ? featureVal : (featureVal ? 1 : 0);
|
|
296
|
+
score += p.weight * fv;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Sigmoid squash to [0, 1]
|
|
301
|
+
return 1 / (1 + Math.exp(-score));
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function _updatePolicies(siteId, agentId, domain, features, error) {
|
|
305
|
+
for (const [feature, value] of Object.entries(features)) {
|
|
306
|
+
const fv = typeof value === 'number' ? value : (value ? 1 : 0);
|
|
307
|
+
const gradient = error * fv * LEARNING_RATE;
|
|
308
|
+
|
|
309
|
+
const existing = stmts.getPolicy.get(siteId, agentId, domain, feature);
|
|
310
|
+
const newWeight = existing ? existing.weight + gradient : gradient;
|
|
311
|
+
|
|
312
|
+
stmts.upsertPolicy.run(
|
|
313
|
+
crypto.randomUUID(), siteId, agentId, domain, feature, newWeight,
|
|
314
|
+
newWeight, Math.abs(error)
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ─── Internal: Multi-Armed Bandit ────────────────────────────────────
|
|
320
|
+
|
|
321
|
+
function _getOrCreateArm(siteId, agentId, domain, action) {
|
|
322
|
+
stmts.upsertArm.run(crypto.randomUUID(), siteId, agentId, domain, action);
|
|
323
|
+
const arms = stmts.getArms.all(siteId, agentId, domain);
|
|
324
|
+
return arms.find((a) => a.action === action) || { pulls: 0, ucb_score: 1000, avg_reward: 0 };
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
function _updateBanditArm(siteId, agentId, domain, action, reward) {
|
|
328
|
+
const totalPullsRow = stmts.getTotalPulls.get(siteId, agentId, domain);
|
|
329
|
+
const totalPulls = (totalPullsRow?.total || 0) + 1;
|
|
330
|
+
|
|
331
|
+
const arms = stmts.getArms.all(siteId, agentId, domain);
|
|
332
|
+
const arm = arms.find((a) => a.action === action);
|
|
333
|
+
const armPulls = arm ? arm.pulls + 1 : 1;
|
|
334
|
+
|
|
335
|
+
// UCB1 formula
|
|
336
|
+
const avgReward = arm ? (arm.total_reward + reward) / armPulls : reward;
|
|
337
|
+
const exploration = UCB_EXPLORATION * Math.sqrt(Math.log(totalPulls) / armPulls);
|
|
338
|
+
const ucbScore = avgReward + exploration;
|
|
339
|
+
|
|
340
|
+
stmts.updateArm.run(reward, reward, ucbScore, siteId, agentId, domain, action);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// ─── Internal: Pattern Mining ────────────────────────────────────────
|
|
344
|
+
|
|
345
|
+
function _minePatterns(siteId, agentId, domain) {
|
|
346
|
+
const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 20);
|
|
347
|
+
if (decisions.length < 3) return;
|
|
348
|
+
|
|
349
|
+
// Extract action sequences of length 2-5
|
|
350
|
+
for (let len = 2; len <= Math.min(MAX_SEQUENCE_LENGTH, decisions.length); len++) {
|
|
351
|
+
const sequence = decisions.slice(0, len).map((d) => d.action).reverse().join(' → ');
|
|
352
|
+
const existing = stmts.findPattern.get(siteId, agentId, sequence);
|
|
353
|
+
|
|
354
|
+
if (existing) {
|
|
355
|
+
const newConf = Math.min(0.99, existing.confidence + 0.05 * (1 - existing.confidence));
|
|
356
|
+
stmts.updatePattern.run(newConf, existing.id);
|
|
357
|
+
} else {
|
|
358
|
+
stmts.insertPattern.run(crypto.randomUUID(), siteId, agentId, 'action_sequence', sequence, 0.3);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ─── Internal: Feature Extraction ────────────────────────────────────
|
|
364
|
+
|
|
365
|
+
function _extractFeatures(context) {
|
|
366
|
+
const features = {};
|
|
367
|
+
|
|
368
|
+
if (context.price !== undefined) features.price = context.price;
|
|
369
|
+
if (context.quantity !== undefined) features.quantity = context.quantity;
|
|
370
|
+
if (context.discount !== undefined) features.discount = context.discount;
|
|
371
|
+
if (context.category) features[`category:${context.category}`] = 1;
|
|
372
|
+
if (context.timeOfDay !== undefined) {
|
|
373
|
+
features.morning = context.timeOfDay < 12 ? 1 : 0;
|
|
374
|
+
features.afternoon = context.timeOfDay >= 12 && context.timeOfDay < 18 ? 1 : 0;
|
|
375
|
+
features.evening = context.timeOfDay >= 18 ? 1 : 0;
|
|
376
|
+
}
|
|
377
|
+
if (context.isRepeat !== undefined) features.repeat_visit = context.isRepeat ? 1 : 0;
|
|
378
|
+
if (context.urgency !== undefined) features.urgency = context.urgency;
|
|
379
|
+
|
|
380
|
+
// Pass through any raw features
|
|
381
|
+
for (const [k, v] of Object.entries(context)) {
|
|
382
|
+
if (features[k] === undefined && typeof v === 'number') {
|
|
383
|
+
features[k] = v;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return features;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// ─── Internal: Confidence Estimation ─────────────────────────────────
|
|
391
|
+
|
|
392
|
+
function _getConfidence(siteId, agentId, domain) {
|
|
393
|
+
const decisions = stmts.getRecentDecisions.all(siteId, agentId, domain, 50);
|
|
394
|
+
if (decisions.length === 0) return 0;
|
|
395
|
+
|
|
396
|
+
const withOutcome = decisions.filter((d) => d.outcome !== 'pending');
|
|
397
|
+
if (withOutcome.length === 0) return MIN_CONFIDENCE;
|
|
398
|
+
|
|
399
|
+
// Confidence = f(data volume, prediction accuracy, recency)
|
|
400
|
+
const volumeConf = Math.min(1, withOutcome.length / 30);
|
|
401
|
+
|
|
402
|
+
let accuracySum = 0;
|
|
403
|
+
for (const d of withOutcome) {
|
|
404
|
+
if (d.predicted_reward !== null) {
|
|
405
|
+
const error = Math.abs(d.reward - d.predicted_reward);
|
|
406
|
+
accuracySum += Math.max(0, 1 - error);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
const accuracyConf = withOutcome.length > 0 ? accuracySum / withOutcome.length : 0.5;
|
|
410
|
+
|
|
411
|
+
// Recency — decay confidence for old data
|
|
412
|
+
const latestTs = new Date(withOutcome[0].created_at).getTime();
|
|
413
|
+
const ageHours = (Date.now() - latestTs) / 3600000;
|
|
414
|
+
const recencyConf = Math.exp(-DECAY_RATE * ageHours);
|
|
415
|
+
|
|
416
|
+
return Math.max(MIN_CONFIDENCE, Math.min(0.99, volumeConf * 0.3 + accuracyConf * 0.5 + recencyConf * 0.2));
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
module.exports = {
|
|
420
|
+
recordDecision, feedback, recommend, getPreferences,
|
|
421
|
+
startSession, endSession, getStats,
|
|
422
|
+
};
|