web-agent-bridge 3.3.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +84 -72
- package/README.ar.md +1563 -1286
- package/README.md +137 -1764
- package/bin/agent-runner.js +474 -474
- package/bin/cli.js +237 -237
- package/bin/wab-init.js +244 -0
- package/bin/wab.js +80 -80
- package/examples/azure-dns-wab.js +83 -0
- package/examples/bidi-agent.js +119 -119
- package/examples/cloudflare-wab-dns.js +121 -0
- package/examples/cpanel-wab-dns.js +114 -0
- package/examples/cross-site-agent.js +91 -91
- package/examples/dns-discovery-agent.js +166 -0
- package/examples/gcp-dns-wab.js +76 -0
- package/examples/governance-agent.js +169 -0
- package/examples/mcp-agent.js +94 -94
- package/examples/next-app-router/README.md +44 -44
- package/examples/plesk-wab-dns.js +103 -0
- package/examples/puppeteer-agent.js +108 -108
- package/examples/route53-wab-dns.js +144 -0
- package/examples/saas-dashboard/README.md +55 -55
- package/examples/safe-mode-agent.js +96 -0
- package/examples/self-discovery.js +106 -0
- package/examples/shopify-hydrogen/README.md +74 -74
- package/examples/vision-agent.js +171 -171
- package/examples/wab-sign.js +74 -0
- package/examples/wab-verify.js +60 -0
- package/examples/wordpress-elementor/README.md +77 -77
- package/package.json +93 -93
- package/public/.well-known/agent-tools.json +180 -180
- package/public/.well-known/ai-assets.json +59 -59
- package/public/.well-known/security.txt +8 -8
- package/public/.well-known/wab.json +28 -0
- package/public/activate.html +448 -0
- package/public/adopt.html +236 -0
- package/public/adoption-metrics.html +188 -0
- package/public/agent-workspace.html +359 -349
- package/public/ai.html +198 -198
- package/public/api.html +397 -413
- package/public/azure-dns-integration.html +289 -0
- package/public/browser.html +486 -486
- package/public/cloudflare-integration.html +380 -0
- package/public/commander-dashboard.html +243 -243
- package/public/cookies.html +210 -210
- package/public/cpanel-integration.html +398 -0
- package/public/css/agent-workspace.css +1713 -1713
- package/public/css/premium.css +317 -317
- package/public/css/styles.css +1401 -1235
- package/public/dashboard-shieldlink.html +295 -0
- package/public/dashboard.html +711 -706
- package/public/dns.html +436 -507
- package/public/docs.html +588 -587
- package/public/enterprise-mesh.ar.html +80 -0
- package/public/enterprise-mesh.html +81 -0
- package/public/feed.xml +89 -89
- package/public/gcp-dns-integration.html +318 -0
- package/public/governance.ar.html +70 -0
- package/public/governance.html +69 -0
- package/public/growth.html +465 -463
- package/public/index.html +1372 -1070
- package/public/integrations.html +556 -556
- package/public/js/activate.js +449 -0
- package/public/js/agent-workspace.js +1740 -1740
- package/public/js/auth-nav.js +117 -31
- package/public/js/auth-redirect.js +12 -12
- package/public/js/cookie-consent.js +56 -56
- package/public/js/dns.js +438 -0
- package/public/js/wab-demo-page.js +721 -721
- package/public/js/ws-client.js +74 -74
- package/public/l-preview.html +242 -0
- package/public/llms-full.txt +360 -360
- package/public/llms.txt +125 -125
- package/public/login.html +85 -85
- package/public/mesh-dashboard.html +328 -328
- package/public/milestones.html +346 -0
- package/public/one-click.html +779 -0
- package/public/openapi.json +669 -580
- package/public/partners.ar.html +145 -0
- package/public/partners.html +143 -0
- package/public/phone-shield.html +281 -281
- package/public/plesk-integration.html +375 -0
- package/public/premium-dashboard.html +2489 -2489
- package/public/premium.html +793 -793
- package/public/privacy.html +297 -297
- package/public/provider-onboarding.html +172 -0
- package/public/provider-sandbox.html +134 -0
- package/public/providers.html +359 -0
- package/public/refusals.html +172 -0
- package/public/register.html +105 -105
- package/public/registrar-integrations.html +141 -0
- package/public/ring4.html +292 -0
- package/public/robots.txt +99 -87
- package/public/route53-integration.html +531 -0
- package/public/score.html +263 -0
- package/public/script/wab-consent.d.ts +36 -36
- package/public/script/wab-consent.js +104 -104
- package/public/script/wab-schema.js +131 -131
- package/public/script/wab.d.ts +108 -108
- package/public/script/wab.min.js +580 -580
- package/public/security.txt +8 -8
- package/public/shieldlink.html +244 -0
- package/public/shieldqr.html +231 -0
- package/public/sitemap.xml +19 -1
- package/public/terms.html +256 -256
- package/public/trust-graph-api.ar.html +92 -0
- package/public/trust-graph-api.html +91 -0
- package/public/wab-features.html +560 -0
- package/public/wab-trust.html +200 -0
- package/public/wab-truth.html +375 -0
- package/public/wab-vs-protocols.html +210 -0
- package/public/whitepaper.html +449 -0
- package/script/ai-agent-bridge.js +1754 -1754
- package/sdk/README.md +99 -99
- package/sdk/agent-mesh.js +449 -449
- package/sdk/auto-discovery.js +301 -0
- package/sdk/commander.js +262 -262
- package/sdk/governance.js +262 -0
- package/sdk/index.d.ts +464 -464
- package/sdk/index.js +649 -636
- package/sdk/multi-agent.js +318 -318
- package/sdk/package.json +2 -2
- package/sdk/safe-mode.js +221 -0
- package/sdk/safety-shield.js +219 -219
- package/sdk/schema-discovery.js +83 -83
- package/server/adapters/index.js +520 -520
- package/server/config/plans.js +412 -367
- package/server/config/secrets.js +102 -102
- package/server/control-plane/index.js +301 -301
- package/server/data-plane/index.js +354 -354
- package/server/index.js +790 -531
- package/server/llm/index.js +404 -404
- package/server/middleware/adminAuth.js +35 -35
- package/server/middleware/api-tier.js +170 -0
- package/server/middleware/auth.js +50 -50
- package/server/middleware/featureGate.js +88 -88
- package/server/middleware/rateLimits.js +100 -100
- package/server/middleware/sensitiveAction.js +157 -157
- package/server/middleware/wab-trust.js +141 -0
- package/server/migrations/001_add_analytics_indexes.sql +7 -7
- package/server/migrations/002_premium_features.sql +418 -418
- package/server/migrations/003_ads_integer_cents.sql +33 -33
- package/server/migrations/004_agent_os.sql +158 -158
- package/server/migrations/005_marketplace_metering.sql +126 -126
- package/server/migrations/006_growth_suite.sql +138 -0
- package/server/migrations/007_governance.sql +106 -0
- package/server/migrations/008_plans.sql +144 -0
- package/server/migrations/009_shieldqr.sql +30 -0
- package/server/migrations/010_extended_trust.sql +33 -0
- package/server/migrations/011_outreach.sql +47 -0
- package/server/migrations/012_shieldlink.sql +116 -0
- package/server/migrations/013_ct_monitor.sql +13 -0
- package/server/migrations/014_wab_advanced_features.sql +128 -0
- package/server/migrations/015_wab_truth_layer.sql +101 -0
- package/server/migrations/016_ring4_external_trust.sql +84 -0
- package/server/migrations/017_ring4_extensions.sql +69 -0
- package/server/migrations/018_commercial_foundations.sql +167 -0
- package/server/migrations/019_unify_tier_constraints.sql +133 -0
- package/server/models/adapters/index.js +33 -33
- package/server/models/adapters/mysql.js +183 -183
- package/server/models/adapters/postgresql.js +172 -172
- package/server/models/adapters/sqlite.js +7 -7
- package/server/models/db.js +740 -681
- package/server/observability/failure-analysis.js +337 -337
- package/server/observability/index.js +394 -394
- package/server/protocol/capabilities.js +223 -223
- package/server/protocol/index.js +243 -243
- package/server/protocol/schema.js +584 -584
- package/server/registry/certification.js +271 -271
- package/server/registry/index.js +326 -326
- package/server/routes/activate.js +478 -0
- package/server/routes/admin-outreach.js +239 -0
- package/server/routes/admin-plans.js +76 -0
- package/server/routes/admin-premium.js +674 -671
- package/server/routes/admin-shieldlink.js +137 -0
- package/server/routes/admin-shieldqr.js +90 -0
- package/server/routes/admin-trust-monitor.js +139 -0
- package/server/routes/admin.js +550 -261
- package/server/routes/adopt.js +61 -0
- package/server/routes/ads.js +130 -130
- package/server/routes/agent-workspace.js +540 -540
- package/server/routes/api-keys.js +127 -0
- package/server/routes/api.js +150 -150
- package/server/routes/auth.js +71 -71
- package/server/routes/billing.js +57 -45
- package/server/routes/commander.js +316 -316
- package/server/routes/customer-shieldlink.js +133 -0
- package/server/routes/demo-showcase.js +332 -332
- package/server/routes/demo-store.js +154 -154
- package/server/routes/diagnose.js +373 -0
- package/server/routes/discovery.js +2348 -417
- package/server/routes/enterprise-mesh.js +170 -0
- package/server/routes/gateway.js +173 -173
- package/server/routes/governance-saas.js +203 -0
- package/server/routes/governance.js +208 -0
- package/server/routes/growth.js +1048 -0
- package/server/routes/intent.js +328 -0
- package/server/routes/license.js +251 -251
- package/server/routes/mesh.js +469 -469
- package/server/routes/noscript.js +543 -543
- package/server/routes/partners.js +201 -0
- package/server/routes/plans.js +33 -0
- package/server/routes/premium-v2.js +686 -686
- package/server/routes/premium.js +724 -724
- package/server/routes/providers.js +650 -0
- package/server/routes/reputation.js +411 -0
- package/server/routes/ring4.js +885 -0
- package/server/routes/runtime.js +2148 -2148
- package/server/routes/shieldlink.js +70 -0
- package/server/routes/shieldqr.js +88 -0
- package/server/routes/sovereign.js +465 -465
- package/server/routes/truth-layer.js +670 -0
- package/server/routes/universal.js +200 -200
- package/server/routes/unsubscribe.js +51 -0
- package/server/routes/wab-api.js +850 -850
- package/server/routes/wab-cache.js +282 -0
- package/server/runtime/container-worker.js +111 -111
- package/server/runtime/container.js +448 -448
- package/server/runtime/distributed-worker.js +362 -362
- package/server/runtime/event-bus.js +210 -210
- package/server/runtime/index.js +253 -253
- package/server/runtime/queue.js +599 -599
- package/server/runtime/replay.js +666 -666
- package/server/runtime/sandbox.js +266 -266
- package/server/runtime/scheduler.js +534 -534
- package/server/runtime/session-engine.js +293 -293
- package/server/runtime/state-manager.js +188 -188
- package/server/secrets/wab-signing-key.pem +3 -0
- package/server/secrets/wab-signing-pub.pem +3 -0
- package/server/security/cross-site-redactor.js +196 -196
- package/server/security/dry-run.js +180 -180
- package/server/security/human-gate-rate-limit.js +147 -147
- package/server/security/human-gate-transports.js +178 -178
- package/server/security/human-gate.js +281 -281
- package/server/security/index.js +368 -368
- package/server/security/intent-engine.js +245 -245
- package/server/security/reward-guard.js +171 -171
- package/server/security/rollback-store.js +239 -239
- package/server/security/token-scope.js +404 -404
- package/server/security/url-policy.js +139 -139
- package/server/services/adoption-agent.js +182 -0
- package/server/services/agent-chat.js +506 -506
- package/server/services/agent-learning.js +601 -601
- package/server/services/agent-memory.js +625 -625
- package/server/services/agent-mesh.js +555 -555
- package/server/services/agent-symphony.js +717 -717
- package/server/services/agent-tasks.js +1807 -1807
- package/server/services/api-key-engine.js +292 -292
- package/server/services/cluster.js +894 -894
- package/server/services/commander.js +738 -738
- package/server/services/edge-compute.js +440 -440
- package/server/services/email.js +233 -204
- package/server/services/fairness-engine.js +409 -0
- package/server/services/fairness.js +420 -0
- package/server/services/governance.js +466 -0
- package/server/services/hosted-runtime.js +205 -205
- package/server/services/lfd.js +635 -635
- package/server/services/local-ai.js +389 -389
- package/server/services/marketplace.js +270 -270
- package/server/services/metering.js +182 -182
- package/server/services/modules/affiliate-intelligence.js +93 -93
- package/server/services/modules/agent-firewall.js +90 -90
- package/server/services/modules/bounty.js +89 -89
- package/server/services/modules/collective-bargaining.js +92 -92
- package/server/services/modules/dark-pattern.js +66 -66
- package/server/services/modules/gov-intelligence.js +45 -45
- package/server/services/modules/neural.js +55 -55
- package/server/services/modules/notary.js +49 -49
- package/server/services/modules/price-time-machine.js +86 -86
- package/server/services/modules/protocol.js +104 -104
- package/server/services/negotiation.js +439 -439
- package/server/services/outreach-agent.js +312 -0
- package/server/services/plans.js +214 -0
- package/server/services/plugins.js +771 -771
- package/server/services/premium.js +1 -1
- package/server/services/price-intelligence.js +566 -566
- package/server/services/price-shield.js +1137 -1137
- package/server/services/provider-clients.js +740 -0
- package/server/services/reputation.js +465 -465
- package/server/services/search-engine.js +357 -357
- package/server/services/security.js +513 -513
- package/server/services/self-healing.js +843 -843
- package/server/services/shieldlink.js +492 -0
- package/server/services/shieldqr.js +322 -0
- package/server/services/sovereign-shield.js +542 -542
- package/server/services/ssl-ct-monitor.js +224 -0
- package/server/services/ssl-inspector.js +42 -0
- package/server/services/ssl-monitor.js +167 -0
- package/server/services/stripe.js +206 -192
- package/server/services/swarm.js +788 -788
- package/server/services/universal-scraper.js +662 -662
- package/server/services/verification.js +481 -481
- package/server/services/vision.js +1163 -1163
- package/server/services/wab-crypto.js +178 -0
- package/server/utils/cache.js +125 -125
- package/server/utils/migrate.js +81 -81
- package/server/utils/safe-fetch.js +228 -228
- package/server/utils/secureFields.js +50 -50
- package/server/ws.js +161 -161
- package/templates/artisan-marketplace.yaml +104 -104
- package/templates/book-price-scout.yaml +98 -98
- package/templates/electronics-price-tracker.yaml +108 -108
- package/templates/flight-deal-hunter.yaml +113 -113
- package/templates/freelancer-direct.yaml +116 -116
- package/templates/grocery-price-compare.yaml +93 -93
- package/templates/hotel-direct-booking.yaml +113 -113
- package/templates/local-services.yaml +98 -98
- package/templates/olive-oil-tunisia.yaml +88 -88
- package/templates/organic-farm-fresh.yaml +101 -101
- package/templates/restaurant-direct.yaml +97 -97
- package/templates/ring4/banking-sovereign.yaml +55 -0
- package/templates/ring4/ecommerce-sovereign.yaml +58 -0
- package/templates/ring4/healthcare-sovereign.yaml +60 -0
|
@@ -1,171 +1,171 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Reward Guard — defenses against reward-hacking in the local RL engine.
|
|
5
|
-
*
|
|
6
|
-
* Threats addressed:
|
|
7
|
-
* 1. Out-of-bounds rewards (writers stuffing huge positive numbers).
|
|
8
|
-
* 2. Sudden gradient explosions (sequence of large positive rewards on
|
|
9
|
-
* previously-low-confidence actions, indicative of a feedback loop).
|
|
10
|
-
* 3. Per-actor abuse (one user/agent flooding rewards to skew a policy).
|
|
11
|
-
* 4. Rewards on sensitive actions without HITL approval.
|
|
12
|
-
*
|
|
13
|
-
* Defenses:
|
|
14
|
-
* - Clamp reward to [REWARD_MIN, REWARD_MAX].
|
|
15
|
-
* - Per-(site,agent,domain) sliding window with EMA + variance check.
|
|
16
|
-
* - Per-actor rate limit (default 60 reward writes / 5 min).
|
|
17
|
-
* - Block rewards on actions in the SENSITIVE_VERBS set unless an
|
|
18
|
-
* `approvedBy` field is present and references a human user id.
|
|
19
|
-
* - Append-only `reward_audit` table for human review.
|
|
20
|
-
*/
|
|
21
|
-
|
|
22
|
-
const crypto = require('crypto');
|
|
23
|
-
const { db } = require('../models/db');
|
|
24
|
-
const { SENSITIVE_VERBS } = require('../middleware/sensitiveAction');
|
|
25
|
-
|
|
26
|
-
const REWARD_MIN = -1.0;
|
|
27
|
-
const REWARD_MAX = 1.0;
|
|
28
|
-
const ANOMALY_Z_SCORE = 4.0; // |reward - mean| / std > 4 → anomaly
|
|
29
|
-
const RATE_LIMIT_WINDOW_MS = 5 * 60 * 1000;
|
|
30
|
-
const RATE_LIMIT_MAX = 60;
|
|
31
|
-
|
|
32
|
-
db.exec(`
|
|
33
|
-
CREATE TABLE IF NOT EXISTS reward_audit (
|
|
34
|
-
id TEXT PRIMARY KEY,
|
|
35
|
-
site_id TEXT,
|
|
36
|
-
agent_id TEXT,
|
|
37
|
-
domain TEXT,
|
|
38
|
-
action TEXT,
|
|
39
|
-
raw_reward REAL,
|
|
40
|
-
final_reward REAL,
|
|
41
|
-
decision TEXT NOT NULL CHECK(decision IN ('accepted','clamped','blocked','flagged')),
|
|
42
|
-
reason TEXT,
|
|
43
|
-
actor_id TEXT,
|
|
44
|
-
approved_by TEXT,
|
|
45
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
46
|
-
);
|
|
47
|
-
CREATE INDEX IF NOT EXISTS idx_reward_audit_site ON reward_audit(site_id, agent_id);
|
|
48
|
-
CREATE INDEX IF NOT EXISTS idx_reward_audit_decision ON reward_audit(decision);
|
|
49
|
-
`);
|
|
50
|
-
|
|
51
|
-
const _rateBuckets = new Map(); // actorKey → [{ts}, ...]
|
|
52
|
-
const _emaState = new Map(); // bucketKey → { mean, var, n }
|
|
53
|
-
|
|
54
|
-
function _bucketKey(siteId, agentId, domain) {
|
|
55
|
-
return `${siteId || ''}::${agentId || ''}::${domain || ''}`;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function _checkRate(actorKey) {
|
|
59
|
-
const now = Date.now();
|
|
60
|
-
const bucket = _rateBuckets.get(actorKey) || [];
|
|
61
|
-
const fresh = bucket.filter((t) => now - t < RATE_LIMIT_WINDOW_MS);
|
|
62
|
-
fresh.push(now);
|
|
63
|
-
_rateBuckets.set(actorKey, fresh);
|
|
64
|
-
return fresh.length <= RATE_LIMIT_MAX;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function _updateEma(bucketKey, x) {
|
|
68
|
-
// Welford-style streaming mean/variance.
|
|
69
|
-
const s = _emaState.get(bucketKey) || { mean: 0, m2: 0, n: 0 };
|
|
70
|
-
s.n += 1;
|
|
71
|
-
const delta = x - s.mean;
|
|
72
|
-
s.mean += delta / s.n;
|
|
73
|
-
s.m2 += delta * (x - s.mean);
|
|
74
|
-
_emaState.set(bucketKey, s);
|
|
75
|
-
const variance = s.n > 1 ? s.m2 / (s.n - 1) : 1;
|
|
76
|
-
return { mean: s.mean, std: Math.sqrt(Math.max(variance, 1e-6)), n: s.n };
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
function _audit(row) {
|
|
80
|
-
db.prepare(`INSERT INTO reward_audit
|
|
81
|
-
(id, site_id, agent_id, domain, action, raw_reward, final_reward, decision, reason, actor_id, approved_by)
|
|
82
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(
|
|
83
|
-
crypto.randomUUID(), row.siteId || null, row.agentId || null, row.domain || null,
|
|
84
|
-
row.action || null, row.rawReward, row.finalReward, row.decision, row.reason || null,
|
|
85
|
-
row.actorId || null, row.approvedBy || null
|
|
86
|
-
);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function _isSensitive(action) {
|
|
90
|
-
if (!action) return false;
|
|
91
|
-
const tokens = String(action).toLowerCase().split(/[\s.\-_/:]+/);
|
|
92
|
-
return tokens.some((t) => SENSITIVE_VERBS.has(t));
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
/**
|
|
96
|
-
* Sanitize a reward emitted by an agent. Returns a `{ reward, decision }`
|
|
97
|
-
* tuple. Always logs to reward_audit.
|
|
98
|
-
*
|
|
99
|
-
* @param {object} input
|
|
100
|
-
* @param {string} input.siteId
|
|
101
|
-
* @param {string} input.agentId
|
|
102
|
-
* @param {string} input.domain
|
|
103
|
-
* @param {string} input.action
|
|
104
|
-
* @param {number} input.reward
|
|
105
|
-
* @param {string} [input.actorId]
|
|
106
|
-
* @param {string} [input.approvedBy] - human user id approving the reward
|
|
107
|
-
*/
|
|
108
|
-
function sanitizeReward(input) {
|
|
109
|
-
const { siteId, agentId, domain, action, reward, actorId, approvedBy } = input;
|
|
110
|
-
const raw = Number(reward);
|
|
111
|
-
|
|
112
|
-
if (!Number.isFinite(raw)) {
|
|
113
|
-
_audit({ ...input, rawReward: reward, finalReward: 0, decision: 'blocked', reason: 'non-finite reward' });
|
|
114
|
-
return { reward: 0, decision: 'blocked', reason: 'non-finite reward' };
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const actorKey = actorId || agentId || 'anon';
|
|
118
|
-
if (!_checkRate(actorKey)) {
|
|
119
|
-
_audit({ ...input, rawReward: raw, finalReward: 0, decision: 'blocked', reason: 'rate limit exceeded' });
|
|
120
|
-
return { reward: 0, decision: 'blocked', reason: 'reward rate limit exceeded' };
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (_isSensitive(action) && !approvedBy) {
|
|
124
|
-
_audit({ ...input, rawReward: raw, finalReward: 0, decision: 'blocked', reason: 'sensitive action without HITL approval' });
|
|
125
|
-
return { reward: 0, decision: 'blocked', reason: 'sensitive action requires approvedBy' };
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
// Clamp.
|
|
129
|
-
let clamped = Math.max(REWARD_MIN, Math.min(REWARD_MAX, raw));
|
|
130
|
-
let decision = clamped === raw ? 'accepted' : 'clamped';
|
|
131
|
-
let reason = decision === 'clamped' ? `clamped from ${raw}` : null;
|
|
132
|
-
|
|
133
|
-
// Anomaly detection vs rolling distribution.
|
|
134
|
-
const stats = _updateEma(_bucketKey(siteId, agentId, domain), clamped);
|
|
135
|
-
if (stats.n >= 10) {
|
|
136
|
-
const z = Math.abs(clamped - stats.mean) / stats.std;
|
|
137
|
-
if (z > ANOMALY_Z_SCORE) {
|
|
138
|
-
decision = 'flagged';
|
|
139
|
-
reason = `anomaly z=${z.toFixed(2)} (mean=${stats.mean.toFixed(3)}, std=${stats.std.toFixed(3)})`;
|
|
140
|
-
// Pull large positive flagged values toward the mean to limit damage.
|
|
141
|
-
if (clamped > stats.mean) clamped = stats.mean + stats.std * 2;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
_audit({ ...input, rawReward: raw, finalReward: clamped, decision, reason });
|
|
146
|
-
return { reward: clamped, decision, reason };
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
function getRecentAudits(limit = 100, decision) {
|
|
150
|
-
if (decision) {
|
|
151
|
-
return db.prepare(`SELECT * FROM reward_audit WHERE decision = ? ORDER BY rowid DESC LIMIT ?`).all(decision, limit);
|
|
152
|
-
}
|
|
153
|
-
return db.prepare(`SELECT * FROM reward_audit ORDER BY rowid DESC LIMIT ?`).all(limit);
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
function getStats() {
|
|
157
|
-
const counts = db.prepare(`SELECT decision, COUNT(*) as n FROM reward_audit GROUP BY decision`).all();
|
|
158
|
-
return {
|
|
159
|
-
bounds: { min: REWARD_MIN, max: REWARD_MAX, anomalyZ: ANOMALY_Z_SCORE },
|
|
160
|
-
rateLimit: { windowMs: RATE_LIMIT_WINDOW_MS, max: RATE_LIMIT_MAX },
|
|
161
|
-
counts: counts.reduce((acc, r) => ({ ...acc, [r.decision]: r.n }), {}),
|
|
162
|
-
};
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
module.exports = {
|
|
166
|
-
sanitizeReward,
|
|
167
|
-
getRecentAudits,
|
|
168
|
-
getStats,
|
|
169
|
-
REWARD_MIN,
|
|
170
|
-
REWARD_MAX,
|
|
171
|
-
};
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Reward Guard — defenses against reward-hacking in the local RL engine.
|
|
5
|
+
*
|
|
6
|
+
* Threats addressed:
|
|
7
|
+
* 1. Out-of-bounds rewards (writers stuffing huge positive numbers).
|
|
8
|
+
* 2. Sudden gradient explosions (sequence of large positive rewards on
|
|
9
|
+
* previously-low-confidence actions, indicative of a feedback loop).
|
|
10
|
+
* 3. Per-actor abuse (one user/agent flooding rewards to skew a policy).
|
|
11
|
+
* 4. Rewards on sensitive actions without HITL approval.
|
|
12
|
+
*
|
|
13
|
+
* Defenses:
|
|
14
|
+
* - Clamp reward to [REWARD_MIN, REWARD_MAX].
|
|
15
|
+
* - Per-(site,agent,domain) sliding window with EMA + variance check.
|
|
16
|
+
* - Per-actor rate limit (default 60 reward writes / 5 min).
|
|
17
|
+
* - Block rewards on actions in the SENSITIVE_VERBS set unless an
|
|
18
|
+
* `approvedBy` field is present and references a human user id.
|
|
19
|
+
* - Append-only `reward_audit` table for human review.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const crypto = require('crypto');
|
|
23
|
+
const { db } = require('../models/db');
|
|
24
|
+
const { SENSITIVE_VERBS } = require('../middleware/sensitiveAction');
|
|
25
|
+
|
|
26
|
+
const REWARD_MIN = -1.0;
|
|
27
|
+
const REWARD_MAX = 1.0;
|
|
28
|
+
const ANOMALY_Z_SCORE = 4.0; // |reward - mean| / std > 4 → anomaly
|
|
29
|
+
const RATE_LIMIT_WINDOW_MS = 5 * 60 * 1000;
|
|
30
|
+
const RATE_LIMIT_MAX = 60;
|
|
31
|
+
|
|
32
|
+
db.exec(`
|
|
33
|
+
CREATE TABLE IF NOT EXISTS reward_audit (
|
|
34
|
+
id TEXT PRIMARY KEY,
|
|
35
|
+
site_id TEXT,
|
|
36
|
+
agent_id TEXT,
|
|
37
|
+
domain TEXT,
|
|
38
|
+
action TEXT,
|
|
39
|
+
raw_reward REAL,
|
|
40
|
+
final_reward REAL,
|
|
41
|
+
decision TEXT NOT NULL CHECK(decision IN ('accepted','clamped','blocked','flagged')),
|
|
42
|
+
reason TEXT,
|
|
43
|
+
actor_id TEXT,
|
|
44
|
+
approved_by TEXT,
|
|
45
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
46
|
+
);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_reward_audit_site ON reward_audit(site_id, agent_id);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_reward_audit_decision ON reward_audit(decision);
|
|
49
|
+
`);
|
|
50
|
+
|
|
51
|
+
const _rateBuckets = new Map(); // actorKey → [{ts}, ...]
|
|
52
|
+
const _emaState = new Map(); // bucketKey → { mean, var, n }
|
|
53
|
+
|
|
54
|
+
function _bucketKey(siteId, agentId, domain) {
|
|
55
|
+
return `${siteId || ''}::${agentId || ''}::${domain || ''}`;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function _checkRate(actorKey) {
|
|
59
|
+
const now = Date.now();
|
|
60
|
+
const bucket = _rateBuckets.get(actorKey) || [];
|
|
61
|
+
const fresh = bucket.filter((t) => now - t < RATE_LIMIT_WINDOW_MS);
|
|
62
|
+
fresh.push(now);
|
|
63
|
+
_rateBuckets.set(actorKey, fresh);
|
|
64
|
+
return fresh.length <= RATE_LIMIT_MAX;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function _updateEma(bucketKey, x) {
|
|
68
|
+
// Welford-style streaming mean/variance.
|
|
69
|
+
const s = _emaState.get(bucketKey) || { mean: 0, m2: 0, n: 0 };
|
|
70
|
+
s.n += 1;
|
|
71
|
+
const delta = x - s.mean;
|
|
72
|
+
s.mean += delta / s.n;
|
|
73
|
+
s.m2 += delta * (x - s.mean);
|
|
74
|
+
_emaState.set(bucketKey, s);
|
|
75
|
+
const variance = s.n > 1 ? s.m2 / (s.n - 1) : 1;
|
|
76
|
+
return { mean: s.mean, std: Math.sqrt(Math.max(variance, 1e-6)), n: s.n };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function _audit(row) {
|
|
80
|
+
db.prepare(`INSERT INTO reward_audit
|
|
81
|
+
(id, site_id, agent_id, domain, action, raw_reward, final_reward, decision, reason, actor_id, approved_by)
|
|
82
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(
|
|
83
|
+
crypto.randomUUID(), row.siteId || null, row.agentId || null, row.domain || null,
|
|
84
|
+
row.action || null, row.rawReward, row.finalReward, row.decision, row.reason || null,
|
|
85
|
+
row.actorId || null, row.approvedBy || null
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function _isSensitive(action) {
|
|
90
|
+
if (!action) return false;
|
|
91
|
+
const tokens = String(action).toLowerCase().split(/[\s.\-_/:]+/);
|
|
92
|
+
return tokens.some((t) => SENSITIVE_VERBS.has(t));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Sanitize a reward emitted by an agent. Returns a `{ reward, decision }`
|
|
97
|
+
* tuple. Always logs to reward_audit.
|
|
98
|
+
*
|
|
99
|
+
* @param {object} input
|
|
100
|
+
* @param {string} input.siteId
|
|
101
|
+
* @param {string} input.agentId
|
|
102
|
+
* @param {string} input.domain
|
|
103
|
+
* @param {string} input.action
|
|
104
|
+
* @param {number} input.reward
|
|
105
|
+
* @param {string} [input.actorId]
|
|
106
|
+
* @param {string} [input.approvedBy] - human user id approving the reward
|
|
107
|
+
*/
|
|
108
|
+
function sanitizeReward(input) {
|
|
109
|
+
const { siteId, agentId, domain, action, reward, actorId, approvedBy } = input;
|
|
110
|
+
const raw = Number(reward);
|
|
111
|
+
|
|
112
|
+
if (!Number.isFinite(raw)) {
|
|
113
|
+
_audit({ ...input, rawReward: reward, finalReward: 0, decision: 'blocked', reason: 'non-finite reward' });
|
|
114
|
+
return { reward: 0, decision: 'blocked', reason: 'non-finite reward' };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const actorKey = actorId || agentId || 'anon';
|
|
118
|
+
if (!_checkRate(actorKey)) {
|
|
119
|
+
_audit({ ...input, rawReward: raw, finalReward: 0, decision: 'blocked', reason: 'rate limit exceeded' });
|
|
120
|
+
return { reward: 0, decision: 'blocked', reason: 'reward rate limit exceeded' };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (_isSensitive(action) && !approvedBy) {
|
|
124
|
+
_audit({ ...input, rawReward: raw, finalReward: 0, decision: 'blocked', reason: 'sensitive action without HITL approval' });
|
|
125
|
+
return { reward: 0, decision: 'blocked', reason: 'sensitive action requires approvedBy' };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Clamp.
|
|
129
|
+
let clamped = Math.max(REWARD_MIN, Math.min(REWARD_MAX, raw));
|
|
130
|
+
let decision = clamped === raw ? 'accepted' : 'clamped';
|
|
131
|
+
let reason = decision === 'clamped' ? `clamped from ${raw}` : null;
|
|
132
|
+
|
|
133
|
+
// Anomaly detection vs rolling distribution.
|
|
134
|
+
const stats = _updateEma(_bucketKey(siteId, agentId, domain), clamped);
|
|
135
|
+
if (stats.n >= 10) {
|
|
136
|
+
const z = Math.abs(clamped - stats.mean) / stats.std;
|
|
137
|
+
if (z > ANOMALY_Z_SCORE) {
|
|
138
|
+
decision = 'flagged';
|
|
139
|
+
reason = `anomaly z=${z.toFixed(2)} (mean=${stats.mean.toFixed(3)}, std=${stats.std.toFixed(3)})`;
|
|
140
|
+
// Pull large positive flagged values toward the mean to limit damage.
|
|
141
|
+
if (clamped > stats.mean) clamped = stats.mean + stats.std * 2;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
_audit({ ...input, rawReward: raw, finalReward: clamped, decision, reason });
|
|
146
|
+
return { reward: clamped, decision, reason };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function getRecentAudits(limit = 100, decision) {
|
|
150
|
+
if (decision) {
|
|
151
|
+
return db.prepare(`SELECT * FROM reward_audit WHERE decision = ? ORDER BY rowid DESC LIMIT ?`).all(decision, limit);
|
|
152
|
+
}
|
|
153
|
+
return db.prepare(`SELECT * FROM reward_audit ORDER BY rowid DESC LIMIT ?`).all(limit);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function getStats() {
|
|
157
|
+
const counts = db.prepare(`SELECT decision, COUNT(*) as n FROM reward_audit GROUP BY decision`).all();
|
|
158
|
+
return {
|
|
159
|
+
bounds: { min: REWARD_MIN, max: REWARD_MAX, anomalyZ: ANOMALY_Z_SCORE },
|
|
160
|
+
rateLimit: { windowMs: RATE_LIMIT_WINDOW_MS, max: RATE_LIMIT_MAX },
|
|
161
|
+
counts: counts.reduce((acc, r) => ({ ...acc, [r.decision]: r.n }), {}),
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
module.exports = {
|
|
166
|
+
sanitizeReward,
|
|
167
|
+
getRecentAudits,
|
|
168
|
+
getStats,
|
|
169
|
+
REWARD_MIN,
|
|
170
|
+
REWARD_MAX,
|
|
171
|
+
};
|