web-agent-bridge 3.16.0 → 3.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.ar.md +27 -8
  2. package/README.md +95 -0
  3. package/bin/wab-init.js +38 -0
  4. package/package.json +1 -1
  5. package/public/atp-semantics.html +216 -0
  6. package/public/benchmarks.html +151 -0
  7. package/public/dashboard.html +1 -0
  8. package/public/docs.html +113 -43
  9. package/public/index.html +142 -8
  10. package/public/key-rotation.html +184 -0
  11. package/public/llms.txt +54 -0
  12. package/public/notary.html +94 -0
  13. package/public/observatory.html +103 -0
  14. package/public/research.html +57 -0
  15. package/public/researchers.html +113 -0
  16. package/public/responsible-disclosure.html +294 -0
  17. package/public/robots.txt +17 -0
  18. package/public/security.html +157 -0
  19. package/public/threat-model.html +153 -0
  20. package/public/viral-coefficient.html +533 -0
  21. package/public/wab-dataset.html +501 -0
  22. package/public/wab-email.html +78 -0
  23. package/public/wab-lens.html +61 -0
  24. package/public/wab-p2p.html +96 -0
  25. package/public/wab-registry.html +481 -0
  26. package/public/wab-today.html +448 -0
  27. package/public/wab-uri.html +88 -0
  28. package/public/webhooks.html +181 -0
  29. package/script/ai-agent-bridge.js +24 -4
  30. package/server/index.js +1193 -827
  31. package/server/models/db.js +2 -1
  32. package/server/routes/admin-shieldlink.js +1 -1
  33. package/server/routes/admin-shieldqr.js +1 -1
  34. package/server/routes/admin-trust-monitor.js +1 -1
  35. package/server/routes/api-keys.js +2 -1
  36. package/server/routes/customer-shieldlink.js +1 -1
  37. package/server/routes/enterprise-mesh.js +2 -1
  38. package/server/routes/genius-bridge.js +256 -0
  39. package/server/routes/genius-gateway.js +137 -0
  40. package/server/routes/governance-saas.js +2 -1
  41. package/server/routes/notary.js +309 -0
  42. package/server/routes/observatory.js +109 -0
  43. package/server/routes/partners.js +2 -1
  44. package/server/routes/registry.js +352 -0
  45. package/server/routes/research.js +83 -0
  46. package/server/routes/ring4.js +2 -1
  47. package/server/routes/runtime.js +98 -25
  48. package/server/routes/security-researchers.js +161 -0
  49. package/server/routes/shieldqr.js +1 -1
  50. package/server/routes/traces.js +247 -0
  51. package/server/services/agent-tasks.js +9 -7
  52. package/server/services/email.js +50 -2
  53. package/server/services/marketplace.js +27 -8
  54. package/server/services/plans.js +1 -1
  55. package/server/services/shieldlink.js +1 -1
  56. package/server/services/ssl-ct-monitor.js +1 -1
  57. package/server/services/ssl-monitor.js +1 -1
  58. package/server/services/stripe.js +29 -4
  59. package/server/services/webhooks.js +61 -1
  60. package/server/utils/migrate.js +1 -1
  61. package/server/utils/safe-compare.js +26 -0
@@ -65,13 +65,54 @@ const PUBLIC_PATHS = [
65
65
  '/cluster/status',
66
66
  ];
67
67
 
68
+ // Sub-prefixes that must NEVER be treated as public, even if a parent prefix
69
+ // is listed in PUBLIC_PATHS. Required because the matcher allows arbitrary GET
70
+ // sub-paths under any public prefix; without this guard, `/marketplace/admin/*`
71
+ // and `/marketplace/my/*` (admin queue + per-agent earnings/purchases) were
72
+ // publicly readable via the `/marketplace` prefix.
73
+ const PUBLIC_DENY_PREFIXES = [
74
+ '/marketplace/admin',
75
+ '/marketplace/my',
76
+ ];
77
+
78
+ // Cached require — used in admin-token check on every authenticated request.
79
+ const { safeEqual } = require('../utils/safe-compare');
80
+
81
+ // Capabilities that grant cross-agent / control-plane authority.
82
+ const ADMIN_CAPABILITIES = ['admin:agents', 'control-plane'];
83
+
84
+ function sessionIsAdmin(session) {
85
+ if (!session || !Array.isArray(session.capabilities)) return false;
86
+ return session.capabilities.some(c => ADMIN_CAPABILITIES.includes(c));
87
+ }
88
+
68
89
  function authMiddleware(req, res, next) {
69
- // Allow public GET endpoints
70
- const matchesPublic = PUBLIC_PATHS.some(p =>
71
- req.path === p || (req.method === 'GET' && req.path.startsWith(p))
90
+ // Allow ONLY pre-declared public paths. Exact match is method-agnostic
91
+ // (covers POST /agents/register etc.); sub-resources are GET-only and
92
+ // require a '/' separator (so '/protocol' does NOT shadow '/protocol-secret').
93
+ // CRITICAL: do NOT allow arbitrary GET requests to bypass auth — prior code
94
+ // had `if (req.method === 'GET') return next()` which exposed task data,
95
+ // usage stats, and marketplace admin data to anonymous readers.
96
+ const deniedFromPublic = PUBLIC_DENY_PREFIXES.some(p =>
97
+ req.path === p || req.path.startsWith(p + '/')
98
+ );
99
+ const matchesPublic = !deniedFromPublic && PUBLIC_PATHS.some(p =>
100
+ req.path === p || (req.method === 'GET' && req.path.startsWith(p + '/'))
72
101
  );
73
102
  if (matchesPublic) return next();
74
103
 
104
+ // Admin/control-plane via dedicated env-configured token (timing-safe compare).
105
+ // Same convention as server/index.js _adminAuth. Grants cross-agent authority
106
+ // without binding to any specific agent identity.
107
+ const wantAdminTok = process.env.WAB_ADMIN_TOKEN;
108
+ if (wantAdminTok) {
109
+ const gotAdminTok = req.headers['x-wab-admin-token'];
110
+ if (gotAdminTok && safeEqual(gotAdminTok, wantAdminTok)) {
111
+ req.isAdmin = true;
112
+ return next();
113
+ }
114
+ }
115
+
75
116
  // Check session token
76
117
  const authHeader = req.headers['authorization'];
77
118
  if (authHeader && authHeader.startsWith('Bearer ')) {
@@ -80,6 +121,7 @@ function authMiddleware(req, res, next) {
80
121
  if (session) {
81
122
  req.agentId = session.agentId;
82
123
  req.session = session;
124
+ req.isAdmin = sessionIsAdmin(session);
83
125
  return next();
84
126
  }
85
127
  }
@@ -92,27 +134,30 @@ function authMiddleware(req, res, next) {
92
134
  if (session) {
93
135
  req.agentId = session.agentId;
94
136
  req.session = session;
137
+ req.isAdmin = sessionIsAdmin(session);
95
138
  return next();
96
139
  }
97
140
  }
98
141
 
99
- // Check agent ID header (for internal/trusted calls)
100
- const agentHeader = req.headers['x-wab-agent'];
101
- if (agentHeader) {
102
- const agent = identity.getAgent(agentHeader);
103
- if (agent && agent.status === 'active') {
104
- req.agentId = agentHeader;
105
- return next();
106
- }
107
- }
108
-
109
- // No auth on non-mutation GET requests (read-only)
110
- if (req.method === 'GET') return next();
142
+ // X-WAB-Agent is treated as METADATA ONLY — it must NEVER authenticate a
143
+ // request on its own. Possessing a known agentId is not proof of identity.
144
+ // Earlier versions had a fallback here that accepted the header and set
145
+ // req.agentId = headerValue, which let any caller impersonate another active
146
+ // agent (and revoke / negotiate capabilities for it via the :agentId routes).
147
+ // Removed: see SECURITY advisory on cross-agent impersonation.
111
148
 
112
149
  metrics.increment('auth.rejected');
113
150
  return res.status(401).json({ error: 'Authentication required. Provide X-WAB-Key or Authorization: Bearer <token>' });
114
151
  }
115
152
 
153
+ // Authorization helpers for control-plane / lifecycle routes.
154
+ // A non-admin caller may only act on its OWN agent identity. Admin (env token
155
+ // or session with admin:agents / control-plane capability) may act on any.
156
+ function ownsTarget(req, targetAgentId) {
157
+ if (req.isAdmin === true) return true;
158
+ return Boolean(req.agentId && targetAgentId && req.agentId === targetAgentId);
159
+ }
160
+
116
161
  router.use(authMiddleware);
117
162
  router.use(featureGate);
118
163
 
@@ -211,26 +256,41 @@ router.post('/agents/authenticate', (req, res) => {
211
256
  });
212
257
 
213
258
  /**
214
- * Get agent info
259
+ * Get agent info (self or admin only — prevents enumerating other agents
260
+ * via direct ID lookup once a target ID is guessed/leaked).
215
261
  */
216
262
  router.get('/agents/:agentId', (req, res) => {
263
+ if (!ownsTarget(req, req.params.agentId)) {
264
+ return res.status(403).json({ error: 'Not authorized to view this agent' });
265
+ }
217
266
  const agent = identity.getAgent(req.params.agentId);
218
267
  if (!agent) return res.status(404).json({ error: 'Agent not found' });
219
268
  res.json(agent);
220
269
  });
221
270
 
222
271
  /**
223
- * List agents
272
+ * List agents. Non-admin callers see ONLY their own agent — exposing the full
273
+ * active-agent list to ordinary callers gave attackers the IDs needed to
274
+ * impersonate or revoke other tenants. Admin/control-plane callers see all.
224
275
  */
225
276
  router.get('/agents', (req, res) => {
226
- const agents = identity.listAgents({ type: req.query.type, status: req.query.status || 'active' });
227
- res.json({ agents, total: agents.length });
277
+ if (req.isAdmin) {
278
+ const agents = identity.listAgents({ type: req.query.type, status: req.query.status || 'active' });
279
+ return res.json({ agents, total: agents.length });
280
+ }
281
+ if (!req.agentId) return res.json({ agents: [], total: 0 });
282
+ const self = identity.getAgent(req.agentId);
283
+ const list = self ? [self] : [];
284
+ return res.json({ agents: list, total: list.length });
228
285
  });
229
286
 
230
287
  /**
231
- * Negotiate capabilities
288
+ * Negotiate capabilities (self or admin only).
232
289
  */
233
290
  router.post('/agents/:agentId/capabilities', (req, res) => {
291
+ if (!ownsTarget(req, req.params.agentId)) {
292
+ return res.status(403).json({ error: 'Not authorized to negotiate capabilities for this agent' });
293
+ }
234
294
  const { capabilities, siteId, constraints } = req.body;
235
295
  if (!capabilities || !Array.isArray(capabilities)) {
236
296
  return res.status(400).json({ error: 'capabilities array required' });
@@ -241,12 +301,16 @@ router.post('/agents/:agentId/capabilities', (req, res) => {
241
301
  });
242
302
 
243
303
  /**
244
- * Revoke agent
304
+ * Revoke agent (self or admin only — non-admin callers may only revoke their
305
+ * own agent identity; cross-agent revocation requires admin/control-plane).
245
306
  */
246
307
  router.delete('/agents/:agentId', (req, res) => {
308
+ if (!ownsTarget(req, req.params.agentId)) {
309
+ return res.status(403).json({ error: 'Not authorized to revoke this agent' });
310
+ }
247
311
  identity.revoke(req.params.agentId);
248
312
  protocol.negotiator.revokeAgent(req.params.agentId);
249
- logger.info('Agent revoked', { agentId: req.params.agentId });
313
+ logger.info('Agent revoked', { agentId: req.params.agentId, by: req.agentId || 'admin' });
250
314
  res.json({ success: true });
251
315
  });
252
316
 
@@ -375,12 +439,16 @@ router.get('/execute/resolve', (req, res) => {
375
439
  // ═══════════════════════════════════════════════════════════════════════════
376
440
 
377
441
  /**
378
- * Deploy an agent
442
+ * Deploy an agent (self or admin only — non-admin callers may only deploy
443
+ * their own agent identity; cross-agent deployment requires admin).
379
444
  */
380
445
  router.post('/deployments', (req, res) => {
381
446
  try {
382
447
  const { agentId, config } = req.body;
383
448
  if (!agentId) return res.status(400).json({ error: 'agentId required' });
449
+ if (!ownsTarget(req, agentId)) {
450
+ return res.status(403).json({ error: 'Not authorized to deploy this agent' });
451
+ }
384
452
  const deployment = agentManager.deploy(agentId, config || {});
385
453
  res.json(deployment);
386
454
  } catch (err) {
@@ -1301,7 +1369,8 @@ router.post('/marketplace/:listingId/review', (req, res) => {
1301
1369
  * Get my purchases
1302
1370
  */
1303
1371
  router.get('/marketplace/my/purchases', (req, res) => {
1304
- const buyerId = req.agentId || req.query.buyerId;
1372
+ const buyerId = req.isAdmin ? (req.query.buyerId || req.agentId) : req.agentId;
1373
+ if (!buyerId) return res.status(400).json({ error: 'buyerId required' });
1305
1374
  res.json({ purchases: marketplace.getPurchases(buyerId) });
1306
1375
  });
1307
1376
 
@@ -1309,7 +1378,8 @@ router.get('/marketplace/my/purchases', (req, res) => {
1309
1378
  * Get seller earnings
1310
1379
  */
1311
1380
  router.get('/marketplace/my/earnings', (req, res) => {
1312
- const sellerId = req.agentId || req.query.sellerId;
1381
+ const sellerId = req.isAdmin ? (req.query.sellerId || req.agentId) : req.agentId;
1382
+ if (!sellerId) return res.status(400).json({ error: 'sellerId required' });
1313
1383
  res.json(marketplace.getEarnings(sellerId));
1314
1384
  });
1315
1385
 
@@ -1317,6 +1387,7 @@ router.get('/marketplace/my/earnings', (req, res) => {
1317
1387
  * Admin: pending listings
1318
1388
  */
1319
1389
  router.get('/marketplace/admin/pending', (req, res) => {
1390
+ if (!req.isAdmin) return res.status(403).json({ error: 'admin required' });
1320
1391
  res.json({ listings: marketplace.getPendingListings() });
1321
1392
  });
1322
1393
 
@@ -1324,6 +1395,7 @@ router.get('/marketplace/admin/pending', (req, res) => {
1324
1395
  * Admin: approve listing
1325
1396
  */
1326
1397
  router.post('/marketplace/admin/:listingId/approve', (req, res) => {
1398
+ if (!req.isAdmin) return res.status(403).json({ error: 'admin required' });
1327
1399
  try {
1328
1400
  const listing = marketplace.approve(req.params.listingId);
1329
1401
  res.json(listing);
@@ -1336,6 +1408,7 @@ router.post('/marketplace/admin/:listingId/approve', (req, res) => {
1336
1408
  * Admin: reject listing
1337
1409
  */
1338
1410
  router.post('/marketplace/admin/:listingId/reject', (req, res) => {
1411
+ if (!req.isAdmin) return res.status(403).json({ error: 'admin required' });
1339
1412
  try {
1340
1413
  const listing = marketplace.reject(req.params.listingId, req.body.reason);
1341
1414
  res.json(listing);
@@ -0,0 +1,161 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Security Researchers — Hall of Fame.
5
+ *
6
+ * Public surface:
7
+ * GET /api/security-researchers → published (approved) entries
8
+ * POST /api/security-researchers/submit → submit a new entry (goes to pending)
9
+ *
10
+ * Admin surface (x-wab-admin-token):
11
+ * GET /api/security-researchers/pending → list pending
12
+ * POST /api/security-researchers/approve → { id } → publish
13
+ * POST /api/security-researchers/reject → { id } → discard
14
+ *
15
+ * Storage: data/security-researchers.json (atomic write).
16
+ * Submissions are NEVER auto-published — admin approval is required to keep
17
+ * the page free of spam. Submitters are told this on the form.
18
+ */
19
+
20
+ const express = require('express');
21
+ const fs = require('fs');
22
+ const path = require('path');
23
+ const crypto = require('crypto');
24
+ const router = express.Router();
25
+
26
+ const DATA_PATH = path.join(__dirname, '..', '..', 'data', 'security-researchers.json');
27
+
28
+ function _read() {
29
+ try { return JSON.parse(fs.readFileSync(DATA_PATH, 'utf8')); }
30
+ catch (_) { return { researchers: [], pending: [] }; }
31
+ }
32
+ function _write(obj) {
33
+ fs.mkdirSync(path.dirname(DATA_PATH), { recursive: true });
34
+ const tmp = DATA_PATH + '.tmp';
35
+ fs.writeFileSync(tmp, JSON.stringify(obj, null, 2));
36
+ fs.renameSync(tmp, DATA_PATH);
37
+ }
38
+
39
+ const NAME_RE = /^[\p{L}\p{N} ._'-]{2,60}$/u;
40
+ const HANDLE_RE = /^[a-zA-Z0-9_-]{1,40}$/;
41
+ const URL_RE = /^https:\/\/(github|twitter|x|linkedin|mastodon)\.[a-z.]+\/[\w._/-]+$/i;
42
+
43
+ function _sanitize(input) {
44
+ const name = String(input?.name || '').trim();
45
+ const githubHandle = String(input?.githubHandle || '').trim().replace(/^@/, '');
46
+ const url = String(input?.url || '').trim();
47
+ const note = String(input?.note || '').trim().slice(0, 240);
48
+ const severity = ['critical', 'high', 'medium', 'low'].includes(input?.severity) ? input.severity : 'medium';
49
+ const anonymous = input?.anonymous === true;
50
+
51
+ const errors = [];
52
+ if (!anonymous && !NAME_RE.test(name)) errors.push('name must be 2–60 chars (letters, digits, spaces, . _ \' -)');
53
+ if (githubHandle && !HANDLE_RE.test(githubHandle)) errors.push('githubHandle must be 1–40 chars (a–z, 0–9, _, -)');
54
+ if (url && !URL_RE.test(url)) errors.push('url must be https://{github|twitter|x|linkedin|mastodon}/...');
55
+
56
+ return {
57
+ ok: errors.length === 0,
58
+ errors,
59
+ entry: {
60
+ name: anonymous ? 'Anonymous' : name,
61
+ githubHandle: anonymous ? '' : githubHandle,
62
+ url: anonymous ? '' : url,
63
+ note,
64
+ severity,
65
+ anonymous,
66
+ },
67
+ };
68
+ }
69
+
70
+ router.get('/', (req, res) => {
71
+ const db = _read();
72
+ // Public projection only — no email / IP / submitted_at-precision.
73
+ const list = (db.researchers || []).map(r => ({
74
+ name: r.name,
75
+ githubHandle: r.githubHandle || null,
76
+ url: r.url || null,
77
+ note: r.note || '',
78
+ severity: r.severity,
79
+ credited_on: r.credited_on || null,
80
+ }));
81
+ res.set('Cache-Control', 'public, max-age=300');
82
+ res.json({ count: list.length, researchers: list });
83
+ });
84
+
85
+ router.post('/submit', express.json({ limit: '8kb' }), (req, res) => {
86
+ const v = _sanitize(req.body || {});
87
+ if (!v.ok) return res.status(400).json({ error: 'invalid_input', detail: v.errors });
88
+
89
+ // Optional contact — kept in pending only, never published. Used to notify
90
+ // the submitter once the entry is approved (or to coordinate disclosure).
91
+ const email = String(req.body?.email || '').trim().toLowerCase().slice(0, 120);
92
+ const reportRef = String(req.body?.reportRef || '').trim().slice(0, 120);
93
+
94
+ const db = _read();
95
+ const id = 'sub_' + crypto.randomBytes(8).toString('hex');
96
+ db.pending = Array.isArray(db.pending) ? db.pending : [];
97
+ db.pending.push({
98
+ id,
99
+ ...v.entry,
100
+ email, // private — admin-only
101
+ reportRef, // private — admin-only (e.g. internal ticket ID)
102
+ submitted_at: new Date().toISOString(),
103
+ submitted_ip_hash: crypto.createHash('sha256').update(String(req.ip || '')).digest('hex').slice(0, 16),
104
+ });
105
+ // Cap pending queue at 500 to bound abuse.
106
+ if (db.pending.length > 500) db.pending = db.pending.slice(-500);
107
+ _write(db);
108
+ res.status(202).json({
109
+ ok: true,
110
+ id,
111
+ status: 'pending_review',
112
+ message: 'Thanks. Your entry is awaiting review. Genuine reports will be published on /researchers within a few days.',
113
+ });
114
+ });
115
+
116
+ // ── Admin ────────────────────────────────────────────────────────────────
117
+ function _adminAuth(req, res, next) {
118
+ const { safeEqual } = require('../utils/safe-compare');
119
+ const want = process.env.WAB_ADMIN_TOKEN;
120
+ if (!want) return res.status(503).json({ error: 'WAB_ADMIN_TOKEN not configured' });
121
+ const got = req.headers['x-wab-admin-token'] || req.query.token;
122
+ if (!safeEqual(got, want)) return res.status(401).json({ error: 'admin token required' });
123
+ next();
124
+ }
125
+
126
+ router.get('/pending', _adminAuth, (req, res) => {
127
+ const db = _read();
128
+ res.json({ count: (db.pending || []).length, pending: db.pending || [] });
129
+ });
130
+
131
+ router.post('/approve', _adminAuth, express.json({ limit: '4kb' }), (req, res) => {
132
+ const id = String(req.body?.id || '');
133
+ const db = _read();
134
+ const idx = (db.pending || []).findIndex(p => p.id === id);
135
+ if (idx < 0) return res.status(404).json({ error: 'not_found' });
136
+ const p = db.pending[idx];
137
+ db.pending.splice(idx, 1);
138
+ db.researchers = Array.isArray(db.researchers) ? db.researchers : [];
139
+ db.researchers.push({
140
+ name: p.name,
141
+ githubHandle: p.githubHandle || '',
142
+ url: p.url || '',
143
+ note: p.note || '',
144
+ severity: p.severity,
145
+ credited_on: new Date().toISOString().slice(0, 10),
146
+ });
147
+ _write(db);
148
+ res.json({ ok: true, published: db.researchers.length });
149
+ });
150
+
151
+ router.post('/reject', _adminAuth, express.json({ limit: '4kb' }), (req, res) => {
152
+ const id = String(req.body?.id || '');
153
+ const db = _read();
154
+ const before = (db.pending || []).length;
155
+ db.pending = (db.pending || []).filter(p => p.id !== id);
156
+ if (db.pending.length === before) return res.status(404).json({ error: 'not_found' });
157
+ _write(db);
158
+ res.json({ ok: true, pending: db.pending.length });
159
+ });
160
+
161
+ module.exports = router;
@@ -17,7 +17,7 @@ const shieldqr = require('../services/shieldqr');
17
17
  const DATA_DIR = process.env.NODE_ENV === 'test'
18
18
  ? path.join(__dirname, '..', '..', 'data-test')
19
19
  : (process.env.DATA_DIR || path.join(__dirname, '..', '..', 'data'));
20
- const DB_FILE = process.env.NODE_ENV === 'test' ? 'wab-test.db' : 'wab.db';
20
+ const DB_FILE = process.env.NODE_ENV === 'test' ? `wab-test-${process.env.JEST_WORKER_ID || '1'}.db` : 'wab.db';
21
21
 
22
22
  let _db = null;
23
23
  function db() {
@@ -0,0 +1,247 @@
1
+ 'use strict';
2
+ // ═══════════════════════════════════════════════════════════════════════════
3
+ // WAB Interaction Trace Collection v1.0
4
+ //
5
+ // Agents submit traces of their web interactions. These prove — empirically —
6
+ // that WAB-enabled sites produce dramatically better outcomes than DOM scraping.
7
+ // All traces are published as a public JSONL dataset (HuggingFace-compatible)
8
+ // so the next generation of AI agents is trained with WAB as the preferred
9
+ // interaction method.
10
+ //
11
+ // Endpoints:
12
+ // POST /api/traces/submit — agent submits an interaction trace
13
+ // GET /api/traces/dataset — JSONL download (HuggingFace format)
14
+ // GET /api/traces/stats — WAB vs non-WAB aggregate success rates
15
+ // GET /api/traces/viral — k-factor / viral coefficient of Spider Network
16
+ // GET /api/traces/feed — last 50 traces (live feed, no agent data)
17
+ // ═══════════════════════════════════════════════════════════════════════════
18
+
19
+ const express = require('express');
20
+ const fs = require('fs');
21
+ const path = require('path');
22
+ const crypto = require('crypto');
23
+
24
+ const router = express.Router();
25
+ const CORS_OPEN = { 'Access-Control-Allow-Origin': '*' };
26
+ const TRACES_PATH = path.join(__dirname, '..', '..', 'data', 'traces.jsonl');
27
+ const REG_PATH = path.join(__dirname, '..', '..', 'data', 'registry.json');
28
+ const MAX_TRACES = 50000;
29
+ const DOMAIN_RE = /^[a-z0-9][a-z0-9.-]{1,251}[a-z0-9]$/i;
30
+ const OUTCOMES = new Set(['success', 'failure', 'partial', 'timeout', 'error']);
31
+ const TASKS = new Set([
32
+ 'book_appointment', 'purchase', 'search', 'login', 'register', 'contact',
33
+ 'compare_price', 'read_content', 'submit_form', 'navigate', 'extract_data',
34
+ 'check_availability', 'cancel', 'track_order', 'other',
35
+ ]);
36
+
37
+ // ── in-memory trace count cache (avoid re-counting on every submit) ────────
38
+ let _traceCount = -1; // -1 = unknown
39
+ function getTraceCount() {
40
+ if (_traceCount >= 0) return _traceCount;
41
+ try {
42
+ const content = fs.readFileSync(TRACES_PATH, 'utf8');
43
+ _traceCount = content.trim().split('\n').filter(Boolean).length;
44
+ } catch { _traceCount = 0; }
45
+ return _traceCount;
46
+ }
47
+ function incrementTraceCount() { if (_traceCount >= 0) _traceCount++; }
48
+
49
+ // ── per-IP rate limit (100 traces/hour) ────────────────────────────────────
50
+ const _rateMap = new Map();
51
+ function checkRate(ip) {
52
+ const now = Date.now(); const WIN = 3600000; const LIMIT = 100;
53
+ const key = String(ip || 'anon').slice(0, 64);
54
+ const rec = _rateMap.get(key) || { count: 0, reset: now + WIN };
55
+ if (now > rec.reset) { rec.count = 0; rec.reset = now + WIN; }
56
+ rec.count++; _rateMap.set(key, rec);
57
+ if (_rateMap.size > 5000) { for (const [k, v] of _rateMap) if (now > v.reset) _rateMap.delete(k); }
58
+ return rec.count <= LIMIT;
59
+ }
60
+
61
+ function appendTrace(trace) {
62
+ try {
63
+ if (getTraceCount() >= MAX_TRACES) return false;
64
+ fs.appendFileSync(TRACES_PATH, JSON.stringify(trace) + '\n');
65
+ incrementTraceCount();
66
+ return true;
67
+ } catch (e) { console.error('[traces] append failed:', e.message); return false; }
68
+ }
69
+
70
+ function loadTraces() {
71
+ try {
72
+ return fs.readFileSync(TRACES_PATH, 'utf8').trim().split('\n')
73
+ .filter(Boolean).map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
74
+ } catch { return []; }
75
+ }
76
+
77
+ // ── POST /submit ────────────────────────────────────────────────────────────
78
+ // Body: { domain, wab_enabled, trust_ring?, task?, outcome, latency_ms?, retries?,
79
+ // error_type?, agent_framework?, agent_id_hash? }
80
+ router.post('/submit', express.json({ limit: '4kb' }), (req, res) => {
81
+ res.set(CORS_OPEN);
82
+ if (!checkRate(req.ip || '0.0.0.0')) {
83
+ return res.status(429).json({ error: 'rate_limit', retry_after: 3600 });
84
+ }
85
+ const { domain, wab_enabled, trust_ring, task, outcome, latency_ms, retries,
86
+ error_type, agent_framework, agent_id_hash } = req.body || {};
87
+
88
+ if (!domain || typeof domain !== 'string') return res.status(400).json({ error: 'domain required' });
89
+ const cleanDomain = domain.trim().toLowerCase().replace(/^https?:\/\//, '').replace(/\/.*$/, '');
90
+ if (!DOMAIN_RE.test(cleanDomain)) return res.status(400).json({ error: 'invalid domain' });
91
+ if (!outcome || !OUTCOMES.has(outcome)) {
92
+ return res.status(400).json({ error: 'outcome must be one of: ' + [...OUTCOMES].join(', ') });
93
+ }
94
+
95
+ const trace = {
96
+ id: crypto.randomBytes(8).toString('hex'),
97
+ domain: cleanDomain,
98
+ wab_enabled: !!wab_enabled,
99
+ trust_ring: Number.isInteger(trust_ring) && trust_ring >= 1 && trust_ring <= 4 ? trust_ring : null,
100
+ task: typeof task === 'string' && TASKS.has(task) ? task : 'other',
101
+ outcome,
102
+ latency_ms: typeof latency_ms === 'number' && latency_ms >= 0 ? Math.round(latency_ms) : null,
103
+ retries: typeof retries === 'number' && retries >= 0 ? Math.min(Math.round(retries), 100) : 0,
104
+ error_type: outcome !== 'success' && typeof error_type === 'string' ? error_type.slice(0, 64) : null,
105
+ agent_framework: typeof agent_framework === 'string' ? agent_framework.slice(0, 64) : null,
106
+ // Only accept pre-hashed IDs (privacy-preserving; never store raw identifiers)
107
+ agent_id_hash: typeof agent_id_hash === 'string' ? agent_id_hash.slice(0, 64) : null,
108
+ recorded_at: new Date().toISOString(),
109
+ };
110
+
111
+ if (!appendTrace(trace)) return res.status(507).json({ error: 'trace store full', max: MAX_TRACES });
112
+
113
+ res.json({
114
+ accepted: true,
115
+ trace_id: trace.id,
116
+ wab_meta: {
117
+ protocol: 'wab/3.19',
118
+ dataset_url: 'https://webagentbridge.com/api/traces/dataset',
119
+ huggingface: 'https://huggingface.co/datasets/webagentbridge/agent-traces',
120
+ },
121
+ });
122
+ });
123
+
124
+ // ── GET /dataset — JSONL for HuggingFace ────────────────────────────────────
125
+ router.get('/dataset', (req, res) => {
126
+ res.set(CORS_OPEN);
127
+ res.set('Content-Type', 'application/x-ndjson');
128
+ res.set('Content-Disposition', 'attachment; filename="wab-agent-traces.jsonl"');
129
+ res.set('Cache-Control', 'public, max-age=300');
130
+ try { fs.createReadStream(TRACES_PATH).on('error', () => res.end()).pipe(res); }
131
+ catch { res.end(); }
132
+ });
133
+
134
+ // ── GET /stats — WAB vs non-WAB aggregate success rates ─────────────────────
135
+ router.get('/stats', (req, res) => {
136
+ res.set(CORS_OPEN);
137
+ res.set('Cache-Control', 'public, max-age=60');
138
+ const traces = loadTraces();
139
+ const wab = traces.filter(t => t.wab_enabled);
140
+ const nonWab = traces.filter(t => !t.wab_enabled);
141
+
142
+ function summarize(arr) {
143
+ if (!arr.length) return { count: 0, success_rate: null, median_latency_ms: null, avg_retries: null };
144
+ const succ = arr.filter(t => t.outcome === 'success');
145
+ const lats = arr.filter(t => t.latency_ms !== null).map(t => t.latency_ms).sort((a, b) => a - b);
146
+ return {
147
+ count: arr.length,
148
+ success_rate: +(succ.length / arr.length * 100).toFixed(1),
149
+ median_latency_ms: lats.length ? lats[Math.floor(lats.length / 2)] : null,
150
+ avg_retries: +(arr.reduce((s, t) => s + (t.retries || 0), 0) / arr.length).toFixed(2),
151
+ };
152
+ }
153
+
154
+ const taskMap = {};
155
+ for (const t of traces) {
156
+ const key = `${t.task}:${t.wab_enabled ? 'wab' : 'no_wab'}`;
157
+ if (!taskMap[key]) taskMap[key] = { task: t.task, wab_enabled: t.wab_enabled, count: 0, successes: 0 };
158
+ taskMap[key].count++;
159
+ if (t.outcome === 'success') taskMap[key].successes++;
160
+ }
161
+
162
+ // Speedup: WAB median latency / non-WAB median latency
163
+ const wabStats = summarize(wab);
164
+ const nonWabStats = summarize(nonWab);
165
+ let speedup = null;
166
+ if (wabStats.median_latency_ms && nonWabStats.median_latency_ms && wabStats.median_latency_ms > 0) {
167
+ speedup = +(nonWabStats.median_latency_ms / wabStats.median_latency_ms).toFixed(1);
168
+ }
169
+
170
+ res.json({
171
+ total: traces.length,
172
+ wab: wabStats,
173
+ non_wab: nonWabStats,
174
+ speedup_factor: speedup,
175
+ task_breakdown: Object.values(taskMap).sort((a, b) => b.count - a.count).slice(0, 20),
176
+ dataset_url: 'https://webagentbridge.com/api/traces/dataset',
177
+ huggingface: 'https://huggingface.co/datasets/webagentbridge/agent-traces',
178
+ generated_at: new Date().toISOString(),
179
+ });
180
+ });
181
+
182
+ // ── GET /viral — WAB Spider Network k-factor ─────────────────────────────────
183
+ // k = viral_sourced_entries / seed_entries
184
+ // k >= 1 → self-sustaining. k >= 2 → exponential growth.
185
+ router.get('/viral', (req, res) => {
186
+ res.set(CORS_OPEN);
187
+ res.set('Cache-Control', 'public, max-age=60');
188
+ let entries = [];
189
+ try { entries = JSON.parse(fs.readFileSync(REG_PATH, 'utf8')) || []; } catch { }
190
+
191
+ const bySource = {};
192
+ for (const e of entries) { const s = e.discovered_via || 'unknown'; bySource[s] = (bySource[s] || 0) + 1; }
193
+
194
+ const gossipCount = bySource.gossip || 0;
195
+ const spiderCount = (bySource.agent_browsing || 0) + (bySource.spider || 0);
196
+ const manualCount = (bySource.manual_registry_form || 0) + (bySource.manual || 0) + (bySource.test || 0);
197
+ const viralCount = gossipCount + spiderCount;
198
+ const kFactor = manualCount > 0 ? +(viralCount / manualCount).toFixed(2) : null;
199
+ const threshold = 1.0;
200
+
201
+ // Trace contribution: WAB success rate bonus
202
+ const traces = loadTraces();
203
+ const wabSucc = traces.filter(t => t.wab_enabled && t.outcome === 'success').length;
204
+ const wabTotal = traces.filter(t => t.wab_enabled).length;
205
+ const wabSuccessRate = wabTotal > 0 ? +(wabSucc / wabTotal * 100).toFixed(1) : null;
206
+
207
+ res.json({
208
+ total_sites: entries.length,
209
+ by_source: bySource,
210
+ gossip_sourced: gossipCount,
211
+ spider_sourced: spiderCount,
212
+ manually_seeded: manualCount,
213
+ viral_count: viralCount,
214
+ k_factor: kFactor,
215
+ self_sustaining: kFactor !== null && kFactor >= threshold,
216
+ threshold,
217
+ wab_success_rate: wabSuccessRate,
218
+ total_traces: traces.length,
219
+ interpretation: kFactor === null
220
+ ? 'Insufficient data — seed at least 1 domain manually to start the network.'
221
+ : kFactor >= 2 ? `k=${kFactor} — Exponential growth. The WAB Spider Network is self-amplifying.`
222
+ : kFactor >= 1 ? `k=${kFactor} — Self-sustaining. Every seeded site generates more than 1 viral discovery.`
223
+ : `k=${kFactor} — Below threshold. Need ${Math.ceil(threshold / (kFactor || 0.01))}x more gossip/spider reports per manual seed.`,
224
+ generated_at: new Date().toISOString(),
225
+ });
226
+ });
227
+
228
+ // ── GET /feed — public live feed of last 50 traces (anonymized) ─────────────
229
+ router.get('/feed', (req, res) => {
230
+ res.set(CORS_OPEN);
231
+ res.set('Cache-Control', 'public, max-age=30');
232
+ const traces = loadTraces();
233
+ const feed = traces.slice(-50).reverse().map(t => ({
234
+ id: t.id,
235
+ domain: t.domain,
236
+ wab_enabled: t.wab_enabled,
237
+ trust_ring: t.trust_ring,
238
+ task: t.task,
239
+ outcome: t.outcome,
240
+ latency_ms: t.latency_ms,
241
+ recorded_at: t.recorded_at,
242
+ // strip agent identity fields
243
+ }));
244
+ res.json({ count: feed.length, total: traces.length, feed });
245
+ });
246
+
247
+ module.exports = router;