web-agent-bridge 3.4.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (310) hide show
  1. package/LICENSE +84 -84
  2. package/README.ar.md +1563 -1304
  3. package/README.md +137 -298
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -237
  6. package/bin/wab-init.js +244 -223
  7. package/bin/wab.js +80 -80
  8. package/examples/azure-dns-wab.js +83 -83
  9. package/examples/bidi-agent.js +119 -119
  10. package/examples/cloudflare-wab-dns.js +121 -121
  11. package/examples/cpanel-wab-dns.js +114 -114
  12. package/examples/cross-site-agent.js +91 -91
  13. package/examples/dns-discovery-agent.js +166 -166
  14. package/examples/gcp-dns-wab.js +76 -76
  15. package/examples/governance-agent.js +169 -169
  16. package/examples/mcp-agent.js +94 -94
  17. package/examples/next-app-router/README.md +44 -44
  18. package/examples/plesk-wab-dns.js +103 -103
  19. package/examples/puppeteer-agent.js +108 -108
  20. package/examples/route53-wab-dns.js +144 -144
  21. package/examples/saas-dashboard/README.md +55 -55
  22. package/examples/safe-mode-agent.js +96 -96
  23. package/examples/self-discovery.js +106 -0
  24. package/examples/shopify-hydrogen/README.md +74 -74
  25. package/examples/vision-agent.js +171 -171
  26. package/examples/wab-sign.js +74 -74
  27. package/examples/wab-verify.js +60 -60
  28. package/examples/wordpress-elementor/README.md +77 -77
  29. package/package.json +93 -93
  30. package/public/.well-known/agent-tools.json +180 -180
  31. package/public/.well-known/ai-assets.json +59 -59
  32. package/public/.well-known/security.txt +8 -8
  33. package/public/.well-known/wab.json +28 -28
  34. package/public/activate.html +448 -368
  35. package/public/adopt.html +236 -0
  36. package/public/adoption-metrics.html +188 -188
  37. package/public/agent-workspace.html +359 -349
  38. package/public/ai.html +198 -198
  39. package/public/api.html +397 -413
  40. package/public/azure-dns-integration.html +289 -289
  41. package/public/browser.html +486 -486
  42. package/public/cloudflare-integration.html +380 -380
  43. package/public/commander-dashboard.html +243 -243
  44. package/public/cookies.html +210 -210
  45. package/public/cpanel-integration.html +398 -398
  46. package/public/css/agent-workspace.css +1713 -1713
  47. package/public/css/premium.css +317 -317
  48. package/public/css/styles.css +1401 -1263
  49. package/public/dashboard-shieldlink.html +295 -0
  50. package/public/dashboard.html +711 -707
  51. package/public/dns.html +436 -436
  52. package/public/docs.html +588 -588
  53. package/public/enterprise-mesh.ar.html +80 -0
  54. package/public/enterprise-mesh.html +81 -0
  55. package/public/feed.xml +89 -89
  56. package/public/gcp-dns-integration.html +318 -318
  57. package/public/governance.ar.html +70 -0
  58. package/public/governance.html +69 -0
  59. package/public/growth.html +465 -465
  60. package/public/index.html +1372 -1266
  61. package/public/integrations.html +556 -556
  62. package/public/js/activate.js +449 -145
  63. package/public/js/agent-workspace.js +1740 -1740
  64. package/public/js/auth-nav.js +117 -65
  65. package/public/js/auth-redirect.js +12 -12
  66. package/public/js/cookie-consent.js +56 -56
  67. package/public/js/dns.js +438 -438
  68. package/public/js/wab-demo-page.js +721 -721
  69. package/public/js/ws-client.js +74 -74
  70. package/public/l-preview.html +242 -0
  71. package/public/llms-full.txt +360 -360
  72. package/public/llms.txt +125 -125
  73. package/public/login.html +85 -85
  74. package/public/mesh-dashboard.html +328 -328
  75. package/public/milestones.html +346 -0
  76. package/public/one-click.html +779 -0
  77. package/public/openapi.json +669 -669
  78. package/public/partners.ar.html +145 -0
  79. package/public/partners.html +143 -0
  80. package/public/phone-shield.html +281 -281
  81. package/public/plesk-integration.html +375 -375
  82. package/public/premium-dashboard.html +2489 -2489
  83. package/public/premium.html +793 -793
  84. package/public/privacy.html +297 -297
  85. package/public/provider-onboarding.html +172 -172
  86. package/public/provider-sandbox.html +134 -134
  87. package/public/providers.html +359 -359
  88. package/public/refusals.html +172 -0
  89. package/public/register.html +105 -105
  90. package/public/registrar-integrations.html +141 -141
  91. package/public/ring4.html +292 -0
  92. package/public/robots.txt +99 -99
  93. package/public/route53-integration.html +531 -531
  94. package/public/score.html +263 -0
  95. package/public/script/wab-consent.d.ts +36 -36
  96. package/public/script/wab-consent.js +104 -104
  97. package/public/script/wab-schema.js +131 -131
  98. package/public/script/wab.d.ts +108 -108
  99. package/public/script/wab.min.js +580 -580
  100. package/public/security.txt +8 -8
  101. package/public/shieldlink.html +244 -0
  102. package/public/shieldqr.html +231 -231
  103. package/public/sitemap.xml +13 -1
  104. package/public/terms.html +256 -256
  105. package/public/trust-graph-api.ar.html +92 -0
  106. package/public/trust-graph-api.html +91 -0
  107. package/public/wab-features.html +560 -0
  108. package/public/wab-trust.html +200 -200
  109. package/public/wab-truth.html +375 -0
  110. package/public/wab-vs-protocols.html +210 -210
  111. package/public/whitepaper.html +449 -449
  112. package/script/ai-agent-bridge.js +1754 -1754
  113. package/sdk/README.md +99 -99
  114. package/sdk/agent-mesh.js +449 -449
  115. package/sdk/auto-discovery.js +301 -288
  116. package/sdk/commander.js +262 -262
  117. package/sdk/governance.js +262 -262
  118. package/sdk/index.d.ts +464 -464
  119. package/sdk/index.js +649 -649
  120. package/sdk/multi-agent.js +318 -318
  121. package/sdk/safe-mode.js +221 -221
  122. package/sdk/safety-shield.js +219 -219
  123. package/sdk/schema-discovery.js +83 -83
  124. package/server/adapters/index.js +520 -520
  125. package/server/config/plans.js +412 -367
  126. package/server/config/secrets.js +102 -102
  127. package/server/control-plane/index.js +301 -301
  128. package/server/data-plane/index.js +354 -354
  129. package/server/index.js +790 -670
  130. package/server/llm/index.js +404 -404
  131. package/server/middleware/adminAuth.js +35 -35
  132. package/server/middleware/api-tier.js +170 -0
  133. package/server/middleware/auth.js +50 -50
  134. package/server/middleware/featureGate.js +88 -88
  135. package/server/middleware/rateLimits.js +100 -100
  136. package/server/middleware/sensitiveAction.js +157 -157
  137. package/server/middleware/wab-trust.js +141 -0
  138. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  139. package/server/migrations/002_premium_features.sql +418 -418
  140. package/server/migrations/003_ads_integer_cents.sql +33 -33
  141. package/server/migrations/004_agent_os.sql +158 -158
  142. package/server/migrations/005_marketplace_metering.sql +126 -126
  143. package/server/migrations/006_growth_suite.sql +138 -0
  144. package/server/migrations/007_governance.sql +106 -106
  145. package/server/migrations/008_plans.sql +144 -144
  146. package/server/migrations/009_shieldqr.sql +30 -30
  147. package/server/migrations/010_extended_trust.sql +33 -33
  148. package/server/migrations/011_outreach.sql +47 -0
  149. package/server/migrations/012_shieldlink.sql +116 -0
  150. package/server/migrations/013_ct_monitor.sql +13 -0
  151. package/server/migrations/014_wab_advanced_features.sql +128 -0
  152. package/server/migrations/015_wab_truth_layer.sql +101 -0
  153. package/server/migrations/016_ring4_external_trust.sql +84 -0
  154. package/server/migrations/017_ring4_extensions.sql +69 -0
  155. package/server/migrations/018_commercial_foundations.sql +167 -0
  156. package/server/migrations/019_unify_tier_constraints.sql +133 -0
  157. package/server/models/adapters/index.js +33 -33
  158. package/server/models/adapters/mysql.js +183 -183
  159. package/server/models/adapters/postgresql.js +172 -172
  160. package/server/models/adapters/sqlite.js +7 -7
  161. package/server/models/db.js +740 -740
  162. package/server/observability/failure-analysis.js +337 -337
  163. package/server/observability/index.js +394 -394
  164. package/server/protocol/capabilities.js +223 -223
  165. package/server/protocol/index.js +243 -243
  166. package/server/protocol/schema.js +584 -584
  167. package/server/registry/certification.js +271 -271
  168. package/server/registry/index.js +326 -326
  169. package/server/routes/activate.js +478 -0
  170. package/server/routes/admin-outreach.js +239 -0
  171. package/server/routes/admin-plans.js +76 -76
  172. package/server/routes/admin-premium.js +674 -673
  173. package/server/routes/admin-shieldlink.js +137 -0
  174. package/server/routes/admin-shieldqr.js +90 -90
  175. package/server/routes/admin-trust-monitor.js +139 -83
  176. package/server/routes/admin.js +550 -549
  177. package/server/routes/adopt.js +61 -0
  178. package/server/routes/ads.js +130 -130
  179. package/server/routes/agent-workspace.js +540 -540
  180. package/server/routes/api-keys.js +127 -0
  181. package/server/routes/api.js +150 -150
  182. package/server/routes/auth.js +71 -71
  183. package/server/routes/billing.js +57 -57
  184. package/server/routes/commander.js +316 -316
  185. package/server/routes/customer-shieldlink.js +133 -0
  186. package/server/routes/demo-showcase.js +332 -332
  187. package/server/routes/demo-store.js +154 -154
  188. package/server/routes/diagnose.js +373 -0
  189. package/server/routes/discovery.js +2348 -2348
  190. package/server/routes/enterprise-mesh.js +170 -0
  191. package/server/routes/gateway.js +173 -173
  192. package/server/routes/governance-saas.js +203 -0
  193. package/server/routes/governance.js +208 -208
  194. package/server/routes/growth.js +1048 -0
  195. package/server/routes/intent.js +328 -0
  196. package/server/routes/license.js +251 -251
  197. package/server/routes/mesh.js +469 -469
  198. package/server/routes/noscript.js +543 -543
  199. package/server/routes/partners.js +201 -0
  200. package/server/routes/plans.js +33 -33
  201. package/server/routes/premium-v2.js +686 -686
  202. package/server/routes/premium.js +724 -724
  203. package/server/routes/providers.js +650 -650
  204. package/server/routes/reputation.js +411 -0
  205. package/server/routes/ring4.js +885 -0
  206. package/server/routes/runtime.js +2148 -2148
  207. package/server/routes/shieldlink.js +70 -0
  208. package/server/routes/shieldqr.js +88 -88
  209. package/server/routes/sovereign.js +465 -465
  210. package/server/routes/truth-layer.js +670 -0
  211. package/server/routes/universal.js +200 -200
  212. package/server/routes/unsubscribe.js +51 -0
  213. package/server/routes/wab-api.js +850 -850
  214. package/server/routes/wab-cache.js +282 -0
  215. package/server/runtime/container-worker.js +111 -111
  216. package/server/runtime/container.js +448 -448
  217. package/server/runtime/distributed-worker.js +362 -362
  218. package/server/runtime/event-bus.js +210 -210
  219. package/server/runtime/index.js +253 -253
  220. package/server/runtime/queue.js +599 -599
  221. package/server/runtime/replay.js +666 -666
  222. package/server/runtime/sandbox.js +266 -266
  223. package/server/runtime/scheduler.js +534 -534
  224. package/server/runtime/session-engine.js +293 -293
  225. package/server/runtime/state-manager.js +188 -188
  226. package/server/secrets/wab-signing-key.pem +3 -0
  227. package/server/secrets/wab-signing-pub.pem +3 -0
  228. package/server/security/cross-site-redactor.js +196 -196
  229. package/server/security/dry-run.js +180 -180
  230. package/server/security/human-gate-rate-limit.js +147 -147
  231. package/server/security/human-gate-transports.js +178 -178
  232. package/server/security/human-gate.js +281 -281
  233. package/server/security/index.js +368 -368
  234. package/server/security/intent-engine.js +245 -245
  235. package/server/security/reward-guard.js +171 -171
  236. package/server/security/rollback-store.js +239 -239
  237. package/server/security/token-scope.js +404 -404
  238. package/server/security/url-policy.js +139 -139
  239. package/server/services/adoption-agent.js +182 -0
  240. package/server/services/agent-chat.js +506 -506
  241. package/server/services/agent-learning.js +601 -601
  242. package/server/services/agent-memory.js +625 -625
  243. package/server/services/agent-mesh.js +555 -555
  244. package/server/services/agent-symphony.js +717 -717
  245. package/server/services/agent-tasks.js +1807 -1807
  246. package/server/services/api-key-engine.js +292 -292
  247. package/server/services/cluster.js +894 -894
  248. package/server/services/commander.js +738 -738
  249. package/server/services/edge-compute.js +440 -440
  250. package/server/services/email.js +233 -233
  251. package/server/services/fairness-engine.js +409 -0
  252. package/server/services/fairness.js +420 -0
  253. package/server/services/governance.js +466 -466
  254. package/server/services/hosted-runtime.js +205 -205
  255. package/server/services/lfd.js +635 -635
  256. package/server/services/local-ai.js +389 -389
  257. package/server/services/marketplace.js +270 -270
  258. package/server/services/metering.js +182 -182
  259. package/server/services/modules/affiliate-intelligence.js +93 -93
  260. package/server/services/modules/agent-firewall.js +90 -90
  261. package/server/services/modules/bounty.js +89 -89
  262. package/server/services/modules/collective-bargaining.js +92 -92
  263. package/server/services/modules/dark-pattern.js +66 -66
  264. package/server/services/modules/gov-intelligence.js +45 -45
  265. package/server/services/modules/neural.js +55 -55
  266. package/server/services/modules/notary.js +49 -49
  267. package/server/services/modules/price-time-machine.js +86 -86
  268. package/server/services/modules/protocol.js +104 -104
  269. package/server/services/negotiation.js +439 -439
  270. package/server/services/outreach-agent.js +312 -0
  271. package/server/services/plans.js +214 -214
  272. package/server/services/plugins.js +771 -771
  273. package/server/services/price-intelligence.js +566 -566
  274. package/server/services/price-shield.js +1137 -1137
  275. package/server/services/provider-clients.js +740 -740
  276. package/server/services/reputation.js +465 -465
  277. package/server/services/search-engine.js +357 -357
  278. package/server/services/security.js +513 -513
  279. package/server/services/self-healing.js +843 -843
  280. package/server/services/shieldlink.js +492 -0
  281. package/server/services/shieldqr.js +322 -322
  282. package/server/services/sovereign-shield.js +542 -542
  283. package/server/services/ssl-ct-monitor.js +224 -0
  284. package/server/services/ssl-inspector.js +42 -42
  285. package/server/services/ssl-monitor.js +167 -167
  286. package/server/services/stripe.js +206 -205
  287. package/server/services/swarm.js +788 -788
  288. package/server/services/universal-scraper.js +662 -662
  289. package/server/services/verification.js +481 -481
  290. package/server/services/vision.js +1163 -1163
  291. package/server/services/wab-crypto.js +178 -178
  292. package/server/utils/cache.js +125 -125
  293. package/server/utils/migrate.js +81 -81
  294. package/server/utils/safe-fetch.js +228 -228
  295. package/server/utils/secureFields.js +50 -50
  296. package/server/ws.js +161 -161
  297. package/templates/artisan-marketplace.yaml +104 -104
  298. package/templates/book-price-scout.yaml +98 -98
  299. package/templates/electronics-price-tracker.yaml +108 -108
  300. package/templates/flight-deal-hunter.yaml +113 -113
  301. package/templates/freelancer-direct.yaml +116 -116
  302. package/templates/grocery-price-compare.yaml +93 -93
  303. package/templates/hotel-direct-booking.yaml +113 -113
  304. package/templates/local-services.yaml +98 -98
  305. package/templates/olive-oil-tunisia.yaml +88 -88
  306. package/templates/organic-farm-fresh.yaml +101 -101
  307. package/templates/restaurant-direct.yaml +97 -97
  308. package/templates/ring4/banking-sovereign.yaml +55 -0
  309. package/templates/ring4/ecommerce-sovereign.yaml +58 -0
  310. package/templates/ring4/healthcare-sovereign.yaml +60 -0
@@ -1,288 +1,301 @@
1
- /**
2
- * WAB SDK — Auto-Discovery Fallback
3
- *
4
- * For sites that haven't installed WAB yet (no /.well-known/wab.json,
5
- * no _wab DNS TXT), this module produces a normalized capabilities envelope
6
- * by parsing publicly available metadata:
7
- * 1. /.well-known/wab.json (canonical)
8
- * 2. <script type="application/ld+json"> (JSON-LD / Schema.org)
9
- * 3. <meta property="og:*"> (OpenGraph)
10
- * 4. <meta name="description"> / <title>
11
- * 5. /sitemap.xml (URL inventory)
12
- * 6. /robots.txt (allow/disallow + Sitemap directives)
13
- *
14
- * The resulting envelope shape mirrors a minimal wab.json so downstream
15
- * code can treat unsigned sites uniformly:
16
- *
17
- * {
18
- * ok: boolean,
19
- * source: 'wab.json' | 'auto-discovery',
20
- * site: { name, description, url },
21
- * trust: { signed: false, ssl: { ... } },
22
- * actions: [ { name, description, source } ],
23
- * products: [ { name, sku, offers } ],
24
- * sitemap: [ url, ... ],
25
- * robots: { allow: [], disallow: [], sitemaps: [] }
26
- * }
27
- *
28
- * Pure JS, no external deps. Works in Node (with global fetch).
29
- */
30
-
31
- const { extractJsonLdBlocks, extractProductsFromHtml, suggestWabActionsFromProducts } =
32
- require('./schema-discovery');
33
-
34
- /* ------------------------------------------------------------------ */
35
- /* Helpers */
36
- /* ------------------------------------------------------------------ */
37
-
38
- function _abs(base, path) {
39
- try {
40
- return new URL(path, base).toString();
41
- } catch {
42
- return null;
43
- }
44
- }
45
-
46
- async function _fetchText(url, { timeoutMs = 8000 } = {}) {
47
- if (typeof fetch !== 'function') {
48
- throw new Error('global fetch() is required (Node 18+) for auto-discovery');
49
- }
50
- const ctrl = new AbortController();
51
- const t = setTimeout(() => ctrl.abort(), timeoutMs);
52
- try {
53
- const r = await fetch(url, {
54
- redirect: 'follow',
55
- signal: ctrl.signal,
56
- headers: { 'user-agent': 'wab-auto-discovery/1.0 (+https://webagentbridge.com)' }
57
- });
58
- if (!r.ok) return { ok: false, status: r.status, text: '' };
59
- return { ok: true, status: r.status, text: await r.text() };
60
- } catch (e) {
61
- return { ok: false, status: 0, text: '', error: e.message };
62
- } finally {
63
- clearTimeout(t);
64
- }
65
- }
66
-
67
- /* ------------------------------------------------------------------ */
68
- /* HTML metadata extractors */
69
- /* ------------------------------------------------------------------ */
70
-
71
- function extractMetaTags(html) {
72
- const out = { og: {}, twitter: {}, description: null, title: null };
73
- if (!html) return out;
74
-
75
- const titleM = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
76
- if (titleM) out.title = titleM[1].trim();
77
-
78
- const metaRe = /<meta\b[^>]*>/gi;
79
- let m;
80
- while ((m = metaRe.exec(html)) !== null) {
81
- const tag = m[0];
82
- const nameM = tag.match(/\bname\s*=\s*["']([^"']+)["']/i);
83
- const propM = tag.match(/\bproperty\s*=\s*["']([^"']+)["']/i);
84
- const contentM = tag.match(/\bcontent\s*=\s*["']([^"']*)["']/i);
85
- if (!contentM) continue;
86
- const content = contentM[1];
87
- const key = (propM && propM[1]) || (nameM && nameM[1]) || '';
88
- if (!key) continue;
89
- const lk = key.toLowerCase();
90
- if (lk === 'description') out.description = content;
91
- else if (lk.startsWith('og:')) out.og[lk.slice(3)] = content;
92
- else if (lk.startsWith('twitter:')) out.twitter[lk.slice(8)] = content;
93
- }
94
- return out;
95
- }
96
-
97
- /* ------------------------------------------------------------------ */
98
- /* Sitemap / robots */
99
- /* ------------------------------------------------------------------ */
100
-
101
- function parseSitemap(xml, { limit = 200 } = {}) {
102
- if (!xml) return [];
103
- const urls = [];
104
- const re = /<loc>\s*([^<\s]+)\s*<\/loc>/gi;
105
- let m;
106
- while ((m = re.exec(xml)) !== null && urls.length < limit) {
107
- urls.push(m[1]);
108
- }
109
- return urls;
110
- }
111
-
112
- function parseRobots(text) {
113
- const out = { allow: [], disallow: [], sitemaps: [], userAgents: [] };
114
- if (!text) return out;
115
- let currentUA = '*';
116
- for (const raw of text.split(/\r?\n/)) {
117
- const line = raw.replace(/#.*$/, '').trim();
118
- if (!line) continue;
119
- const idx = line.indexOf(':');
120
- if (idx < 0) continue;
121
- const key = line.slice(0, idx).trim().toLowerCase();
122
- const val = line.slice(idx + 1).trim();
123
- if (key === 'user-agent') {
124
- currentUA = val;
125
- if (!out.userAgents.includes(val)) out.userAgents.push(val);
126
- } else if (key === 'allow') out.allow.push({ ua: currentUA, path: val });
127
- else if (key === 'disallow') out.disallow.push({ ua: currentUA, path: val });
128
- else if (key === 'sitemap') out.sitemaps.push(val);
129
- }
130
- return out;
131
- }
132
-
133
- /* ------------------------------------------------------------------ */
134
- /* JSON-LD extras: WebSite + Organization + SearchAction */
135
- /* ------------------------------------------------------------------ */
136
-
137
- function extractSiteIdentity(html) {
138
- const blocks = extractJsonLdBlocks(html);
139
- const out = { name: null, description: null, url: null, search: null, organization: null };
140
- for (const text of blocks) {
141
- let data;
142
- try { data = JSON.parse(text); } catch { continue; }
143
- const items = Array.isArray(data) ? data : Array.isArray(data['@graph']) ? data['@graph'] : [data];
144
- for (const node of items) {
145
- if (!node || typeof node !== 'object') continue;
146
- let types = node['@type'];
147
- if (typeof types === 'string') types = [types];
148
- if (!Array.isArray(types)) types = [];
149
- if (types.includes('WebSite')) {
150
- out.name = out.name || node.name;
151
- out.url = out.url || node.url;
152
- out.description = out.description || node.description;
153
- const action = node.potentialAction;
154
- if (action && (Array.isArray(action) ? action[0] : action)) {
155
- const a = Array.isArray(action) ? action[0] : action;
156
- if (a && (a['@type'] === 'SearchAction' || /SearchAction/.test(String(a['@type'])))) {
157
- out.search = {
158
- target: typeof a.target === 'string' ? a.target : (a.target && a.target.urlTemplate),
159
- queryParam: a['query-input'] || a.queryInput || null
160
- };
161
- }
162
- }
163
- } else if (types.includes('Organization')) {
164
- out.organization = out.organization || { name: node.name, url: node.url, logo: node.logo };
165
- }
166
- }
167
- }
168
- return out;
169
- }
170
-
171
- /* ------------------------------------------------------------------ */
172
- /* Main entry */
173
- /* ------------------------------------------------------------------ */
174
-
175
- /**
176
- * Discover capabilities for a site. Tries /.well-known/wab.json first;
177
- * falls back to HTML/sitemap/robots scraping.
178
- *
179
- * @param {string} siteUrl e.g. "https://example.com"
180
- * @param {object} [opts]
181
- * @param {number} [opts.timeoutMs=8000]
182
- * @param {number} [opts.sitemapLimit=200]
183
- * @param {boolean} [opts.skipWabJson=false]
184
- * @returns {Promise<object>} normalized envelope
185
- */
186
- async function discover(siteUrl, opts = {}) {
187
- const timeoutMs = opts.timeoutMs || 8000;
188
- const sitemapLimit = opts.sitemapLimit || 200;
189
-
190
- let baseUrl;
191
- try { baseUrl = new URL(siteUrl).origin; }
192
- catch { return { ok: false, error: 'invalid_url', source: 'auto-discovery' }; }
193
-
194
- // 1) Canonical wab.json — if present, use as authoritative source.
195
- if (!opts.skipWabJson) {
196
- const wabUrl = _abs(baseUrl, '/.well-known/wab.json');
197
- const wabRes = await _fetchText(wabUrl, { timeoutMs });
198
- if (wabRes.ok && wabRes.text) {
199
- try {
200
- const parsed = JSON.parse(wabRes.text);
201
- return {
202
- ok: true,
203
- source: 'wab.json',
204
- site: { name: parsed.site || parsed.name, description: parsed.description, url: baseUrl },
205
- trust: { signed: !!parsed.sig, ...(parsed.trust || {}) },
206
- actions: Array.isArray(parsed.actions) ? parsed.actions : [],
207
- products: [],
208
- sitemap: [],
209
- robots: null,
210
- raw: parsed
211
- };
212
- } catch { /* fall through to auto-discovery */ }
213
- }
214
- }
215
-
216
- // 2) Fetch homepage HTML in parallel with sitemap + robots.
217
- const [homeRes, sitemapRes, robotsRes] = await Promise.all([
218
- _fetchText(baseUrl, { timeoutMs }),
219
- _fetchText(_abs(baseUrl, '/sitemap.xml'), { timeoutMs }),
220
- _fetchText(_abs(baseUrl, '/robots.txt'), { timeoutMs })
221
- ]);
222
-
223
- const html = homeRes.text || '';
224
- const meta = extractMetaTags(html);
225
- const ident = extractSiteIdentity(html);
226
- const products = extractProductsFromHtml(html);
227
- const robots = parseRobots(robotsRes.text || '');
228
- let sitemap = parseSitemap(sitemapRes.text || '', { limit: sitemapLimit });
229
-
230
- // Discover additional sitemaps from robots.
231
- if (sitemap.length === 0 && robots.sitemaps.length) {
232
- for (const sm of robots.sitemaps.slice(0, 3)) {
233
- const r = await _fetchText(sm, { timeoutMs });
234
- if (r.ok) sitemap = sitemap.concat(parseSitemap(r.text, { limit: sitemapLimit }));
235
- if (sitemap.length >= sitemapLimit) break;
236
- }
237
- }
238
-
239
- // Build action hints.
240
- const actions = suggestWabActionsFromProducts(products);
241
- if (ident.search && ident.search.target) {
242
- actions.push({
243
- name: 'searchSite',
244
- description: 'Schema.org SearchAction: ' + ident.search.target,
245
- source: 'schema.org/SearchAction',
246
- template: ident.search.target
247
- });
248
- }
249
- if (sitemap.length) {
250
- actions.push({
251
- name: 'browseSitemap',
252
- description: `${sitemap.length} URLs discovered from sitemap.xml`,
253
- source: 'sitemap.xml'
254
- });
255
- }
256
- if (meta.og && meta.og.url) {
257
- actions.push({
258
- name: 'getOpenGraph',
259
- description: 'OpenGraph metadata available',
260
- source: 'opengraph'
261
- });
262
- }
263
-
264
- return {
265
- ok: true,
266
- source: 'auto-discovery',
267
- site: {
268
- name: ident.name || meta.og.site_name || meta.title || baseUrl,
269
- description: ident.description || meta.description || meta.og.description || null,
270
- url: ident.url || meta.og.url || baseUrl
271
- },
272
- trust: { signed: false, auto: true },
273
- actions,
274
- products,
275
- sitemap,
276
- robots,
277
- meta: { og: meta.og, twitter: meta.twitter, title: meta.title, description: meta.description },
278
- organization: ident.organization || null
279
- };
280
- }
281
-
282
- module.exports = {
283
- discover,
284
- extractMetaTags,
285
- parseSitemap,
286
- parseRobots,
287
- extractSiteIdentity
288
- };
1
+ /**
2
+ * WAB SDK — Auto-Discovery Fallback
3
+ *
4
+ * For sites that haven't installed WAB yet (no /.well-known/wab.json,
5
+ * no _wab DNS TXT), this module produces a normalized capabilities envelope
6
+ * by parsing publicly available metadata:
7
+ * 1. /.well-known/wab.json (canonical)
8
+ * 2. <script type="application/ld+json"> (JSON-LD / Schema.org)
9
+ * 3. <meta property="og:*"> (OpenGraph)
10
+ * 4. <meta name="description"> / <title>
11
+ * 5. /sitemap.xml (URL inventory)
12
+ * 6. /robots.txt (allow/disallow + Sitemap directives)
13
+ *
14
+ * The resulting envelope shape mirrors a minimal wab.json so downstream
15
+ * code can treat unsigned sites uniformly:
16
+ *
17
+ * {
18
+ * ok: boolean,
19
+ * source: 'wab.json' | 'auto-discovery',
20
+ * site: { name, description, url },
21
+ * trust: { signed: false, ssl: { ... } },
22
+ * actions: [ { name, description, source } ],
23
+ * products: [ { name, sku, offers } ],
24
+ * sitemap: [ url, ... ],
25
+ * robots: { allow: [], disallow: [], sitemaps: [] }
26
+ * }
27
+ *
28
+ * Pure JS, no external deps. Works in Node (with global fetch).
29
+ */
30
+
31
+ const { extractJsonLdBlocks, extractProductsFromHtml, suggestWabActionsFromProducts } =
32
+ require('./schema-discovery');
33
+
34
+ /* ------------------------------------------------------------------ */
35
+ /* Helpers */
36
+ /* ------------------------------------------------------------------ */
37
+
38
+ function _abs(base, path) {
39
+ try {
40
+ return new URL(path, base).toString();
41
+ } catch {
42
+ return null;
43
+ }
44
+ }
45
+
46
+ async function _fetchText(url, { timeoutMs = 8000 } = {}) {
47
+ if (typeof fetch !== 'function') {
48
+ throw new Error('global fetch() is required (Node 18+) for auto-discovery');
49
+ }
50
+ const ctrl = new AbortController();
51
+ const t = setTimeout(() => ctrl.abort(), timeoutMs);
52
+ try {
53
+ const r = await fetch(url, {
54
+ redirect: 'follow',
55
+ signal: ctrl.signal,
56
+ headers: { 'user-agent': 'wab-auto-discovery/1.0 (+https://webagentbridge.com)' }
57
+ });
58
+ if (!r.ok) return { ok: false, status: r.status, text: '' };
59
+ return { ok: true, status: r.status, text: await r.text() };
60
+ } catch (e) {
61
+ return { ok: false, status: 0, text: '', error: e.message };
62
+ } finally {
63
+ clearTimeout(t);
64
+ }
65
+ }
66
+
67
+ /* ------------------------------------------------------------------ */
68
+ /* HTML metadata extractors */
69
+ /* ------------------------------------------------------------------ */
70
+
71
+ function extractMetaTags(html) {
72
+ const out = { og: {}, twitter: {}, description: null, title: null };
73
+ if (!html) return out;
74
+
75
+ const titleM = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
76
+ if (titleM) out.title = titleM[1].trim();
77
+
78
+ const metaRe = /<meta\b[^>]*>/gi;
79
+ let m;
80
+ while ((m = metaRe.exec(html)) !== null) {
81
+ const tag = m[0];
82
+ const nameM = tag.match(/\bname\s*=\s*["']([^"']+)["']/i);
83
+ const propM = tag.match(/\bproperty\s*=\s*["']([^"']+)["']/i);
84
+ const contentM = tag.match(/\bcontent\s*=\s*["']([^"']*)["']/i);
85
+ if (!contentM) continue;
86
+ const content = contentM[1];
87
+ const key = (propM && propM[1]) || (nameM && nameM[1]) || '';
88
+ if (!key) continue;
89
+ const lk = key.toLowerCase();
90
+ if (lk === 'description') out.description = content;
91
+ else if (lk.startsWith('og:')) out.og[lk.slice(3)] = content;
92
+ else if (lk.startsWith('twitter:')) out.twitter[lk.slice(8)] = content;
93
+ }
94
+ return out;
95
+ }
96
+
97
+ /* ------------------------------------------------------------------ */
98
+ /* Sitemap / robots */
99
+ /* ------------------------------------------------------------------ */
100
+
101
+ function parseSitemap(xml, { limit = 200 } = {}) {
102
+ if (!xml) return [];
103
+ const urls = [];
104
+ const re = /<loc>\s*([^<\s]+)\s*<\/loc>/gi;
105
+ let m;
106
+ while ((m = re.exec(xml)) !== null && urls.length < limit) {
107
+ urls.push(m[1]);
108
+ }
109
+ return urls;
110
+ }
111
+
112
+ function parseRobots(text) {
113
+ const out = { allow: [], disallow: [], sitemaps: [], userAgents: [] };
114
+ if (!text) return out;
115
+ let currentUA = '*';
116
+ for (const raw of text.split(/\r?\n/)) {
117
+ const line = raw.replace(/#.*$/, '').trim();
118
+ if (!line) continue;
119
+ const idx = line.indexOf(':');
120
+ if (idx < 0) continue;
121
+ const key = line.slice(0, idx).trim().toLowerCase();
122
+ const val = line.slice(idx + 1).trim();
123
+ if (key === 'user-agent') {
124
+ currentUA = val;
125
+ if (!out.userAgents.includes(val)) out.userAgents.push(val);
126
+ } else if (key === 'allow') out.allow.push({ ua: currentUA, path: val });
127
+ else if (key === 'disallow') out.disallow.push({ ua: currentUA, path: val });
128
+ else if (key === 'sitemap') out.sitemaps.push(val);
129
+ }
130
+ return out;
131
+ }
132
+
133
+ /* ------------------------------------------------------------------ */
134
+ /* JSON-LD extras: WebSite + Organization + SearchAction */
135
+ /* ------------------------------------------------------------------ */
136
+
137
+ function extractSiteIdentity(html) {
138
+ const blocks = extractJsonLdBlocks(html);
139
+ const out = { name: null, description: null, url: null, search: null, organization: null };
140
+ for (const text of blocks) {
141
+ let data;
142
+ try { data = JSON.parse(text); } catch { continue; }
143
+ const items = Array.isArray(data) ? data : Array.isArray(data['@graph']) ? data['@graph'] : [data];
144
+ for (const node of items) {
145
+ if (!node || typeof node !== 'object') continue;
146
+ let types = node['@type'];
147
+ if (typeof types === 'string') types = [types];
148
+ if (!Array.isArray(types)) types = [];
149
+ if (types.includes('WebSite')) {
150
+ out.name = out.name || node.name;
151
+ out.url = out.url || node.url;
152
+ out.description = out.description || node.description;
153
+ const action = node.potentialAction;
154
+ if (action && (Array.isArray(action) ? action[0] : action)) {
155
+ const a = Array.isArray(action) ? action[0] : action;
156
+ if (a && (a['@type'] === 'SearchAction' || /SearchAction/.test(String(a['@type'])))) {
157
+ out.search = {
158
+ target: typeof a.target === 'string' ? a.target : (a.target && a.target.urlTemplate),
159
+ queryParam: a['query-input'] || a.queryInput || null
160
+ };
161
+ }
162
+ }
163
+ } else if (types.includes('Organization')) {
164
+ out.organization = out.organization || { name: node.name, url: node.url, logo: node.logo };
165
+ }
166
+ }
167
+ }
168
+ return out;
169
+ }
170
+
171
+ /* ------------------------------------------------------------------ */
172
+ /* Main entry */
173
+ /* ------------------------------------------------------------------ */
174
+
175
+ /**
176
+ * Discover capabilities for a site. Tries /.well-known/wab.json first;
177
+ * falls back to HTML/sitemap/robots scraping.
178
+ *
179
+ * @param {string} siteUrl e.g. "https://example.com"
180
+ * @param {object} [opts]
181
+ * @param {number} [opts.timeoutMs=8000]
182
+ * @param {number} [opts.sitemapLimit=200]
183
+ * @param {boolean} [opts.skipWabJson=false]
184
+ * @returns {Promise<object>} normalized envelope
185
+ */
186
+ async function discover(siteUrl, opts = {}) {
187
+ const timeoutMs = opts.timeoutMs || 8000;
188
+ const sitemapLimit = opts.sitemapLimit || 200;
189
+
190
+ let baseUrl;
191
+ try { baseUrl = new URL(siteUrl).origin; }
192
+ catch { return { ok: false, error: 'invalid_url', source: 'auto-discovery' }; }
193
+
194
+ // 1) Canonical wab.json — if present, use as authoritative source.
195
+ if (!opts.skipWabJson) {
196
+ const wabUrl = _abs(baseUrl, '/.well-known/wab.json');
197
+ const wabRes = await _fetchText(wabUrl, { timeoutMs });
198
+ if (wabRes.ok && wabRes.text) {
199
+ try {
200
+ const parsed = JSON.parse(wabRes.text);
201
+ // Support BOTH legacy flat shape AND signed envelope { payload, signature }.
202
+ const inner = parsed && parsed.payload ? parsed.payload : parsed;
203
+ const signed = !!(parsed && parsed.signature);
204
+ const actions = Array.isArray(inner.actions) ? inner.actions
205
+ : (inner.capabilities && typeof inner.capabilities === 'object')
206
+ ? Object.entries(inner.capabilities)
207
+ .filter(([, v]) => v === true || typeof v === 'string')
208
+ .map(([k, v]) => ({ name: k, description: typeof v === 'string' ? v : `capability: ${k}` }))
209
+ : [];
210
+ return {
211
+ ok: true,
212
+ source: 'wab.json',
213
+ site: {
214
+ name: inner.site || inner.name || inner.host || baseUrl,
215
+ description: inner.description,
216
+ url: inner.endpoint || inner.url || baseUrl
217
+ },
218
+ trust: { signed, ...(inner.trust || {}) },
219
+ actions,
220
+ products: [],
221
+ sitemap: [],
222
+ robots: null,
223
+ raw: parsed
224
+ };
225
+ } catch { /* fall through to auto-discovery */ }
226
+ }
227
+ }
228
+
229
+ // 2) Fetch homepage HTML in parallel with sitemap + robots.
230
+ const [homeRes, sitemapRes, robotsRes] = await Promise.all([
231
+ _fetchText(baseUrl, { timeoutMs }),
232
+ _fetchText(_abs(baseUrl, '/sitemap.xml'), { timeoutMs }),
233
+ _fetchText(_abs(baseUrl, '/robots.txt'), { timeoutMs })
234
+ ]);
235
+
236
+ const html = homeRes.text || '';
237
+ const meta = extractMetaTags(html);
238
+ const ident = extractSiteIdentity(html);
239
+ const products = extractProductsFromHtml(html);
240
+ const robots = parseRobots(robotsRes.text || '');
241
+ let sitemap = parseSitemap(sitemapRes.text || '', { limit: sitemapLimit });
242
+
243
+ // Discover additional sitemaps from robots.
244
+ if (sitemap.length === 0 && robots.sitemaps.length) {
245
+ for (const sm of robots.sitemaps.slice(0, 3)) {
246
+ const r = await _fetchText(sm, { timeoutMs });
247
+ if (r.ok) sitemap = sitemap.concat(parseSitemap(r.text, { limit: sitemapLimit }));
248
+ if (sitemap.length >= sitemapLimit) break;
249
+ }
250
+ }
251
+
252
+ // Build action hints.
253
+ const actions = suggestWabActionsFromProducts(products);
254
+ if (ident.search && ident.search.target) {
255
+ actions.push({
256
+ name: 'searchSite',
257
+ description: 'Schema.org SearchAction: ' + ident.search.target,
258
+ source: 'schema.org/SearchAction',
259
+ template: ident.search.target
260
+ });
261
+ }
262
+ if (sitemap.length) {
263
+ actions.push({
264
+ name: 'browseSitemap',
265
+ description: `${sitemap.length} URLs discovered from sitemap.xml`,
266
+ source: 'sitemap.xml'
267
+ });
268
+ }
269
+ if (meta.og && meta.og.url) {
270
+ actions.push({
271
+ name: 'getOpenGraph',
272
+ description: 'OpenGraph metadata available',
273
+ source: 'opengraph'
274
+ });
275
+ }
276
+
277
+ return {
278
+ ok: true,
279
+ source: 'auto-discovery',
280
+ site: {
281
+ name: ident.name || meta.og.site_name || meta.title || baseUrl,
282
+ description: ident.description || meta.description || meta.og.description || null,
283
+ url: ident.url || meta.og.url || baseUrl
284
+ },
285
+ trust: { signed: false, auto: true },
286
+ actions,
287
+ products,
288
+ sitemap,
289
+ robots,
290
+ meta: { og: meta.og, twitter: meta.twitter, title: meta.title, description: meta.description },
291
+ organization: ident.organization || null
292
+ };
293
+ }
294
+
295
+ module.exports = {
296
+ discover,
297
+ extractMetaTags,
298
+ parseSitemap,
299
+ parseRobots,
300
+ extractSiteIdentity
301
+ };