npm - web-agent-bridge - Versions diffs - 3.2.0 → 3.3.0 - Mend

web-agent-bridge 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

package/LICENSE +72 -72
package/README.ar.md +1286 -1152
package/README.md +1764 -1635
package/bin/agent-runner.js +474 -474
package/bin/cli.js +237 -138
package/bin/wab.js +80 -80
package/examples/bidi-agent.js +119 -119
package/examples/cross-site-agent.js +91 -91
package/examples/mcp-agent.js +94 -94
package/examples/next-app-router/README.md +44 -44
package/examples/puppeteer-agent.js +108 -108
package/examples/saas-dashboard/README.md +55 -55
package/examples/shopify-hydrogen/README.md +74 -74
package/examples/vision-agent.js +171 -171
package/examples/wordpress-elementor/README.md +77 -77
package/package.json +16 -3
package/public/.well-known/agent-tools.json +180 -180
package/public/.well-known/ai-assets.json +59 -59
package/public/.well-known/security.txt +8 -0
package/public/agent-workspace.html +349 -349
package/public/ai.html +198 -198
package/public/api.html +413 -412
package/public/browser.html +486 -486
package/public/commander-dashboard.html +243 -243
package/public/cookies.html +210 -210
package/public/css/agent-workspace.css +1713 -1713
package/public/css/premium.css +317 -317
package/public/css/styles.css +1235 -1235
package/public/dashboard.html +706 -706
package/public/dns.html +507 -0
package/public/docs.html +587 -587
package/public/feed.xml +89 -89
package/public/growth.html +463 -463
package/public/index.html +1070 -982
package/public/integrations.html +556 -0
package/public/js/agent-workspace.js +1740 -1740
package/public/js/auth-nav.js +31 -31
package/public/js/auth-redirect.js +12 -12
package/public/js/cookie-consent.js +56 -56
package/public/js/wab-demo-page.js +721 -721
package/public/js/ws-client.js +74 -74
package/public/llms-full.txt +360 -360
package/public/llms.txt +125 -125
package/public/login.html +85 -85
package/public/mesh-dashboard.html +328 -328
package/public/openapi.json +580 -580
package/public/phone-shield.html +281 -0
package/public/premium-dashboard.html +2489 -2489
package/public/premium.html +793 -793
package/public/privacy.html +297 -297
package/public/register.html +105 -105
package/public/robots.txt +87 -87
package/public/script/wab-consent.d.ts +36 -36
package/public/script/wab-consent.js +104 -104
package/public/script/wab-schema.js +131 -131
package/public/script/wab.d.ts +108 -108
package/public/script/wab.min.js +580 -580
package/public/security.txt +8 -0
package/public/terms.html +256 -256
package/script/ai-agent-bridge.js +1754 -1754
package/sdk/README.md +99 -99
package/sdk/agent-mesh.js +449 -449
package/sdk/commander.js +262 -262
package/sdk/index.d.ts +464 -464
package/sdk/index.js +12 -1
package/sdk/multi-agent.js +318 -318
package/sdk/package.json +1 -1
package/sdk/safety-shield.js +219 -0
package/sdk/schema-discovery.js +83 -83
package/server/adapters/index.js +520 -520
package/server/config/plans.js +367 -367
package/server/config/secrets.js +102 -102
package/server/control-plane/index.js +301 -301
package/server/data-plane/index.js +354 -354
package/server/index.js +531 -427
package/server/llm/index.js +404 -404
package/server/middleware/adminAuth.js +35 -35
package/server/middleware/auth.js +50 -50
package/server/middleware/featureGate.js +88 -88
package/server/middleware/rateLimits.js +100 -100
package/server/middleware/sensitiveAction.js +157 -0
package/server/migrations/001_add_analytics_indexes.sql +7 -7
package/server/migrations/002_premium_features.sql +418 -418
package/server/migrations/003_ads_integer_cents.sql +33 -33
package/server/migrations/004_agent_os.sql +158 -158
package/server/migrations/005_marketplace_metering.sql +126 -126
package/server/models/adapters/index.js +33 -33
package/server/models/adapters/mysql.js +183 -183
package/server/models/adapters/postgresql.js +172 -172
package/server/models/adapters/sqlite.js +7 -7
package/server/models/db.js +681 -681
package/server/observability/failure-analysis.js +337 -337
package/server/observability/index.js +394 -394
package/server/protocol/capabilities.js +223 -223
package/server/protocol/index.js +243 -243
package/server/protocol/schema.js +584 -584
package/server/registry/certification.js +271 -271
package/server/registry/index.js +326 -326
package/server/routes/admin-premium.js +671 -671
package/server/routes/admin.js +261 -261
package/server/routes/ads.js +130 -130
package/server/routes/agent-workspace.js +540 -540
package/server/routes/api.js +150 -150
package/server/routes/auth.js +71 -71
package/server/routes/billing.js +45 -45
package/server/routes/commander.js +316 -316
package/server/routes/demo-showcase.js +332 -332
package/server/routes/demo-store.js +154 -0
package/server/routes/discovery.js +417 -417
package/server/routes/gateway.js +173 -157
package/server/routes/license.js +251 -240
package/server/routes/mesh.js +469 -469
package/server/routes/noscript.js +543 -543
package/server/routes/premium-v2.js +686 -686
package/server/routes/premium.js +724 -724
package/server/routes/runtime.js +2148 -2147
package/server/routes/sovereign.js +465 -385
package/server/routes/universal.js +200 -185
package/server/routes/wab-api.js +850 -501
package/server/runtime/container-worker.js +111 -111
package/server/runtime/container.js +448 -448
package/server/runtime/distributed-worker.js +362 -362
package/server/runtime/event-bus.js +210 -210
package/server/runtime/index.js +253 -253
package/server/runtime/queue.js +599 -599
package/server/runtime/replay.js +666 -666
package/server/runtime/sandbox.js +266 -266
package/server/runtime/scheduler.js +534 -534
package/server/runtime/session-engine.js +293 -293
package/server/runtime/state-manager.js +188 -188
package/server/security/cross-site-redactor.js +196 -0
package/server/security/dry-run.js +180 -0
package/server/security/human-gate-rate-limit.js +147 -0
package/server/security/human-gate-transports.js +178 -0
package/server/security/human-gate.js +281 -0
package/server/security/index.js +368 -368
package/server/security/intent-engine.js +245 -0
package/server/security/reward-guard.js +171 -0
package/server/security/rollback-store.js +239 -0
package/server/security/token-scope.js +404 -0
package/server/security/url-policy.js +139 -0
package/server/services/agent-chat.js +506 -506
package/server/services/agent-learning.js +601 -575
package/server/services/agent-memory.js +625 -625
package/server/services/agent-mesh.js +555 -539
package/server/services/agent-symphony.js +717 -717
package/server/services/agent-tasks.js +1807 -1807
package/server/services/api-key-engine.js +292 -261
package/server/services/cluster.js +894 -894
package/server/services/commander.js +738 -738
package/server/services/edge-compute.js +440 -440
package/server/services/email.js +204 -204
package/server/services/hosted-runtime.js +205 -205
package/server/services/lfd.js +635 -635
package/server/services/local-ai.js +389 -389
package/server/services/marketplace.js +270 -270
package/server/services/metering.js +182 -182
package/server/services/modules/affiliate-intelligence.js +93 -93
package/server/services/modules/agent-firewall.js +90 -90
package/server/services/modules/bounty.js +89 -89
package/server/services/modules/collective-bargaining.js +92 -92
package/server/services/modules/dark-pattern.js +66 -66
package/server/services/modules/gov-intelligence.js +45 -45
package/server/services/modules/neural.js +55 -55
package/server/services/modules/notary.js +49 -49
package/server/services/modules/price-time-machine.js +86 -86
package/server/services/modules/protocol.js +104 -104
package/server/services/negotiation.js +439 -439
package/server/services/plugins.js +771 -771
package/server/services/price-intelligence.js +566 -566
package/server/services/price-shield.js +1137 -1137
package/server/services/reputation.js +465 -465
package/server/services/search-engine.js +357 -357
package/server/services/security.js +513 -513
package/server/services/self-healing.js +843 -843
package/server/services/sovereign-shield.js +542 -0
package/server/services/stripe.js +192 -192
package/server/services/swarm.js +788 -788
package/server/services/universal-scraper.js +662 -661
package/server/services/verification.js +481 -481
package/server/services/vision.js +1163 -1163
package/server/utils/cache.js +125 -125
package/server/utils/migrate.js +81 -81
package/server/utils/safe-fetch.js +228 -0
package/server/utils/secureFields.js +50 -50
package/server/ws.js +161 -161
package/templates/artisan-marketplace.yaml +104 -104
package/templates/book-price-scout.yaml +98 -98
package/templates/electronics-price-tracker.yaml +108 -108
package/templates/flight-deal-hunter.yaml +113 -113
package/templates/freelancer-direct.yaml +116 -116
package/templates/grocery-price-compare.yaml +93 -93
package/templates/hotel-direct-booking.yaml +113 -113
package/templates/local-services.yaml +98 -98
package/templates/olive-oil-tunisia.yaml +88 -88
package/templates/organic-farm-fresh.yaml +101 -101
package/templates/restaurant-direct.yaml +97 -97
package/public/score.html +0 -263
package/server/migrations/006_growth_suite.sql +0 -138
package/server/routes/growth.js +0 -962
package/server/services/fairness-engine.js +0 -409
package/server/services/fairness.js +0 -420

package/server/security/token-scope.js ADDED Viewed

@@ -0,0 +1,404 @@
+'use strict';
+/**
+ * WAB Safety Shield — Scoped Session Tokens
+ *
+ * Implements the runtime side of WAB SPEC §8.7 (Scoped Session Tokens).
+ *
+ * Threat model: a leaked or compromised session token issued for one
+ * environment / one access level (e.g. `read` in `staging`) MUST NOT be
+ * usable to perform a destructive operation in production. This is the
+ * safety primitive that prevents PocketOS-class incidents where a single
+ * unscoped token straddles staging and production with full write access.
+ *
+ * Design:
+ *   - Scope is a triplet  (access, env, resources)
+ *       access     : 'read' | 'write' | 'admin'        (hierarchy: admin > write > read)
+ *       env        : 'sandbox' | 'staging' | 'production' | '*'  ('*' = any)
+ *       resources  : array of glob patterns OR ['*']    (default ['*'])
+ *   - A separate boolean axis: destructive=true is required for any command
+ *     that matches the SPEC default destructive verb list OR the site's
+ *     wab.json `destructiveActions` array. `read` scope NEVER allows
+ *     destructive, regardless of environment. `admin` always does.
+ *   - Scopes can only be NARROWED, never widened. When a token issuer is
+ *     itself scoped (delegation), the issued scope is the intersection of
+ *     parent and requested.
+ *
+ * Error codes (returned to clients):
+ *   INVALID_SCOPE              — scope string/object did not parse
+ *   INSUFFICIENT_SCOPE         — token doesn't satisfy required access level
+ *   ENV_MISMATCH               — token env doesn't include requested env
+ *   READONLY_VIOLATION         — read-scope token tried to perform a write
+ *   DESTRUCTIVE_REQUIRES_WRITE — token cannot perform destructive ops
+ *   RESOURCE_OUT_OF_SCOPE      — resource glob doesn't include the target
+ *
+ * This module is intentionally pure (no DB, no Express deps) so it can be
+ * unit-tested in isolation.
+ */
+// ─── Constants ───────────────────────────────────────────────────────
+const ACCESS_LEVELS = ['read', 'write', 'admin'];
+const ENVIRONMENTS = ['sandbox', 'staging', 'production'];
+const ANY_ENV = '*';
+const ANY_RESOURCE = '*';
+// SPEC §8.7.3 — default destructive verb list (lower-case). Sites may
+// extend this via wab.json `destructiveActions: [...]` and may suppress
+// individual verbs via `nonDestructiveActions: [...]`.
+const DEFAULT_DESTRUCTIVE_VERBS = Object.freeze([
+  'delete', 'destroy', 'drop', 'truncate', 'purge', 'wipe', 'erase',
+  'remove', 'unlink', 'rm', 'rmdir',
+  'reset', 'reinit', 'reformat', 'format',
+  'shutdown', 'terminate', 'kill',
+  'revoke', 'disable', 'deactivate',
+  'volume-delete', 'volumedelete', 'db-drop', 'database-drop',
+]);
+const DESTRUCTIVE_VERBS_SET = new Set(DEFAULT_DESTRUCTIVE_VERBS);
+// ─── Aliases (legacy / human-friendly inputs) ────────────────────────
+const ACCESS_ALIASES = {
+  readonly: 'read',
+  ro: 'read',
+  read: 'read',
+  rw: 'write',
+  write: 'write',
+  full: 'admin',
+  admin: 'admin',
+};
+const ENV_ALIASES = {
+  prod: 'production',
+  production: 'production',
+  live: 'production',
+  staging: 'staging',
+  stage: 'staging',
+  test: 'sandbox',
+  sandbox: 'sandbox',
+  dev: 'sandbox',
+  development: 'sandbox',
+};
+// ─── Errors ──────────────────────────────────────────────────────────
+class ScopeError extends Error {
+  constructor(code, message) {
+    super(message);
+    this.name = 'ScopeError';
+    this.code = code;
+  }
+}
+// ─── Parsing & canonicalisation ──────────────────────────────────────
+/**
+ * Parse arbitrary scope input into a canonical object.
+ *
+ * Accepts:
+ *   - undefined / null            → admin/* (legacy unscoped tokens)
+ *   - string  "readonly"          → { access:'read', env:['*'], resources:['*'] }
+ *   - string  "read:staging"      → { access:'read', env:['staging'], resources:['*'] }
+ *   - string  "write:staging,prod"→ { access:'write', env:['staging','production'], resources:['*'] }
+ *   - string  "read:*:cart.*"     → { access:'read', env:['*'], resources:['cart.*'] }
+ *   - object  { access, env, resources }
+ *
+ * Returns canonical: { access, envs:Set<string>|null, resources:string[],
+ *                      legacyUnscoped:bool }
+ *
+ * envs === null  ⇨  any environment ('*')
+ *
+ * Throws ScopeError('INVALID_SCOPE') on malformed input.
+ */
+function parseScope(input) {
+  // Legacy: no scope provided. Pre-§8.7 behaviour preserved for backward compat.
+  if (input == null || input === '' || input === '*') {
+    return {
+      access: 'admin',
+      envs: null,
+      resources: ['*'],
+      legacyUnscoped: true,
+    };
+  }
+  let access;
+  let envParts;
+  let resources;
+  if (typeof input === 'string') {
+    const segments = input.split(':').map((s) => s.trim()).filter(Boolean);
+    if (segments.length === 0) throw new ScopeError('INVALID_SCOPE', 'empty scope string');
+    access = _normalizeAccess(segments[0]);
+    envParts = segments[1] ? segments[1].split(',').map((s) => s.trim()) : ['*'];
+    resources = segments[2] ? segments[2].split(',').map((s) => s.trim()) : ['*'];
+  } else if (typeof input === 'object') {
+    access = _normalizeAccess(input.access || input.level || 'read');
+    const rawEnv = input.env != null ? input.env : (input.environment != null ? input.environment : '*');
+    envParts = Array.isArray(rawEnv) ? rawEnv : String(rawEnv).split(',');
+    envParts = envParts.map((s) => String(s).trim()).filter(Boolean);
+    if (envParts.length === 0) envParts = ['*'];
+    const rawRes = input.resources != null ? input.resources : '*';
+    resources = Array.isArray(rawRes) ? rawRes : [String(rawRes)];
+    resources = resources.map((s) => String(s).trim()).filter(Boolean);
+    if (resources.length === 0) resources = ['*'];
+  } else {
+    throw new ScopeError('INVALID_SCOPE', `unsupported scope input type: ${typeof input}`);
+  }
+  // Resolve envs.
+  let envs;
+  if (envParts.includes('*') || envParts.includes(ANY_ENV)) {
+    envs = null;
+  } else {
+    envs = new Set();
+    for (const e of envParts) {
+      const norm = ENV_ALIASES[e.toLowerCase()];
+      if (!norm) throw new ScopeError('INVALID_SCOPE', `unknown environment "${e}"`);
+      envs.add(norm);
+    }
+    if (envs.size === 0) envs = null;
+  }
+  // Validate resource patterns: limit to ASCII-safe glob, no spaces, ≤256 chars.
+  for (const r of resources) {
+    if (r.length > 256 || /[\s\x00-\x1f]/.test(r)) {
+      throw new ScopeError('INVALID_SCOPE', `invalid resource pattern "${r}"`);
+    }
+  }
+  return { access, envs, resources, legacyUnscoped: false };
+}
+function _normalizeAccess(raw) {
+  const a = ACCESS_ALIASES[String(raw).toLowerCase()];
+  if (!a) throw new ScopeError('INVALID_SCOPE', `unknown access level "${raw}"`);
+  return a;
+}
+/** Stable string form for logging / token serialisation. */
+function formatScope(scope) {
+  if (!scope) return '*';
+  if (scope.legacyUnscoped) return '*';
+  const env = scope.envs == null ? '*' : Array.from(scope.envs).sort().join(',');
+  const res = scope.resources.join(',');
+  return `${scope.access}:${env}:${res}`;
+}
+// ─── Hierarchy & intersection ────────────────────────────────────────
+function _accessRank(a) {
+  return ACCESS_LEVELS.indexOf(a);
+}
+/**
+ * Return the most restrictive scope that satisfies BOTH parent and child.
+ * Used when an issuer (already scoped) delegates a narrower scope to a
+ * sub-token — the result must never exceed the parent's authority.
+ *
+ * Throws ScopeError('INSUFFICIENT_SCOPE') if child requests more than parent.
+ */
+function intersectScopes(parent, child) {
+  const p = _ensureScope(parent);
+  const c = _ensureScope(child);
+  // Access: must be ≤ parent.
+  if (_accessRank(c.access) > _accessRank(p.access)) {
+    throw new ScopeError('INSUFFICIENT_SCOPE',
+      `requested access "${c.access}" exceeds parent "${p.access}"`);
+  }
+  // Envs.
+  let envs;
+  if (p.envs == null) {
+    envs = c.envs == null ? null : new Set(c.envs);
+  } else if (c.envs == null) {
+    envs = new Set(p.envs);
+  } else {
+    envs = new Set([...c.envs].filter((e) => p.envs.has(e)));
+    if (envs.size === 0) {
+      throw new ScopeError('ENV_MISMATCH', 'requested environments not allowed by parent');
+    }
+  }
+  // Resources: child must be a subset (or '*' which inherits parent).
+  let resources;
+  if (c.resources.length === 1 && c.resources[0] === '*') {
+    resources = [...p.resources];
+  } else if (p.resources.length === 1 && p.resources[0] === '*') {
+    resources = [...c.resources];
+  } else {
+    // Each child pattern must be covered by at least one parent pattern.
+    for (const cp of c.resources) {
+      const ok = p.resources.some((pp) => _resourceCovers(pp, cp));
+      if (!ok) {
+        throw new ScopeError('INSUFFICIENT_SCOPE',
+          `requested resource "${cp}" exceeds parent`);
+      }
+    }
+    resources = [...c.resources];
+  }
+  return { access: c.access, envs, resources, legacyUnscoped: false };
+}
+function _resourceCovers(parentPattern, childPattern) {
+  // Conservative: only consider trailing-* globs and exact equality.
+  if (parentPattern === '*' || parentPattern === childPattern) return true;
+  if (parentPattern.endsWith('.*') || parentPattern.endsWith('/*')) {
+    const prefix = parentPattern.slice(0, -1);
+    return childPattern === prefix.slice(0, -1) || childPattern.startsWith(prefix);
+  }
+  return false;
+}
+function _ensureScope(s) {
+  return s && typeof s === 'object' && 'access' in s ? s : parseScope(s);
+}
+// ─── Destructive verb classification ─────────────────────────────────
+/**
+ * Decide whether `actionName` (within `siteConfig`) is destructive.
+ *
+ * Order:
+ *   1. siteConfig.nonDestructiveActions[]  → forces non-destructive (override)
+ *   2. siteConfig.destructiveActions[]     → forces destructive
+ *   3. SPEC default destructive verb list  → fallback
+ */
+function isDestructiveAction(actionName, siteConfig = {}) {
+  if (!actionName) return false;
+  const raw = String(actionName);
+  // Insert a separator at camelCase boundaries BEFORE lowercasing so we can
+  // catch forms like "deleteVolume", "dropTable", "purgeBackups".
+  const camelExpanded = raw.replace(/([a-z0-9])([A-Z])/g, '$1-$2').toLowerCase();
+  const lower = camelExpanded;
+  const nonD = Array.isArray(siteConfig.nonDestructiveActions)
+    ? siteConfig.nonDestructiveActions.map((s) => String(s).toLowerCase())
+    : [];
+  if (nonD.includes(raw.toLowerCase()) || nonD.includes(lower)) return false;
+  const extra = Array.isArray(siteConfig.destructiveActions)
+    ? siteConfig.destructiveActions.map((s) => String(s).toLowerCase())
+    : [];
+  if (extra.includes(raw.toLowerCase()) || extra.includes(lower)) return true;
+  // Token-split match against the camelCase-expanded form.
+  const tokens = lower.split(/[\s.\-_/:]+/).filter(Boolean);
+  for (const t of tokens) {
+    if (DESTRUCTIVE_VERBS_SET.has(t)) return true;
+    if (extra.includes(t)) return true;
+  }
+  return false;
+}
+// ─── Authorisation decision ──────────────────────────────────────────
+/**
+ * The single authoritative authorisation check.
+ *
+ * @param {object} scope    Canonical scope (from parseScope).
+ * @param {object} command  { name, env, resource, action_kind?, destructive? }
+ *                          - name        : action identifier (e.g. 'delete')
+ *                          - env         : 'production' | 'staging' | 'sandbox'
+ *                          - resource    : optional resource id (e.g. 'orders.cart')
+ *                          - action_kind : 'read' | 'write' | 'admin'
+ *                                          (defaults: 'admin' if destructive,
+ *                                           'write' if name not GET-like, else 'read')
+ *                          - destructive : boolean override (otherwise inferred
+ *                                          via isDestructiveAction + siteConfig)
+ * @param {object} siteConfig parsed wab.json
+ *
+ * @returns {{allowed: true} | {allowed: false, code: string, reason: string}}
+ */
+function authorize(scope, command, siteConfig = {}) {
+  const sc = _ensureScope(scope);
+  const cmd = command || {};
+  const env = cmd.env ? ENV_ALIASES[String(cmd.env).toLowerCase()] || cmd.env : null;
+  // 1. Environment match.
+  if (sc.envs != null && env && !sc.envs.has(env)) {
+    return {
+      allowed: false,
+      code: 'ENV_MISMATCH',
+      reason: `token does not include environment "${env}" (allowed: ${[...sc.envs].join(',')})`,
+    };
+  }
+  // 2. Destructive flag.
+  const destructive = cmd.destructive === true ||
+    isDestructiveAction(cmd.name, siteConfig);
+  if (destructive) {
+    if (sc.access === 'read') {
+      return {
+        allowed: false,
+        code: 'DESTRUCTIVE_REQUIRES_WRITE',
+        reason: `destructive action "${cmd.name}" cannot be performed by a read-scope token`,
+      };
+    }
+    // write and admin both pass the destructive gate; site policy may still
+    // require admin via an explicit action_kind hint below.
+  }
+  // 3. Access level.
+  const requiredAccess = _requiredAccessForCommand(cmd, destructive);
+  if (_accessRank(sc.access) < _accessRank(requiredAccess)) {
+    const code = (sc.access === 'read' && requiredAccess === 'write')
+      ? 'READONLY_VIOLATION'
+      : 'INSUFFICIENT_SCOPE';
+    return {
+      allowed: false,
+      code,
+      reason: `command requires "${requiredAccess}" but token has "${sc.access}"`,
+    };
+  }
+  // 4. Resource glob.
+  if (cmd.resource && !_resourceMatchesAny(cmd.resource, sc.resources)) {
+    return {
+      allowed: false,
+      code: 'RESOURCE_OUT_OF_SCOPE',
+      reason: `resource "${cmd.resource}" not in token scope`,
+    };
+  }
+  return { allowed: true };
+}
+function _requiredAccessForCommand(cmd, destructive) {
+  if (cmd.action_kind && ACCESS_LEVELS.includes(cmd.action_kind)) return cmd.action_kind;
+  if (destructive) return 'write';
+  const READ_ONLY_PATTERNS = /^(read|get|list|search|find|view|page-info|ping|discover|actions)/i;
+  if (cmd.name && READ_ONLY_PATTERNS.test(String(cmd.name))) return 'read';
+  return 'write';
+}
+function _resourceMatchesAny(target, patterns) {
+  for (const p of patterns) {
+    if (p === '*' || p === target) return true;
+    if (p.endsWith('.*') || p.endsWith('/*')) {
+      const prefix = p.slice(0, -1);
+      if (target === prefix.slice(0, -1) || target.startsWith(prefix)) return true;
+    }
+  }
+  return false;
+}
+// ─── Public API ──────────────────────────────────────────────────────
+module.exports = {
+  // parsing
+  parseScope,
+  formatScope,
+  intersectScopes,
+  // policy
+  authorize,
+  isDestructiveAction,
+  // diagnostics / introspection
+  ScopeError,
+  ACCESS_LEVELS,
+  ENVIRONMENTS,
+  DEFAULT_DESTRUCTIVE_VERBS,
+};

package/server/security/url-policy.js ADDED Viewed

@@ -0,0 +1,139 @@
+'use strict';
+/**
+ * URL Policy — guards public endpoints (e.g. /api/universal/extract) that
+ * accept arbitrary user URLs. Layered on top of the SSRF guard in
+ * server/utils/safe-fetch.js, this module enforces:
+ *
+ *   1. Scheme allow-list (https only by default).
+ *   2. TLD/host denylist (configurable via WAB_URL_DENY_HOSTS / DEFAULT_DENY).
+ *   3. Path denylist for obvious admin/credential/wp-login style targets that
+ *      would suggest abuse.
+ *   4. Per-actor (IP / API-key / siteId) rate-limit independent of express
+ *      router-level rate limiting.
+ *
+ * Decisions are recorded in `url_policy_audit` for review.
+ */
+const crypto = require('crypto');
+const { db } = require('../models/db');
+const DEFAULT_DENY_HOSTS = [
+  // Local/private/metadata is already blocked by safe-fetch; these are
+  // additional public hosts that have no legitimate scraping use case.
+  'login.microsoftonline.com',
+  'accounts.google.com',
+  'appleid.apple.com',
+];
+const DEFAULT_DENY_PATH_RE = /\/(?:wp-(?:login|admin)|administrator|phpmyadmin|\.git|\.env)(?:\/|\.|$|\?)/i;
+const RATE_WINDOW_MS = 60_000;
+const RATE_MAX = parseInt(process.env.WAB_URL_POLICY_RATE_MAX || '30', 10);
+function _envHosts() {
+  return String(process.env.WAB_URL_DENY_HOSTS || '')
+    .split(',').map((s) => s.trim().toLowerCase()).filter(Boolean);
+}
+db.exec(`
+  CREATE TABLE IF NOT EXISTS url_policy_audit (
+    id TEXT PRIMARY KEY,
+    actor TEXT,
+    url TEXT,
+    decision TEXT NOT NULL CHECK(decision IN ('allowed','blocked','rate_limited')),
+    reason TEXT,
+    created_at TEXT DEFAULT (datetime('now'))
+  );
+  CREATE INDEX IF NOT EXISTS idx_urlpolicy_decision ON url_policy_audit(decision);
+`);
+const _rate = new Map(); // actor → [ts]
+function _hit(actor) {
+  const now = Date.now();
+  const arr = (_rate.get(actor) || []).filter((t) => now - t < RATE_WINDOW_MS);
+  arr.push(now);
+  _rate.set(actor, arr);
+  return arr.length;
+}
+function _audit(actor, url, decision, reason) {
+  try {
+    db.prepare(`INSERT INTO url_policy_audit (id, actor, url, decision, reason)
+                VALUES (?, ?, ?, ?, ?)`).run(
+      crypto.randomUUID(), actor || null, url || null, decision, reason || null);
+  } catch (_) { /* never block on audit failure */ }
+}
+/**
+ * @param {string} rawUrl
+ * @param {object} opts
+ * @param {string} [opts.actor] - IP, API key id, or site id
+ * @returns {{ ok:boolean, reason?:string, code?:string, parsed?:URL }}
+ */
+function check(rawUrl, opts = {}) {
+  const actor = opts.actor || 'anon';
+  if (typeof rawUrl !== 'string' || rawUrl.length === 0) {
+    _audit(actor, String(rawUrl).slice(0, 200), 'blocked', 'missing_url');
+    return { ok: false, reason: 'URL is required', code: 'MISSING_URL' };
+  }
+  if (rawUrl.length > 2048) {
+    _audit(actor, rawUrl.slice(0, 200), 'blocked', 'url_too_long');
+    return { ok: false, reason: 'URL exceeds 2048 characters', code: 'URL_TOO_LONG' };
+  }
+  let parsed;
+  try { parsed = new URL(rawUrl); }
+  catch {
+    _audit(actor, rawUrl.slice(0, 200), 'blocked', 'invalid_url');
+    return { ok: false, reason: 'Invalid URL', code: 'INVALID_URL' };
+  }
+  if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
+    _audit(actor, rawUrl, 'blocked', `scheme:${parsed.protocol}`);
+    return { ok: false, reason: `Scheme ${parsed.protocol} not allowed`, code: 'BAD_SCHEME' };
+  }
+  if (process.env.WAB_URL_POLICY_HTTPS_ONLY === '1' && parsed.protocol !== 'https:') {
+    _audit(actor, rawUrl, 'blocked', 'http_disallowed');
+    return { ok: false, reason: 'HTTPS required', code: 'HTTPS_REQUIRED' };
+  }
+  const host = parsed.hostname.toLowerCase();
+  const deny = new Set([...DEFAULT_DENY_HOSTS, ..._envHosts()]);
+  if (deny.has(host)) {
+    _audit(actor, rawUrl, 'blocked', `host_denied:${host}`);
+    return { ok: false, reason: `Host ${host} is denied by policy`, code: 'HOST_DENIED' };
+  }
+  if (DEFAULT_DENY_PATH_RE.test(parsed.pathname)) {
+    _audit(actor, rawUrl, 'blocked', `path_denied:${parsed.pathname}`);
+    return { ok: false, reason: 'Path matches abuse pattern', code: 'PATH_DENIED' };
+  }
+  const count = _hit(actor);
+  if (count > RATE_MAX) {
+    _audit(actor, rawUrl, 'rate_limited', `count:${count}`);
+    return { ok: false, reason: `Rate limit exceeded (${RATE_MAX} URLs/min per actor)`, code: 'RATE_LIMITED' };
+  }
+  _audit(actor, rawUrl, 'allowed', null);
+  return { ok: true, parsed };
+}
+function getRecentAudits(limit = 100, decision) {
+  if (decision) {
+    return db.prepare(`SELECT * FROM url_policy_audit WHERE decision = ? ORDER BY rowid DESC LIMIT ?`).all(decision, limit);
+  }
+  return db.prepare(`SELECT * FROM url_policy_audit ORDER BY rowid DESC LIMIT ?`).all(limit);
+}
+function actorFromReq(req) {
+  return (req.wabAuth && req.wabAuth.key_id) ||
+         (req.user && req.user.id) ||
+         req.ip ||
+         'anon';
+}
+module.exports = { check, getRecentAudits, actorFromReq, RATE_MAX };