web-agent-bridge 3.3.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/LICENSE +84 -72
  2. package/README.ar.md +1563 -1286
  3. package/README.md +137 -1764
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -237
  6. package/bin/wab-init.js +244 -0
  7. package/bin/wab.js +80 -80
  8. package/examples/azure-dns-wab.js +83 -0
  9. package/examples/bidi-agent.js +119 -119
  10. package/examples/cloudflare-wab-dns.js +121 -0
  11. package/examples/cpanel-wab-dns.js +114 -0
  12. package/examples/cross-site-agent.js +91 -91
  13. package/examples/dns-discovery-agent.js +166 -0
  14. package/examples/gcp-dns-wab.js +76 -0
  15. package/examples/governance-agent.js +169 -0
  16. package/examples/mcp-agent.js +94 -94
  17. package/examples/next-app-router/README.md +44 -44
  18. package/examples/plesk-wab-dns.js +103 -0
  19. package/examples/puppeteer-agent.js +108 -108
  20. package/examples/route53-wab-dns.js +144 -0
  21. package/examples/saas-dashboard/README.md +55 -55
  22. package/examples/safe-mode-agent.js +96 -0
  23. package/examples/self-discovery.js +106 -0
  24. package/examples/shopify-hydrogen/README.md +74 -74
  25. package/examples/vision-agent.js +171 -171
  26. package/examples/wab-sign.js +74 -0
  27. package/examples/wab-verify.js +60 -0
  28. package/examples/wordpress-elementor/README.md +77 -77
  29. package/package.json +93 -93
  30. package/public/.well-known/agent-tools.json +180 -180
  31. package/public/.well-known/ai-assets.json +59 -59
  32. package/public/.well-known/security.txt +8 -8
  33. package/public/.well-known/wab.json +28 -0
  34. package/public/activate.html +448 -0
  35. package/public/adopt.html +236 -0
  36. package/public/adoption-metrics.html +188 -0
  37. package/public/agent-workspace.html +359 -349
  38. package/public/ai.html +198 -198
  39. package/public/api.html +397 -413
  40. package/public/azure-dns-integration.html +289 -0
  41. package/public/browser.html +486 -486
  42. package/public/cloudflare-integration.html +380 -0
  43. package/public/commander-dashboard.html +243 -243
  44. package/public/cookies.html +210 -210
  45. package/public/cpanel-integration.html +398 -0
  46. package/public/css/agent-workspace.css +1713 -1713
  47. package/public/css/premium.css +317 -317
  48. package/public/css/styles.css +1401 -1235
  49. package/public/dashboard-shieldlink.html +295 -0
  50. package/public/dashboard.html +711 -706
  51. package/public/dns.html +436 -507
  52. package/public/docs.html +588 -587
  53. package/public/enterprise-mesh.ar.html +80 -0
  54. package/public/enterprise-mesh.html +81 -0
  55. package/public/feed.xml +89 -89
  56. package/public/gcp-dns-integration.html +318 -0
  57. package/public/governance.ar.html +70 -0
  58. package/public/governance.html +69 -0
  59. package/public/growth.html +465 -463
  60. package/public/index.html +1372 -1070
  61. package/public/integrations.html +556 -556
  62. package/public/js/activate.js +449 -0
  63. package/public/js/agent-workspace.js +1740 -1740
  64. package/public/js/auth-nav.js +117 -31
  65. package/public/js/auth-redirect.js +12 -12
  66. package/public/js/cookie-consent.js +56 -56
  67. package/public/js/dns.js +438 -0
  68. package/public/js/wab-demo-page.js +721 -721
  69. package/public/js/ws-client.js +74 -74
  70. package/public/l-preview.html +242 -0
  71. package/public/llms-full.txt +360 -360
  72. package/public/llms.txt +125 -125
  73. package/public/login.html +85 -85
  74. package/public/mesh-dashboard.html +328 -328
  75. package/public/milestones.html +346 -0
  76. package/public/one-click.html +779 -0
  77. package/public/openapi.json +669 -580
  78. package/public/partners.ar.html +145 -0
  79. package/public/partners.html +143 -0
  80. package/public/phone-shield.html +281 -281
  81. package/public/plesk-integration.html +375 -0
  82. package/public/premium-dashboard.html +2489 -2489
  83. package/public/premium.html +793 -793
  84. package/public/privacy.html +297 -297
  85. package/public/provider-onboarding.html +172 -0
  86. package/public/provider-sandbox.html +134 -0
  87. package/public/providers.html +359 -0
  88. package/public/refusals.html +172 -0
  89. package/public/register.html +105 -105
  90. package/public/registrar-integrations.html +141 -0
  91. package/public/ring4.html +292 -0
  92. package/public/robots.txt +99 -87
  93. package/public/route53-integration.html +531 -0
  94. package/public/score.html +263 -0
  95. package/public/script/wab-consent.d.ts +36 -36
  96. package/public/script/wab-consent.js +104 -104
  97. package/public/script/wab-schema.js +131 -131
  98. package/public/script/wab.d.ts +108 -108
  99. package/public/script/wab.min.js +580 -580
  100. package/public/security.txt +8 -8
  101. package/public/shieldlink.html +244 -0
  102. package/public/shieldqr.html +231 -0
  103. package/public/sitemap.xml +19 -1
  104. package/public/terms.html +256 -256
  105. package/public/trust-graph-api.ar.html +92 -0
  106. package/public/trust-graph-api.html +91 -0
  107. package/public/wab-features.html +560 -0
  108. package/public/wab-trust.html +200 -0
  109. package/public/wab-truth.html +375 -0
  110. package/public/wab-vs-protocols.html +210 -0
  111. package/public/whitepaper.html +449 -0
  112. package/script/ai-agent-bridge.js +1754 -1754
  113. package/sdk/README.md +99 -99
  114. package/sdk/agent-mesh.js +449 -449
  115. package/sdk/auto-discovery.js +301 -0
  116. package/sdk/commander.js +262 -262
  117. package/sdk/governance.js +262 -0
  118. package/sdk/index.d.ts +464 -464
  119. package/sdk/index.js +649 -636
  120. package/sdk/multi-agent.js +318 -318
  121. package/sdk/package.json +2 -2
  122. package/sdk/safe-mode.js +221 -0
  123. package/sdk/safety-shield.js +219 -219
  124. package/sdk/schema-discovery.js +83 -83
  125. package/server/adapters/index.js +520 -520
  126. package/server/config/plans.js +412 -367
  127. package/server/config/secrets.js +102 -102
  128. package/server/control-plane/index.js +301 -301
  129. package/server/data-plane/index.js +354 -354
  130. package/server/index.js +790 -531
  131. package/server/llm/index.js +404 -404
  132. package/server/middleware/adminAuth.js +35 -35
  133. package/server/middleware/api-tier.js +170 -0
  134. package/server/middleware/auth.js +50 -50
  135. package/server/middleware/featureGate.js +88 -88
  136. package/server/middleware/rateLimits.js +100 -100
  137. package/server/middleware/sensitiveAction.js +157 -157
  138. package/server/middleware/wab-trust.js +141 -0
  139. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  140. package/server/migrations/002_premium_features.sql +418 -418
  141. package/server/migrations/003_ads_integer_cents.sql +33 -33
  142. package/server/migrations/004_agent_os.sql +158 -158
  143. package/server/migrations/005_marketplace_metering.sql +126 -126
  144. package/server/migrations/006_growth_suite.sql +138 -0
  145. package/server/migrations/007_governance.sql +106 -0
  146. package/server/migrations/008_plans.sql +144 -0
  147. package/server/migrations/009_shieldqr.sql +30 -0
  148. package/server/migrations/010_extended_trust.sql +33 -0
  149. package/server/migrations/011_outreach.sql +47 -0
  150. package/server/migrations/012_shieldlink.sql +116 -0
  151. package/server/migrations/013_ct_monitor.sql +13 -0
  152. package/server/migrations/014_wab_advanced_features.sql +128 -0
  153. package/server/migrations/015_wab_truth_layer.sql +101 -0
  154. package/server/migrations/016_ring4_external_trust.sql +84 -0
  155. package/server/migrations/017_ring4_extensions.sql +69 -0
  156. package/server/migrations/018_commercial_foundations.sql +167 -0
  157. package/server/migrations/019_unify_tier_constraints.sql +133 -0
  158. package/server/models/adapters/index.js +33 -33
  159. package/server/models/adapters/mysql.js +183 -183
  160. package/server/models/adapters/postgresql.js +172 -172
  161. package/server/models/adapters/sqlite.js +7 -7
  162. package/server/models/db.js +740 -681
  163. package/server/observability/failure-analysis.js +337 -337
  164. package/server/observability/index.js +394 -394
  165. package/server/protocol/capabilities.js +223 -223
  166. package/server/protocol/index.js +243 -243
  167. package/server/protocol/schema.js +584 -584
  168. package/server/registry/certification.js +271 -271
  169. package/server/registry/index.js +326 -326
  170. package/server/routes/activate.js +478 -0
  171. package/server/routes/admin-outreach.js +239 -0
  172. package/server/routes/admin-plans.js +76 -0
  173. package/server/routes/admin-premium.js +674 -671
  174. package/server/routes/admin-shieldlink.js +137 -0
  175. package/server/routes/admin-shieldqr.js +90 -0
  176. package/server/routes/admin-trust-monitor.js +139 -0
  177. package/server/routes/admin.js +550 -261
  178. package/server/routes/adopt.js +61 -0
  179. package/server/routes/ads.js +130 -130
  180. package/server/routes/agent-workspace.js +540 -540
  181. package/server/routes/api-keys.js +127 -0
  182. package/server/routes/api.js +150 -150
  183. package/server/routes/auth.js +71 -71
  184. package/server/routes/billing.js +57 -45
  185. package/server/routes/commander.js +316 -316
  186. package/server/routes/customer-shieldlink.js +133 -0
  187. package/server/routes/demo-showcase.js +332 -332
  188. package/server/routes/demo-store.js +154 -154
  189. package/server/routes/diagnose.js +373 -0
  190. package/server/routes/discovery.js +2348 -417
  191. package/server/routes/enterprise-mesh.js +170 -0
  192. package/server/routes/gateway.js +173 -173
  193. package/server/routes/governance-saas.js +203 -0
  194. package/server/routes/governance.js +208 -0
  195. package/server/routes/growth.js +1048 -0
  196. package/server/routes/intent.js +328 -0
  197. package/server/routes/license.js +251 -251
  198. package/server/routes/mesh.js +469 -469
  199. package/server/routes/noscript.js +543 -543
  200. package/server/routes/partners.js +201 -0
  201. package/server/routes/plans.js +33 -0
  202. package/server/routes/premium-v2.js +686 -686
  203. package/server/routes/premium.js +724 -724
  204. package/server/routes/providers.js +650 -0
  205. package/server/routes/reputation.js +411 -0
  206. package/server/routes/ring4.js +885 -0
  207. package/server/routes/runtime.js +2148 -2148
  208. package/server/routes/shieldlink.js +70 -0
  209. package/server/routes/shieldqr.js +88 -0
  210. package/server/routes/sovereign.js +465 -465
  211. package/server/routes/truth-layer.js +670 -0
  212. package/server/routes/universal.js +200 -200
  213. package/server/routes/unsubscribe.js +51 -0
  214. package/server/routes/wab-api.js +850 -850
  215. package/server/routes/wab-cache.js +282 -0
  216. package/server/runtime/container-worker.js +111 -111
  217. package/server/runtime/container.js +448 -448
  218. package/server/runtime/distributed-worker.js +362 -362
  219. package/server/runtime/event-bus.js +210 -210
  220. package/server/runtime/index.js +253 -253
  221. package/server/runtime/queue.js +599 -599
  222. package/server/runtime/replay.js +666 -666
  223. package/server/runtime/sandbox.js +266 -266
  224. package/server/runtime/scheduler.js +534 -534
  225. package/server/runtime/session-engine.js +293 -293
  226. package/server/runtime/state-manager.js +188 -188
  227. package/server/secrets/wab-signing-key.pem +3 -0
  228. package/server/secrets/wab-signing-pub.pem +3 -0
  229. package/server/security/cross-site-redactor.js +196 -196
  230. package/server/security/dry-run.js +180 -180
  231. package/server/security/human-gate-rate-limit.js +147 -147
  232. package/server/security/human-gate-transports.js +178 -178
  233. package/server/security/human-gate.js +281 -281
  234. package/server/security/index.js +368 -368
  235. package/server/security/intent-engine.js +245 -245
  236. package/server/security/reward-guard.js +171 -171
  237. package/server/security/rollback-store.js +239 -239
  238. package/server/security/token-scope.js +404 -404
  239. package/server/security/url-policy.js +139 -139
  240. package/server/services/adoption-agent.js +182 -0
  241. package/server/services/agent-chat.js +506 -506
  242. package/server/services/agent-learning.js +601 -601
  243. package/server/services/agent-memory.js +625 -625
  244. package/server/services/agent-mesh.js +555 -555
  245. package/server/services/agent-symphony.js +717 -717
  246. package/server/services/agent-tasks.js +1807 -1807
  247. package/server/services/api-key-engine.js +292 -292
  248. package/server/services/cluster.js +894 -894
  249. package/server/services/commander.js +738 -738
  250. package/server/services/edge-compute.js +440 -440
  251. package/server/services/email.js +233 -204
  252. package/server/services/fairness-engine.js +409 -0
  253. package/server/services/fairness.js +420 -0
  254. package/server/services/governance.js +466 -0
  255. package/server/services/hosted-runtime.js +205 -205
  256. package/server/services/lfd.js +635 -635
  257. package/server/services/local-ai.js +389 -389
  258. package/server/services/marketplace.js +270 -270
  259. package/server/services/metering.js +182 -182
  260. package/server/services/modules/affiliate-intelligence.js +93 -93
  261. package/server/services/modules/agent-firewall.js +90 -90
  262. package/server/services/modules/bounty.js +89 -89
  263. package/server/services/modules/collective-bargaining.js +92 -92
  264. package/server/services/modules/dark-pattern.js +66 -66
  265. package/server/services/modules/gov-intelligence.js +45 -45
  266. package/server/services/modules/neural.js +55 -55
  267. package/server/services/modules/notary.js +49 -49
  268. package/server/services/modules/price-time-machine.js +86 -86
  269. package/server/services/modules/protocol.js +104 -104
  270. package/server/services/negotiation.js +439 -439
  271. package/server/services/outreach-agent.js +312 -0
  272. package/server/services/plans.js +214 -0
  273. package/server/services/plugins.js +771 -771
  274. package/server/services/premium.js +1 -1
  275. package/server/services/price-intelligence.js +566 -566
  276. package/server/services/price-shield.js +1137 -1137
  277. package/server/services/provider-clients.js +740 -0
  278. package/server/services/reputation.js +465 -465
  279. package/server/services/search-engine.js +357 -357
  280. package/server/services/security.js +513 -513
  281. package/server/services/self-healing.js +843 -843
  282. package/server/services/shieldlink.js +492 -0
  283. package/server/services/shieldqr.js +322 -0
  284. package/server/services/sovereign-shield.js +542 -542
  285. package/server/services/ssl-ct-monitor.js +224 -0
  286. package/server/services/ssl-inspector.js +42 -0
  287. package/server/services/ssl-monitor.js +167 -0
  288. package/server/services/stripe.js +206 -192
  289. package/server/services/swarm.js +788 -788
  290. package/server/services/universal-scraper.js +662 -662
  291. package/server/services/verification.js +481 -481
  292. package/server/services/vision.js +1163 -1163
  293. package/server/services/wab-crypto.js +178 -0
  294. package/server/utils/cache.js +125 -125
  295. package/server/utils/migrate.js +81 -81
  296. package/server/utils/safe-fetch.js +228 -228
  297. package/server/utils/secureFields.js +50 -50
  298. package/server/ws.js +161 -161
  299. package/templates/artisan-marketplace.yaml +104 -104
  300. package/templates/book-price-scout.yaml +98 -98
  301. package/templates/electronics-price-tracker.yaml +108 -108
  302. package/templates/flight-deal-hunter.yaml +113 -113
  303. package/templates/freelancer-direct.yaml +116 -116
  304. package/templates/grocery-price-compare.yaml +93 -93
  305. package/templates/hotel-direct-booking.yaml +113 -113
  306. package/templates/local-services.yaml +98 -98
  307. package/templates/olive-oil-tunisia.yaml +88 -88
  308. package/templates/organic-farm-fresh.yaml +101 -101
  309. package/templates/restaurant-direct.yaml +97 -97
  310. package/templates/ring4/banking-sovereign.yaml +55 -0
  311. package/templates/ring4/ecommerce-sovereign.yaml +58 -0
  312. package/templates/ring4/healthcare-sovereign.yaml +60 -0
@@ -1,662 +1,662 @@
1
- /**
2
- * WAB Universal Scraper Engine
3
- * ═══════════════════════════════════════════════════════════════════
4
- * Works with ANY website — no script installation required.
5
- * Extracts prices, products, availability from raw HTML/DOM.
6
- *
7
- * Three extraction modes:
8
- * 1. Schema.org JSON-LD (structured, most reliable)
9
- * 2. Open Graph / Meta tags (semi-structured)
10
- * 3. DOM pattern matching (heuristic, any site)
11
- *
12
- * Used by: WAB Browser (webview), Chrome Extension (content script),
13
- * Server-side fetch (Node.js)
14
- */
15
-
16
- const crypto = require('crypto');
17
- const { db } = require('../models/db');
18
- const { safeFetch } = require('../utils/safe-fetch');
19
-
20
- // ─── Schema ──────────────────────────────────────────────────────────
21
-
22
- db.exec(`
23
- CREATE TABLE IF NOT EXISTS scraped_prices (
24
- id TEXT PRIMARY KEY,
25
- url TEXT NOT NULL,
26
- domain TEXT NOT NULL,
27
- product_name TEXT,
28
- price REAL,
29
- currency TEXT DEFAULT 'USD',
30
- original_price REAL,
31
- availability TEXT,
32
- rating REAL,
33
- review_count INTEGER,
34
- seller TEXT,
35
- category TEXT,
36
- extraction_method TEXT,
37
- raw_data TEXT DEFAULT '{}',
38
- scraped_at TEXT DEFAULT (datetime('now'))
39
- );
40
-
41
- CREATE TABLE IF NOT EXISTS universal_price_history (
42
- id TEXT PRIMARY KEY,
43
- url_hash TEXT NOT NULL,
44
- domain TEXT NOT NULL,
45
- product_name TEXT,
46
- price REAL,
47
- currency TEXT DEFAULT 'USD',
48
- recorded_at TEXT DEFAULT (datetime('now'))
49
- );
50
-
51
- CREATE INDEX IF NOT EXISTS idx_scraped_domain ON scraped_prices(domain);
52
- CREATE INDEX IF NOT EXISTS idx_scraped_url ON scraped_prices(url);
53
- CREATE INDEX IF NOT EXISTS idx_uph_hash ON universal_price_history(url_hash);
54
- `);
55
-
56
- const stmts = {
57
- insertScraped: db.prepare(`INSERT OR REPLACE INTO scraped_prices
58
- (id, url, domain, product_name, price, currency, original_price,
59
- availability, rating, review_count, seller, category, extraction_method, raw_data)
60
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
61
- getByUrl: db.prepare('SELECT * FROM scraped_prices WHERE url = ? ORDER BY scraped_at DESC LIMIT 1'),
62
- getByDomain: db.prepare('SELECT * FROM scraped_prices WHERE domain = ? ORDER BY scraped_at DESC LIMIT ?'),
63
- insertHistory: db.prepare(`INSERT INTO universal_price_history
64
- (id, url_hash, domain, product_name, price, currency) VALUES (?, ?, ?, ?, ?, ?)`),
65
- getHistory: db.prepare('SELECT * FROM universal_price_history WHERE url_hash = ? ORDER BY recorded_at DESC LIMIT ?'),
66
- };
67
-
68
- // ─── Currency Normalization ──────────────────────────────────────────
69
-
70
- const CURRENCY_MAP = {
71
- '$': 'USD', 'USD': 'USD', 'US$': 'USD',
72
- '€': 'EUR', 'EUR': 'EUR',
73
- '£': 'GBP', 'GBP': 'GBP',
74
- 'SAR': 'SAR', 'ريال': 'SAR', 'ر.س': 'SAR',
75
- 'AED': 'AED', 'درهم': 'AED', 'د.إ': 'AED',
76
- 'TND': 'TND', 'دينار': 'TND', 'د.ت': 'TND',
77
- 'EGP': 'EGP', 'ج.م': 'EGP',
78
- 'MAD': 'MAD', 'د.م': 'MAD',
79
- 'TRY': 'TRY', '₺': 'TRY',
80
- 'JPY': 'JPY', '¥': 'JPY',
81
- 'INR': 'INR', '₹': 'INR',
82
- 'KRW': 'KRW', '₩': 'KRW',
83
- };
84
-
85
- // Approximate USD rates for comparison
86
- const TO_USD = {
87
- USD: 1, EUR: 1.08, GBP: 1.27, SAR: 0.27, AED: 0.27,
88
- TND: 0.32, EGP: 0.032, MAD: 0.10, TRY: 0.031,
89
- JPY: 0.0067, INR: 0.012, KRW: 0.00074,
90
- };
91
-
92
- function normalizeCurrency(symbol) {
93
- if (!symbol) return 'USD';
94
- const s = symbol.trim().toUpperCase();
95
- return CURRENCY_MAP[s] || CURRENCY_MAP[symbol.trim()] || 'USD';
96
- }
97
-
98
- function toUSD(price, currency) {
99
- const rate = TO_USD[currency] || 1;
100
- return Math.round(price * rate * 100) / 100;
101
- }
102
-
103
- // ─── Price Extraction ────────────────────────────────────────────────
104
-
105
- const PRICE_PATTERNS = [
106
- // $123.45 or $ 123.45
107
- /(?<currency>\$|USD|US\$)\s*(?<price>[\d,]+\.?\d*)/gi,
108
- // €123.45
109
- /(?<currency>€|EUR)\s*(?<price>[\d,]+\.?\d*)/gi,
110
- // £123.45
111
- /(?<currency>£|GBP)\s*(?<price>[\d,]+\.?\d*)/gi,
112
- // 123.45 SAR / ريال
113
- /(?<price>[\d,]+\.?\d*)\s*(?<currency>SAR|ريال|ر\.س|AED|درهم|د\.إ|TND|دينار|د\.ت|EGP|ج\.م|MAD|د\.م)/gi,
114
- // ₺ ₹ ¥ ₩ prefixed
115
- /(?<currency>[₺₹¥₩])\s*(?<price>[\d,]+\.?\d*)/gi,
116
- // 123.45$ (suffix dollar)
117
- /(?<price>[\d,]+\.?\d*)\s*(?<currency>\$|€|£)/gi,
118
- ];
119
-
120
- function extractPrices(text) {
121
- const prices = [];
122
- const seen = new Set();
123
-
124
- for (const pattern of PRICE_PATTERNS) {
125
- pattern.lastIndex = 0;
126
- let m;
127
- while ((m = pattern.exec(text)) !== null) {
128
- const raw = m.groups?.price || m[2] || m[1];
129
- const currSymbol = m.groups?.currency || m[1] || m[2];
130
- if (!raw) continue;
131
-
132
- const num = parseFloat(raw.replace(/,/g, ''));
133
- if (isNaN(num) || num <= 0 || num > 1000000) continue;
134
-
135
- const currency = normalizeCurrency(currSymbol);
136
- const key = `${num}-${currency}`;
137
- if (seen.has(key)) continue;
138
- seen.add(key);
139
-
140
- prices.push({ price: num, currency, usd: toUSD(num, currency), raw: m[0].trim() });
141
- }
142
- }
143
-
144
- return prices.sort((a, b) => a.usd - b.usd);
145
- }
146
-
147
- // ─── Schema.org JSON-LD Extraction ───────────────────────────────────
148
-
149
- function extractJsonLd(html) {
150
- const products = [];
151
- const regex = /<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
152
- let m;
153
-
154
- while ((m = regex.exec(html)) !== null) {
155
- try {
156
- let data = JSON.parse(m[1].trim());
157
- if (Array.isArray(data)) data.forEach(d => processJsonLd(d, products));
158
- else processJsonLd(data, products);
159
- } catch (_) {}
160
- }
161
-
162
- return products;
163
- }
164
-
165
- function processJsonLd(data, products) {
166
- if (!data || typeof data !== 'object') return;
167
-
168
- // Handle @graph arrays
169
- if (data['@graph'] && Array.isArray(data['@graph'])) {
170
- data['@graph'].forEach(item => processJsonLd(item, products));
171
- return;
172
- }
173
-
174
- const type = (data['@type'] || '').toLowerCase();
175
-
176
- if (type === 'product' || type === 'hotel' || type === 'hotelroom' ||
177
- type === 'lodgingbusiness' || type === 'offer') {
178
- const offers = data.offers || data.priceSpecification || {};
179
- const offer = Array.isArray(offers) ? offers[0] : offers;
180
-
181
- products.push({
182
- name: data.name || data.headline || null,
183
- price: parseFloat(offer?.price || offer?.lowPrice || data.price) || null,
184
- originalPrice: parseFloat(offer?.highPrice) || null,
185
- currency: offer?.priceCurrency || 'USD',
186
- availability: offer?.availability?.replace('https://schema.org/', '').replace('http://schema.org/', '') || null,
187
- rating: parseFloat(data.aggregateRating?.ratingValue) || null,
188
- reviewCount: parseInt(data.aggregateRating?.reviewCount || data.aggregateRating?.ratingCount) || null,
189
- image: data.image?.url || (typeof data.image === 'string' ? data.image : null),
190
- description: (data.description || '').slice(0, 500),
191
- brand: data.brand?.name || data.brand || null,
192
- sku: data.sku || null,
193
- url: data.url || null,
194
- method: 'json-ld',
195
- });
196
- }
197
-
198
- // Recurse into nested objects
199
- for (const key of Object.keys(data)) {
200
- if (typeof data[key] === 'object' && data[key] !== null && key !== '@context') {
201
- if (Array.isArray(data[key])) {
202
- data[key].forEach(item => {
203
- if (typeof item === 'object') processJsonLd(item, products);
204
- });
205
- } else {
206
- processJsonLd(data[key], products);
207
- }
208
- }
209
- }
210
- }
211
-
212
- // ─── Open Graph / Meta Tag Extraction ────────────────────────────────
213
-
214
- function extractMetaTags(html) {
215
- const meta = {};
216
- const metaRegex = /<meta\s+(?:[^>]*?(?:property|name)=["']([^"']+)["'][^>]*?content=["']([^"']*?)["']|[^>]*?content=["']([^"']*?)["'][^>]*?(?:property|name)=["']([^"']+)["'])[^>]*\/?>/gi;
217
- let m;
218
-
219
- while ((m = metaRegex.exec(html)) !== null) {
220
- const key = (m[1] || m[4] || '').toLowerCase();
221
- const value = m[2] || m[3] || '';
222
- if (key && value) meta[key] = value;
223
- }
224
-
225
- const product = {};
226
- if (meta['og:title']) product.name = meta['og:title'];
227
- if (meta['product:price:amount']) product.price = parseFloat(meta['product:price:amount']);
228
- if (meta['product:price:currency']) product.currency = meta['product:price:currency'];
229
- if (meta['og:description']) product.description = meta['og:description'].slice(0, 500);
230
- if (meta['og:image']) product.image = meta['og:image'];
231
- if (meta['product:availability']) product.availability = meta['product:availability'];
232
- if (meta['product:brand']) product.brand = meta['product:brand'];
233
- if (meta['og:type']) product.type = meta['og:type'];
234
- if (meta['og:url']) product.url = meta['og:url'];
235
-
236
- if (product.name || product.price) {
237
- product.method = 'meta-tags';
238
- return product;
239
- }
240
- return null;
241
- }
242
-
243
- // ─── DOM Heuristic Extraction ────────────────────────────────────────
244
- // This runs either on server (from fetched HTML) or in browser (content script)
245
-
246
- function extractFromHtml(html, url) {
247
- const results = [];
248
- const domain = _extractDomain(url);
249
-
250
- // 1. Try JSON-LD first (most reliable)
251
- const jsonLdProducts = extractJsonLd(html);
252
- if (jsonLdProducts.length > 0) {
253
- results.push(...jsonLdProducts);
254
- }
255
-
256
- // 2. Try Open Graph meta tags
257
- const metaProduct = extractMetaTags(html);
258
- if (metaProduct && metaProduct.price) {
259
- results.push(metaProduct);
260
- }
261
-
262
- // 3. Heuristic price extraction from common patterns
263
- const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
264
- const pageTitle = titleMatch ? titleMatch[1].replace(/\s+/g, ' ').trim() : '';
265
-
266
- // Extract potential product containers
267
- const priceContainerPatterns = [
268
- // Common e-commerce price selectors reflected as class names
269
- /class="[^"]*(?:price|cost|amount|tarif|سعر|ثمن)[^"]*"[^>]*>([^<]{1,100})</g,
270
- /class="[^"]*(?:product-price|item-price|sale-price|offer-price|current-price)[^"]*"[^>]*>([^<]{1,100})</g,
271
- /class="[^"]*(?:room-price|rate-price|nightly-rate|total-price)[^"]*"[^>]*>([^<]{1,100})</g,
272
- // data-price attributes
273
- /data-price=["']([^"']+)["']/gi,
274
- /data-product-price=["']([^"']+)["']/gi,
275
- ];
276
-
277
- const rawPrices = [];
278
- for (const pattern of priceContainerPatterns) {
279
- let m;
280
- while ((m = pattern.exec(html)) !== null) {
281
- const extracted = extractPrices(m[1]);
282
- rawPrices.push(...extracted);
283
- }
284
- }
285
-
286
- // If no structured prices found, do a broad sweep
287
- if (results.length === 0 && rawPrices.length === 0) {
288
- // Extract from visible text areas (skip scripts/styles)
289
- const cleaned = html
290
- .replace(/<script[\s\S]*?<\/script>/gi, '')
291
- .replace(/<style[\s\S]*?<\/style>/gi, '')
292
- .replace(/<[^>]+>/g, ' ')
293
- .replace(/\s+/g, ' ');
294
- const broadPrices = extractPrices(cleaned);
295
- rawPrices.push(...broadPrices);
296
- }
297
-
298
- // Deduplicate and create heuristic results
299
- if (rawPrices.length > 0 && results.length === 0) {
300
- // Filter: likely product prices (not phone numbers, years, etc.)
301
- const validPrices = rawPrices.filter(p =>
302
- p.usd >= 1 && p.usd <= 50000 &&
303
- !`${p.price}`.match(/^(19|20)\d{2}$/) // not a year
304
- );
305
-
306
- if (validPrices.length > 0) {
307
- const sorted = validPrices.sort((a, b) => a.usd - b.usd);
308
- results.push({
309
- name: pageTitle || domain,
310
- price: sorted[0].price,
311
- currency: sorted[0].currency,
312
- originalPrice: sorted.length > 1 ? sorted[sorted.length - 1].price : null,
313
- method: 'heuristic',
314
- allPrices: sorted.slice(0, 10),
315
- });
316
- }
317
- }
318
-
319
- return results;
320
- }
321
-
322
- // ─── Server-side Fetch & Extract ─────────────────────────────────────
323
-
324
- const UA_LIST = [
325
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
326
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
327
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
328
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15',
329
- ];
330
-
331
- async function fetchAndExtract(url, options = {}) {
332
- const domain = _extractDomain(url);
333
-
334
- // Check cache (< 1 hour old)
335
- const cached = stmts.getByUrl.get(url);
336
- if (cached && !options.force) {
337
- const age = Date.now() - new Date(cached.scraped_at).getTime();
338
- if (age < 3600000) return { cached: true, ...JSON.parse(cached.raw_data), products: [cached] };
339
- }
340
-
341
- try {
342
- const ua = UA_LIST[Math.floor(Math.random() * UA_LIST.length)];
343
-
344
- const resp = await safeFetch(url, {
345
- headers: {
346
- 'User-Agent': ua,
347
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
348
- 'Accept-Language': 'en-US,en;q=0.9,ar;q=0.8',
349
- 'Accept-Encoding': 'gzip, deflate, br',
350
- 'Sec-Fetch-Dest': 'document',
351
- 'Sec-Fetch-Mode': 'navigate',
352
- 'Sec-Fetch-Site': 'none',
353
- },
354
- }, {
355
- timeoutMs: options.timeout || 10000,
356
- maxBytes: options.maxBytes || 5 * 1024 * 1024,
357
- maxRedirects: 3,
358
- allowedContentTypes: ['text/html', 'application/xhtml', 'application/xml'],
359
- });
360
-
361
- if (!resp.ok) return { error: `HTTP ${resp.status}`, products: [] };
362
-
363
- const contentType = resp.headers.get('content-type') || '';
364
- if (!contentType.includes('text/html') && !contentType.includes('application/xhtml')) {
365
- return { error: 'Not HTML', products: [] };
366
- }
367
-
368
- const html = await resp.text();
369
- const products = extractFromHtml(html, url);
370
-
371
- // Store results
372
- for (const p of products) {
373
- const id = crypto.randomUUID();
374
- stmts.insertScraped.run(
375
- id, url, domain,
376
- p.name || null, p.price || null, p.currency || 'USD',
377
- p.originalPrice || null, p.availability || null,
378
- p.rating || null, p.reviewCount || null,
379
- p.seller || p.brand || null, p.category || null,
380
- p.method || 'unknown', JSON.stringify(p)
381
- );
382
-
383
- // Record price history
384
- const urlHash = crypto.createHash('sha256').update(url).digest('hex').slice(0, 16);
385
- stmts.insertHistory.run(
386
- crypto.randomUUID(), urlHash, domain,
387
- p.name || null, p.price || null, p.currency || 'USD'
388
- );
389
- }
390
-
391
- return { products, domain, url, fetchedAt: new Date().toISOString() };
392
- } catch (err) {
393
- return { error: err.message, products: [] };
394
- }
395
- }
396
-
397
- // ─── Browser-side extraction (for content script / WAB Browser) ──────
398
- // This generates a script that can be injected into any page via
399
- // webview.executeJavaScript() or chrome content script
400
-
401
- function getBrowserExtractionScript() {
402
- return `
403
- (function() {
404
- 'use strict';
405
- const WAB_EXTRACT = {
406
- // Extract JSON-LD products
407
- getJsonLd() {
408
- const products = [];
409
- document.querySelectorAll('script[type="application/ld+json"]').forEach(el => {
410
- try {
411
- let data = JSON.parse(el.textContent);
412
- if (Array.isArray(data)) data.forEach(d => this._processLd(d, products));
413
- else this._processLd(data, products);
414
- } catch(_) {}
415
- });
416
- return products;
417
- },
418
-
419
- _processLd(data, products) {
420
- if (!data || typeof data !== 'object') return;
421
- if (data['@graph']) { data['@graph'].forEach(i => this._processLd(i, products)); return; }
422
- const type = (data['@type'] || '').toLowerCase();
423
- if (['product','hotel','hotelroom','lodgingbusiness','offer'].includes(type)) {
424
- const offers = data.offers || {};
425
- const offer = Array.isArray(offers) ? offers[0] : offers;
426
- products.push({
427
- name: data.name || null,
428
- price: parseFloat(offer?.price || offer?.lowPrice || data.price) || null,
429
- originalPrice: parseFloat(offer?.highPrice) || null,
430
- currency: offer?.priceCurrency || 'USD',
431
- availability: (offer?.availability || '').replace(/https?:\\/\\/schema\\.org\\//,''),
432
- rating: parseFloat(data.aggregateRating?.ratingValue) || null,
433
- reviewCount: parseInt(data.aggregateRating?.reviewCount) || null,
434
- image: typeof data.image === 'string' ? data.image : data.image?.url || null,
435
- brand: data.brand?.name || data.brand || null,
436
- method: 'json-ld'
437
- });
438
- }
439
- for (const key of Object.keys(data)) {
440
- if (typeof data[key] === 'object' && data[key] !== null && key !== '@context') {
441
- if (Array.isArray(data[key])) data[key].forEach(i => { if (typeof i === 'object') this._processLd(i, products); });
442
- else this._processLd(data[key], products);
443
- }
444
- }
445
- },
446
-
447
- // Extract Open Graph meta
448
- getMeta() {
449
- const m = {};
450
- document.querySelectorAll('meta[property], meta[name]').forEach(el => {
451
- const key = (el.getAttribute('property') || el.getAttribute('name') || '').toLowerCase();
452
- const val = el.getAttribute('content');
453
- if (key && val) m[key] = val;
454
- });
455
- const p = {};
456
- if (m['og:title']) p.name = m['og:title'];
457
- if (m['product:price:amount']) p.price = parseFloat(m['product:price:amount']);
458
- if (m['product:price:currency']) p.currency = m['product:price:currency'];
459
- if (m['og:description']) p.description = m['og:description'];
460
- if (m['og:image']) p.image = m['og:image'];
461
- if (p.name || p.price) { p.method = 'meta-tags'; return p; }
462
- return null;
463
- },
464
-
465
- // Extract prices from visible text
466
- getPrices() {
467
- const patterns = [
468
- /(?:\\$|USD|US\\$)\\s*([\\d,]+\\.?\\d*)/g,
469
- /(?:€|EUR)\\s*([\\d,]+\\.?\\d*)/g,
470
- /(?:£|GBP)\\s*([\\d,]+\\.?\\d*)/g,
471
- /([\\d,]+\\.?\\d*)\\s*(?:SAR|ريال|AED|درهم|TND|دينار|EGP)/g,
472
- /(?:[₺₹¥₩])\\s*([\\d,]+\\.?\\d*)/g,
473
- ];
474
- const prices = [];
475
- const seen = new Set();
476
-
477
- // Target price-like containers first
478
- const priceEls = document.querySelectorAll(
479
- '[class*="price"], [class*="cost"], [class*="amount"], [class*="rate"], ' +
480
- '[data-price], [data-product-price], [itemprop="price"], ' +
481
- '[class*="tarif"], [class*="سعر"]'
482
- );
483
- priceEls.forEach(el => {
484
- const text = el.textContent || el.getAttribute('data-price') || '';
485
- for (const pat of patterns) {
486
- pat.lastIndex = 0;
487
- let m;
488
- while ((m = pat.exec(text)) !== null) {
489
- const num = parseFloat((m[1] || m[0].replace(/[^\\d.,]/g,'')).replace(/,/g,''));
490
- if (num > 0 && num < 100000 && !seen.has(num)) {
491
- seen.add(num);
492
- prices.push({ price: num, raw: m[0].trim(), el: el.className });
493
- }
494
- }
495
- }
496
- });
497
-
498
- return prices.sort((a,b) => a.price - b.price);
499
- },
500
-
501
- // Extract product cards (hotels, flights, items)
502
- getProductCards() {
503
- const cards = [];
504
- const selectors = [
505
- '[class*="product-card"]', '[class*="hotel-card"]', '[class*="listing-card"]',
506
- '[class*="search-result"]', '[class*="offer-card"]', '[class*="deal-card"]',
507
- '[class*="property-card"]', '[class*="sr_item"]', '[class*="result-item"]',
508
- '[data-testid*="property"]', '[data-testid*="product"]', '[data-testid*="listing"]',
509
- ];
510
- const allCards = document.querySelectorAll(selectors.join(','));
511
- allCards.forEach((el, i) => {
512
- if (i >= 20) return; // limit
513
- const title = el.querySelector('[class*="title"], [class*="name"], h2, h3, h4')?.textContent?.trim()?.slice(0, 200);
514
- const priceEl = el.querySelector('[class*="price"], [data-price], [itemprop="price"]');
515
- const priceText = priceEl?.textContent?.trim() || priceEl?.getAttribute('data-price') || '';
516
- const ratingEl = el.querySelector('[class*="rating"], [class*="score"], [aria-label*="rating"], [aria-label*="score"]');
517
- const rating = ratingEl?.textContent?.trim() || ratingEl?.getAttribute('aria-label') || '';
518
- const link = el.querySelector('a[href]')?.href || '';
519
- const img = el.querySelector('img')?.src || '';
520
-
521
- if (title || priceText) {
522
- cards.push({ title, price: priceText, rating, link, image: img, index: i });
523
- }
524
- });
525
- return cards;
526
- },
527
-
528
- // Full extraction — called by WAB Browser or extension
529
- extract() {
530
- const hasWabBridge = typeof window.AICommands !== 'undefined' || typeof window.__wab_bidi !== 'undefined';
531
- return {
532
- url: location.href,
533
- domain: location.hostname,
534
- title: document.title,
535
- jsonLd: this.getJsonLd(),
536
- meta: this.getMeta(),
537
- prices: this.getPrices(),
538
- cards: this.getProductCards(),
539
- timestamp: Date.now(),
540
- hasWabBridge,
541
- wabBridgeType: hasWabBridge
542
- ? (typeof window.__wab_bidi !== 'undefined' ? 'bidi' : 'standard')
543
- : null,
544
- };
545
- }
546
- };
547
-
548
- // Expose for WAB Browser / extension
549
- window.__wab_universal = WAB_EXTRACT;
550
-
551
- // Auto-report if WAB extension is present
552
- if (window.__wab_extension_ready) {
553
- window.postMessage({ type: 'wab-extract', data: WAB_EXTRACT.extract() }, '*');
554
- }
555
-
556
- return WAB_EXTRACT.extract();
557
- })();
558
- `;
559
- }
560
-
561
- // ─── Helpers ─────────────────────────────────────────────────────────
562
-
563
- function _extractDomain(url) {
564
- try { return new URL(url).hostname.replace(/^www\./, ''); } catch (_) { return ''; }
565
- }
566
-
567
- function getPriceHistory(url, limit = 30) {
568
- const urlHash = crypto.createHash('sha256').update(url).digest('hex').slice(0, 16);
569
- return stmts.getHistory.all(urlHash, limit);
570
- }
571
-
572
- function getScrapedByDomain(domain, limit = 50) {
573
- return stmts.getByDomain.all(domain.replace(/^www\./, ''), limit);
574
- }
575
-
576
- // ─── Process browser extraction data ─────────────────────────────────
577
- // Data sent from WAB Browser webview or Chrome extension content script
578
-
579
- function processBrowserExtraction(data) {
580
- if (!data || !data.url) return { error: 'No URL' };
581
-
582
- const domain = _extractDomain(data.url);
583
- const products = [];
584
-
585
- // Process JSON-LD products
586
- if (data.jsonLd && data.jsonLd.length > 0) {
587
- products.push(...data.jsonLd);
588
- }
589
-
590
- // Process meta tags
591
- if (data.meta && data.meta.price) {
592
- products.push(data.meta);
593
- }
594
-
595
- // Process product cards from DOM
596
- if (data.cards && data.cards.length > 0) {
597
- for (const card of data.cards) {
598
- const prices = extractPrices(card.price || '');
599
- if (prices.length > 0 || card.title) {
600
- products.push({
601
- name: card.title || 'Unknown',
602
- price: prices[0]?.price || null,
603
- currency: prices[0]?.currency || 'USD',
604
- rating: parseFloat(card.rating) || null,
605
- url: card.link || data.url,
606
- image: card.image || null,
607
- method: 'dom-cards',
608
- });
609
- }
610
- }
611
- }
612
-
613
- // Fallback: use raw prices
614
- if (products.length === 0 && data.prices && data.prices.length > 0) {
615
- products.push({
616
- name: data.title || domain,
617
- price: data.prices[0].price,
618
- currency: 'USD',
619
- method: 'dom-prices',
620
- allPrices: data.prices,
621
- });
622
- }
623
-
624
- // Store in database
625
- for (const p of products) {
626
- const id = crypto.randomUUID();
627
- try {
628
- stmts.insertScraped.run(
629
- id, data.url, domain,
630
- p.name || null, p.price || null, p.currency || 'USD',
631
- p.originalPrice || null, p.availability || null,
632
- p.rating || null, p.reviewCount || null,
633
- p.seller || p.brand || null, p.category || null,
634
- p.method || 'browser', JSON.stringify(p)
635
- );
636
-
637
- const urlHash = crypto.createHash('sha256').update(data.url).digest('hex').slice(0, 16);
638
- stmts.insertHistory.run(
639
- crypto.randomUUID(), urlHash, domain,
640
- p.name || null, p.price || null, p.currency || 'USD'
641
- );
642
- } catch (_) {}
643
- }
644
-
645
- return { products, domain, url: data.url, hasWabBridge: data.hasWabBridge };
646
- }
647
-
648
- // ─── Exports ─────────────────────────────────────────────────────────
649
-
650
- module.exports = {
651
- extractPrices,
652
- extractJsonLd,
653
- extractMetaTags,
654
- extractFromHtml,
655
- fetchAndExtract,
656
- getBrowserExtractionScript,
657
- processBrowserExtraction,
658
- getPriceHistory,
659
- getScrapedByDomain,
660
- normalizeCurrency,
661
- toUSD,
662
- };
1
+ /**
2
+ * WAB Universal Scraper Engine
3
+ * ═══════════════════════════════════════════════════════════════════
4
+ * Works with ANY website — no script installation required.
5
+ * Extracts prices, products, availability from raw HTML/DOM.
6
+ *
7
+ * Three extraction modes:
8
+ * 1. Schema.org JSON-LD (structured, most reliable)
9
+ * 2. Open Graph / Meta tags (semi-structured)
10
+ * 3. DOM pattern matching (heuristic, any site)
11
+ *
12
+ * Used by: WAB Browser (webview), Chrome Extension (content script),
13
+ * Server-side fetch (Node.js)
14
+ */
15
+
16
+ const crypto = require('crypto');
17
+ const { db } = require('../models/db');
18
+ const { safeFetch } = require('../utils/safe-fetch');
19
+
20
+ // ─── Schema ──────────────────────────────────────────────────────────
21
+
22
+ db.exec(`
23
+ CREATE TABLE IF NOT EXISTS scraped_prices (
24
+ id TEXT PRIMARY KEY,
25
+ url TEXT NOT NULL,
26
+ domain TEXT NOT NULL,
27
+ product_name TEXT,
28
+ price REAL,
29
+ currency TEXT DEFAULT 'USD',
30
+ original_price REAL,
31
+ availability TEXT,
32
+ rating REAL,
33
+ review_count INTEGER,
34
+ seller TEXT,
35
+ category TEXT,
36
+ extraction_method TEXT,
37
+ raw_data TEXT DEFAULT '{}',
38
+ scraped_at TEXT DEFAULT (datetime('now'))
39
+ );
40
+
41
+ CREATE TABLE IF NOT EXISTS universal_price_history (
42
+ id TEXT PRIMARY KEY,
43
+ url_hash TEXT NOT NULL,
44
+ domain TEXT NOT NULL,
45
+ product_name TEXT,
46
+ price REAL,
47
+ currency TEXT DEFAULT 'USD',
48
+ recorded_at TEXT DEFAULT (datetime('now'))
49
+ );
50
+
51
+ CREATE INDEX IF NOT EXISTS idx_scraped_domain ON scraped_prices(domain);
52
+ CREATE INDEX IF NOT EXISTS idx_scraped_url ON scraped_prices(url);
53
+ CREATE INDEX IF NOT EXISTS idx_uph_hash ON universal_price_history(url_hash);
54
+ `);
55
+
56
+ const stmts = {
57
+ insertScraped: db.prepare(`INSERT OR REPLACE INTO scraped_prices
58
+ (id, url, domain, product_name, price, currency, original_price,
59
+ availability, rating, review_count, seller, category, extraction_method, raw_data)
60
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
61
+ getByUrl: db.prepare('SELECT * FROM scraped_prices WHERE url = ? ORDER BY scraped_at DESC LIMIT 1'),
62
+ getByDomain: db.prepare('SELECT * FROM scraped_prices WHERE domain = ? ORDER BY scraped_at DESC LIMIT ?'),
63
+ insertHistory: db.prepare(`INSERT INTO universal_price_history
64
+ (id, url_hash, domain, product_name, price, currency) VALUES (?, ?, ?, ?, ?, ?)`),
65
+ getHistory: db.prepare('SELECT * FROM universal_price_history WHERE url_hash = ? ORDER BY recorded_at DESC LIMIT ?'),
66
+ };
67
+
68
+ // ─── Currency Normalization ──────────────────────────────────────────
69
+
70
+ const CURRENCY_MAP = {
71
+ '$': 'USD', 'USD': 'USD', 'US$': 'USD',
72
+ '€': 'EUR', 'EUR': 'EUR',
73
+ '£': 'GBP', 'GBP': 'GBP',
74
+ 'SAR': 'SAR', 'ريال': 'SAR', 'ر.س': 'SAR',
75
+ 'AED': 'AED', 'درهم': 'AED', 'د.إ': 'AED',
76
+ 'TND': 'TND', 'دينار': 'TND', 'د.ت': 'TND',
77
+ 'EGP': 'EGP', 'ج.م': 'EGP',
78
+ 'MAD': 'MAD', 'د.م': 'MAD',
79
+ 'TRY': 'TRY', '₺': 'TRY',
80
+ 'JPY': 'JPY', '¥': 'JPY',
81
+ 'INR': 'INR', '₹': 'INR',
82
+ 'KRW': 'KRW', '₩': 'KRW',
83
+ };
84
+
85
+ // Approximate USD rates for comparison
86
+ const TO_USD = {
87
+ USD: 1, EUR: 1.08, GBP: 1.27, SAR: 0.27, AED: 0.27,
88
+ TND: 0.32, EGP: 0.032, MAD: 0.10, TRY: 0.031,
89
+ JPY: 0.0067, INR: 0.012, KRW: 0.00074,
90
+ };
91
+
92
+ function normalizeCurrency(symbol) {
93
+ if (!symbol) return 'USD';
94
+ const s = symbol.trim().toUpperCase();
95
+ return CURRENCY_MAP[s] || CURRENCY_MAP[symbol.trim()] || 'USD';
96
+ }
97
+
98
+ function toUSD(price, currency) {
99
+ const rate = TO_USD[currency] || 1;
100
+ return Math.round(price * rate * 100) / 100;
101
+ }
102
+
103
+ // ─── Price Extraction ────────────────────────────────────────────────
104
+
105
+ const PRICE_PATTERNS = [
106
+ // $123.45 or $ 123.45
107
+ /(?<currency>\$|USD|US\$)\s*(?<price>[\d,]+\.?\d*)/gi,
108
+ // €123.45
109
+ /(?<currency>€|EUR)\s*(?<price>[\d,]+\.?\d*)/gi,
110
+ // £123.45
111
+ /(?<currency>£|GBP)\s*(?<price>[\d,]+\.?\d*)/gi,
112
+ // 123.45 SAR / ريال
113
+ /(?<price>[\d,]+\.?\d*)\s*(?<currency>SAR|ريال|ر\.س|AED|درهم|د\.إ|TND|دينار|د\.ت|EGP|ج\.م|MAD|د\.م)/gi,
114
+ // ₺ ₹ ¥ ₩ prefixed
115
+ /(?<currency>[₺₹¥₩])\s*(?<price>[\d,]+\.?\d*)/gi,
116
+ // 123.45$ (suffix dollar)
117
+ /(?<price>[\d,]+\.?\d*)\s*(?<currency>\$|€|£)/gi,
118
+ ];
119
+
120
+ function extractPrices(text) {
121
+ const prices = [];
122
+ const seen = new Set();
123
+
124
+ for (const pattern of PRICE_PATTERNS) {
125
+ pattern.lastIndex = 0;
126
+ let m;
127
+ while ((m = pattern.exec(text)) !== null) {
128
+ const raw = m.groups?.price || m[2] || m[1];
129
+ const currSymbol = m.groups?.currency || m[1] || m[2];
130
+ if (!raw) continue;
131
+
132
+ const num = parseFloat(raw.replace(/,/g, ''));
133
+ if (isNaN(num) || num <= 0 || num > 1000000) continue;
134
+
135
+ const currency = normalizeCurrency(currSymbol);
136
+ const key = `${num}-${currency}`;
137
+ if (seen.has(key)) continue;
138
+ seen.add(key);
139
+
140
+ prices.push({ price: num, currency, usd: toUSD(num, currency), raw: m[0].trim() });
141
+ }
142
+ }
143
+
144
+ return prices.sort((a, b) => a.usd - b.usd);
145
+ }
146
+
147
+ // ─── Schema.org JSON-LD Extraction ───────────────────────────────────
148
+
149
+ function extractJsonLd(html) {
150
+ const products = [];
151
+ const regex = /<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
152
+ let m;
153
+
154
+ while ((m = regex.exec(html)) !== null) {
155
+ try {
156
+ let data = JSON.parse(m[1].trim());
157
+ if (Array.isArray(data)) data.forEach(d => processJsonLd(d, products));
158
+ else processJsonLd(data, products);
159
+ } catch (_) {}
160
+ }
161
+
162
+ return products;
163
+ }
164
+
165
+ function processJsonLd(data, products) {
166
+ if (!data || typeof data !== 'object') return;
167
+
168
+ // Handle @graph arrays
169
+ if (data['@graph'] && Array.isArray(data['@graph'])) {
170
+ data['@graph'].forEach(item => processJsonLd(item, products));
171
+ return;
172
+ }
173
+
174
+ const type = (data['@type'] || '').toLowerCase();
175
+
176
+ if (type === 'product' || type === 'hotel' || type === 'hotelroom' ||
177
+ type === 'lodgingbusiness' || type === 'offer') {
178
+ const offers = data.offers || data.priceSpecification || {};
179
+ const offer = Array.isArray(offers) ? offers[0] : offers;
180
+
181
+ products.push({
182
+ name: data.name || data.headline || null,
183
+ price: parseFloat(offer?.price || offer?.lowPrice || data.price) || null,
184
+ originalPrice: parseFloat(offer?.highPrice) || null,
185
+ currency: offer?.priceCurrency || 'USD',
186
+ availability: offer?.availability?.replace('https://schema.org/', '').replace('http://schema.org/', '') || null,
187
+ rating: parseFloat(data.aggregateRating?.ratingValue) || null,
188
+ reviewCount: parseInt(data.aggregateRating?.reviewCount || data.aggregateRating?.ratingCount) || null,
189
+ image: data.image?.url || (typeof data.image === 'string' ? data.image : null),
190
+ description: (data.description || '').slice(0, 500),
191
+ brand: data.brand?.name || data.brand || null,
192
+ sku: data.sku || null,
193
+ url: data.url || null,
194
+ method: 'json-ld',
195
+ });
196
+ }
197
+
198
+ // Recurse into nested objects
199
+ for (const key of Object.keys(data)) {
200
+ if (typeof data[key] === 'object' && data[key] !== null && key !== '@context') {
201
+ if (Array.isArray(data[key])) {
202
+ data[key].forEach(item => {
203
+ if (typeof item === 'object') processJsonLd(item, products);
204
+ });
205
+ } else {
206
+ processJsonLd(data[key], products);
207
+ }
208
+ }
209
+ }
210
+ }
211
+
212
+ // ─── Open Graph / Meta Tag Extraction ────────────────────────────────
213
+
214
+ function extractMetaTags(html) {
215
+ const meta = {};
216
+ const metaRegex = /<meta\s+(?:[^>]*?(?:property|name)=["']([^"']+)["'][^>]*?content=["']([^"']*?)["']|[^>]*?content=["']([^"']*?)["'][^>]*?(?:property|name)=["']([^"']+)["'])[^>]*\/?>/gi;
217
+ let m;
218
+
219
+ while ((m = metaRegex.exec(html)) !== null) {
220
+ const key = (m[1] || m[4] || '').toLowerCase();
221
+ const value = m[2] || m[3] || '';
222
+ if (key && value) meta[key] = value;
223
+ }
224
+
225
+ const product = {};
226
+ if (meta['og:title']) product.name = meta['og:title'];
227
+ if (meta['product:price:amount']) product.price = parseFloat(meta['product:price:amount']);
228
+ if (meta['product:price:currency']) product.currency = meta['product:price:currency'];
229
+ if (meta['og:description']) product.description = meta['og:description'].slice(0, 500);
230
+ if (meta['og:image']) product.image = meta['og:image'];
231
+ if (meta['product:availability']) product.availability = meta['product:availability'];
232
+ if (meta['product:brand']) product.brand = meta['product:brand'];
233
+ if (meta['og:type']) product.type = meta['og:type'];
234
+ if (meta['og:url']) product.url = meta['og:url'];
235
+
236
+ if (product.name || product.price) {
237
+ product.method = 'meta-tags';
238
+ return product;
239
+ }
240
+ return null;
241
+ }
242
+
243
+ // ─── DOM Heuristic Extraction ────────────────────────────────────────
244
+ // This runs either on server (from fetched HTML) or in browser (content script)
245
+
246
+ function extractFromHtml(html, url) {
247
+ const results = [];
248
+ const domain = _extractDomain(url);
249
+
250
+ // 1. Try JSON-LD first (most reliable)
251
+ const jsonLdProducts = extractJsonLd(html);
252
+ if (jsonLdProducts.length > 0) {
253
+ results.push(...jsonLdProducts);
254
+ }
255
+
256
+ // 2. Try Open Graph meta tags
257
+ const metaProduct = extractMetaTags(html);
258
+ if (metaProduct && metaProduct.price) {
259
+ results.push(metaProduct);
260
+ }
261
+
262
+ // 3. Heuristic price extraction from common patterns
263
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
264
+ const pageTitle = titleMatch ? titleMatch[1].replace(/\s+/g, ' ').trim() : '';
265
+
266
+ // Extract potential product containers
267
+ const priceContainerPatterns = [
268
+ // Common e-commerce price selectors reflected as class names
269
+ /class="[^"]*(?:price|cost|amount|tarif|سعر|ثمن)[^"]*"[^>]*>([^<]{1,100})</g,
270
+ /class="[^"]*(?:product-price|item-price|sale-price|offer-price|current-price)[^"]*"[^>]*>([^<]{1,100})</g,
271
+ /class="[^"]*(?:room-price|rate-price|nightly-rate|total-price)[^"]*"[^>]*>([^<]{1,100})</g,
272
+ // data-price attributes
273
+ /data-price=["']([^"']+)["']/gi,
274
+ /data-product-price=["']([^"']+)["']/gi,
275
+ ];
276
+
277
+ const rawPrices = [];
278
+ for (const pattern of priceContainerPatterns) {
279
+ let m;
280
+ while ((m = pattern.exec(html)) !== null) {
281
+ const extracted = extractPrices(m[1]);
282
+ rawPrices.push(...extracted);
283
+ }
284
+ }
285
+
286
+ // If no structured prices found, do a broad sweep
287
+ if (results.length === 0 && rawPrices.length === 0) {
288
+ // Extract from visible text areas (skip scripts/styles)
289
+ const cleaned = html
290
+ .replace(/<script[\s\S]*?<\/script>/gi, '')
291
+ .replace(/<style[\s\S]*?<\/style>/gi, '')
292
+ .replace(/<[^>]+>/g, ' ')
293
+ .replace(/\s+/g, ' ');
294
+ const broadPrices = extractPrices(cleaned);
295
+ rawPrices.push(...broadPrices);
296
+ }
297
+
298
+ // Deduplicate and create heuristic results
299
+ if (rawPrices.length > 0 && results.length === 0) {
300
+ // Filter: likely product prices (not phone numbers, years, etc.)
301
+ const validPrices = rawPrices.filter(p =>
302
+ p.usd >= 1 && p.usd <= 50000 &&
303
+ !`${p.price}`.match(/^(19|20)\d{2}$/) // not a year
304
+ );
305
+
306
+ if (validPrices.length > 0) {
307
+ const sorted = validPrices.sort((a, b) => a.usd - b.usd);
308
+ results.push({
309
+ name: pageTitle || domain,
310
+ price: sorted[0].price,
311
+ currency: sorted[0].currency,
312
+ originalPrice: sorted.length > 1 ? sorted[sorted.length - 1].price : null,
313
+ method: 'heuristic',
314
+ allPrices: sorted.slice(0, 10),
315
+ });
316
+ }
317
+ }
318
+
319
+ return results;
320
+ }
321
+
322
+ // ─── Server-side Fetch & Extract ─────────────────────────────────────
323
+
324
+ const UA_LIST = [
325
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
326
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
327
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
328
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15',
329
+ ];
330
+
331
+ async function fetchAndExtract(url, options = {}) {
332
+ const domain = _extractDomain(url);
333
+
334
+ // Check cache (< 1 hour old)
335
+ const cached = stmts.getByUrl.get(url);
336
+ if (cached && !options.force) {
337
+ const age = Date.now() - new Date(cached.scraped_at).getTime();
338
+ if (age < 3600000) return { cached: true, ...JSON.parse(cached.raw_data), products: [cached] };
339
+ }
340
+
341
+ try {
342
+ const ua = UA_LIST[Math.floor(Math.random() * UA_LIST.length)];
343
+
344
+ const resp = await safeFetch(url, {
345
+ headers: {
346
+ 'User-Agent': ua,
347
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
348
+ 'Accept-Language': 'en-US,en;q=0.9,ar;q=0.8',
349
+ 'Accept-Encoding': 'gzip, deflate, br',
350
+ 'Sec-Fetch-Dest': 'document',
351
+ 'Sec-Fetch-Mode': 'navigate',
352
+ 'Sec-Fetch-Site': 'none',
353
+ },
354
+ }, {
355
+ timeoutMs: options.timeout || 10000,
356
+ maxBytes: options.maxBytes || 5 * 1024 * 1024,
357
+ maxRedirects: 3,
358
+ allowedContentTypes: ['text/html', 'application/xhtml', 'application/xml'],
359
+ });
360
+
361
+ if (!resp.ok) return { error: `HTTP ${resp.status}`, products: [] };
362
+
363
+ const contentType = resp.headers.get('content-type') || '';
364
+ if (!contentType.includes('text/html') && !contentType.includes('application/xhtml')) {
365
+ return { error: 'Not HTML', products: [] };
366
+ }
367
+
368
+ const html = await resp.text();
369
+ const products = extractFromHtml(html, url);
370
+
371
+ // Store results
372
+ for (const p of products) {
373
+ const id = crypto.randomUUID();
374
+ stmts.insertScraped.run(
375
+ id, url, domain,
376
+ p.name || null, p.price || null, p.currency || 'USD',
377
+ p.originalPrice || null, p.availability || null,
378
+ p.rating || null, p.reviewCount || null,
379
+ p.seller || p.brand || null, p.category || null,
380
+ p.method || 'unknown', JSON.stringify(p)
381
+ );
382
+
383
+ // Record price history
384
+ const urlHash = crypto.createHash('sha256').update(url).digest('hex').slice(0, 16);
385
+ stmts.insertHistory.run(
386
+ crypto.randomUUID(), urlHash, domain,
387
+ p.name || null, p.price || null, p.currency || 'USD'
388
+ );
389
+ }
390
+
391
+ return { products, domain, url, fetchedAt: new Date().toISOString() };
392
+ } catch (err) {
393
+ return { error: err.message, products: [] };
394
+ }
395
+ }
396
+
397
+ // ─── Browser-side extraction (for content script / WAB Browser) ──────
398
+ // This generates a script that can be injected into any page via
399
+ // webview.executeJavaScript() or chrome content script
400
+
401
+ function getBrowserExtractionScript() {
402
+ return `
403
+ (function() {
404
+ 'use strict';
405
+ const WAB_EXTRACT = {
406
+ // Extract JSON-LD products
407
+ getJsonLd() {
408
+ const products = [];
409
+ document.querySelectorAll('script[type="application/ld+json"]').forEach(el => {
410
+ try {
411
+ let data = JSON.parse(el.textContent);
412
+ if (Array.isArray(data)) data.forEach(d => this._processLd(d, products));
413
+ else this._processLd(data, products);
414
+ } catch(_) {}
415
+ });
416
+ return products;
417
+ },
418
+
419
+ _processLd(data, products) {
420
+ if (!data || typeof data !== 'object') return;
421
+ if (data['@graph']) { data['@graph'].forEach(i => this._processLd(i, products)); return; }
422
+ const type = (data['@type'] || '').toLowerCase();
423
+ if (['product','hotel','hotelroom','lodgingbusiness','offer'].includes(type)) {
424
+ const offers = data.offers || {};
425
+ const offer = Array.isArray(offers) ? offers[0] : offers;
426
+ products.push({
427
+ name: data.name || null,
428
+ price: parseFloat(offer?.price || offer?.lowPrice || data.price) || null,
429
+ originalPrice: parseFloat(offer?.highPrice) || null,
430
+ currency: offer?.priceCurrency || 'USD',
431
+ availability: (offer?.availability || '').replace(/https?:\\/\\/schema\\.org\\//,''),
432
+ rating: parseFloat(data.aggregateRating?.ratingValue) || null,
433
+ reviewCount: parseInt(data.aggregateRating?.reviewCount) || null,
434
+ image: typeof data.image === 'string' ? data.image : data.image?.url || null,
435
+ brand: data.brand?.name || data.brand || null,
436
+ method: 'json-ld'
437
+ });
438
+ }
439
+ for (const key of Object.keys(data)) {
440
+ if (typeof data[key] === 'object' && data[key] !== null && key !== '@context') {
441
+ if (Array.isArray(data[key])) data[key].forEach(i => { if (typeof i === 'object') this._processLd(i, products); });
442
+ else this._processLd(data[key], products);
443
+ }
444
+ }
445
+ },
446
+
447
+ // Extract Open Graph meta
448
+ getMeta() {
449
+ const m = {};
450
+ document.querySelectorAll('meta[property], meta[name]').forEach(el => {
451
+ const key = (el.getAttribute('property') || el.getAttribute('name') || '').toLowerCase();
452
+ const val = el.getAttribute('content');
453
+ if (key && val) m[key] = val;
454
+ });
455
+ const p = {};
456
+ if (m['og:title']) p.name = m['og:title'];
457
+ if (m['product:price:amount']) p.price = parseFloat(m['product:price:amount']);
458
+ if (m['product:price:currency']) p.currency = m['product:price:currency'];
459
+ if (m['og:description']) p.description = m['og:description'];
460
+ if (m['og:image']) p.image = m['og:image'];
461
+ if (p.name || p.price) { p.method = 'meta-tags'; return p; }
462
+ return null;
463
+ },
464
+
465
+ // Extract prices from visible text
466
+ getPrices() {
467
+ const patterns = [
468
+ /(?:\\$|USD|US\\$)\\s*([\\d,]+\\.?\\d*)/g,
469
+ /(?:€|EUR)\\s*([\\d,]+\\.?\\d*)/g,
470
+ /(?:£|GBP)\\s*([\\d,]+\\.?\\d*)/g,
471
+ /([\\d,]+\\.?\\d*)\\s*(?:SAR|ريال|AED|درهم|TND|دينار|EGP)/g,
472
+ /(?:[₺₹¥₩])\\s*([\\d,]+\\.?\\d*)/g,
473
+ ];
474
+ const prices = [];
475
+ const seen = new Set();
476
+
477
+ // Target price-like containers first
478
+ const priceEls = document.querySelectorAll(
479
+ '[class*="price"], [class*="cost"], [class*="amount"], [class*="rate"], ' +
480
+ '[data-price], [data-product-price], [itemprop="price"], ' +
481
+ '[class*="tarif"], [class*="سعر"]'
482
+ );
483
+ priceEls.forEach(el => {
484
+ const text = el.textContent || el.getAttribute('data-price') || '';
485
+ for (const pat of patterns) {
486
+ pat.lastIndex = 0;
487
+ let m;
488
+ while ((m = pat.exec(text)) !== null) {
489
+ const num = parseFloat((m[1] || m[0].replace(/[^\\d.,]/g,'')).replace(/,/g,''));
490
+ if (num > 0 && num < 100000 && !seen.has(num)) {
491
+ seen.add(num);
492
+ prices.push({ price: num, raw: m[0].trim(), el: el.className });
493
+ }
494
+ }
495
+ }
496
+ });
497
+
498
+ return prices.sort((a,b) => a.price - b.price);
499
+ },
500
+
501
+ // Extract product cards (hotels, flights, items)
502
+ getProductCards() {
503
+ const cards = [];
504
+ const selectors = [
505
+ '[class*="product-card"]', '[class*="hotel-card"]', '[class*="listing-card"]',
506
+ '[class*="search-result"]', '[class*="offer-card"]', '[class*="deal-card"]',
507
+ '[class*="property-card"]', '[class*="sr_item"]', '[class*="result-item"]',
508
+ '[data-testid*="property"]', '[data-testid*="product"]', '[data-testid*="listing"]',
509
+ ];
510
+ const allCards = document.querySelectorAll(selectors.join(','));
511
+ allCards.forEach((el, i) => {
512
+ if (i >= 20) return; // limit
513
+ const title = el.querySelector('[class*="title"], [class*="name"], h2, h3, h4')?.textContent?.trim()?.slice(0, 200);
514
+ const priceEl = el.querySelector('[class*="price"], [data-price], [itemprop="price"]');
515
+ const priceText = priceEl?.textContent?.trim() || priceEl?.getAttribute('data-price') || '';
516
+ const ratingEl = el.querySelector('[class*="rating"], [class*="score"], [aria-label*="rating"], [aria-label*="score"]');
517
+ const rating = ratingEl?.textContent?.trim() || ratingEl?.getAttribute('aria-label') || '';
518
+ const link = el.querySelector('a[href]')?.href || '';
519
+ const img = el.querySelector('img')?.src || '';
520
+
521
+ if (title || priceText) {
522
+ cards.push({ title, price: priceText, rating, link, image: img, index: i });
523
+ }
524
+ });
525
+ return cards;
526
+ },
527
+
528
+ // Full extraction — called by WAB Browser or extension
529
+ extract() {
530
+ const hasWabBridge = typeof window.AICommands !== 'undefined' || typeof window.__wab_bidi !== 'undefined';
531
+ return {
532
+ url: location.href,
533
+ domain: location.hostname,
534
+ title: document.title,
535
+ jsonLd: this.getJsonLd(),
536
+ meta: this.getMeta(),
537
+ prices: this.getPrices(),
538
+ cards: this.getProductCards(),
539
+ timestamp: Date.now(),
540
+ hasWabBridge,
541
+ wabBridgeType: hasWabBridge
542
+ ? (typeof window.__wab_bidi !== 'undefined' ? 'bidi' : 'standard')
543
+ : null,
544
+ };
545
+ }
546
+ };
547
+
548
+ // Expose for WAB Browser / extension
549
+ window.__wab_universal = WAB_EXTRACT;
550
+
551
+ // Auto-report if WAB extension is present
552
+ if (window.__wab_extension_ready) {
553
+ window.postMessage({ type: 'wab-extract', data: WAB_EXTRACT.extract() }, '*');
554
+ }
555
+
556
+ return WAB_EXTRACT.extract();
557
+ })();
558
+ `;
559
+ }
560
+
561
+ // ─── Helpers ─────────────────────────────────────────────────────────
562
+
563
+ function _extractDomain(url) {
564
+ try { return new URL(url).hostname.replace(/^www\./, ''); } catch (_) { return ''; }
565
+ }
566
+
567
+ function getPriceHistory(url, limit = 30) {
568
+ const urlHash = crypto.createHash('sha256').update(url).digest('hex').slice(0, 16);
569
+ return stmts.getHistory.all(urlHash, limit);
570
+ }
571
+
572
+ function getScrapedByDomain(domain, limit = 50) {
573
+ return stmts.getByDomain.all(domain.replace(/^www\./, ''), limit);
574
+ }
575
+
576
+ // ─── Process browser extraction data ─────────────────────────────────
577
+ // Data sent from WAB Browser webview or Chrome extension content script
578
+
579
+ function processBrowserExtraction(data) {
580
+ if (!data || !data.url) return { error: 'No URL' };
581
+
582
+ const domain = _extractDomain(data.url);
583
+ const products = [];
584
+
585
+ // Process JSON-LD products
586
+ if (data.jsonLd && data.jsonLd.length > 0) {
587
+ products.push(...data.jsonLd);
588
+ }
589
+
590
+ // Process meta tags
591
+ if (data.meta && data.meta.price) {
592
+ products.push(data.meta);
593
+ }
594
+
595
+ // Process product cards from DOM
596
+ if (data.cards && data.cards.length > 0) {
597
+ for (const card of data.cards) {
598
+ const prices = extractPrices(card.price || '');
599
+ if (prices.length > 0 || card.title) {
600
+ products.push({
601
+ name: card.title || 'Unknown',
602
+ price: prices[0]?.price || null,
603
+ currency: prices[0]?.currency || 'USD',
604
+ rating: parseFloat(card.rating) || null,
605
+ url: card.link || data.url,
606
+ image: card.image || null,
607
+ method: 'dom-cards',
608
+ });
609
+ }
610
+ }
611
+ }
612
+
613
+ // Fallback: use raw prices
614
+ if (products.length === 0 && data.prices && data.prices.length > 0) {
615
+ products.push({
616
+ name: data.title || domain,
617
+ price: data.prices[0].price,
618
+ currency: 'USD',
619
+ method: 'dom-prices',
620
+ allPrices: data.prices,
621
+ });
622
+ }
623
+
624
+ // Store in database
625
+ for (const p of products) {
626
+ const id = crypto.randomUUID();
627
+ try {
628
+ stmts.insertScraped.run(
629
+ id, data.url, domain,
630
+ p.name || null, p.price || null, p.currency || 'USD',
631
+ p.originalPrice || null, p.availability || null,
632
+ p.rating || null, p.reviewCount || null,
633
+ p.seller || p.brand || null, p.category || null,
634
+ p.method || 'browser', JSON.stringify(p)
635
+ );
636
+
637
+ const urlHash = crypto.createHash('sha256').update(data.url).digest('hex').slice(0, 16);
638
+ stmts.insertHistory.run(
639
+ crypto.randomUUID(), urlHash, domain,
640
+ p.name || null, p.price || null, p.currency || 'USD'
641
+ );
642
+ } catch (_) {}
643
+ }
644
+
645
+ return { products, domain, url: data.url, hasWabBridge: data.hasWabBridge };
646
+ }
647
+
648
+ // ─── Exports ─────────────────────────────────────────────────────────
649
+
650
+ module.exports = {
651
+ extractPrices,
652
+ extractJsonLd,
653
+ extractMetaTags,
654
+ extractFromHtml,
655
+ fetchAndExtract,
656
+ getBrowserExtractionScript,
657
+ processBrowserExtraction,
658
+ getPriceHistory,
659
+ getScrapedByDomain,
660
+ normalizeCurrency,
661
+ toUSD,
662
+ };