@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-memory LRU fetch cache for WebPeel
|
|
3
|
+
*
|
|
4
|
+
* Caches pipeline results to avoid redundant fetches for identical requests.
|
|
5
|
+
* Supports TTL-based expiry and LRU eviction when maxEntries is exceeded.
|
|
6
|
+
* Exported as a singleton: import { fetchCache } from './fetch-cache.js'
|
|
7
|
+
*/
|
|
8
|
+
export class FetchCache {
|
|
9
|
+
cache;
|
|
10
|
+
maxEntries;
|
|
11
|
+
defaultTTL; // ms
|
|
12
|
+
hits;
|
|
13
|
+
misses;
|
|
14
|
+
constructor(maxEntries = 500, defaultTTLSeconds = 300) {
|
|
15
|
+
this.cache = new Map();
|
|
16
|
+
this.maxEntries = maxEntries;
|
|
17
|
+
this.defaultTTL = defaultTTLSeconds * 1000;
|
|
18
|
+
this.hits = 0;
|
|
19
|
+
this.misses = 0;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Generate a stable cache key from url + relevant fetch options.
|
|
23
|
+
* Different option combinations produce different cache entries.
|
|
24
|
+
*/
|
|
25
|
+
getKey(url, options = {}) {
|
|
26
|
+
const render = options.render ? '1' : '0';
|
|
27
|
+
const stealth = options.stealth ? '1' : '0';
|
|
28
|
+
const budget = options.budget !== undefined ? String(options.budget) : '';
|
|
29
|
+
return `${url}|r:${render}|s:${stealth}|b:${budget}`;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Retrieve a cached entry. Returns null if missing or expired.
|
|
33
|
+
* On hit: entry is moved to the end of the Map (LRU refresh).
|
|
34
|
+
*/
|
|
35
|
+
get(key) {
|
|
36
|
+
const entry = this.cache.get(key);
|
|
37
|
+
if (!entry) {
|
|
38
|
+
this.misses++;
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
const ageMs = Date.now() - entry.timestamp;
|
|
42
|
+
if (ageMs > this.defaultTTL) {
|
|
43
|
+
// Expired — evict and return null
|
|
44
|
+
this.cache.delete(key);
|
|
45
|
+
this.misses++;
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
// LRU touch: move to end
|
|
49
|
+
this.cache.delete(key);
|
|
50
|
+
this.cache.set(key, entry);
|
|
51
|
+
this.hits++;
|
|
52
|
+
return entry;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Store an entry in the cache.
|
|
56
|
+
* If the cache is at capacity, the least recently used entry is evicted.
|
|
57
|
+
*/
|
|
58
|
+
set(key, entry) {
|
|
59
|
+
// Remove existing to refresh position
|
|
60
|
+
if (this.cache.has(key)) {
|
|
61
|
+
this.cache.delete(key);
|
|
62
|
+
}
|
|
63
|
+
this.cache.set(key, entry);
|
|
64
|
+
// LRU eviction: remove oldest entry (first in Map iteration order)
|
|
65
|
+
while (this.cache.size > this.maxEntries) {
|
|
66
|
+
const oldestKey = this.cache.keys().next().value;
|
|
67
|
+
if (oldestKey !== undefined) {
|
|
68
|
+
this.cache.delete(oldestKey);
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** Clear all entries and reset stats. */
|
|
76
|
+
clear() {
|
|
77
|
+
this.cache.clear();
|
|
78
|
+
this.hits = 0;
|
|
79
|
+
this.misses = 0;
|
|
80
|
+
}
|
|
81
|
+
/** Return cache stats. hitRate is in [0, 1]. */
|
|
82
|
+
stats() {
|
|
83
|
+
const total = this.hits + this.misses;
|
|
84
|
+
return {
|
|
85
|
+
size: this.cache.size,
|
|
86
|
+
hits: this.hits,
|
|
87
|
+
misses: this.misses,
|
|
88
|
+
hitRate: total === 0 ? 0 : Math.round((this.hits / total) * 100) / 100,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/** Singleton fetch cache — shared across all requests (5 min TTL, 500 entries). */
|
|
93
|
+
export const fetchCache = new FetchCache(500, 300);
|
|
94
|
+
/** Singleton search cache — shorter TTL since results change faster (60 s). */
|
|
95
|
+
export const searchCache = new FetchCache(500, 60);
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core fetching — thin re-export layer for backward compatibility.
|
|
3
|
+
*
|
|
4
|
+
* The implementation has been split into focused modules:
|
|
5
|
+
* - http-fetch.ts — Pure HTTP fetching (simpleFetch, SSRF validation, HTTP pool)
|
|
6
|
+
* - browser-pool.ts — Browser lifecycle & page pool (getBrowser, cleanup, warmup)
|
|
7
|
+
* - browser-fetch.ts — Browser-based fetching (browserFetch, browserScreenshot)
|
|
8
|
+
*/
|
|
9
|
+
export { simpleFetch, type FetchResult } from './http-fetch.js';
|
|
10
|
+
export { cleanup, warmup, closePool, closeProfileBrowser, playwrightLoaded } from './browser-pool.js';
|
|
11
|
+
export { browserFetch, browserScreenshot, browserFilmstrip, browserAudit, browserAnimationCapture, browserViewports, browserDesignAudit, browserDiff, retryFetch, scrollAndWait, browserDesignAnalysis } from './browser-fetch.js';
|
|
12
|
+
export type { DesignAuditResult } from './browser-fetch.js';
|
|
13
|
+
export type { DesignAnalysis, EffectInstance } from './design-analysis.js';
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core fetching — thin re-export layer for backward compatibility.
|
|
3
|
+
*
|
|
4
|
+
* The implementation has been split into focused modules:
|
|
5
|
+
* - http-fetch.ts — Pure HTTP fetching (simpleFetch, SSRF validation, HTTP pool)
|
|
6
|
+
* - browser-pool.ts — Browser lifecycle & page pool (getBrowser, cleanup, warmup)
|
|
7
|
+
* - browser-fetch.ts — Browser-based fetching (browserFetch, browserScreenshot)
|
|
8
|
+
*/
|
|
9
|
+
// Re-export everything for backward compatibility
|
|
10
|
+
export { simpleFetch } from './http-fetch.js';
|
|
11
|
+
export { cleanup, warmup, closePool, closeProfileBrowser, playwrightLoaded } from './browser-pool.js';
|
|
12
|
+
export { browserFetch, browserScreenshot, browserFilmstrip, browserAudit, browserAnimationCapture, browserViewports, browserDesignAudit, browserDiff, retryFetch, scrollAndWait, browserDesignAnalysis } from './browser-fetch.js';
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google Cache fallback fetcher.
|
|
3
|
+
*
|
|
4
|
+
* When a site blocks direct access (Akamai, PerimeterX, Cloudflare, etc.),
|
|
5
|
+
* Google's cache at webcache.googleusercontent.com often has a clean copy
|
|
6
|
+
* that's freely accessible without anti-bot protection.
|
|
7
|
+
*/
|
|
8
|
+
export interface GoogleCacheResult {
|
|
9
|
+
html: string;
|
|
10
|
+
url: string;
|
|
11
|
+
cachedDate?: string;
|
|
12
|
+
statusCode: number;
|
|
13
|
+
method: 'google-cache';
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Fetch a cached copy of a URL from Google Cache.
|
|
17
|
+
*
|
|
18
|
+
* Returns null if:
|
|
19
|
+
* - Google returns a 404 (page not in cache)
|
|
20
|
+
* - Google redirects to the live page (cache unavailable)
|
|
21
|
+
* - The response looks like a Google search page rather than a cache
|
|
22
|
+
*/
|
|
23
|
+
export declare function fetchGoogleCache(url: string, options?: {
|
|
24
|
+
timeout?: number;
|
|
25
|
+
}): Promise<GoogleCacheResult | null>;
|
|
26
|
+
/**
|
|
27
|
+
* Google Cache is always available — no API key or special setup required.
|
|
28
|
+
*/
|
|
29
|
+
export declare function isGoogleCacheAvailable(): boolean;
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google Cache fallback fetcher.
|
|
3
|
+
*
|
|
4
|
+
* When a site blocks direct access (Akamai, PerimeterX, Cloudflare, etc.),
|
|
5
|
+
* Google's cache at webcache.googleusercontent.com often has a clean copy
|
|
6
|
+
* that's freely accessible without anti-bot protection.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Fetch a cached copy of a URL from Google Cache.
|
|
10
|
+
*
|
|
11
|
+
* Returns null if:
|
|
12
|
+
* - Google returns a 404 (page not in cache)
|
|
13
|
+
* - Google redirects to the live page (cache unavailable)
|
|
14
|
+
* - The response looks like a Google search page rather than a cache
|
|
15
|
+
*/
|
|
16
|
+
export async function fetchGoogleCache(url, options) {
|
|
17
|
+
const timeout = options?.timeout ?? 10000;
|
|
18
|
+
// Build the Google Cache URL
|
|
19
|
+
const cacheUrl = `https://webcache.googleusercontent.com/search?q=cache:${encodeURIComponent(url)}`;
|
|
20
|
+
const controller = new AbortController();
|
|
21
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
22
|
+
try {
|
|
23
|
+
const response = await fetch(cacheUrl, {
|
|
24
|
+
signal: controller.signal,
|
|
25
|
+
redirect: 'follow',
|
|
26
|
+
headers: {
|
|
27
|
+
// Must look like a real Chrome browser or Google blocks us
|
|
28
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
|
29
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
30
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
31
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
32
|
+
'Cache-Control': 'no-cache',
|
|
33
|
+
Pragma: 'no-cache',
|
|
34
|
+
'Sec-Fetch-Dest': 'document',
|
|
35
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
36
|
+
'Sec-Fetch-Site': 'none',
|
|
37
|
+
'Upgrade-Insecure-Requests': '1',
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
clearTimeout(timer);
|
|
41
|
+
// 404 → page not cached
|
|
42
|
+
if (response.status === 404) {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
// If redirected to the live site (Google doesn't have a cache), return null
|
|
46
|
+
const finalUrl = response.url;
|
|
47
|
+
if (!finalUrl.includes('webcache.googleusercontent.com') &&
|
|
48
|
+
!finalUrl.includes('google.com/search')) {
|
|
49
|
+
// Redirected away from Google cache — cache unavailable
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
const html = await response.text();
|
|
53
|
+
// If this looks like a Google search results page (not a cache page), return null
|
|
54
|
+
if (isGoogleSearchPage(html)) {
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
// If the page is way too small to be real content, return null
|
|
58
|
+
if (html.length < 200) {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
// Extract the cache date from Google's notice banner
|
|
62
|
+
const cachedDate = extractCacheDate(html);
|
|
63
|
+
// Remove Google's wrapper elements and return the cleaned HTML
|
|
64
|
+
const cleanedHtml = removeGoogleWrapper(html);
|
|
65
|
+
return {
|
|
66
|
+
html: cleanedHtml,
|
|
67
|
+
url,
|
|
68
|
+
cachedDate,
|
|
69
|
+
statusCode: 200,
|
|
70
|
+
method: 'google-cache',
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
clearTimeout(timer);
|
|
75
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
76
|
+
// Timeout
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
// Network errors → return null (not in cache / unavailable)
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Detect if a page is a Google search results page rather than a cached page.
|
|
85
|
+
*/
|
|
86
|
+
function isGoogleSearchPage(html) {
|
|
87
|
+
// Google search pages have these distinctive patterns
|
|
88
|
+
if (html.includes('<title>Google Search</title>'))
|
|
89
|
+
return true;
|
|
90
|
+
if (html.includes('id="search"') && html.includes('class="g"'))
|
|
91
|
+
return true;
|
|
92
|
+
// "Did not match any documents" message
|
|
93
|
+
if (html.includes('did not match any documents'))
|
|
94
|
+
return true;
|
|
95
|
+
// Redirect to google.com/search with no cache content
|
|
96
|
+
if (html.includes('www.google.com/search?') && !html.includes('webcache'))
|
|
97
|
+
return true;
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Extract the cache date from Google's cache notice banner.
|
|
102
|
+
*
|
|
103
|
+
* The notice typically reads:
|
|
104
|
+
* "It is a snapshot of the page as it appeared on 15 Jan 2025 09:30:12 GMT."
|
|
105
|
+
*/
|
|
106
|
+
function extractCacheDate(html) {
|
|
107
|
+
// Match date patterns in Google's cache notice
|
|
108
|
+
// Patterns like: "15 Jan 2025 09:30:12 GMT" or "Jan 15, 2025"
|
|
109
|
+
const patterns = [
|
|
110
|
+
/as it appeared on ([^<."]+(?:GMT|UTC))/i,
|
|
111
|
+
/snapshot.*?on\s+([A-Za-z]+ \d+,?\s+\d{4}[^<."]*)/i,
|
|
112
|
+
/cached on:?\s*([^<."]+)/i,
|
|
113
|
+
];
|
|
114
|
+
for (const pattern of patterns) {
|
|
115
|
+
const match = html.match(pattern);
|
|
116
|
+
if (match && match[1]) {
|
|
117
|
+
return match[1].trim();
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return undefined;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Remove Google's wrapper elements from the cached page.
|
|
124
|
+
*
|
|
125
|
+
* Google's cache page structure:
|
|
126
|
+
* 1. A <div style="..."> at the very top with the cache notice
|
|
127
|
+
* 2. An <hr> separator
|
|
128
|
+
* 3. The actual cached page content
|
|
129
|
+
*
|
|
130
|
+
* We remove Google's branding/notice and return just the original content.
|
|
131
|
+
*/
|
|
132
|
+
function removeGoogleWrapper(html) {
|
|
133
|
+
let cleaned = html;
|
|
134
|
+
// Remove the Google cache notice div at the top
|
|
135
|
+
// It's typically: <div style="...">...</div><hr>
|
|
136
|
+
// Strategy 1: Find the first <hr> that follows the cache notice and take everything after it
|
|
137
|
+
const hrIndex = findFirstCacheHr(html);
|
|
138
|
+
if (hrIndex !== -1) {
|
|
139
|
+
cleaned = html.slice(hrIndex + 4); // +4 for '<hr>'
|
|
140
|
+
}
|
|
141
|
+
// Remove remaining Google-specific elements that might be injected
|
|
142
|
+
// Google injects a top bar div with id="google-cache-hdr" or similar
|
|
143
|
+
cleaned = cleaned
|
|
144
|
+
.replace(/<div[^>]*id=["']google-cache-hdr["'][^>]*>[\s\S]*?<\/div>/i, '')
|
|
145
|
+
.replace(/<div[^>]*id=["']gbw["'][^>]*>[\s\S]*?<\/div>/i, '')
|
|
146
|
+
.replace(/<div[^>]*id=["']gb["'][^>]*>[\s\S]*?<\/div>/gi, '');
|
|
147
|
+
return cleaned.trim();
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Find the index of the <hr> tag that separates Google's cache notice
|
|
151
|
+
* from the actual page content.
|
|
152
|
+
*
|
|
153
|
+
* We look for the first <hr> that appears after Google's cache notice text.
|
|
154
|
+
*/
|
|
155
|
+
function findFirstCacheHr(html) {
|
|
156
|
+
// Look for the cache notice keywords to confirm we're in a Google cache page
|
|
157
|
+
const noticeKeywords = [
|
|
158
|
+
'webcache.googleusercontent',
|
|
159
|
+
"Google's cache of",
|
|
160
|
+
'cached version of',
|
|
161
|
+
'It is a snapshot',
|
|
162
|
+
];
|
|
163
|
+
const hasNotice = noticeKeywords.some((kw) => html.toLowerCase().includes(kw.toLowerCase()));
|
|
164
|
+
if (!hasNotice) {
|
|
165
|
+
// Not a standard Google cache page — don't strip anything
|
|
166
|
+
return -1;
|
|
167
|
+
}
|
|
168
|
+
// Find the first <hr>, <hr/>, or <hr /> tag — that's the separator
|
|
169
|
+
const hrMatch = html.match(/<hr\s*\/?>/i);
|
|
170
|
+
if (hrMatch && hrMatch.index !== undefined) {
|
|
171
|
+
return hrMatch.index;
|
|
172
|
+
}
|
|
173
|
+
return -1;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Google Cache is always available — no API key or special setup required.
|
|
177
|
+
*/
|
|
178
|
+
export function isGoogleCacheAvailable() {
|
|
179
|
+
return true;
|
|
180
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google SERP Parser — extracts rich structured data from Google search HTML.
|
|
3
|
+
* Supports organic results, knowledge panel, PAA, featured snippets,
|
|
4
|
+
* related searches, shopping, news, images, videos, and local pack.
|
|
5
|
+
*/
|
|
6
|
+
export interface GoogleSerpResult {
|
|
7
|
+
organicResults: Array<{
|
|
8
|
+
position: number;
|
|
9
|
+
title: string;
|
|
10
|
+
url: string;
|
|
11
|
+
snippet: string;
|
|
12
|
+
sitelinks?: Array<{
|
|
13
|
+
title: string;
|
|
14
|
+
url: string;
|
|
15
|
+
}>;
|
|
16
|
+
date?: string;
|
|
17
|
+
cachedUrl?: string;
|
|
18
|
+
}>;
|
|
19
|
+
knowledgePanel?: {
|
|
20
|
+
title: string;
|
|
21
|
+
type?: string;
|
|
22
|
+
description?: string;
|
|
23
|
+
source?: string;
|
|
24
|
+
sourceUrl?: string;
|
|
25
|
+
attributes?: Record<string, string>;
|
|
26
|
+
imageUrl?: string;
|
|
27
|
+
};
|
|
28
|
+
peopleAlsoAsk?: Array<{
|
|
29
|
+
question: string;
|
|
30
|
+
snippet?: string;
|
|
31
|
+
source?: string;
|
|
32
|
+
sourceUrl?: string;
|
|
33
|
+
}>;
|
|
34
|
+
featuredSnippet?: {
|
|
35
|
+
text: string;
|
|
36
|
+
source: string;
|
|
37
|
+
sourceUrl: string;
|
|
38
|
+
type: 'paragraph' | 'list' | 'table';
|
|
39
|
+
};
|
|
40
|
+
relatedSearches?: string[];
|
|
41
|
+
shoppingResults?: Array<{
|
|
42
|
+
title: string;
|
|
43
|
+
price?: string;
|
|
44
|
+
source?: string;
|
|
45
|
+
url?: string;
|
|
46
|
+
imageUrl?: string;
|
|
47
|
+
rating?: number;
|
|
48
|
+
reviewCount?: number;
|
|
49
|
+
}>;
|
|
50
|
+
newsResults?: Array<{
|
|
51
|
+
title: string;
|
|
52
|
+
url: string;
|
|
53
|
+
source: string;
|
|
54
|
+
date?: string;
|
|
55
|
+
snippet?: string;
|
|
56
|
+
imageUrl?: string;
|
|
57
|
+
}>;
|
|
58
|
+
imagePack?: Array<{
|
|
59
|
+
url: string;
|
|
60
|
+
imageUrl: string;
|
|
61
|
+
title?: string;
|
|
62
|
+
}>;
|
|
63
|
+
videoResults?: Array<{
|
|
64
|
+
title: string;
|
|
65
|
+
url: string;
|
|
66
|
+
platform?: string;
|
|
67
|
+
duration?: string;
|
|
68
|
+
date?: string;
|
|
69
|
+
thumbnailUrl?: string;
|
|
70
|
+
}>;
|
|
71
|
+
localPack?: Array<{
|
|
72
|
+
name: string;
|
|
73
|
+
address?: string;
|
|
74
|
+
rating?: number;
|
|
75
|
+
reviewCount?: number;
|
|
76
|
+
type?: string;
|
|
77
|
+
phone?: string;
|
|
78
|
+
}>;
|
|
79
|
+
totalResults?: string;
|
|
80
|
+
searchTime?: string;
|
|
81
|
+
}
|
|
82
|
+
export declare function parseGoogleSerp(html: string): GoogleSerpResult;
|