auspex 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/package.json +10 -4
  2. package/readme.md +169 -35
  3. package/dist/agent/actions.d.ts +0 -5
  4. package/dist/agent/actions.d.ts.map +0 -1
  5. package/dist/agent/actions.js +0 -26
  6. package/dist/agent/actions.js.map +0 -1
  7. package/dist/agent/agent.d.ts +0 -12
  8. package/dist/agent/agent.d.ts.map +0 -1
  9. package/dist/agent/agent.js +0 -159
  10. package/dist/agent/agent.js.map +0 -1
  11. package/dist/agent/loop.d.ts +0 -6
  12. package/dist/agent/loop.d.ts.map +0 -1
  13. package/dist/agent/loop.js +0 -175
  14. package/dist/agent/loop.js.map +0 -1
  15. package/dist/agent/report.d.ts +0 -3
  16. package/dist/agent/report.d.ts.map +0 -1
  17. package/dist/agent/report.js +0 -95
  18. package/dist/agent/report.js.map +0 -1
  19. package/dist/browser/executor.d.ts +0 -5
  20. package/dist/browser/executor.d.ts.map +0 -1
  21. package/dist/browser/executor.js +0 -33
  22. package/dist/browser/executor.js.map +0 -1
  23. package/dist/browser/snapshot.d.ts +0 -6
  24. package/dist/browser/snapshot.d.ts.map +0 -1
  25. package/dist/browser/snapshot.js +0 -145
  26. package/dist/browser/snapshot.js.map +0 -1
  27. package/dist/config/defaults.d.ts +0 -11
  28. package/dist/config/defaults.d.ts.map +0 -1
  29. package/dist/config/defaults.js +0 -11
  30. package/dist/config/defaults.js.map +0 -1
  31. package/dist/config/schema.d.ts +0 -62
  32. package/dist/config/schema.d.ts.map +0 -1
  33. package/dist/config/schema.js +0 -24
  34. package/dist/config/schema.js.map +0 -1
  35. package/dist/index.d.ts +0 -7
  36. package/dist/index.d.ts.map +0 -1
  37. package/dist/index.js +0 -8
  38. package/dist/index.js.map +0 -1
  39. package/dist/llm/client.d.ts +0 -23
  40. package/dist/llm/client.d.ts.map +0 -1
  41. package/dist/llm/client.js +0 -51
  42. package/dist/llm/client.js.map +0 -1
  43. package/dist/llm/prompt.d.ts +0 -3
  44. package/dist/llm/prompt.d.ts.map +0 -1
  45. package/dist/llm/prompt.js +0 -36
  46. package/dist/llm/prompt.js.map +0 -1
  47. package/dist/scraper/extractors/content.d.ts +0 -32
  48. package/dist/scraper/extractors/content.d.ts.map +0 -1
  49. package/dist/scraper/extractors/content.js +0 -276
  50. package/dist/scraper/extractors/content.js.map +0 -1
  51. package/dist/scraper/extractors/ssr.d.ts +0 -17
  52. package/dist/scraper/extractors/ssr.d.ts.map +0 -1
  53. package/dist/scraper/extractors/ssr.js +0 -162
  54. package/dist/scraper/extractors/ssr.js.map +0 -1
  55. package/dist/scraper/extractors/to-markdown.d.ts +0 -5
  56. package/dist/scraper/extractors/to-markdown.d.ts.map +0 -1
  57. package/dist/scraper/extractors/to-markdown.js +0 -103
  58. package/dist/scraper/extractors/to-markdown.js.map +0 -1
  59. package/dist/scraper/index.d.ts +0 -35
  60. package/dist/scraper/index.d.ts.map +0 -1
  61. package/dist/scraper/index.js +0 -299
  62. package/dist/scraper/index.js.map +0 -1
  63. package/dist/scraper/tiers/tier1-http.d.ts +0 -5
  64. package/dist/scraper/tiers/tier1-http.d.ts.map +0 -1
  65. package/dist/scraper/tiers/tier1-http.js +0 -122
  66. package/dist/scraper/tiers/tier1-http.js.map +0 -1
  67. package/dist/scraper/tiers/tier2-stealth.d.ts +0 -5
  68. package/dist/scraper/tiers/tier2-stealth.d.ts.map +0 -1
  69. package/dist/scraper/tiers/tier2-stealth.js +0 -108
  70. package/dist/scraper/tiers/tier2-stealth.js.map +0 -1
  71. package/dist/scraper/tiers/tier3-browser.d.ts +0 -10
  72. package/dist/scraper/tiers/tier3-browser.d.ts.map +0 -1
  73. package/dist/scraper/tiers/tier3-browser.js +0 -506
  74. package/dist/scraper/tiers/tier3-browser.js.map +0 -1
  75. package/dist/scraper/types.d.ts +0 -161
  76. package/dist/scraper/types.d.ts.map +0 -1
  77. package/dist/scraper/types.js +0 -3
  78. package/dist/scraper/types.js.map +0 -1
  79. package/dist/security/action-validator.d.ts +0 -83
  80. package/dist/security/action-validator.d.ts.map +0 -1
  81. package/dist/security/action-validator.js +0 -36
  82. package/dist/security/action-validator.js.map +0 -1
  83. package/dist/security/url-validator.d.ts +0 -9
  84. package/dist/security/url-validator.d.ts.map +0 -1
  85. package/dist/security/url-validator.js +0 -69
  86. package/dist/security/url-validator.js.map +0 -1
  87. package/dist/types.d.ts +0 -96
  88. package/dist/types.d.ts.map +0 -1
  89. package/dist/types.js +0 -2
  90. package/dist/types.js.map +0 -1
@@ -1,162 +0,0 @@
1
- import { load } from "cheerio";
2
- // ─── Detectores de dados SSR ───────────────────────────────────────────────
3
- //
4
- // Frameworks modernos embutem dados no HTML inicial para hidratação no cliente.
5
- // Extrair esses dados evita a necessidade de browser em ~60-70% dos sites.
6
- //
7
- // Ordem: do mais específico para o mais genérico.
8
- // ──────────────────────────────────────────────────────────────────────────
9
- /** Tenta parsear JSON com segurança; retorna null em caso de erro */
10
- function tryParse(raw) {
11
- if (!raw?.trim())
12
- return null;
13
- try {
14
- return JSON.parse(raw.trim());
15
- }
16
- catch {
17
- return null;
18
- }
19
- }
20
- /**
21
- * Tenta extrair dados JSON embutidos por frameworks SSR no HTML inicial.
22
- * Muitos sites Next.js/Nuxt/SvelteKit não precisam de browser —
23
- * os dados já estão no HTML e podem ser extraídos com Cheerio!
24
- */
25
- export function extractSSRData(html) {
26
- const $ = load(html);
27
- // ── Next.js: <script id="__NEXT_DATA__" type="application/json"> ──────
28
- const nextRaw = $("#__NEXT_DATA__").text().trim();
29
- const nextData = tryParse(nextRaw);
30
- if (nextData)
31
- return { type: "next", data: nextData };
32
- // ── Angular Universal: <script id="ng-state" type="application/json"> ─
33
- const ngRaw = $('script#ng-state[type="application/json"]').text().trim();
34
- const ngData = tryParse(ngRaw);
35
- if (ngData)
36
- return { type: "angular", data: ngData };
37
- // ── SvelteKit: <script type="application/json" data-sveltekit-fetched> ─
38
- // SvelteKit 2+ serializa dados de `load()` em tags script com atributo especial
39
- const svelteFetchedRaw = $('script[data-sveltekit-fetched]').text().trim();
40
- const svelteFetchedData = tryParse(svelteFetchedRaw);
41
- if (svelteFetchedData)
42
- return { type: "sveltekit", data: svelteFetchedData };
43
- // ── Nuxt 2/3: window.__NUXT__ = ... ──────────────────────────────────
44
- // Nuxt pode usar JSON ou devalue (formato não-JSON proprietário do Nuxt 3)
45
- // Tentamos capturar JSON puro; devalue é ignorado (precisa de browser)
46
- const nuxtMatch = html.match(/window\.__NUXT__\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
47
- if (nuxtMatch?.[1]) {
48
- const nuxtData = tryParse(nuxtMatch[1]);
49
- if (nuxtData)
50
- return { type: "nuxt", data: nuxtData };
51
- }
52
- // ── Nuxt 3 alternativo: useNuxtApp / nuxtState ────────────────────────
53
- const nuxt3Match = html.match(/window\.__nuxt_state__\s*=\s*'([^']+)'/);
54
- if (nuxt3Match?.[1]) {
55
- try {
56
- const decoded = decodeURIComponent(nuxt3Match[1]);
57
- const nuxt3Data = tryParse(decoded);
58
- if (nuxt3Data)
59
- return { type: "nuxt", data: nuxt3Data };
60
- }
61
- catch { }
62
- }
63
- // ── Gatsby: window.___gatsby ou window.___GATSBY ──────────────────────
64
- const gatsbyMatch = html.match(/window\.___(?:gatsby|GATSBY)\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
65
- if (gatsbyMatch?.[1]) {
66
- const gatsbyData = tryParse(gatsbyMatch[1]);
67
- if (gatsbyData)
68
- return { type: "gatsby", data: gatsbyData };
69
- }
70
- // ── Remix / React Router v7: window.__remixContext ────────────────────
71
- const remixMatch = html.match(/window\.__remix(?:Context|RouterManifest)\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
72
- if (remixMatch?.[1]) {
73
- const remixData = tryParse(remixMatch[1]);
74
- if (remixData)
75
- return { type: "remix", data: remixData };
76
- }
77
- // ── TanStack Router / Start: window.__TSR_DEHYDRATED__ ───────────────
78
- const tanstackMatch = html.match(/window\.__(?:TSR_DEHYDRATED|TANSTACK_ROUTER_CONTEXT|TRT_DEHYDRATED)__\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
79
- if (tanstackMatch?.[1]) {
80
- const tsrData = tryParse(tanstackMatch[1]);
81
- if (tsrData)
82
- return { type: "tanstack", data: tsrData };
83
- }
84
- // ── Vue SSR: window.__VUE_SSR_CONTEXT__ / window.__pinia ─────────────
85
- const vueMatch = html.match(/window\.__(?:VUE_SSR_CONTEXT__|VUE_STORE__|pinia)\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
86
- if (vueMatch?.[1]) {
87
- const vueData = tryParse(vueMatch[1]);
88
- if (vueData)
89
- return { type: "vue", data: vueData };
90
- }
91
- // ── SvelteKit legado: window.__SVELTEKIT__ ────────────────────────────
92
- const svelteLegacyMatch = html.match(/window\.__(?:SVELTEKIT|sveltekit)__?\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
93
- if (svelteLegacyMatch?.[1]) {
94
- const svelteData = tryParse(svelteLegacyMatch[1]);
95
- if (svelteData)
96
- return { type: "sveltekit", data: svelteData };
97
- }
98
- // ── Genérico: window.__INITIAL_STATE__ / __APP_STATE__ / __REDUX_STATE__ ─
99
- // Cobre Redux, MobX, Zustand e qualquer store serializado manualmente
100
- const genericMatch = html.match(/window\.__(?:INITIAL_STATE|APP_STATE|REDUX_STATE|STORE_STATE|DATA|STATE|PROPS)__\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
101
- if (genericMatch?.[1]) {
102
- const genericData = tryParse(genericMatch[1]);
103
- if (genericData)
104
- return { type: "generic", data: genericData };
105
- }
106
- return null;
107
- }
108
- /**
109
- * Verifica se a página tem conteúdo suficiente sem JavaScript.
110
- *
111
- * Retorna `false` quando:
112
- * - O texto visível é muito curto (< 200 chars) → SPA ainda não renderizou
113
- * - Detecta padrões de anti-bot / challenge pages (Cloudflare, DDoS-Guard, etc.)
114
- * - Detecta loading screens (texto de JS habilitado, spinners, etc.)
115
- */
116
- export function hasEnoughContent(html) {
117
- const $ = load(html);
118
- // Remove elementos que não geram conteúdo legível
119
- $("script, style, noscript, iframe, svg, img").remove();
120
- const bodyText = $("body").text().replace(/\s+/g, " ").trim();
121
- // Heurística básica: texto muito curto = SPA sem SSR ou página vazia
122
- if (bodyText.length < 200)
123
- return false;
124
- // ── Padrões de anti-bot / challenge pages ────────────────────────────
125
- // Cada serviço tem uma frase característica que aparece quando bloqueia o bot.
126
- const antiBotPatterns = [
127
- // Cloudflare (mais comum)
128
- /just a moment/i,
129
- /checking your browser/i,
130
- /ddos protection by cloudflare/i,
131
- /ray id:/i, // ID único do Cloudflare
132
- // DDoS-Guard
133
- /ddos-guard/i,
134
- // Imperva / Incapsula
135
- /incapsula incident id/i,
136
- /powered by imperva/i,
137
- // DataDome
138
- /datadome/i,
139
- // hCaptcha / reCAPTCHA challenges
140
- /complete the security check/i,
141
- /prove you are human/i,
142
- /please complete the captcha/i,
143
- // Loading screens / SPA shell genérica
144
- /please wait/i,
145
- /enable javascript/i,
146
- /you need to enable javascript/i,
147
- /javascript is required/i,
148
- /javascript is disabled/i,
149
- /please enable javascript/i,
150
- // Genérico
151
- /access denied/i,
152
- /403 forbidden/i,
153
- /bot detected/i,
154
- ];
155
- const lowerText = bodyText.toLowerCase();
156
- const isAntiBot = antiBotPatterns.some((p) => p.test(lowerText));
157
- // Challenge pages tem pouco texto e padrões identificáveis
158
- if (isAntiBot && bodyText.length < 2_000)
159
- return false;
160
- return true;
161
- }
162
- //# sourceMappingURL=ssr.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"ssr.js","sourceRoot":"","sources":["../../../src/scraper/extractors/ssr.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAG/B,8EAA8E;AAC9E,EAAE;AACF,gFAAgF;AAChF,2EAA2E;AAC3E,EAAE;AACF,kDAAkD;AAClD,6EAA6E;AAE7E,qEAAqE;AACrE,SAAS,QAAQ,CAAC,GAAW;IAC3B,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,yEAAyE;IACzE,MAAM,OAAO,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAClD,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,QAAQ;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAEtD,yEAAyE;IACzE,MAAM,KAAK,GAAG,CAAC,CAAC,0CAA0C,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC1E,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,IAAI,MAAM;QAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAErD,0EAA0E;IAC1E,gFAAgF;IAChF,MAAM,gBAAgB,GAAG,CAAC,CAAC,gCAAgC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3E,MAAM,iBAAiB,GAAG,QAAQ,CAAC,gBAAgB,CAAC,CAAC;IACrD,IAAI,iBAAiB;QAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,iBAAiB,EAAE,CAAC;IAE7E,wEAAwE;IACxE,2EAA2E;IAC3E,uEAAuE;IACvE,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;IACxF,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACnB,MAAM,QAAQ,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,IAAI,QAAQ;YAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IACxD,CAAC;IAED,yEAAyE;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,wCAAwC,CAAC,CAAC;IACxE,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;YACpC,IAAI,SAAS;gBAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1D,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;IAED,yEAAyE;IACzE,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAC5B,qEAAqE,CACtE,CAAC;IACF,IAAI,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5C,IAAI,UAAU;YAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IAC9D,CAAC;IAED,yEAAyE;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAC3B,kFAAkF,CACnF,CAAC;IACF,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,SAAS;YAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAC3D,CAAC;IAED,wEAAwE;IACxE,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAC9B,8GAA8G,CAC/G,CAAC;IACF,IAAI,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3C,IAAI,OAAO;YAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAC1D,CAAC;IAED,wEAAwE;IACxE,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CACzB,0FAA0F,CAC3F,CAAC;IACF,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAClB,MAAM,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACtC,IAAI,OAAO;YAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,yEAAyE;IACzE,MAAM,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAClC,6EAA6E,CAC9E,CAAC;IACF,IAAI,iBAAiB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,IAAI,UAAU;YAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IACjE,CAAC;IAED,4EAA4E;IAC5E,sEAAsE;IACtE,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAC7B,yHAAyH,CAC1H,CAAC;IACF,IAAI,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACtB,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAI,WAAW;YAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;IACjE,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,kDAAkD;IAClD,CAAC,CAAC,2CAA2C,CAAC,CAAC,MAAM,EAAE,CAAC;IAExD,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAE9D,qEAAqE;IACrE,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG;QAAE,OAAO,KAAK,CAAC;IAExC,wEAAwE;IACxE,+EAA+E;IAC/E,MAAM,eAAe,GAAa;QAChC,0BAA0B;QAC1B,gBAAgB;QAChB,wBAAwB;QACxB,gCAAgC;QAChC,UAAU,EAA8B,yBAAyB;QAEjE,aAAa;QACb,aAAa;QAEb,sBAAsB;QACtB,wBAAwB;QACxB,qBAAqB;QAErB,WAAW;QACX,WAAW;QAEX,kCAAkC;QAClC,8BAA8B;QAC9B,sBAAsB;QACtB,8BAA8B;QAE9B,uCAAuC;QACvC,cAAc;QACd,oBAAoB;QACpB,gCAAgC;QAChC,yBAAyB;QACzB,yBAAyB;QACzB,2BAA2B;QAE3B,WAAW;QACX,gBAAgB;QAChB,gBAAgB;QAChB,eAAe;KAChB,CAAC;IAEF,MAAM,SAAS,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IACzC,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAEjE,2DAA2D;IAC3D,IAAI,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,KAAK;QAAE,OAAO,KAAK,CAAC;IAEvD,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -1,5 +0,0 @@
1
- /**
2
- * Converte HTML em Markdown limpo e legível por humanos/LLMs.
3
- */
4
- export declare function htmlToMarkdown(html: string): string;
5
- //# sourceMappingURL=to-markdown.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"to-markdown.d.ts","sourceRoot":"","sources":["../../../src/scraper/extractors/to-markdown.ts"],"names":[],"mappings":"AA6FA;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAwBnD"}
@@ -1,103 +0,0 @@
1
- // ─── Conversor HTML → Markdown ─────────────────────────────────────────────
2
- // Usa Turndown (CJS) + plugin GFM para tabelas pipe nativas
3
- import TurndownService from "turndown";
4
- import { tables, strikethrough } from "turndown-plugin-gfm";
5
- let _td = null;
6
- function getTurndown() {
7
- if (_td)
8
- return _td;
9
- _td = new TurndownService({
10
- headingStyle: "atx", // # Título em vez de sublinhado
11
- bulletListMarker: "-",
12
- codeBlockStyle: "fenced", // ```code``` em vez de indentado
13
- hr: "---",
14
- strongDelimiter: "**",
15
- emDelimiter: "_",
16
- linkStyle: "inlined",
17
- });
18
- // ── Plugin GFM: tabelas pipe e strikethrough ────────────────────────────
19
- // Converte <table> → | col1 | col2 | em vez de HTML bruto
20
- _td.use(tables);
21
- _td.use(strikethrough);
22
- // ── Regras customizadas ──────────────────────────────────────────────────
23
- // Remove completamente elementos que não geram conteúdo útil
24
- // Nota: Turndown.remove() aceita apenas tag names, não CSS selectors
25
- _td.remove([
26
- "script",
27
- "style",
28
- "noscript",
29
- "iframe",
30
- "nav",
31
- "footer",
32
- "header",
33
- "button",
34
- "form",
35
- ]);
36
- // figcaption dentro de figure: remove (evita legenda solta no Markdown)
37
- _td.addRule("removeFigcaption", {
38
- filter(node) {
39
- return (node.nodeName === "FIGCAPTION" &&
40
- node.parentNode?.nodeName === "FIGURE");
41
- },
42
- replacement: () => "",
43
- });
44
- // Classes de anúncio (.ad, .ads) — Turndown.remove() não aceita CSS selectors
45
- _td.addRule("removeAds", {
46
- filter(node) {
47
- if (node.nodeType !== 1)
48
- return false;
49
- const cls = node.getAttribute("class") ?? "";
50
- return /\bad\b|\bads\b/.test(cls);
51
- },
52
- replacement: () => "",
53
- });
54
- // Imagens: extrai alt text de forma limpa
55
- _td.addRule("images", {
56
- filter: "img",
57
- replacement(_content, node) {
58
- const img = node;
59
- const alt = img.getAttribute("alt")?.trim() ?? "";
60
- const src = img.getAttribute("src") ?? "";
61
- if (!src)
62
- return "";
63
- return alt ? `![${alt}](${src})` : `![image](${src})`;
64
- },
65
- });
66
- // Links: remove links vazios ou com href #
67
- _td.addRule("cleanLinks", {
68
- filter(node) {
69
- return (node.nodeName === "A" &&
70
- (!node.getAttribute("href") ||
71
- node.getAttribute("href") === "#" ||
72
- node.getAttribute("href")?.startsWith("javascript:") === true));
73
- },
74
- replacement(content) {
75
- return content; // Mantém apenas o texto, sem o link
76
- },
77
- });
78
- return _td;
79
- }
80
- /**
81
- * Converte HTML em Markdown limpo e legível por humanos/LLMs.
82
- */
83
- export function htmlToMarkdown(html) {
84
- if (!html.trim())
85
- return "";
86
- const td = getTurndown();
87
- let markdown = td.turndown(html);
88
- // ── Limpeza pós-conversão ──────────────────────────────────────────────
89
- // Remove linhas que são só espaços/pontuação
90
- markdown = markdown
91
- .split("\n")
92
- .filter((line) => line.trim().length > 0 || line === "")
93
- .join("\n");
94
- // Colapsa 3+ linhas em branco para no máximo 2
95
- markdown = markdown.replace(/\n{3,}/g, "\n\n");
96
- // Remove espaços trailing
97
- markdown = markdown
98
- .split("\n")
99
- .map((l) => l.trimEnd())
100
- .join("\n");
101
- return markdown.trim();
102
- }
103
- //# sourceMappingURL=to-markdown.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"to-markdown.js","sourceRoot":"","sources":["../../../src/scraper/extractors/to-markdown.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4DAA4D;AAE5D,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAE5D,IAAI,GAAG,GAA2B,IAAI,CAAC;AAEvC,SAAS,WAAW;IAClB,IAAI,GAAG;QAAE,OAAO,GAAG,CAAC;IAEpB,GAAG,GAAG,IAAI,eAAe,CAAC;QACxB,YAAY,EAAE,KAAK,EAAQ,gCAAgC;QAC3D,gBAAgB,EAAE,GAAG;QACrB,cAAc,EAAE,QAAQ,EAAG,iCAAiC;QAC5D,EAAE,EAAE,KAAK;QACT,eAAe,EAAE,IAAI;QACrB,WAAW,EAAE,GAAG;QAChB,SAAS,EAAE,SAAS;KACrB,CAAC,CAAC;IAEH,2EAA2E;IAC3E,0DAA0D;IAC1D,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;IAEvB,4EAA4E;IAE5E,6DAA6D;IAC7D,qEAAqE;IACrE,GAAG,CAAC,MAAM,CAAC;QACT,QAAQ;QACR,OAAO;QACP,UAAU;QACV,QAAQ;QACR,KAAK;QACL,QAAQ;QACR,QAAQ;QACR,QAAQ;QACR,MAAM;KACP,CAAC,CAAC;IAEH,wEAAwE;IACxE,GAAG,CAAC,OAAO,CAAC,kBAAkB,EAAE;QAC9B,MAAM,CAAC,IAAI;YACT,OAAO,CACL,IAAI,CAAC,QAAQ,KAAK,YAAY;gBAC9B,IAAI,CAAC,UAAU,EAAE,QAAQ,KAAK,QAAQ,CACvC,CAAC;QACJ,CAAC;QACD,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;KACtB,CAAC,CAAC;IAEH,8EAA8E;IAC9E,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE;QACvB,MAAM,CAAC,IAAI;YACT,IAAI,IAAI,CAAC,QAAQ,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YACtC,MAAM,GAAG,GAAI,IAAgB,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAC1D,OAAO,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpC,CAAC;QACD,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;KACtB,CAAC,CAAC;IAEH,0CAA0C;IAC1C,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE;QACpB,MAAM,EAAE,KAAK;QACb,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,GAAG,GAAG,IAAwB,CAAC;YACrC,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAClD,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,IAAI,CAAC,GAAG;gBAAE,OAAO,EAAE,CAAC;YACpB,OAAO,GAAG,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,YAAY,GAAG,GAAG,CAAC;QACxD,CAAC;KACF,CAAC,CAAC;IAEH,2CAA2C;IAC3C,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE;QACxB,MAAM,CAAC,IAAI;YACT,OAAO,CACL,IAAI,CAAC,QAAQ,KAAK,GAAG;gBACrB,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC;oBACzB,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,KAAK,GAAG;oBACjC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,UAAU,CAAC,aAAa,CAAC,KAAK,IAAI,CAAC,CACjE,CAAC;QACJ,CAAC;QACD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC,CAAC,oCAAoC;QACtD,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAE5B,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,IAAI,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAEjC,0EAA0E;IAE1E,6CAA6C;IAC7C,QAAQ,GAAG,QAAQ;SAChB,KAAK,CAAC,IAAI,CAAC;SACX,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,KAAK,EAAE,CAAC;SACvD,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,+CAA+C;IAC/C,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,0BAA0B;IAC1B,QAAQ,GAAG,QAAQ;SAChB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACvB,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC"}
@@ -1,35 +0,0 @@
1
- import type { MapOptions, MapResult, ScrapeOptions, ScrapeResult, ScraperConfig } from "./types.js";
2
- export declare class Scraper {
3
- private readonly fullConfig;
4
- private readonly tier1;
5
- private readonly tier2;
6
- private readonly tier3;
7
- private readonly config;
8
- constructor(fullConfig?: ScraperConfig);
9
- scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
10
- /**
11
- * Mapeia links de uma página (URL + texto do âncora).
12
- * Reutiliza a cascata de tiers (HTTP → Stealth → Browser).
13
- *
14
- * @param url - URL base para extrair links
15
- * @param options - Filtros e limites
16
- */
17
- map(url: string, options?: MapOptions): Promise<MapResult>;
18
- /**
19
- * Scrapia múltiplas URLs em paralelo com concorrência limitada.
20
- * Erros em URLs individuais não derrubam o lote inteiro.
21
- *
22
- * @param urls - Lista de URLs a scrapeiar
23
- * @param options - Opções aplicadas a todas as URLs
24
- * @param concurrency - Máximo de scrapes simultâneos. Default: 3
25
- */
26
- scrapeMany(urls: string[], options?: ScrapeOptions, concurrency?: number): Promise<ScrapeResult[]>;
27
- /**
28
- * Fecha o browser Playwright (Tier 3).
29
- * Sempre chamar ao terminar para evitar processos Chromium órfãos.
30
- */
31
- close(): Promise<void>;
32
- private log;
33
- }
34
- export type { ScrapeOptions, ScrapeResult, ScrapeTier, ContentFormat, SSRData, InterceptedAPI, ScraperConfig, TierRawResult, MapLink, MapOptions, MapResult, } from "./types.js";
35
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,UAAU,EACV,SAAS,EACT,aAAa,EACb,YAAY,EACZ,aAAa,EACd,MAAM,YAAY,CAAC;AAepB,qBAAa,OAAO;IAYN,OAAO,CAAC,QAAQ,CAAC,UAAU;IAXvC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAY;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;IACrC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;IACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAMrB;gBAE2B,UAAU,GAAE,aAAkB;IAerD,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;IAqG7E;;;;;;OAMG;IACG,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,UAAe,GAAG,OAAO,CAAC,SAAS,CAAC;IA0HpE;;;;;;;OAOG;IACG,UAAU,CACd,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,aAAkB,EAC3B,WAAW,SAAI,GACd,OAAO,CAAC,YAAY,EAAE,CAAC;IAkC1B;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,OAAO,CAAC,GAAG;CAKZ;AAGD,YAAY,EACV,aAAa,EACb,YAAY,EACZ,UAAU,EACV,aAAa,EACb,OAAO,EACP,cAAc,EACd,aAAa,EACb,aAAa,EACb,OAAO,EACP,UAAU,EACV,SAAS,GACV,MAAM,YAAY,CAAC"}
@@ -1,299 +0,0 @@
1
- import { validateUrl } from "../security/url-validator.js";
2
- import { Tier1HTTP } from "./tiers/tier1-http.js";
3
- import { Tier2Stealth } from "./tiers/tier2-stealth.js";
4
- import { Tier3Browser } from "./tiers/tier3-browser.js";
5
- import { extractLinksWithMetadata } from "./extractors/content.js";
6
- // ─── Scraper ───────────────────────────────────────────────────────────────
7
- //
8
- // Scraper de alta qualidade com fallback automático em 3 tiers:
9
- //
10
- // Tier 1 → HTTP puro (got-scraping) (~100-500ms, sem browser)
11
- // ↓ bloqueado ou conteúdo insuficiente (SPA, anti-bot básico)
12
- // Tier 2 → HTTP Stealth (got-scraping) (~200-800ms, TLS fingerprint)
13
- // ↓ ainda bloqueado ou SPA sem SSR
14
- // Tier 3 → Playwright Chromium + stealth (~2-10s, browser completo)
15
- //
16
- // Anti-SSRF integrado: todas as URLs são validadas antes do scrape.
17
- // ──────────────────────────────────────────────────────────────────────────
18
- export class Scraper {
19
- fullConfig;
20
- tier1;
21
- tier2;
22
- tier3;
23
- config;
24
- constructor(fullConfig = {}) {
25
- this.fullConfig = fullConfig;
26
- this.tier1 = new Tier1HTTP();
27
- this.tier2 = new Tier2Stealth();
28
- this.tier3 = new Tier3Browser(fullConfig.browserConfig);
29
- this.config = {
30
- timeout: fullConfig.timeout ?? 30_000,
31
- verbose: fullConfig.verbose ?? false,
32
- forceTier: fullConfig.forceTier,
33
- allowedDomains: fullConfig.allowedDomains,
34
- blockedDomains: fullConfig.blockedDomains,
35
- };
36
- }
37
- // ── Scrape de uma única URL ────────────────────────────────────────────
38
- async scrape(url, options = {}) {
39
- // Validação anti-SSRF antes de qualquer requisição
40
- const validUrl = await validateUrl(url, {
41
- allowedDomains: this.config.allowedDomains,
42
- blockedDomains: this.config.blockedDomains,
43
- });
44
- const mergedOptions = {
45
- timeout: this.config.timeout,
46
- ...options,
47
- };
48
- // ── Tier forçado: pula a cascata automática ────────────────────────
49
- const forced = options.forceTier ?? this.config.forceTier;
50
- if (forced === "browser") {
51
- this.log("🌐 Tier 3 (Playwright) forçado");
52
- return this.tier3.scrape(validUrl, mergedOptions);
53
- }
54
- if (forced === "stealth") {
55
- this.log("🥷 Tier 2 (Stealth HTTP) forçado");
56
- return this.tier2.scrape(validUrl, mergedOptions);
57
- }
58
- if (forced === "http") {
59
- this.log("🔗 Tier 1 (HTTP) forçado");
60
- return this.tier1.scrape(validUrl, mergedOptions);
61
- }
62
- // ── Modo automático: Tier 1 → Tier 2 → Tier 3 ────────────────────
63
- // ── Tier 1: HTTP puro (fetch nativo, sem overhead de TLS) ─────────
64
- let tier1Error = null;
65
- try {
66
- const result = await this.tier1.scrape(validUrl, mergedOptions);
67
- const content = result.markdown ?? result.text ?? "";
68
- // Menos de 200 chars sem dados SSR = página quase certamente vazia
69
- // (SPA sem SSR, Cloudflare challenge, bloqueio silencioso, etc.)
70
- if (content.length < 200 && !result.ssrData) {
71
- tier1Error = "Conteúdo insuficiente após HTTP — provavelmente SPA ou bloqueio silencioso";
72
- this.log(`⚠ Tier 1: ${tier1Error}`);
73
- }
74
- else {
75
- this.log(`✓ Tier 1 (HTTP) — ${result.durationMs}ms`);
76
- return result;
77
- }
78
- }
79
- catch (err) {
80
- tier1Error = err instanceof Error ? err.message : String(err);
81
- this.log(`⚠ Tier 1 falhou: ${tier1Error}`);
82
- }
83
- // ── Tier 2: HTTP Stealth (got-scraping, TLS fingerprint) ──────────
84
- let tier2Error = null;
85
- this.log("🥷 Ativando fallback → Tier 2 (Stealth HTTP)...");
86
- try {
87
- const result = await this.tier2.scrape(validUrl, mergedOptions);
88
- const content = result.markdown ?? result.text ?? "";
89
- // Mesmo com TLS spoofing pode ser SPA que precisa de browser
90
- if (content.length < 200 && !result.ssrData) {
91
- tier2Error = "Conteúdo insuficiente após Stealth — SPA que precisa de browser";
92
- this.log(`⚠ Tier 2: ${tier2Error}`);
93
- }
94
- else {
95
- this.log(`✓ Tier 2 (Stealth) — ${result.durationMs}ms`);
96
- return result;
97
- }
98
- }
99
- catch (err) {
100
- tier2Error = err instanceof Error ? err.message : String(err);
101
- this.log(`⚠ Tier 2 (Stealth) falhou: ${tier2Error}`);
102
- }
103
- // ── Tier 3: Playwright Chromium + stealth (fallback final) ────────
104
- this.log("🌐 Ativando fallback final → Tier 3 (Playwright)...");
105
- try {
106
- const result = await this.tier3.scrape(validUrl, mergedOptions);
107
- this.log(`✓ Tier 3 (Playwright) — ${result.durationMs}ms`);
108
- return result;
109
- }
110
- catch (err) {
111
- const tier3Error = err instanceof Error ? err.message : String(err);
112
- this.log(`✗ Tier 3 (Playwright) falhou: ${tier3Error}`);
113
- // Todos os tiers falharam — retorna resultado com erro consolidado
114
- return {
115
- url: validUrl,
116
- statusCode: 0,
117
- title: "",
118
- tier: "browser",
119
- durationMs: 0,
120
- error: [
121
- "Todos os tiers falharam:",
122
- ` Tier 1 (HTTP): ${tier1Error ?? "não tentado"}`,
123
- ` Tier 2 (Stealth): ${tier2Error ?? "não tentado"}`,
124
- ` Tier 3 (Browser): ${tier3Error}`,
125
- ].join("\n"),
126
- };
127
- }
128
- }
129
- // ── Map: descobrir URLs de um site ──────────────────────────────────────
130
- /**
131
- * Mapeia links de uma página (URL + texto do âncora).
132
- * Reutiliza a cascata de tiers (HTTP → Stealth → Browser).
133
- *
134
- * @param url - URL base para extrair links
135
- * @param options - Filtros e limites
136
- */
137
- async map(url, options = {}) {
138
- const startTime = Date.now();
139
- const limit = options.limit ?? 500;
140
- const includeSubdomains = options.includeSubdomains ?? true;
141
- const ignoreQueryParameters = options.ignoreQueryParameters ?? true;
142
- const searchTerm = options.search?.toLowerCase().trim();
143
- let scrapeResult;
144
- try {
145
- scrapeResult = await this.scrape(url, {
146
- getRawHtml: true,
147
- forceTier: options.forceTier,
148
- });
149
- }
150
- catch (err) {
151
- const errorMsg = err instanceof Error ? err.message : String(err);
152
- return {
153
- url,
154
- links: [],
155
- tier: "http",
156
- durationMs: Date.now() - startTime,
157
- error: `Falha ao carregar a página: ${errorMsg}`,
158
- };
159
- }
160
- if (scrapeResult.error) {
161
- return {
162
- url: scrapeResult.url,
163
- links: [],
164
- tier: scrapeResult.tier,
165
- durationMs: scrapeResult.durationMs,
166
- error: scrapeResult.error,
167
- };
168
- }
169
- const rawHtml = scrapeResult.rawHtml ?? scrapeResult.html ?? "";
170
- if (!rawHtml) {
171
- return {
172
- url: scrapeResult.url,
173
- links: [],
174
- tier: scrapeResult.tier,
175
- durationMs: scrapeResult.durationMs,
176
- error: "HTML não disponível para extração de links",
177
- };
178
- }
179
- const baseUrl = scrapeResult.url;
180
- const baseHostname = new URL(baseUrl).hostname;
181
- const baseDomain = baseHostname.replace(/^www\./, "");
182
- let links = extractLinksWithMetadata(rawHtml, baseUrl);
183
- // Filtrar por mesmo domínio
184
- links = links.filter((link) => {
185
- try {
186
- const linkHost = new URL(link.url).hostname.replace(/^www\./, "");
187
- if (includeSubdomains) {
188
- return linkHost === baseDomain || linkHost.endsWith(`.${baseDomain}`);
189
- }
190
- return linkHost === baseDomain;
191
- }
192
- catch {
193
- return false;
194
- }
195
- });
196
- // Normalizar URL (remover query string) e deduplicar
197
- const normalizeUrl = (href) => {
198
- if (!ignoreQueryParameters)
199
- return href;
200
- try {
201
- const u = new URL(href);
202
- u.search = "";
203
- return u.href;
204
- }
205
- catch {
206
- return href;
207
- }
208
- };
209
- const seen = new Set();
210
- const deduped = [];
211
- for (const link of links) {
212
- const key = ignoreQueryParameters ? normalizeUrl(link.url) : link.url;
213
- if (seen.has(key))
214
- continue;
215
- seen.add(key);
216
- deduped.push({
217
- url: link.url,
218
- title: link.title || undefined,
219
- });
220
- }
221
- links = deduped;
222
- // Filtrar e ordenar por search (relevância simples)
223
- if (searchTerm) {
224
- const escaped = searchTerm.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
225
- const regex = new RegExp(escaped, "gi");
226
- const scored = links
227
- .map((link) => {
228
- const urlLower = link.url.toLowerCase();
229
- const titleLower = (link.title ?? "").toLowerCase();
230
- const urlMatches = (urlLower.match(regex) ?? []).length;
231
- const titleMatches = (titleLower.match(regex) ?? []).length;
232
- const score = urlMatches * 2 + titleMatches * 3; // title tem mais peso
233
- return { link, score };
234
- })
235
- .filter(({ score }) => score > 0)
236
- .sort((a, b) => b.score - a.score)
237
- .map(({ link }) => link);
238
- links = scored;
239
- }
240
- const result = {
241
- url: baseUrl,
242
- links: links.slice(0, limit),
243
- tier: scrapeResult.tier,
244
- durationMs: Date.now() - startTime,
245
- };
246
- this.log(`✓ Map: ${result.links.length} links (${result.tier})`);
247
- return result;
248
- }
249
- // ── Scrape em lote com concorrência controlada ─────────────────────────
250
- /**
251
- * Scrapia múltiplas URLs em paralelo com concorrência limitada.
252
- * Erros em URLs individuais não derrubam o lote inteiro.
253
- *
254
- * @param urls - Lista de URLs a scrapeiar
255
- * @param options - Opções aplicadas a todas as URLs
256
- * @param concurrency - Máximo de scrapes simultâneos. Default: 3
257
- */
258
- async scrapeMany(urls, options = {}, concurrency = 3) {
259
- const results = [];
260
- const queue = [...urls];
261
- while (queue.length > 0) {
262
- const batch = queue.splice(0, concurrency);
263
- const settled = await Promise.allSettled(batch.map((u) => this.scrape(u, options)));
264
- for (const outcome of settled) {
265
- if (outcome.status === "fulfilled") {
266
- results.push(outcome.value);
267
- }
268
- else {
269
- results.push({
270
- url: "unknown",
271
- statusCode: 0,
272
- title: "",
273
- tier: "http",
274
- durationMs: 0,
275
- error: outcome.reason instanceof Error
276
- ? outcome.reason.message
277
- : String(outcome.reason),
278
- });
279
- }
280
- }
281
- }
282
- return results;
283
- }
284
- // ── Encerrar recursos ──────────────────────────────────────────────────
285
- /**
286
- * Fecha o browser Playwright (Tier 3).
287
- * Sempre chamar ao terminar para evitar processos Chromium órfãos.
288
- */
289
- async close() {
290
- await this.tier3.close();
291
- }
292
- // ── Helpers ────────────────────────────────────────────────────────────
293
- log(msg) {
294
- if (this.config.verbose) {
295
- console.log(`[Scraper] ${msg}`);
296
- }
297
- }
298
- }
299
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,8BAA8B,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAUnE,8EAA8E;AAC9E,EAAE;AACF,gEAAgE;AAChE,EAAE;AACF,wEAAwE;AACxE,2EAA2E;AAC3E,4EAA4E;AAC5E,gDAAgD;AAChD,yEAAyE;AACzE,EAAE;AACF,oEAAoE;AACpE,6EAA6E;AAE7E,MAAM,OAAO,OAAO;IAYW;IAXZ,KAAK,CAAY;IACjB,KAAK,CAAe;IACpB,KAAK,CAAe;IACpB,MAAM,CAMrB;IAEF,YAA6B,aAA4B,EAAE;QAA9B,eAAU,GAAV,UAAU,CAAoB;QACzD,IAAI,CAAC,KAAK,GAAG,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QACxD,IAAI,CAAC,MAAM,GAAG;YACZ,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM;YACrC,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,KAAK;YACpC,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,cAAc,EAAE,UAAU,CAAC,cAAc;YACzC,cAAc,EAAE,UAAU,CAAC,cAAc;SAC1C,CAAC;IACJ,CAAC;IAED,0EAA0E;IAE1E,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,mDAAmD;QACnD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;YACtC,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;YAC1C,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;SAC3C,CAAC,CAAC;QAEH,MAAM,aAAa,GAAkB;YACnC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,GAAG,OAAO;SACX,CAAC;QAEF,sEAAsE;QACtE,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QAE1D,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;YAC7C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACrC,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,oEAAoE;QAEpE,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,mEAAmE;YACnE,iEAAiE;YACjE,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,4EAA4E,CAAC;gBAC1F,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,qBAAqB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACrD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC;QAC9C,CAAC;QAED,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;QAC5D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,6DAA6D;YAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,iEAAiE,CAAC;gBAC/E,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACxD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,+BAA+B,UAAU,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,qEAAqE;QACrE,IAAI,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;QAChE,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,IAAI,CAAC,GAAG,CAAC,2BAA2B,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;YAC3D,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACpE,IAAI,CAAC,GAAG,CAAC,iCAAiC,UAAU,EAAE,CAAC,CAAC;YAExD,mEAAmE;YACnE,OAAO;gBACL,GAAG,EAAE,QAAQ;gBACb,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,SAAS;gBACf,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE;oBACL,0BAA0B;oBAC1B,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,EAAE;iBACpC,CAAC,IAAI,CAAC,IAAI,CAAC;aACb,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAE3E;;;;;;OAMG;IACH,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,UAAsB,EAAE;QAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;QACnC,MAAM,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC;QAC5D,MAAM,qBAAqB,GAAG,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAC;QACpE,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAExD,IAAI,YAA0B,CAAC;QAE/B,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE;gBACpC,UAAU,EAAE,IAAI;gBAChB,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAClE,OAAO;gBACL,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,MAAM;gBACZ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAClC,KAAK,EAAE,+BAA+B,QAAQ,EAAE;aACjD,CAAC;QACJ,CAAC;QAED,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;YACvB,OAAO;gBACL,GAAG,EAAE,YAAY,CAAC,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,YAAY,CAAC,IAAI;gBACvB,UAAU,EAAE,YAAY,CAAC,UAAU;gBACnC,KAAK,EAAE,YAAY,CAAC,KAAK;aAC1B,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,IAAI,YAAY,CAAC,IAAI,IAAI,EAAE,CAAC;QAChE,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO;gBACL,GAAG,EAAE,YAAY,CAAC,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,YAAY,CAAC,IAAI;gBACvB,UAAU,EAAE,YAAY,CAAC,UAAU;gBACnC,KAAK,EAAE,4CAA4C;aACpD,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC;QACjC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC;QAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEtD,IAAI,KAAK,GAAG,wBAAwB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEvD,4BAA4B;QAC5B,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YAC5B,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;gBAClE,IAAI,iBAAiB,EAAE,CAAC;oBACtB,OAAO,QAAQ,KAAK,UAAU,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC;gBACxE,CAAC;gBACD,OAAO,QAAQ,KAAK,UAAU,CAAC;YACjC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,qDAAqD;QACrD,MAAM,YAAY,GAAG,CAAC,IAAY,EAAU,EAAE;YAC5C,IAAI,CAAC,qBAAqB;gBAAE,OAAO,IAAI,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC;gBACd,OAAO,CAAC,CAAC,IAAI,CAAC;YAChB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,MAAM,OAAO,GAAc,EAAE,CAAC;QAC9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,qBAAqB,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YACtE,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,CAAC,IAAI,CAAC;gBACX,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;aAC/B,CAAC,CAAC;QACL,CAAC;QACD,KAAK,GAAG,OAAO,CAAC;QAEhB,oDAAoD;QACpD,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;YAClE,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YACxC,MAAM,MAAM,GAAG,KAAK;iBACjB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;gBACxC,MAAM,UAAU,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBACxD,MAAM,YAAY,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBAC5D,MAAM,KAAK,GAAG,UAAU,GAAG,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,sBAAsB;gBACvE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;YACzB,CAAC,CAAC;iBACD,MAAM,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC;iBAChC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;iBACjC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC3B,KAAK,GAAG,MAAM,CAAC;QACjB,CAAC;QAED,MAAM,MAAM,GAAc;YACxB,GAAG,EAAE,OAAO;YACZ,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;YAC5B,IAAI,EAAE,YAAY,CAAC,IAAI;YACvB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;SACnC,CAAC;QAEF,IAAI,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,CAAC,MAAM,WAAW,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;QACjE,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,0EAA0E;IAE1E;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CACd,IAAc,EACd,UAAyB,EAAE,EAC3B,WAAW,GAAG,CAAC;QAEf,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;QAExB,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;YAC3C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAC1C,CAAC;YAEF,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;gBAC9B,IAAI,OAAO,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;oBACnC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC9B,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC;wBACX,GAAG,EAAE,SAAS;wBACd,UAAU,EAAE,CAAC;wBACb,KAAK,EAAE,EAAE;wBACT,IAAI,EAAE,MAAM;wBACZ,UAAU,EAAE,CAAC;wBACb,KAAK,EACH,OAAO,CAAC,MAAM,YAAY,KAAK;4BAC7B,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO;4BACxB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;qBAC7B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,0EAA0E;IAE1E;;;OAGG;IACH,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED,0EAA0E;IAElE,GAAG,CAAC,GAAW;QACrB,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,aAAa,GAAG,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;CACF"}
@@ -1,5 +0,0 @@
1
- import type { ScrapeOptions, ScrapeResult } from "../types.js";
2
- export declare class Tier1HTTP {
3
- scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
4
- }
5
- //# sourceMappingURL=tier1-http.d.ts.map