auspex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/LICENSE +21 -0
  2. package/dist/agent/actions.d.ts +5 -0
  3. package/dist/agent/actions.d.ts.map +1 -0
  4. package/dist/agent/actions.js +26 -0
  5. package/dist/agent/actions.js.map +1 -0
  6. package/dist/agent/agent.d.ts +12 -0
  7. package/dist/agent/agent.d.ts.map +1 -0
  8. package/dist/agent/agent.js +147 -0
  9. package/dist/agent/agent.js.map +1 -0
  10. package/dist/agent/loop.d.ts +6 -0
  11. package/dist/agent/loop.d.ts.map +1 -0
  12. package/dist/agent/loop.js +165 -0
  13. package/dist/agent/loop.js.map +1 -0
  14. package/dist/agent/report.d.ts +3 -0
  15. package/dist/agent/report.d.ts.map +1 -0
  16. package/dist/agent/report.js +90 -0
  17. package/dist/agent/report.js.map +1 -0
  18. package/dist/browser/executor.d.ts +5 -0
  19. package/dist/browser/executor.d.ts.map +1 -0
  20. package/dist/browser/executor.js +33 -0
  21. package/dist/browser/executor.js.map +1 -0
  22. package/dist/browser/snapshot.d.ts +6 -0
  23. package/dist/browser/snapshot.d.ts.map +1 -0
  24. package/dist/browser/snapshot.js +145 -0
  25. package/dist/browser/snapshot.js.map +1 -0
  26. package/dist/config/defaults.d.ts +10 -0
  27. package/dist/config/defaults.d.ts.map +1 -0
  28. package/dist/config/defaults.js +10 -0
  29. package/dist/config/defaults.js.map +1 -0
  30. package/dist/config/schema.d.ts +59 -0
  31. package/dist/config/schema.d.ts.map +1 -0
  32. package/dist/config/schema.js +23 -0
  33. package/dist/config/schema.js.map +1 -0
  34. package/dist/index.d.ts +7 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +8 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/llm/client.d.ts +23 -0
  39. package/dist/llm/client.d.ts.map +1 -0
  40. package/dist/llm/client.js +51 -0
  41. package/dist/llm/client.js.map +1 -0
  42. package/dist/llm/prompt.d.ts +3 -0
  43. package/dist/llm/prompt.d.ts.map +1 -0
  44. package/dist/llm/prompt.js +36 -0
  45. package/dist/llm/prompt.js.map +1 -0
  46. package/dist/scraper/extractors/content.d.ts +22 -0
  47. package/dist/scraper/extractors/content.d.ts.map +1 -0
  48. package/dist/scraper/extractors/content.js +237 -0
  49. package/dist/scraper/extractors/content.js.map +1 -0
  50. package/dist/scraper/extractors/ssr.d.ts +17 -0
  51. package/dist/scraper/extractors/ssr.d.ts.map +1 -0
  52. package/dist/scraper/extractors/ssr.js +162 -0
  53. package/dist/scraper/extractors/ssr.js.map +1 -0
  54. package/dist/scraper/extractors/to-markdown.d.ts +5 -0
  55. package/dist/scraper/extractors/to-markdown.d.ts.map +1 -0
  56. package/dist/scraper/extractors/to-markdown.js +103 -0
  57. package/dist/scraper/extractors/to-markdown.js.map +1 -0
  58. package/dist/scraper/index.d.ts +27 -0
  59. package/dist/scraper/index.d.ts.map +1 -0
  60. package/dist/scraper/index.js +178 -0
  61. package/dist/scraper/index.js.map +1 -0
  62. package/dist/scraper/tiers/tier1-http.d.ts +5 -0
  63. package/dist/scraper/tiers/tier1-http.d.ts.map +1 -0
  64. package/dist/scraper/tiers/tier1-http.js +120 -0
  65. package/dist/scraper/tiers/tier1-http.js.map +1 -0
  66. package/dist/scraper/tiers/tier2-stealth.d.ts +5 -0
  67. package/dist/scraper/tiers/tier2-stealth.d.ts.map +1 -0
  68. package/dist/scraper/tiers/tier2-stealth.js +106 -0
  69. package/dist/scraper/tiers/tier2-stealth.js.map +1 -0
  70. package/dist/scraper/tiers/tier3-browser.d.ts +10 -0
  71. package/dist/scraper/tiers/tier3-browser.d.ts.map +1 -0
  72. package/dist/scraper/tiers/tier3-browser.js +504 -0
  73. package/dist/scraper/tiers/tier3-browser.js.map +1 -0
  74. package/dist/scraper/types.d.ts +130 -0
  75. package/dist/scraper/types.d.ts.map +1 -0
  76. package/dist/scraper/types.js +3 -0
  77. package/dist/scraper/types.js.map +1 -0
  78. package/dist/security/action-validator.d.ts +83 -0
  79. package/dist/security/action-validator.d.ts.map +1 -0
  80. package/dist/security/action-validator.js +36 -0
  81. package/dist/security/action-validator.js.map +1 -0
  82. package/dist/security/url-validator.d.ts +9 -0
  83. package/dist/security/url-validator.d.ts.map +1 -0
  84. package/dist/security/url-validator.js +69 -0
  85. package/dist/security/url-validator.js.map +1 -0
  86. package/dist/types.d.ts +95 -0
  87. package/dist/types.d.ts.map +1 -0
  88. package/dist/types.js +2 -0
  89. package/dist/types.js.map +1 -0
  90. package/package.json +54 -0
  91. package/readme.md +760 -0
@@ -0,0 +1,178 @@
1
+ import { validateUrl } from "../security/url-validator.js";
2
+ import { Tier1HTTP } from "./tiers/tier1-http.js";
3
+ import { Tier2Stealth } from "./tiers/tier2-stealth.js";
4
+ import { Tier3Browser } from "./tiers/tier3-browser.js";
5
+ // ─── Firecrawl ─────────────────────────────────────────────────────────────
6
+ //
7
+ // Scraper de alta qualidade com fallback automático em 3 tiers:
8
+ //
9
+ // Tier 1 → HTTP puro (fetch nativo) (~100-500ms, sem browser)
10
+ // ↓ bloqueado ou conteúdo insuficiente (SPA, anti-bot básico)
11
+ // Tier 2 → HTTP Stealth (got-scraping) (~200-800ms, TLS fingerprint)
12
+ // ↓ ainda bloqueado ou SPA sem SSR
13
+ // Tier 3 → Playwright Chromium + stealth (~2-10s, browser completo)
14
+ //
15
+ // Anti-SSRF integrado: todas as URLs são validadas antes do scrape.
16
+ // ──────────────────────────────────────────────────────────────────────────
17
+ export class Firecrawl {
18
+ fullConfig;
19
+ tier1;
20
+ tier2;
21
+ tier3;
22
+ config;
23
+ constructor(fullConfig = {}) {
24
+ this.fullConfig = fullConfig;
25
+ this.tier1 = new Tier1HTTP();
26
+ this.tier2 = new Tier2Stealth();
27
+ this.tier3 = new Tier3Browser(fullConfig.browserConfig);
28
+ this.config = {
29
+ timeout: fullConfig.timeout ?? 30_000,
30
+ verbose: fullConfig.verbose ?? false,
31
+ forceTier: fullConfig.forceTier,
32
+ allowedDomains: fullConfig.allowedDomains,
33
+ blockedDomains: fullConfig.blockedDomains,
34
+ };
35
+ }
36
+ // ── Scrape de uma única URL ────────────────────────────────────────────
37
+ async scrape(url, options = {}) {
38
+ // Validação anti-SSRF antes de qualquer requisição
39
+ const validUrl = await validateUrl(url, {
40
+ allowedDomains: this.config.allowedDomains,
41
+ blockedDomains: this.config.blockedDomains,
42
+ });
43
+ const mergedOptions = {
44
+ timeout: this.config.timeout,
45
+ ...options,
46
+ };
47
+ // ── Tier forçado: pula a cascata automática ────────────────────────
48
+ const forced = options.forceTier ?? this.config.forceTier;
49
+ if (forced === "browser") {
50
+ this.log("🌐 Tier 3 (Playwright) forçado");
51
+ return this.tier3.scrape(validUrl, mergedOptions);
52
+ }
53
+ if (forced === "stealth") {
54
+ this.log("🥷 Tier 2 (Stealth HTTP) forçado");
55
+ return this.tier2.scrape(validUrl, mergedOptions);
56
+ }
57
+ if (forced === "http") {
58
+ this.log("🔗 Tier 1 (HTTP) forçado");
59
+ return this.tier1.scrape(validUrl, mergedOptions);
60
+ }
61
+ // ── Modo automático: Tier 1 → Tier 2 → Tier 3 ────────────────────
62
+ // ── Tier 1: HTTP puro (fetch nativo, sem overhead de TLS) ─────────
63
+ let tier1Error = null;
64
+ try {
65
+ const result = await this.tier1.scrape(validUrl, mergedOptions);
66
+ const content = result.markdown ?? result.text ?? "";
67
+ // Menos de 200 chars sem dados SSR = página quase certamente vazia
68
+ // (SPA sem SSR, Cloudflare challenge, bloqueio silencioso, etc.)
69
+ if (content.length < 200 && !result.ssrData) {
70
+ tier1Error = "Conteúdo insuficiente após HTTP — provavelmente SPA ou bloqueio silencioso";
71
+ this.log(`⚠ Tier 1: ${tier1Error}`);
72
+ }
73
+ else {
74
+ this.log(`✓ Tier 1 (HTTP) — ${result.durationMs}ms`);
75
+ return result;
76
+ }
77
+ }
78
+ catch (err) {
79
+ tier1Error = err instanceof Error ? err.message : String(err);
80
+ this.log(`⚠ Tier 1 falhou: ${tier1Error}`);
81
+ }
82
+ // ── Tier 2: HTTP Stealth (got-scraping, TLS fingerprint) ──────────
83
+ let tier2Error = null;
84
+ this.log("🥷 Ativando fallback → Tier 2 (Stealth HTTP)...");
85
+ try {
86
+ const result = await this.tier2.scrape(validUrl, mergedOptions);
87
+ const content = result.markdown ?? result.text ?? "";
88
+ // Mesmo com TLS spoofing pode ser SPA que precisa de browser
89
+ if (content.length < 200 && !result.ssrData) {
90
+ tier2Error = "Conteúdo insuficiente após Stealth — SPA que precisa de browser";
91
+ this.log(`⚠ Tier 2: ${tier2Error}`);
92
+ }
93
+ else {
94
+ this.log(`✓ Tier 2 (Stealth) — ${result.durationMs}ms`);
95
+ return result;
96
+ }
97
+ }
98
+ catch (err) {
99
+ tier2Error = err instanceof Error ? err.message : String(err);
100
+ this.log(`⚠ Tier 2 (Stealth) falhou: ${tier2Error}`);
101
+ }
102
+ // ── Tier 3: Playwright Chromium + stealth (fallback final) ────────
103
+ this.log("🌐 Ativando fallback final → Tier 3 (Playwright)...");
104
+ try {
105
+ const result = await this.tier3.scrape(validUrl, mergedOptions);
106
+ this.log(`✓ Tier 3 (Playwright) — ${result.durationMs}ms`);
107
+ return result;
108
+ }
109
+ catch (err) {
110
+ const tier3Error = err instanceof Error ? err.message : String(err);
111
+ this.log(`✗ Tier 3 (Playwright) falhou: ${tier3Error}`);
112
+ // Todos os tiers falharam — retorna resultado com erro consolidado
113
+ return {
114
+ url: validUrl,
115
+ statusCode: 0,
116
+ title: "",
117
+ tier: "browser",
118
+ durationMs: 0,
119
+ error: [
120
+ "Todos os tiers falharam:",
121
+ ` Tier 1 (HTTP): ${tier1Error ?? "não tentado"}`,
122
+ ` Tier 2 (Stealth): ${tier2Error ?? "não tentado"}`,
123
+ ` Tier 3 (Browser): ${tier3Error}`,
124
+ ].join("\n"),
125
+ };
126
+ }
127
+ }
128
+ // ── Scrape em lote com concorrência controlada ─────────────────────────
129
+ /**
130
+ * Scrapia múltiplas URLs em paralelo com concorrência limitada.
131
+ * Erros em URLs individuais não derrubam o lote inteiro.
132
+ *
133
+ * @param urls - Lista de URLs a scrapeiar
134
+ * @param options - Opções aplicadas a todas as URLs
135
+ * @param concurrency - Máximo de scrapes simultâneos. Default: 3
136
+ */
137
+ async scrapeMany(urls, options = {}, concurrency = 3) {
138
+ const results = [];
139
+ const queue = [...urls];
140
+ while (queue.length > 0) {
141
+ const batch = queue.splice(0, concurrency);
142
+ const settled = await Promise.allSettled(batch.map((u) => this.scrape(u, options)));
143
+ for (const outcome of settled) {
144
+ if (outcome.status === "fulfilled") {
145
+ results.push(outcome.value);
146
+ }
147
+ else {
148
+ results.push({
149
+ url: "unknown",
150
+ statusCode: 0,
151
+ title: "",
152
+ tier: "http",
153
+ durationMs: 0,
154
+ error: outcome.reason instanceof Error
155
+ ? outcome.reason.message
156
+ : String(outcome.reason),
157
+ });
158
+ }
159
+ }
160
+ }
161
+ return results;
162
+ }
163
+ // ── Encerrar recursos ──────────────────────────────────────────────────
164
+ /**
165
+ * Fecha o browser Playwright (Tier 3).
166
+ * Sempre chamar ao terminar para evitar processos Chromium órfãos.
167
+ */
168
+ async close() {
169
+ await this.tier3.close();
170
+ }
171
+ // ── Helpers ────────────────────────────────────────────────────────────
172
+ log(msg) {
173
+ if (this.config.verbose) {
174
+ console.log(`[Firecrawl] ${msg}`);
175
+ }
176
+ }
177
+ }
178
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,8BAA8B,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAOxD,8EAA8E;AAC9E,EAAE;AACF,gEAAgE;AAChE,EAAE;AACF,wEAAwE;AACxE,2EAA2E;AAC3E,4EAA4E;AAC5E,gDAAgD;AAChD,yEAAyE;AACzE,EAAE;AACF,oEAAoE;AACpE,6EAA6E;AAE7E,MAAM,OAAO,SAAS;IAYS;IAXZ,KAAK,CAAY;IACjB,KAAK,CAAe;IACpB,KAAK,CAAe;IACpB,MAAM,CAMrB;IAEF,YAA6B,aAA8B,EAAE;QAAhC,eAAU,GAAV,UAAU,CAAsB;QAC3D,IAAI,CAAC,KAAK,GAAG,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QACxD,IAAI,CAAC,MAAM,GAAG;YACZ,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM;YACrC,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,KAAK;YACpC,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,cAAc,EAAE,UAAU,CAAC,cAAc;YACzC,cAAc,EAAE,UAAU,CAAC,cAAc;SAC1C,CAAC;IACJ,CAAC;IAED,0EAA0E;IAE1E,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,mDAAmD;QACnD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;YACtC,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;YAC1C,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;SAC3C,CAAC,CAAC;QAEH,MAAM,aAAa,GAAkB;YACnC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,GAAG,OAAO;SACX,CAAC;QAEF,sEAAsE;QACtE,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QAE1D,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;YAC7C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACrC,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,oEAAoE;QAEpE,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,mEAAmE;YACnE,iEAAiE;YACjE,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,4EAA4E,CAAC;gBAC1F,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,qBAAqB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACrD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC;QAC9C,CAAC;QAED,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;QAC5D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,6DAA6D;YAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,iEAAiE,CAAC;gBAC/E,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACxD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,+BAA+B,UAAU,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,qEAAqE;QACrE,IAAI,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;QAChE,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,IAAI,CAAC,GAAG,CAAC,2BAA2B,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;YAC3D,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACpE,IAAI,CAAC,GAAG,CAAC,iCAAiC,UAAU,EAAE,CAAC,CAAC;YAExD,mEAAmE;YACnE,OAAO;gBACL,GAAG,EAAE,QAAQ;gBACb,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,SAAS;gBACf,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE;oBACL,0BAA0B;oBAC1B,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,EAAE;iBACpC,CAAC,IAAI,CAAC,IAAI,CAAC;aACb,CAAC;QACJ,CAAC;IACH,CAAC;IAED,0EAA0E;IAE1E;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CACd,IAAc,EACd,UAAyB,EAAE,EAC3B,WAAW,GAAG,CAAC;QAEf,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;QAExB,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;YAC3C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAC1C,CAAC;YAEF,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;gBAC9B,IAAI,OAAO,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;oBACnC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC9B,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC;wBACX,GAAG,EAAE,SAAS;wBACd,UAAU,EAAE,CAAC;wBACb,KAAK,EAAE,EAAE;wBACT,IAAI,EAAE,MAAM;wBACZ,UAAU,EAAE,CAAC;wBACb,KAAK,EACH,OAAO,CAAC,MAAM,YAAY,KAAK;4BAC7B,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO;4BACxB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;qBAC7B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,0EAA0E;IAE1E;;;OAGG;IACH,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED,0EAA0E;IAElE,GAAG,CAAC,GAAW;QACrB,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,eAAe,GAAG,EAAE,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,5 @@
1
+ import type { ScrapeOptions, ScrapeResult } from "../types.js";
2
+ export declare class Tier1HTTP {
3
+ scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
4
+ }
5
+ //# sourceMappingURL=tier1-http.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tier1-http.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier1-http.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAmD/D,qBAAa,SAAS;IACd,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;CAmG9E"}
@@ -0,0 +1,120 @@
1
+ import { gotScraping } from "got-scraping";
2
+ import { extractSSRData, hasEnoughContent } from "../extractors/ssr.js";
3
+ import { extractContent } from "../extractors/content.js";
4
+ import { htmlToMarkdown } from "../extractors/to-markdown.js";
5
+ // ─── Tier 1: got-scraping + TLS Fingerprint + Cheerio ──────────────────────
6
+ //
7
+ // Pipeline:
8
+ // 1. got-scraping com TLS/JA3 fingerprint spoofing que imita Chrome real
9
+ // 2. Detectar dados SSR embutidos (Next.js, Nuxt, Gatsby, Remix)
10
+ // 3. Verificar se o HTML tem conteúdo sem JS
11
+ // 4. Mozilla Readability → Cheerio (fallback) → Markdown
12
+ //
13
+ // Por que got-scraping em vez de fetch() nativo?
14
+ // ✓ TLS fingerprint (JA3/JA4) idêntico ao Chrome → bypassa Cloudflare, Akamai
15
+ // ✓ HTTP/2 com fingerprint consistente (TLS + ALPN + header order)
16
+ // ✓ Header generator integrado: UA, Sec-Ch-Ua*, Sec-Fetch-* coerentes entre si
17
+ // ✓ ~65-70% dos sites funcionam sem browser (~100-800ms)
18
+ //
19
+ // Limitações:
20
+ // ✗ Não executa JavaScript → SPAs sem SSR vão falhar
21
+ // → Orquestrador aciona Tier 2 (Stealth) ou Tier 3 (Playwright) se falhar
22
+ // ──────────────────────────────────────────────────────────────────────────
23
+ // Headers que NÃO são gerados automaticamente pelo got-scraping:
24
+ // - Accept-Language → precisa ser pt-BR para sites locais
25
+ // - Cache-Control → garante resposta fresca, sem cache de CDN
26
+ // - Pragma → backward compat com servidores antigos
27
+ //
28
+ // got-scraping auto-gera (coerentes com o TLS fingerprint do Chrome):
29
+ // - User-Agent, Accept, Accept-Encoding
30
+ // - Sec-Ch-Ua, Sec-Ch-Ua-Mobile, Sec-Ch-Ua-Platform
31
+ // - Sec-Fetch-Dest, Sec-Fetch-Mode, Sec-Fetch-Site, Sec-Fetch-User
32
+ // - Upgrade-Insecure-Requests
33
+ const EXTRA_HEADERS = {
34
+ "Accept-Language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7",
35
+ "Cache-Control": "no-cache",
36
+ "Pragma": "no-cache",
37
+ };
38
+ // Códigos HTTP que indicam bloqueio ativo por anti-bot (não erro de servidor)
39
+ const ANTIBOT_STATUS = new Set([403, 429, 503]);
40
+ export class Tier1HTTP {
41
+ async scrape(url, options = {}) {
42
+ const startTime = Date.now();
43
+ // ── Requisição HTTP com TLS fingerprint spoofing ────────────────────
44
+ // gotScraping() aplica JA3/JA4 fingerprint de Chrome real no handshake TLS.
45
+ // O header generator (useHeaderGenerator: true, padrão) gera User-Agent,
46
+ // Accept, Sec-Ch-Ua* e Sec-Fetch-* consistentes com esse fingerprint.
47
+ // Isso é o que diferencia got-scraping de fetch() e axios.
48
+ let response;
49
+ try {
50
+ response = (await gotScraping({
51
+ url,
52
+ // Mesclamos nossos headers extras com os que got-scraping auto-gera.
53
+ // Se o usuário passar headers customizados, eles têm prioridade.
54
+ headers: { ...EXTRA_HEADERS, ...options.headers },
55
+ // Não lança exceção em 4xx/5xx — tratamos manualmente abaixo
56
+ throwHttpErrors: false,
57
+ // got gerencia decompressão (gzip/br) automaticamente — não setar Accept-Encoding
58
+ timeout: { request: options.timeout ?? 15_000 },
59
+ // Retorna o corpo como string (HTML)
60
+ responseType: "text",
61
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
62
+ }));
63
+ }
64
+ catch (err) {
65
+ // Erros de rede: DNS, TLS, timeout, ECONNREFUSED, etc.
66
+ const msg = err instanceof Error ? err.message : String(err);
67
+ throw new Error(`Tier1 HTTP: falha na requisição — ${msg}`);
68
+ }
69
+ // ── Verificações de bloqueio por anti-bot ───────────────────────────
70
+ if (ANTIBOT_STATUS.has(response.statusCode)) {
71
+ throw new Error(`Tier1 HTTP: status ${response.statusCode} — bloqueado por anti-bot`);
72
+ }
73
+ if (response.statusCode >= 400) {
74
+ throw new Error(`Tier1 HTTP: status HTTP ${response.statusCode}`);
75
+ }
76
+ // Content-Type: em got pode ser string ou string[] dependendo da versão
77
+ const rawCt = response.headers["content-type"];
78
+ const contentType = Array.isArray(rawCt) ? (rawCt[0] ?? "") : (rawCt ?? "");
79
+ if (!contentType.includes("text/html") && !contentType.includes("text/plain")) {
80
+ throw new Error(`Tier1 HTTP: Content-Type "${contentType}" inesperado — esperava text/html`);
81
+ }
82
+ // got retorna o body como string quando responseType: 'text'
83
+ const html = response.body;
84
+ // response.url é a URL final após redirecionamentos
85
+ const finalUrl = response.url;
86
+ // ── Tentar extrair dados SSR embutidos ──────────────────────────────
87
+ // Next.js, Nuxt, Gatsby, Remix → os dados já estão no HTML!
88
+ // Permite extrair conteúdo rico sem precisar de browser ou JS.
89
+ const ssrData = extractSSRData(html);
90
+ // ── Verificar se o HTML tem conteúdo sem JS ─────────────────────────
91
+ // Detecta: página vazia de SPA, Cloudflare challenge, "enable JavaScript", etc.
92
+ if (!hasEnoughContent(html) && !ssrData) {
93
+ throw new Error("Tier1 HTTP: conteúdo insuficiente — provavelmente SPA sem SSR ou anti-bot");
94
+ }
95
+ // ── Extrair conteúdo principal ──────────────────────────────────────
96
+ // 1. Mozilla Readability (mesmo algoritmo do Firefox Reader Mode)
97
+ // 2. Cheerio + heurísticas (fallback quando Readability falha)
98
+ const formats = options.formats ?? ["markdown", "text"];
99
+ const extracted = extractContent(html, options.onlyMainContent ?? true, finalUrl);
100
+ const result = {
101
+ url: finalUrl,
102
+ statusCode: response.statusCode,
103
+ title: extracted.title,
104
+ description: extracted.description || undefined,
105
+ tier: "http",
106
+ durationMs: Date.now() - startTime,
107
+ links: extracted.links.length > 0 ? extracted.links : undefined,
108
+ };
109
+ if (formats.includes("markdown"))
110
+ result.markdown = htmlToMarkdown(extracted.html);
111
+ if (formats.includes("html"))
112
+ result.html = extracted.html;
113
+ if (formats.includes("text"))
114
+ result.text = extracted.text;
115
+ if (ssrData)
116
+ result.ssrData = ssrData;
117
+ return result;
118
+ }
119
+ }
120
+ //# sourceMappingURL=tier1-http.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tier1-http.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier1-http.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAW3C,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAE9D,8EAA8E;AAC9E,EAAE;AACF,YAAY;AACZ,0EAA0E;AAC1E,kEAAkE;AAClE,8CAA8C;AAC9C,0DAA0D;AAC1D,EAAE;AACF,iDAAiD;AACjD,gFAAgF;AAChF,qEAAqE;AACrE,iFAAiF;AACjF,2DAA2D;AAC3D,EAAE;AACF,cAAc;AACd,uDAAuD;AACvD,4EAA4E;AAC5E,6EAA6E;AAE7E,iEAAiE;AACjE,6DAA6D;AAC7D,mEAAmE;AACnE,gEAAgE;AAChE,EAAE;AACF,sEAAsE;AACtE,0CAA0C;AAC1C,sDAAsD;AACtD,qEAAqE;AACrE,gCAAgC;AAChC,MAAM,aAAa,GAA2B;IAC5C,iBAAiB,EAAE,qCAAqC;IACxD,eAAe,EAAE,UAAU;IAC3B,QAAQ,EAAE,UAAU;CACrB,CAAC;AAEF,8EAA8E;AAC9E,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEhD,MAAM,OAAO,SAAS;IACpB,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,uEAAuE;QACvE,4EAA4E;QAC5E,yEAAyE;QACzE,sEAAsE;QACtE,2DAA2D;QAC3D,IAAI,QAAqB,CAAC;QAE1B,IAAI,CAAC;YACH,QAAQ,GAAG,CAAC,MAAM,WAAW,CAAC;gBAC5B,GAAG;gBACH,qEAAqE;gBACrE,iEAAiE;gBACjE,OAAO,EAAE,EAAE,GAAG,aAAa,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE;gBAEjD,6DAA6D;gBAC7D,eAAe,EAAE,KAAK;gBAEtB,kFAAkF;gBAClF,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,MAAM,EAAE;gBAE/C,qCAAqC;gBACrC,YAAY,EAAE,MAAM;gBACpB,8DAA8D;aAC/D,CAAC,CAA2B,CAAC;QAChC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,uDAAuD;YACvD,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,MAAM,IAAI,KAAK,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;QAC9D,CAAC;QAED,uEAAuE;QACvE,IAAI,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CACb,sBAAsB,QAAQ,CAAC,UAAU,2BAA2B,CACrE,CAAC;QACJ,CAAC;QAED,IAAI,QAAQ,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACpE,CAAC;QAED,wEAAwE;QACxE,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAC/C,MAAM,WAAW,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAE5E,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC9E,MAAM,IAAI,KAAK,CACb,6BAA6B,WAAW,mCAAmC,CAC5E,CAAC;QACJ,CAAC;QAED,6DAA6D;QAC7D,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAc,CAAC;QACrC,oDAAoD;QACpD,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC;QAE9B,uEAAuE;QACvE,4DAA4D;QAC5D,+DAA+D;QAC/D,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAErC,uEAAuE;QACvE,gFAAgF;QAChF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,2EAA2E,CAC5E,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,kEAAkE;QAClE,+DAA+D;QAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,OAAO,CAAC,eAAe,IAAI,IAAI,EAC/B,QAAQ,CACT,CAAC;QAEF,MAAM,MAAM,GAAiB;YAC3B,GAAG,EAAE,QAAQ;YACb,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,SAAS;YAC/C,IAAI,EAAE,MAAM;YACZ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAChE,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAO,SAAS,CAAC,IAAI,CAAC;QACnE,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAO,SAAS,CAAC,IAAI,CAAC;QACnE,IAAI,OAAO;YAAuB,MAAM,CAAC,OAAO,GAAI,OAAO,CAAC;QAE5D,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,5 @@
1
+ import type { ScrapeOptions, ScrapeResult } from "../types.js";
2
+ export declare class Tier2Stealth {
3
+ scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
4
+ }
5
+ //# sourceMappingURL=tier2-stealth.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tier2-stealth.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier2-stealth.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAgC/D,qBAAa,YAAY;IACjB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;CAuG9E"}
@@ -0,0 +1,106 @@
1
+ import { gotScraping } from "got-scraping";
2
+ import { extractSSRData, hasEnoughContent } from "../extractors/ssr.js";
3
+ import { extractContent } from "../extractors/content.js";
4
+ import { htmlToMarkdown } from "../extractors/to-markdown.js";
5
+ // ─── Tier 2: HTTP Stealth com TLS Fingerprint ──────────────────────────────
6
+ //
7
+ // Pipeline:
8
+ // 1. gotScraping → HTTP com TLS fingerprint spoofing (JA3/JA4 anti-bot)
9
+ // 2. Gera headers realistas de Chrome automaticamente via headerGeneratorOptions
10
+ // 3. Detectar dados SSR embutidos (Next.js, Nuxt, Gatsby, Remix)
11
+ // 4. Verificar se o HTML tem conteúdo sem JS
12
+ // 5. Extrair conteúdo principal + converter para Markdown
13
+ //
14
+ // Ativado quando o Tier 1 (HTTP simples) for bloqueado por anti-bot básico.
15
+ // Resolve a maioria dos casos de TLS/JA3 fingerprinting.
16
+ //
17
+ // Se ainda falhar (SPA sem SSR, anti-bot avançado) → lança Error para o
18
+ // orquestrador acionar o Tier 3 (Playwright Chromium).
19
+ // ──────────────────────────────────────────────────────────────────────────
20
+ // Códigos HTTP que indicam bloqueio por anti-bot
21
+ const ANTIBOT_STATUS = new Set([403, 429, 503]);
22
+ export class Tier2Stealth {
23
+ async scrape(url, options = {}) {
24
+ const startTime = Date.now();
25
+ // ── Requisição HTTP com TLS fingerprint de browser real ─────────────
26
+ let response;
27
+ try {
28
+ response = (await gotScraping({
29
+ url,
30
+ method: "GET",
31
+ // Gera headers realistas de Chrome automaticamente
32
+ headerGeneratorOptions: {
33
+ browsers: [{ name: "chrome", minVersion: 120 }],
34
+ operatingSystems: ["macos", "windows"],
35
+ devices: ["desktop"],
36
+ locales: ["pt-BR", "pt", "en-US"],
37
+ },
38
+ // Headers extras para parecer mais humano
39
+ headers: {
40
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
41
+ "accept-language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7",
42
+ "cache-control": "no-cache",
43
+ pragma: "no-cache",
44
+ "sec-fetch-dest": "document",
45
+ "sec-fetch-mode": "navigate",
46
+ "sec-fetch-site": "none",
47
+ "sec-fetch-user": "?1",
48
+ "upgrade-insecure-requests": "1",
49
+ ...options.headers,
50
+ },
51
+ timeout: { request: options.timeout ?? 30_000 },
52
+ followRedirect: true,
53
+ maxRedirects: 10,
54
+ retry: { limit: 2, methods: ["GET"] },
55
+ throwHttpErrors: false,
56
+ decompress: true,
57
+ }));
58
+ }
59
+ catch (err) {
60
+ throw new Error(`Tier2 Stealth: falha na requisição — ${err instanceof Error ? err.message : String(err)}`);
61
+ }
62
+ const html = response.body;
63
+ const statusCode = response.statusCode;
64
+ const finalUrl = response.url ?? url;
65
+ // ── Verificações de bloqueio ────────────────────────────────────────
66
+ if (ANTIBOT_STATUS.has(statusCode)) {
67
+ throw new Error(`Tier2 Stealth: status ${statusCode} — bloqueado por anti-bot`);
68
+ }
69
+ if (statusCode >= 400) {
70
+ throw new Error(`Tier2 Stealth: status ${statusCode}`);
71
+ }
72
+ const contentType = response.headers["content-type"] ?? "";
73
+ if (!String(contentType).includes("text/html") &&
74
+ !String(contentType).includes("text/plain")) {
75
+ throw new Error(`Tier2 Stealth: Content-Type inesperado "${contentType}" — esperava text/html`);
76
+ }
77
+ // ── Tentar extrair dados SSR embutidos ──────────────────────────────
78
+ const ssrData = extractSSRData(html);
79
+ // ── Verificar se o HTML tem conteúdo sem JS ─────────────────────────
80
+ if (!hasEnoughContent(html) && !ssrData) {
81
+ throw new Error("Tier2 Stealth: conteúdo insuficiente — página precisa de JavaScript para renderizar");
82
+ }
83
+ // ── Extrair conteúdo ────────────────────────────────────────────────
84
+ const formats = options.formats ?? ["markdown", "text"];
85
+ const extracted = extractContent(html, options.onlyMainContent ?? true, finalUrl);
86
+ const result = {
87
+ url: finalUrl,
88
+ statusCode,
89
+ title: extracted.title,
90
+ description: extracted.description || undefined,
91
+ tier: "stealth",
92
+ durationMs: Date.now() - startTime,
93
+ links: extracted.links.length > 0 ? extracted.links : undefined,
94
+ };
95
+ if (formats.includes("markdown"))
96
+ result.markdown = htmlToMarkdown(extracted.html);
97
+ if (formats.includes("html"))
98
+ result.html = extracted.html;
99
+ if (formats.includes("text"))
100
+ result.text = extracted.text;
101
+ if (ssrData)
102
+ result.ssrData = ssrData;
103
+ return result;
104
+ }
105
+ }
106
+ //# sourceMappingURL=tier2-stealth.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tier2-stealth.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier2-stealth.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAU9D,8EAA8E;AAC9E,EAAE;AACF,YAAY;AACZ,yEAAyE;AACzE,kFAAkF;AAClF,kEAAkE;AAClE,8CAA8C;AAC9C,2DAA2D;AAC3D,EAAE;AACF,4EAA4E;AAC5E,yDAAyD;AACzD,EAAE;AACF,wEAAwE;AACxE,uDAAuD;AACvD,6EAA6E;AAE7E,iDAAiD;AACjD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEhD,MAAM,OAAO,YAAY;IACvB,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,uEAAuE;QACvE,IAAI,QAAqB,CAAC;QAC1B,IAAI,CAAC;YACH,QAAQ,GAAG,CAAC,MAAM,WAAW,CAAC;gBAC5B,GAAG;gBACH,MAAM,EAAE,KAAK;gBACb,mDAAmD;gBACnD,sBAAsB,EAAE;oBACtB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;oBAC/C,gBAAgB,EAAE,CAAC,OAAO,EAAE,SAAS,CAAC;oBACtC,OAAO,EAAE,CAAC,SAAS,CAAC;oBACpB,OAAO,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC;iBAClC;gBACD,0CAA0C;gBAC1C,OAAO,EAAE;oBACP,MAAM,EACJ,kGAAkG;oBACpG,iBAAiB,EAAE,qCAAqC;oBACxD,eAAe,EAAE,UAAU;oBAC3B,MAAM,EAAE,UAAU;oBAClB,gBAAgB,EAAE,UAAU;oBAC5B,gBAAgB,EAAE,UAAU;oBAC5B,gBAAgB,EAAE,MAAM;oBACxB,gBAAgB,EAAE,IAAI;oBACtB,2BAA2B,EAAE,GAAG;oBAChC,GAAG,OAAO,CAAC,OAAO;iBACnB;gBACD,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,MAAM,EAAE;gBAC/C,cAAc,EAAE,IAAI;gBACpB,YAAY,EAAE,EAAE;gBAChB,KAAK,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,KAAK,CAAC,EAAE;gBACrC,eAAe,EAAE,KAAK;gBACtB,UAAU,EAAE,IAAI;aACjB,CAAC,CAA2B,CAAC;QAChC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,wCAAwC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC3F,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QAC3B,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC;QACvC,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,IAAI,GAAG,CAAC;QAErC,uEAAuE;QACvE,IAAI,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CACb,yBAAyB,UAAU,2BAA2B,CAC/D,CAAC;QACJ,CAAC;QAED,IAAI,UAAU,IAAI,GAAG,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,yBAAyB,UAAU,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC3D,IACE,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC;YAC1C,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EAC3C,CAAC;YACD,MAAM,IAAI,KAAK,CACb,2CAA2C,WAAW,wBAAwB,CAC/E,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAErC,uEAAuE;QACvE,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,qFAAqF,CACtF,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,OAAO,CAAC,eAAe,IAAI,IAAI,EAC/B,QAAQ,CACT,CAAC;QAEF,MAAM,MAAM,GAAiB;YAC3B,GAAG,EAAE,QAAQ;YACb,UAAU;YACV,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,SAAS;YAC/C,IAAI,EAAE,SAAS;YACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAChE,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC/D,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC/D,IAAI,OAAO;YAAuB,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAE3D,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
@@ -0,0 +1,10 @@
1
+ import type { ScrapeOptions, ScrapeResult, FirecrawlConfig } from "../types.js";
2
+ export declare class Tier3Browser {
3
+ private browser;
4
+ private readonly browserConfig;
5
+ constructor(browserConfig?: FirecrawlConfig["browserConfig"]);
6
+ private getBrowser;
7
+ scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
8
+ close(): Promise<void>;
9
+ }
10
+ //# sourceMappingURL=tier3-browser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tier3-browser.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier3-browser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,aAAa,EACb,YAAY,EAEZ,eAAe,EAChB,MAAM,aAAa,CAAC;AA4RrB,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAgD;gBAElE,aAAa,GAAE,eAAe,CAAC,eAAe,CAAM;YAMlD,UAAU;IAgClB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;IAkNvE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAM7B"}