auspex 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -4
- package/readme.md +169 -35
- package/dist/agent/actions.d.ts +0 -5
- package/dist/agent/actions.d.ts.map +0 -1
- package/dist/agent/actions.js +0 -26
- package/dist/agent/actions.js.map +0 -1
- package/dist/agent/agent.d.ts +0 -12
- package/dist/agent/agent.d.ts.map +0 -1
- package/dist/agent/agent.js +0 -159
- package/dist/agent/agent.js.map +0 -1
- package/dist/agent/loop.d.ts +0 -6
- package/dist/agent/loop.d.ts.map +0 -1
- package/dist/agent/loop.js +0 -175
- package/dist/agent/loop.js.map +0 -1
- package/dist/agent/report.d.ts +0 -3
- package/dist/agent/report.d.ts.map +0 -1
- package/dist/agent/report.js +0 -95
- package/dist/agent/report.js.map +0 -1
- package/dist/browser/executor.d.ts +0 -5
- package/dist/browser/executor.d.ts.map +0 -1
- package/dist/browser/executor.js +0 -33
- package/dist/browser/executor.js.map +0 -1
- package/dist/browser/snapshot.d.ts +0 -6
- package/dist/browser/snapshot.d.ts.map +0 -1
- package/dist/browser/snapshot.js +0 -145
- package/dist/browser/snapshot.js.map +0 -1
- package/dist/config/defaults.d.ts +0 -11
- package/dist/config/defaults.d.ts.map +0 -1
- package/dist/config/defaults.js +0 -11
- package/dist/config/defaults.js.map +0 -1
- package/dist/config/schema.d.ts +0 -62
- package/dist/config/schema.d.ts.map +0 -1
- package/dist/config/schema.js +0 -24
- package/dist/config/schema.js.map +0 -1
- package/dist/index.d.ts +0 -7
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -8
- package/dist/index.js.map +0 -1
- package/dist/llm/client.d.ts +0 -23
- package/dist/llm/client.d.ts.map +0 -1
- package/dist/llm/client.js +0 -51
- package/dist/llm/client.js.map +0 -1
- package/dist/llm/prompt.d.ts +0 -3
- package/dist/llm/prompt.d.ts.map +0 -1
- package/dist/llm/prompt.js +0 -36
- package/dist/llm/prompt.js.map +0 -1
- package/dist/scraper/extractors/content.d.ts +0 -32
- package/dist/scraper/extractors/content.d.ts.map +0 -1
- package/dist/scraper/extractors/content.js +0 -276
- package/dist/scraper/extractors/content.js.map +0 -1
- package/dist/scraper/extractors/ssr.d.ts +0 -17
- package/dist/scraper/extractors/ssr.d.ts.map +0 -1
- package/dist/scraper/extractors/ssr.js +0 -162
- package/dist/scraper/extractors/ssr.js.map +0 -1
- package/dist/scraper/extractors/to-markdown.d.ts +0 -5
- package/dist/scraper/extractors/to-markdown.d.ts.map +0 -1
- package/dist/scraper/extractors/to-markdown.js +0 -103
- package/dist/scraper/extractors/to-markdown.js.map +0 -1
- package/dist/scraper/index.d.ts +0 -35
- package/dist/scraper/index.d.ts.map +0 -1
- package/dist/scraper/index.js +0 -299
- package/dist/scraper/index.js.map +0 -1
- package/dist/scraper/tiers/tier1-http.d.ts +0 -5
- package/dist/scraper/tiers/tier1-http.d.ts.map +0 -1
- package/dist/scraper/tiers/tier1-http.js +0 -122
- package/dist/scraper/tiers/tier1-http.js.map +0 -1
- package/dist/scraper/tiers/tier2-stealth.d.ts +0 -5
- package/dist/scraper/tiers/tier2-stealth.d.ts.map +0 -1
- package/dist/scraper/tiers/tier2-stealth.js +0 -108
- package/dist/scraper/tiers/tier2-stealth.js.map +0 -1
- package/dist/scraper/tiers/tier3-browser.d.ts +0 -10
- package/dist/scraper/tiers/tier3-browser.d.ts.map +0 -1
- package/dist/scraper/tiers/tier3-browser.js +0 -506
- package/dist/scraper/tiers/tier3-browser.js.map +0 -1
- package/dist/scraper/types.d.ts +0 -161
- package/dist/scraper/types.d.ts.map +0 -1
- package/dist/scraper/types.js +0 -3
- package/dist/scraper/types.js.map +0 -1
- package/dist/security/action-validator.d.ts +0 -83
- package/dist/security/action-validator.d.ts.map +0 -1
- package/dist/security/action-validator.js +0 -36
- package/dist/security/action-validator.js.map +0 -1
- package/dist/security/url-validator.d.ts +0 -9
- package/dist/security/url-validator.d.ts.map +0 -1
- package/dist/security/url-validator.js +0 -69
- package/dist/security/url-validator.js.map +0 -1
- package/dist/types.d.ts +0 -96
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -2
- package/dist/types.js.map +0 -1
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
import { load } from "cheerio";
|
|
2
|
-
// ─── Detectores de dados SSR ───────────────────────────────────────────────
|
|
3
|
-
//
|
|
4
|
-
// Frameworks modernos embutem dados no HTML inicial para hidratação no cliente.
|
|
5
|
-
// Extrair esses dados evita a necessidade de browser em ~60-70% dos sites.
|
|
6
|
-
//
|
|
7
|
-
// Ordem: do mais específico para o mais genérico.
|
|
8
|
-
// ──────────────────────────────────────────────────────────────────────────
|
|
9
|
-
/** Tenta parsear JSON com segurança; retorna null em caso de erro */
|
|
10
|
-
function tryParse(raw) {
|
|
11
|
-
if (!raw?.trim())
|
|
12
|
-
return null;
|
|
13
|
-
try {
|
|
14
|
-
return JSON.parse(raw.trim());
|
|
15
|
-
}
|
|
16
|
-
catch {
|
|
17
|
-
return null;
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* Tenta extrair dados JSON embutidos por frameworks SSR no HTML inicial.
|
|
22
|
-
* Muitos sites Next.js/Nuxt/SvelteKit não precisam de browser —
|
|
23
|
-
* os dados já estão no HTML e podem ser extraídos com Cheerio!
|
|
24
|
-
*/
|
|
25
|
-
export function extractSSRData(html) {
|
|
26
|
-
const $ = load(html);
|
|
27
|
-
// ── Next.js: <script id="__NEXT_DATA__" type="application/json"> ──────
|
|
28
|
-
const nextRaw = $("#__NEXT_DATA__").text().trim();
|
|
29
|
-
const nextData = tryParse(nextRaw);
|
|
30
|
-
if (nextData)
|
|
31
|
-
return { type: "next", data: nextData };
|
|
32
|
-
// ── Angular Universal: <script id="ng-state" type="application/json"> ─
|
|
33
|
-
const ngRaw = $('script#ng-state[type="application/json"]').text().trim();
|
|
34
|
-
const ngData = tryParse(ngRaw);
|
|
35
|
-
if (ngData)
|
|
36
|
-
return { type: "angular", data: ngData };
|
|
37
|
-
// ── SvelteKit: <script type="application/json" data-sveltekit-fetched> ─
|
|
38
|
-
// SvelteKit 2+ serializa dados de `load()` em tags script com atributo especial
|
|
39
|
-
const svelteFetchedRaw = $('script[data-sveltekit-fetched]').text().trim();
|
|
40
|
-
const svelteFetchedData = tryParse(svelteFetchedRaw);
|
|
41
|
-
if (svelteFetchedData)
|
|
42
|
-
return { type: "sveltekit", data: svelteFetchedData };
|
|
43
|
-
// ── Nuxt 2/3: window.__NUXT__ = ... ──────────────────────────────────
|
|
44
|
-
// Nuxt pode usar JSON ou devalue (formato não-JSON proprietário do Nuxt 3)
|
|
45
|
-
// Tentamos capturar JSON puro; devalue é ignorado (precisa de browser)
|
|
46
|
-
const nuxtMatch = html.match(/window\.__NUXT__\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
|
|
47
|
-
if (nuxtMatch?.[1]) {
|
|
48
|
-
const nuxtData = tryParse(nuxtMatch[1]);
|
|
49
|
-
if (nuxtData)
|
|
50
|
-
return { type: "nuxt", data: nuxtData };
|
|
51
|
-
}
|
|
52
|
-
// ── Nuxt 3 alternativo: useNuxtApp / nuxtState ────────────────────────
|
|
53
|
-
const nuxt3Match = html.match(/window\.__nuxt_state__\s*=\s*'([^']+)'/);
|
|
54
|
-
if (nuxt3Match?.[1]) {
|
|
55
|
-
try {
|
|
56
|
-
const decoded = decodeURIComponent(nuxt3Match[1]);
|
|
57
|
-
const nuxt3Data = tryParse(decoded);
|
|
58
|
-
if (nuxt3Data)
|
|
59
|
-
return { type: "nuxt", data: nuxt3Data };
|
|
60
|
-
}
|
|
61
|
-
catch { }
|
|
62
|
-
}
|
|
63
|
-
// ── Gatsby: window.___gatsby ou window.___GATSBY ──────────────────────
|
|
64
|
-
const gatsbyMatch = html.match(/window\.___(?:gatsby|GATSBY)\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
|
|
65
|
-
if (gatsbyMatch?.[1]) {
|
|
66
|
-
const gatsbyData = tryParse(gatsbyMatch[1]);
|
|
67
|
-
if (gatsbyData)
|
|
68
|
-
return { type: "gatsby", data: gatsbyData };
|
|
69
|
-
}
|
|
70
|
-
// ── Remix / React Router v7: window.__remixContext ────────────────────
|
|
71
|
-
const remixMatch = html.match(/window\.__remix(?:Context|RouterManifest)\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
|
|
72
|
-
if (remixMatch?.[1]) {
|
|
73
|
-
const remixData = tryParse(remixMatch[1]);
|
|
74
|
-
if (remixData)
|
|
75
|
-
return { type: "remix", data: remixData };
|
|
76
|
-
}
|
|
77
|
-
// ── TanStack Router / Start: window.__TSR_DEHYDRATED__ ───────────────
|
|
78
|
-
const tanstackMatch = html.match(/window\.__(?:TSR_DEHYDRATED|TANSTACK_ROUTER_CONTEXT|TRT_DEHYDRATED)__\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
|
|
79
|
-
if (tanstackMatch?.[1]) {
|
|
80
|
-
const tsrData = tryParse(tanstackMatch[1]);
|
|
81
|
-
if (tsrData)
|
|
82
|
-
return { type: "tanstack", data: tsrData };
|
|
83
|
-
}
|
|
84
|
-
// ── Vue SSR: window.__VUE_SSR_CONTEXT__ / window.__pinia ─────────────
|
|
85
|
-
const vueMatch = html.match(/window\.__(?:VUE_SSR_CONTEXT__|VUE_STORE__|pinia)\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
|
|
86
|
-
if (vueMatch?.[1]) {
|
|
87
|
-
const vueData = tryParse(vueMatch[1]);
|
|
88
|
-
if (vueData)
|
|
89
|
-
return { type: "vue", data: vueData };
|
|
90
|
-
}
|
|
91
|
-
// ── SvelteKit legado: window.__SVELTEKIT__ ────────────────────────────
|
|
92
|
-
const svelteLegacyMatch = html.match(/window\.__(?:SVELTEKIT|sveltekit)__?\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
|
|
93
|
-
if (svelteLegacyMatch?.[1]) {
|
|
94
|
-
const svelteData = tryParse(svelteLegacyMatch[1]);
|
|
95
|
-
if (svelteData)
|
|
96
|
-
return { type: "sveltekit", data: svelteData };
|
|
97
|
-
}
|
|
98
|
-
// ── Genérico: window.__INITIAL_STATE__ / __APP_STATE__ / __REDUX_STATE__ ─
|
|
99
|
-
// Cobre Redux, MobX, Zustand e qualquer store serializado manualmente
|
|
100
|
-
const genericMatch = html.match(/window\.__(?:INITIAL_STATE|APP_STATE|REDUX_STATE|STORE_STATE|DATA|STATE|PROPS)__\s*=\s*(\{[\s\S]*?\})\s*;?\s*<\/script>/);
|
|
101
|
-
if (genericMatch?.[1]) {
|
|
102
|
-
const genericData = tryParse(genericMatch[1]);
|
|
103
|
-
if (genericData)
|
|
104
|
-
return { type: "generic", data: genericData };
|
|
105
|
-
}
|
|
106
|
-
return null;
|
|
107
|
-
}
|
|
108
|
-
/**
|
|
109
|
-
* Verifica se a página tem conteúdo suficiente sem JavaScript.
|
|
110
|
-
*
|
|
111
|
-
* Retorna `false` quando:
|
|
112
|
-
* - O texto visível é muito curto (< 200 chars) → SPA ainda não renderizou
|
|
113
|
-
* - Detecta padrões de anti-bot / challenge pages (Cloudflare, DDoS-Guard, etc.)
|
|
114
|
-
* - Detecta loading screens (texto de JS habilitado, spinners, etc.)
|
|
115
|
-
*/
|
|
116
|
-
export function hasEnoughContent(html) {
|
|
117
|
-
const $ = load(html);
|
|
118
|
-
// Remove elementos que não geram conteúdo legível
|
|
119
|
-
$("script, style, noscript, iframe, svg, img").remove();
|
|
120
|
-
const bodyText = $("body").text().replace(/\s+/g, " ").trim();
|
|
121
|
-
// Heurística básica: texto muito curto = SPA sem SSR ou página vazia
|
|
122
|
-
if (bodyText.length < 200)
|
|
123
|
-
return false;
|
|
124
|
-
// ── Padrões de anti-bot / challenge pages ────────────────────────────
|
|
125
|
-
// Cada serviço tem uma frase característica que aparece quando bloqueia o bot.
|
|
126
|
-
const antiBotPatterns = [
|
|
127
|
-
// Cloudflare (mais comum)
|
|
128
|
-
/just a moment/i,
|
|
129
|
-
/checking your browser/i,
|
|
130
|
-
/ddos protection by cloudflare/i,
|
|
131
|
-
/ray id:/i, // ID único do Cloudflare
|
|
132
|
-
// DDoS-Guard
|
|
133
|
-
/ddos-guard/i,
|
|
134
|
-
// Imperva / Incapsula
|
|
135
|
-
/incapsula incident id/i,
|
|
136
|
-
/powered by imperva/i,
|
|
137
|
-
// DataDome
|
|
138
|
-
/datadome/i,
|
|
139
|
-
// hCaptcha / reCAPTCHA challenges
|
|
140
|
-
/complete the security check/i,
|
|
141
|
-
/prove you are human/i,
|
|
142
|
-
/please complete the captcha/i,
|
|
143
|
-
// Loading screens / SPA shell genérica
|
|
144
|
-
/please wait/i,
|
|
145
|
-
/enable javascript/i,
|
|
146
|
-
/you need to enable javascript/i,
|
|
147
|
-
/javascript is required/i,
|
|
148
|
-
/javascript is disabled/i,
|
|
149
|
-
/please enable javascript/i,
|
|
150
|
-
// Genérico
|
|
151
|
-
/access denied/i,
|
|
152
|
-
/403 forbidden/i,
|
|
153
|
-
/bot detected/i,
|
|
154
|
-
];
|
|
155
|
-
const lowerText = bodyText.toLowerCase();
|
|
156
|
-
const isAntiBot = antiBotPatterns.some((p) => p.test(lowerText));
|
|
157
|
-
// Challenge pages tem pouco texto e padrões identificáveis
|
|
158
|
-
if (isAntiBot && bodyText.length < 2_000)
|
|
159
|
-
return false;
|
|
160
|
-
return true;
|
|
161
|
-
}
|
|
162
|
-
//# sourceMappingURL=ssr.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"ssr.js","sourceRoot":"","sources":["../../../src/scraper/extractors/ssr.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAG/B,8EAA8E;AAC9E,EAAE;AACF,gFAAgF;AAChF,2EAA2E;AAC3E,EAAE;AACF,kDAAkD;AAClD,6EAA6E;AAE7E,qEAAqE;AACrE,SAAS,QAAQ,CAAC,GAAW;IAC3B,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,yEAAyE;IACzE,MAAM,OAAO,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAClD,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,QAAQ;QAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAEtD,yEAAyE;IACzE,MAAM,KAAK,GAAG,CAAC,CAAC,0CAA0C,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC1E,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,IAAI,MAAM;QAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAErD,0EAA0E;IAC1E,gFAAgF;IAChF,MAAM,gBAAgB,GAAG,CAAC,CAAC,gCAAgC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3E,MAAM,iBAAiB,GAAG,QAAQ,CAAC,gBAAgB,CAAC,CAAC;IACrD,IAAI,iBAAiB;QAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,iBAAiB,EAAE,CAAC;IAE7E,wEAAwE;IACxE,2EAA2E;IAC3E,uEAAuE;IACvE,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;IACxF,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACnB,MAAM,QAAQ,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QACxC,IAAI,QAAQ;YAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IACxD,CAAC;IAED,yEAAyE;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,wCAAwC,CAAC,CAAC;IACxE,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;YACpC,IAAI,SAAS;gBAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1D,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;IAED,yEAAyE;IACzE,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAC5B,qEAAqE,CACtE,CAAC;IACF,IAAI,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5C,IAAI,UAAU;YAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IAC9D,CAAC;IAED,yEAAyE;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAC3B,kFAAkF,CACnF,CAAC;IACF,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,SAAS;YAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAC3D,CAAC;IAED,wEAAwE;IACxE,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAC9B,8GAA8G,CAC/G,CAAC;IACF,IAAI,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3C,IAAI,OAAO;YAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAC1D,CAAC;IAED,wEAAwE;IACxE,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CACzB,0FAA0F,CAC3F,CAAC;IACF,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAClB,MAAM,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACtC,IAAI,OAAO;YAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,yEAAyE;IACzE,MAAM,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAClC,6EAA6E,CAC9E,CAAC;IACF,IAAI,iBAAiB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,IAAI,UAAU;YAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IACjE,CAAC;IAED,4EAA4E;IAC5E,sEAAsE;IACtE,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAC7B,yHAAyH,CAC1H,CAAC;IACF,IAAI,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACtB,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,IAAI,WAAW;YAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;IACjE,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,kDAAkD;IAClD,CAAC,CAAC,2CAA2C,CAAC,CAAC,MAAM,EAAE,CAAC;IAExD,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAE9D,qEAAqE;IACrE,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG;QAAE,OAAO,KAAK,CAAC;IAExC,wEAAwE;IACxE,+EAA+E;IAC/E,MAAM,eAAe,GAAa;QAChC,0BAA0B;QAC1B,gBAAgB;QAChB,wBAAwB;QACxB,gCAAgC;QAChC,UAAU,EAA8B,yBAAyB;QAEjE,aAAa;QACb,aAAa;QAEb,sBAAsB;QACtB,wBAAwB;QACxB,qBAAqB;QAErB,WAAW;QACX,WAAW;QAEX,kCAAkC;QAClC,8BAA8B;QAC9B,sBAAsB;QACtB,8BAA8B;QAE9B,uCAAuC;QACvC,cAAc;QACd,oBAAoB;QACpB,gCAAgC;QAChC,yBAAyB;QACzB,yBAAyB;QACzB,2BAA2B;QAE3B,WAAW;QACX,gBAAgB;QAChB,gBAAgB;QAChB,eAAe;KAChB,CAAC;IAEF,MAAM,SAAS,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IACzC,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAEjE,2DAA2D;IAC3D,IAAI,SAAS,IAAI,QAAQ,CAAC,MAAM,GAAG,KAAK;QAAE,OAAO,KAAK,CAAC;IAEvD,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"to-markdown.d.ts","sourceRoot":"","sources":["../../../src/scraper/extractors/to-markdown.ts"],"names":[],"mappings":"AA6FA;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAwBnD"}
|
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
// ─── Conversor HTML → Markdown ─────────────────────────────────────────────
|
|
2
|
-
// Usa Turndown (CJS) + plugin GFM para tabelas pipe nativas
|
|
3
|
-
import TurndownService from "turndown";
|
|
4
|
-
import { tables, strikethrough } from "turndown-plugin-gfm";
|
|
5
|
-
let _td = null;
|
|
6
|
-
function getTurndown() {
|
|
7
|
-
if (_td)
|
|
8
|
-
return _td;
|
|
9
|
-
_td = new TurndownService({
|
|
10
|
-
headingStyle: "atx", // # Título em vez de sublinhado
|
|
11
|
-
bulletListMarker: "-",
|
|
12
|
-
codeBlockStyle: "fenced", // ```code``` em vez de indentado
|
|
13
|
-
hr: "---",
|
|
14
|
-
strongDelimiter: "**",
|
|
15
|
-
emDelimiter: "_",
|
|
16
|
-
linkStyle: "inlined",
|
|
17
|
-
});
|
|
18
|
-
// ── Plugin GFM: tabelas pipe e strikethrough ────────────────────────────
|
|
19
|
-
// Converte <table> → | col1 | col2 | em vez de HTML bruto
|
|
20
|
-
_td.use(tables);
|
|
21
|
-
_td.use(strikethrough);
|
|
22
|
-
// ── Regras customizadas ──────────────────────────────────────────────────
|
|
23
|
-
// Remove completamente elementos que não geram conteúdo útil
|
|
24
|
-
// Nota: Turndown.remove() aceita apenas tag names, não CSS selectors
|
|
25
|
-
_td.remove([
|
|
26
|
-
"script",
|
|
27
|
-
"style",
|
|
28
|
-
"noscript",
|
|
29
|
-
"iframe",
|
|
30
|
-
"nav",
|
|
31
|
-
"footer",
|
|
32
|
-
"header",
|
|
33
|
-
"button",
|
|
34
|
-
"form",
|
|
35
|
-
]);
|
|
36
|
-
// figcaption dentro de figure: remove (evita legenda solta no Markdown)
|
|
37
|
-
_td.addRule("removeFigcaption", {
|
|
38
|
-
filter(node) {
|
|
39
|
-
return (node.nodeName === "FIGCAPTION" &&
|
|
40
|
-
node.parentNode?.nodeName === "FIGURE");
|
|
41
|
-
},
|
|
42
|
-
replacement: () => "",
|
|
43
|
-
});
|
|
44
|
-
// Classes de anúncio (.ad, .ads) — Turndown.remove() não aceita CSS selectors
|
|
45
|
-
_td.addRule("removeAds", {
|
|
46
|
-
filter(node) {
|
|
47
|
-
if (node.nodeType !== 1)
|
|
48
|
-
return false;
|
|
49
|
-
const cls = node.getAttribute("class") ?? "";
|
|
50
|
-
return /\bad\b|\bads\b/.test(cls);
|
|
51
|
-
},
|
|
52
|
-
replacement: () => "",
|
|
53
|
-
});
|
|
54
|
-
// Imagens: extrai alt text de forma limpa
|
|
55
|
-
_td.addRule("images", {
|
|
56
|
-
filter: "img",
|
|
57
|
-
replacement(_content, node) {
|
|
58
|
-
const img = node;
|
|
59
|
-
const alt = img.getAttribute("alt")?.trim() ?? "";
|
|
60
|
-
const src = img.getAttribute("src") ?? "";
|
|
61
|
-
if (!src)
|
|
62
|
-
return "";
|
|
63
|
-
return alt ? `` : ``;
|
|
64
|
-
},
|
|
65
|
-
});
|
|
66
|
-
// Links: remove links vazios ou com href #
|
|
67
|
-
_td.addRule("cleanLinks", {
|
|
68
|
-
filter(node) {
|
|
69
|
-
return (node.nodeName === "A" &&
|
|
70
|
-
(!node.getAttribute("href") ||
|
|
71
|
-
node.getAttribute("href") === "#" ||
|
|
72
|
-
node.getAttribute("href")?.startsWith("javascript:") === true));
|
|
73
|
-
},
|
|
74
|
-
replacement(content) {
|
|
75
|
-
return content; // Mantém apenas o texto, sem o link
|
|
76
|
-
},
|
|
77
|
-
});
|
|
78
|
-
return _td;
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* Converte HTML em Markdown limpo e legível por humanos/LLMs.
|
|
82
|
-
*/
|
|
83
|
-
export function htmlToMarkdown(html) {
|
|
84
|
-
if (!html.trim())
|
|
85
|
-
return "";
|
|
86
|
-
const td = getTurndown();
|
|
87
|
-
let markdown = td.turndown(html);
|
|
88
|
-
// ── Limpeza pós-conversão ──────────────────────────────────────────────
|
|
89
|
-
// Remove linhas que são só espaços/pontuação
|
|
90
|
-
markdown = markdown
|
|
91
|
-
.split("\n")
|
|
92
|
-
.filter((line) => line.trim().length > 0 || line === "")
|
|
93
|
-
.join("\n");
|
|
94
|
-
// Colapsa 3+ linhas em branco para no máximo 2
|
|
95
|
-
markdown = markdown.replace(/\n{3,}/g, "\n\n");
|
|
96
|
-
// Remove espaços trailing
|
|
97
|
-
markdown = markdown
|
|
98
|
-
.split("\n")
|
|
99
|
-
.map((l) => l.trimEnd())
|
|
100
|
-
.join("\n");
|
|
101
|
-
return markdown.trim();
|
|
102
|
-
}
|
|
103
|
-
//# sourceMappingURL=to-markdown.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"to-markdown.js","sourceRoot":"","sources":["../../../src/scraper/extractors/to-markdown.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4DAA4D;AAE5D,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAE5D,IAAI,GAAG,GAA2B,IAAI,CAAC;AAEvC,SAAS,WAAW;IAClB,IAAI,GAAG;QAAE,OAAO,GAAG,CAAC;IAEpB,GAAG,GAAG,IAAI,eAAe,CAAC;QACxB,YAAY,EAAE,KAAK,EAAQ,gCAAgC;QAC3D,gBAAgB,EAAE,GAAG;QACrB,cAAc,EAAE,QAAQ,EAAG,iCAAiC;QAC5D,EAAE,EAAE,KAAK;QACT,eAAe,EAAE,IAAI;QACrB,WAAW,EAAE,GAAG;QAChB,SAAS,EAAE,SAAS;KACrB,CAAC,CAAC;IAEH,2EAA2E;IAC3E,0DAA0D;IAC1D,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;IAEvB,4EAA4E;IAE5E,6DAA6D;IAC7D,qEAAqE;IACrE,GAAG,CAAC,MAAM,CAAC;QACT,QAAQ;QACR,OAAO;QACP,UAAU;QACV,QAAQ;QACR,KAAK;QACL,QAAQ;QACR,QAAQ;QACR,QAAQ;QACR,MAAM;KACP,CAAC,CAAC;IAEH,wEAAwE;IACxE,GAAG,CAAC,OAAO,CAAC,kBAAkB,EAAE;QAC9B,MAAM,CAAC,IAAI;YACT,OAAO,CACL,IAAI,CAAC,QAAQ,KAAK,YAAY;gBAC9B,IAAI,CAAC,UAAU,EAAE,QAAQ,KAAK,QAAQ,CACvC,CAAC;QACJ,CAAC;QACD,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;KACtB,CAAC,CAAC;IAEH,8EAA8E;IAC9E,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE;QACvB,MAAM,CAAC,IAAI;YACT,IAAI,IAAI,CAAC,QAAQ,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YACtC,MAAM,GAAG,GAAI,IAAgB,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAC1D,OAAO,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpC,CAAC;QACD,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;KACtB,CAAC,CAAC;IAEH,0CAA0C;IAC1C,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE;QACpB,MAAM,EAAE,KAAK;QACb,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,GAAG,GAAG,IAAwB,CAAC;YACrC,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAClD,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,IAAI,CAAC,GAAG;gBAAE,OAAO,EAAE,CAAC;YACpB,OAAO,GAAG,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,YAAY,GAAG,GAAG,CAAC;QACxD,CAAC;KACF,CAAC,CAAC;IAEH,2CAA2C;IAC3C,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE;QACxB,MAAM,CAAC,IAAI;YACT,OAAO,CACL,IAAI,CAAC,QAAQ,KAAK,GAAG;gBACrB,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC;oBACzB,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,KAAK,GAAG;oBACjC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,UAAU,CAAC,aAAa,CAAC,KAAK,IAAI,CAAC,CACjE,CAAC;QACJ,CAAC;QACD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC,CAAC,oCAAoC;QACtD,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAE5B,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,IAAI,QAAQ,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAEjC,0EAA0E;IAE1E,6CAA6C;IAC7C,QAAQ,GAAG,QAAQ;SAChB,KAAK,CAAC,IAAI,CAAC;SACX,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,KAAK,EAAE,CAAC;SACvD,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,+CAA+C;IAC/C,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,0BAA0B;IAC1B,QAAQ,GAAG,QAAQ;SAChB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACvB,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC"}
|
package/dist/scraper/index.d.ts
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import type { MapOptions, MapResult, ScrapeOptions, ScrapeResult, ScraperConfig } from "./types.js";
|
|
2
|
-
export declare class Scraper {
|
|
3
|
-
private readonly fullConfig;
|
|
4
|
-
private readonly tier1;
|
|
5
|
-
private readonly tier2;
|
|
6
|
-
private readonly tier3;
|
|
7
|
-
private readonly config;
|
|
8
|
-
constructor(fullConfig?: ScraperConfig);
|
|
9
|
-
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
10
|
-
/**
|
|
11
|
-
* Mapeia links de uma página (URL + texto do âncora).
|
|
12
|
-
* Reutiliza a cascata de tiers (HTTP → Stealth → Browser).
|
|
13
|
-
*
|
|
14
|
-
* @param url - URL base para extrair links
|
|
15
|
-
* @param options - Filtros e limites
|
|
16
|
-
*/
|
|
17
|
-
map(url: string, options?: MapOptions): Promise<MapResult>;
|
|
18
|
-
/**
|
|
19
|
-
* Scrapia múltiplas URLs em paralelo com concorrência limitada.
|
|
20
|
-
* Erros em URLs individuais não derrubam o lote inteiro.
|
|
21
|
-
*
|
|
22
|
-
* @param urls - Lista de URLs a scrapeiar
|
|
23
|
-
* @param options - Opções aplicadas a todas as URLs
|
|
24
|
-
* @param concurrency - Máximo de scrapes simultâneos. Default: 3
|
|
25
|
-
*/
|
|
26
|
-
scrapeMany(urls: string[], options?: ScrapeOptions, concurrency?: number): Promise<ScrapeResult[]>;
|
|
27
|
-
/**
|
|
28
|
-
* Fecha o browser Playwright (Tier 3).
|
|
29
|
-
* Sempre chamar ao terminar para evitar processos Chromium órfãos.
|
|
30
|
-
*/
|
|
31
|
-
close(): Promise<void>;
|
|
32
|
-
private log;
|
|
33
|
-
}
|
|
34
|
-
export type { ScrapeOptions, ScrapeResult, ScrapeTier, ContentFormat, SSRData, InterceptedAPI, ScraperConfig, TierRawResult, MapLink, MapOptions, MapResult, } from "./types.js";
|
|
35
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,UAAU,EACV,SAAS,EACT,aAAa,EACb,YAAY,EACZ,aAAa,EACd,MAAM,YAAY,CAAC;AAepB,qBAAa,OAAO;IAYN,OAAO,CAAC,QAAQ,CAAC,UAAU;IAXvC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAY;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;IACrC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;IACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAMrB;gBAE2B,UAAU,GAAE,aAAkB;IAerD,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;IAqG7E;;;;;;OAMG;IACG,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,UAAe,GAAG,OAAO,CAAC,SAAS,CAAC;IA0HpE;;;;;;;OAOG;IACG,UAAU,CACd,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,aAAkB,EAC3B,WAAW,SAAI,GACd,OAAO,CAAC,YAAY,EAAE,CAAC;IAkC1B;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,OAAO,CAAC,GAAG;CAKZ;AAGD,YAAY,EACV,aAAa,EACb,YAAY,EACZ,UAAU,EACV,aAAa,EACb,OAAO,EACP,cAAc,EACd,aAAa,EACb,aAAa,EACb,OAAO,EACP,UAAU,EACV,SAAS,GACV,MAAM,YAAY,CAAC"}
|
package/dist/scraper/index.js
DELETED
|
@@ -1,299 +0,0 @@
|
|
|
1
|
-
import { validateUrl } from "../security/url-validator.js";
|
|
2
|
-
import { Tier1HTTP } from "./tiers/tier1-http.js";
|
|
3
|
-
import { Tier2Stealth } from "./tiers/tier2-stealth.js";
|
|
4
|
-
import { Tier3Browser } from "./tiers/tier3-browser.js";
|
|
5
|
-
import { extractLinksWithMetadata } from "./extractors/content.js";
|
|
6
|
-
// ─── Scraper ───────────────────────────────────────────────────────────────
|
|
7
|
-
//
|
|
8
|
-
// Scraper de alta qualidade com fallback automático em 3 tiers:
|
|
9
|
-
//
|
|
10
|
-
// Tier 1 → HTTP puro (got-scraping) (~100-500ms, sem browser)
|
|
11
|
-
// ↓ bloqueado ou conteúdo insuficiente (SPA, anti-bot básico)
|
|
12
|
-
// Tier 2 → HTTP Stealth (got-scraping) (~200-800ms, TLS fingerprint)
|
|
13
|
-
// ↓ ainda bloqueado ou SPA sem SSR
|
|
14
|
-
// Tier 3 → Playwright Chromium + stealth (~2-10s, browser completo)
|
|
15
|
-
//
|
|
16
|
-
// Anti-SSRF integrado: todas as URLs são validadas antes do scrape.
|
|
17
|
-
// ──────────────────────────────────────────────────────────────────────────
|
|
18
|
-
export class Scraper {
|
|
19
|
-
fullConfig;
|
|
20
|
-
tier1;
|
|
21
|
-
tier2;
|
|
22
|
-
tier3;
|
|
23
|
-
config;
|
|
24
|
-
constructor(fullConfig = {}) {
|
|
25
|
-
this.fullConfig = fullConfig;
|
|
26
|
-
this.tier1 = new Tier1HTTP();
|
|
27
|
-
this.tier2 = new Tier2Stealth();
|
|
28
|
-
this.tier3 = new Tier3Browser(fullConfig.browserConfig);
|
|
29
|
-
this.config = {
|
|
30
|
-
timeout: fullConfig.timeout ?? 30_000,
|
|
31
|
-
verbose: fullConfig.verbose ?? false,
|
|
32
|
-
forceTier: fullConfig.forceTier,
|
|
33
|
-
allowedDomains: fullConfig.allowedDomains,
|
|
34
|
-
blockedDomains: fullConfig.blockedDomains,
|
|
35
|
-
};
|
|
36
|
-
}
|
|
37
|
-
// ── Scrape de uma única URL ────────────────────────────────────────────
|
|
38
|
-
async scrape(url, options = {}) {
|
|
39
|
-
// Validação anti-SSRF antes de qualquer requisição
|
|
40
|
-
const validUrl = await validateUrl(url, {
|
|
41
|
-
allowedDomains: this.config.allowedDomains,
|
|
42
|
-
blockedDomains: this.config.blockedDomains,
|
|
43
|
-
});
|
|
44
|
-
const mergedOptions = {
|
|
45
|
-
timeout: this.config.timeout,
|
|
46
|
-
...options,
|
|
47
|
-
};
|
|
48
|
-
// ── Tier forçado: pula a cascata automática ────────────────────────
|
|
49
|
-
const forced = options.forceTier ?? this.config.forceTier;
|
|
50
|
-
if (forced === "browser") {
|
|
51
|
-
this.log("🌐 Tier 3 (Playwright) forçado");
|
|
52
|
-
return this.tier3.scrape(validUrl, mergedOptions);
|
|
53
|
-
}
|
|
54
|
-
if (forced === "stealth") {
|
|
55
|
-
this.log("🥷 Tier 2 (Stealth HTTP) forçado");
|
|
56
|
-
return this.tier2.scrape(validUrl, mergedOptions);
|
|
57
|
-
}
|
|
58
|
-
if (forced === "http") {
|
|
59
|
-
this.log("🔗 Tier 1 (HTTP) forçado");
|
|
60
|
-
return this.tier1.scrape(validUrl, mergedOptions);
|
|
61
|
-
}
|
|
62
|
-
// ── Modo automático: Tier 1 → Tier 2 → Tier 3 ────────────────────
|
|
63
|
-
// ── Tier 1: HTTP puro (fetch nativo, sem overhead de TLS) ─────────
|
|
64
|
-
let tier1Error = null;
|
|
65
|
-
try {
|
|
66
|
-
const result = await this.tier1.scrape(validUrl, mergedOptions);
|
|
67
|
-
const content = result.markdown ?? result.text ?? "";
|
|
68
|
-
// Menos de 200 chars sem dados SSR = página quase certamente vazia
|
|
69
|
-
// (SPA sem SSR, Cloudflare challenge, bloqueio silencioso, etc.)
|
|
70
|
-
if (content.length < 200 && !result.ssrData) {
|
|
71
|
-
tier1Error = "Conteúdo insuficiente após HTTP — provavelmente SPA ou bloqueio silencioso";
|
|
72
|
-
this.log(`⚠ Tier 1: ${tier1Error}`);
|
|
73
|
-
}
|
|
74
|
-
else {
|
|
75
|
-
this.log(`✓ Tier 1 (HTTP) — ${result.durationMs}ms`);
|
|
76
|
-
return result;
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
catch (err) {
|
|
80
|
-
tier1Error = err instanceof Error ? err.message : String(err);
|
|
81
|
-
this.log(`⚠ Tier 1 falhou: ${tier1Error}`);
|
|
82
|
-
}
|
|
83
|
-
// ── Tier 2: HTTP Stealth (got-scraping, TLS fingerprint) ──────────
|
|
84
|
-
let tier2Error = null;
|
|
85
|
-
this.log("🥷 Ativando fallback → Tier 2 (Stealth HTTP)...");
|
|
86
|
-
try {
|
|
87
|
-
const result = await this.tier2.scrape(validUrl, mergedOptions);
|
|
88
|
-
const content = result.markdown ?? result.text ?? "";
|
|
89
|
-
// Mesmo com TLS spoofing pode ser SPA que precisa de browser
|
|
90
|
-
if (content.length < 200 && !result.ssrData) {
|
|
91
|
-
tier2Error = "Conteúdo insuficiente após Stealth — SPA que precisa de browser";
|
|
92
|
-
this.log(`⚠ Tier 2: ${tier2Error}`);
|
|
93
|
-
}
|
|
94
|
-
else {
|
|
95
|
-
this.log(`✓ Tier 2 (Stealth) — ${result.durationMs}ms`);
|
|
96
|
-
return result;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
catch (err) {
|
|
100
|
-
tier2Error = err instanceof Error ? err.message : String(err);
|
|
101
|
-
this.log(`⚠ Tier 2 (Stealth) falhou: ${tier2Error}`);
|
|
102
|
-
}
|
|
103
|
-
// ── Tier 3: Playwright Chromium + stealth (fallback final) ────────
|
|
104
|
-
this.log("🌐 Ativando fallback final → Tier 3 (Playwright)...");
|
|
105
|
-
try {
|
|
106
|
-
const result = await this.tier3.scrape(validUrl, mergedOptions);
|
|
107
|
-
this.log(`✓ Tier 3 (Playwright) — ${result.durationMs}ms`);
|
|
108
|
-
return result;
|
|
109
|
-
}
|
|
110
|
-
catch (err) {
|
|
111
|
-
const tier3Error = err instanceof Error ? err.message : String(err);
|
|
112
|
-
this.log(`✗ Tier 3 (Playwright) falhou: ${tier3Error}`);
|
|
113
|
-
// Todos os tiers falharam — retorna resultado com erro consolidado
|
|
114
|
-
return {
|
|
115
|
-
url: validUrl,
|
|
116
|
-
statusCode: 0,
|
|
117
|
-
title: "",
|
|
118
|
-
tier: "browser",
|
|
119
|
-
durationMs: 0,
|
|
120
|
-
error: [
|
|
121
|
-
"Todos os tiers falharam:",
|
|
122
|
-
` Tier 1 (HTTP): ${tier1Error ?? "não tentado"}`,
|
|
123
|
-
` Tier 2 (Stealth): ${tier2Error ?? "não tentado"}`,
|
|
124
|
-
` Tier 3 (Browser): ${tier3Error}`,
|
|
125
|
-
].join("\n"),
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
// ── Map: descobrir URLs de um site ──────────────────────────────────────
|
|
130
|
-
/**
|
|
131
|
-
* Mapeia links de uma página (URL + texto do âncora).
|
|
132
|
-
* Reutiliza a cascata de tiers (HTTP → Stealth → Browser).
|
|
133
|
-
*
|
|
134
|
-
* @param url - URL base para extrair links
|
|
135
|
-
* @param options - Filtros e limites
|
|
136
|
-
*/
|
|
137
|
-
async map(url, options = {}) {
|
|
138
|
-
const startTime = Date.now();
|
|
139
|
-
const limit = options.limit ?? 500;
|
|
140
|
-
const includeSubdomains = options.includeSubdomains ?? true;
|
|
141
|
-
const ignoreQueryParameters = options.ignoreQueryParameters ?? true;
|
|
142
|
-
const searchTerm = options.search?.toLowerCase().trim();
|
|
143
|
-
let scrapeResult;
|
|
144
|
-
try {
|
|
145
|
-
scrapeResult = await this.scrape(url, {
|
|
146
|
-
getRawHtml: true,
|
|
147
|
-
forceTier: options.forceTier,
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
catch (err) {
|
|
151
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
152
|
-
return {
|
|
153
|
-
url,
|
|
154
|
-
links: [],
|
|
155
|
-
tier: "http",
|
|
156
|
-
durationMs: Date.now() - startTime,
|
|
157
|
-
error: `Falha ao carregar a página: ${errorMsg}`,
|
|
158
|
-
};
|
|
159
|
-
}
|
|
160
|
-
if (scrapeResult.error) {
|
|
161
|
-
return {
|
|
162
|
-
url: scrapeResult.url,
|
|
163
|
-
links: [],
|
|
164
|
-
tier: scrapeResult.tier,
|
|
165
|
-
durationMs: scrapeResult.durationMs,
|
|
166
|
-
error: scrapeResult.error,
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
const rawHtml = scrapeResult.rawHtml ?? scrapeResult.html ?? "";
|
|
170
|
-
if (!rawHtml) {
|
|
171
|
-
return {
|
|
172
|
-
url: scrapeResult.url,
|
|
173
|
-
links: [],
|
|
174
|
-
tier: scrapeResult.tier,
|
|
175
|
-
durationMs: scrapeResult.durationMs,
|
|
176
|
-
error: "HTML não disponível para extração de links",
|
|
177
|
-
};
|
|
178
|
-
}
|
|
179
|
-
const baseUrl = scrapeResult.url;
|
|
180
|
-
const baseHostname = new URL(baseUrl).hostname;
|
|
181
|
-
const baseDomain = baseHostname.replace(/^www\./, "");
|
|
182
|
-
let links = extractLinksWithMetadata(rawHtml, baseUrl);
|
|
183
|
-
// Filtrar por mesmo domínio
|
|
184
|
-
links = links.filter((link) => {
|
|
185
|
-
try {
|
|
186
|
-
const linkHost = new URL(link.url).hostname.replace(/^www\./, "");
|
|
187
|
-
if (includeSubdomains) {
|
|
188
|
-
return linkHost === baseDomain || linkHost.endsWith(`.${baseDomain}`);
|
|
189
|
-
}
|
|
190
|
-
return linkHost === baseDomain;
|
|
191
|
-
}
|
|
192
|
-
catch {
|
|
193
|
-
return false;
|
|
194
|
-
}
|
|
195
|
-
});
|
|
196
|
-
// Normalizar URL (remover query string) e deduplicar
|
|
197
|
-
const normalizeUrl = (href) => {
|
|
198
|
-
if (!ignoreQueryParameters)
|
|
199
|
-
return href;
|
|
200
|
-
try {
|
|
201
|
-
const u = new URL(href);
|
|
202
|
-
u.search = "";
|
|
203
|
-
return u.href;
|
|
204
|
-
}
|
|
205
|
-
catch {
|
|
206
|
-
return href;
|
|
207
|
-
}
|
|
208
|
-
};
|
|
209
|
-
const seen = new Set();
|
|
210
|
-
const deduped = [];
|
|
211
|
-
for (const link of links) {
|
|
212
|
-
const key = ignoreQueryParameters ? normalizeUrl(link.url) : link.url;
|
|
213
|
-
if (seen.has(key))
|
|
214
|
-
continue;
|
|
215
|
-
seen.add(key);
|
|
216
|
-
deduped.push({
|
|
217
|
-
url: link.url,
|
|
218
|
-
title: link.title || undefined,
|
|
219
|
-
});
|
|
220
|
-
}
|
|
221
|
-
links = deduped;
|
|
222
|
-
// Filtrar e ordenar por search (relevância simples)
|
|
223
|
-
if (searchTerm) {
|
|
224
|
-
const escaped = searchTerm.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
225
|
-
const regex = new RegExp(escaped, "gi");
|
|
226
|
-
const scored = links
|
|
227
|
-
.map((link) => {
|
|
228
|
-
const urlLower = link.url.toLowerCase();
|
|
229
|
-
const titleLower = (link.title ?? "").toLowerCase();
|
|
230
|
-
const urlMatches = (urlLower.match(regex) ?? []).length;
|
|
231
|
-
const titleMatches = (titleLower.match(regex) ?? []).length;
|
|
232
|
-
const score = urlMatches * 2 + titleMatches * 3; // title tem mais peso
|
|
233
|
-
return { link, score };
|
|
234
|
-
})
|
|
235
|
-
.filter(({ score }) => score > 0)
|
|
236
|
-
.sort((a, b) => b.score - a.score)
|
|
237
|
-
.map(({ link }) => link);
|
|
238
|
-
links = scored;
|
|
239
|
-
}
|
|
240
|
-
const result = {
|
|
241
|
-
url: baseUrl,
|
|
242
|
-
links: links.slice(0, limit),
|
|
243
|
-
tier: scrapeResult.tier,
|
|
244
|
-
durationMs: Date.now() - startTime,
|
|
245
|
-
};
|
|
246
|
-
this.log(`✓ Map: ${result.links.length} links (${result.tier})`);
|
|
247
|
-
return result;
|
|
248
|
-
}
|
|
249
|
-
// ── Scrape em lote com concorrência controlada ─────────────────────────
|
|
250
|
-
/**
|
|
251
|
-
* Scrapia múltiplas URLs em paralelo com concorrência limitada.
|
|
252
|
-
* Erros em URLs individuais não derrubam o lote inteiro.
|
|
253
|
-
*
|
|
254
|
-
* @param urls - Lista de URLs a scrapeiar
|
|
255
|
-
* @param options - Opções aplicadas a todas as URLs
|
|
256
|
-
* @param concurrency - Máximo de scrapes simultâneos. Default: 3
|
|
257
|
-
*/
|
|
258
|
-
async scrapeMany(urls, options = {}, concurrency = 3) {
|
|
259
|
-
const results = [];
|
|
260
|
-
const queue = [...urls];
|
|
261
|
-
while (queue.length > 0) {
|
|
262
|
-
const batch = queue.splice(0, concurrency);
|
|
263
|
-
const settled = await Promise.allSettled(batch.map((u) => this.scrape(u, options)));
|
|
264
|
-
for (const outcome of settled) {
|
|
265
|
-
if (outcome.status === "fulfilled") {
|
|
266
|
-
results.push(outcome.value);
|
|
267
|
-
}
|
|
268
|
-
else {
|
|
269
|
-
results.push({
|
|
270
|
-
url: "unknown",
|
|
271
|
-
statusCode: 0,
|
|
272
|
-
title: "",
|
|
273
|
-
tier: "http",
|
|
274
|
-
durationMs: 0,
|
|
275
|
-
error: outcome.reason instanceof Error
|
|
276
|
-
? outcome.reason.message
|
|
277
|
-
: String(outcome.reason),
|
|
278
|
-
});
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
return results;
|
|
283
|
-
}
|
|
284
|
-
// ── Encerrar recursos ──────────────────────────────────────────────────
|
|
285
|
-
/**
|
|
286
|
-
* Fecha o browser Playwright (Tier 3).
|
|
287
|
-
* Sempre chamar ao terminar para evitar processos Chromium órfãos.
|
|
288
|
-
*/
|
|
289
|
-
async close() {
|
|
290
|
-
await this.tier3.close();
|
|
291
|
-
}
|
|
292
|
-
// ── Helpers ────────────────────────────────────────────────────────────
|
|
293
|
-
log(msg) {
|
|
294
|
-
if (this.config.verbose) {
|
|
295
|
-
console.log(`[Scraper] ${msg}`);
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
//# sourceMappingURL=index.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,8BAA8B,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAUnE,8EAA8E;AAC9E,EAAE;AACF,gEAAgE;AAChE,EAAE;AACF,wEAAwE;AACxE,2EAA2E;AAC3E,4EAA4E;AAC5E,gDAAgD;AAChD,yEAAyE;AACzE,EAAE;AACF,oEAAoE;AACpE,6EAA6E;AAE7E,MAAM,OAAO,OAAO;IAYW;IAXZ,KAAK,CAAY;IACjB,KAAK,CAAe;IACpB,KAAK,CAAe;IACpB,MAAM,CAMrB;IAEF,YAA6B,aAA4B,EAAE;QAA9B,eAAU,GAAV,UAAU,CAAoB;QACzD,IAAI,CAAC,KAAK,GAAG,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QACxD,IAAI,CAAC,MAAM,GAAG;YACZ,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM;YACrC,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,KAAK;YACpC,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,cAAc,EAAE,UAAU,CAAC,cAAc;YACzC,cAAc,EAAE,UAAU,CAAC,cAAc;SAC1C,CAAC;IACJ,CAAC;IAED,0EAA0E;IAE1E,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,mDAAmD;QACnD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;YACtC,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;YAC1C,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;SAC3C,CAAC,CAAC;QAEH,MAAM,aAAa,GAAkB;YACnC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,GAAG,OAAO;SACX,CAAC;QAEF,sEAAsE;QACtE,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QAE1D,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;YAC7C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACrC,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,oEAAoE;QAEpE,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,mEAAmE;YACnE,iEAAiE;YACjE,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,4EAA4E,CAAC;gBAC1F,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,qBAAqB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACrD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC;QAC9C,CAAC;QAED,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;QAC5D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,6DAA6D;YAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,iEAAiE,CAAC;gBAC/E,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACxD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,+BAA+B,UAAU,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,qEAAqE;QACrE,IAAI,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;QAChE,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,IAAI,CAAC,GAAG,CAAC,2BAA2B,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;YAC3D,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACpE,IAAI,CAAC,GAAG,CAAC,iCAAiC,UAAU,EAAE,CAAC,CAAC;YAExD,mEAAmE;YACnE,OAAO;gBACL,GAAG,EAAE,QAAQ;gBACb,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,SAAS;gBACf,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE;oBACL,0BAA0B;oBAC1B,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,EAAE;iBACpC,CAAC,IAAI,CAAC,IAAI,CAAC;aACb,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAE3E;;;;;;OAMG;IACH,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,UAAsB,EAAE;QAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;QACnC,MAAM,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC;QAC5D,MAAM,qBAAqB,GAAG,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAC;QACpE,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAExD,IAAI,YAA0B,CAAC;QAE/B,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE;gBACpC,UAAU,EAAE,IAAI;gBAChB,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAClE,OAAO;gBACL,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,MAAM;gBACZ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAClC,KAAK,EAAE,+BAA+B,QAAQ,EAAE;aACjD,CAAC;QACJ,CAAC;QAED,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;YACvB,OAAO;gBACL,GAAG,EAAE,YAAY,CAAC,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,YAAY,CAAC,IAAI;gBACvB,UAAU,EAAE,YAAY,CAAC,UAAU;gBACnC,KAAK,EAAE,YAAY,CAAC,KAAK;aAC1B,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,IAAI,YAAY,CAAC,IAAI,IAAI,EAAE,CAAC;QAChE,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO;gBACL,GAAG,EAAE,YAAY,CAAC,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,YAAY,CAAC,IAAI;gBACvB,UAAU,EAAE,YAAY,CAAC,UAAU;gBACnC,KAAK,EAAE,4CAA4C;aACpD,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC;QACjC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC;QAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEtD,IAAI,KAAK,GAAG,wBAAwB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEvD,4BAA4B;QAC5B,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YAC5B,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;gBAClE,IAAI,iBAAiB,EAAE,CAAC;oBACtB,OAAO,QAAQ,KAAK,UAAU,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC;gBACxE,CAAC;gBACD,OAAO,QAAQ,KAAK,UAAU,CAAC;YACjC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,qDAAqD;QACrD,MAAM,YAAY,GAAG,CAAC,IAAY,EAAU,EAAE;YAC5C,IAAI,CAAC,qBAAqB;gBAAE,OAAO,IAAI,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC;gBACd,OAAO,CAAC,CAAC,IAAI,CAAC;YAChB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,MAAM,OAAO,GAAc,EAAE,CAAC;QAC9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,qBAAqB,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YACtE,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,CAAC,IAAI,CAAC;gBACX,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;aAC/B,CAAC,CAAC;QACL,CAAC;QACD,KAAK,GAAG,OAAO,CAAC;QAEhB,oDAAoD;QACpD,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;YAClE,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YACxC,MAAM,MAAM,GAAG,KAAK;iBACjB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;gBACxC,MAAM,UAAU,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBACxD,MAAM,YAAY,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBAC5D,MAAM,KAAK,GAAG,UAAU,GAAG,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,sBAAsB;gBACvE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;YACzB,CAAC,CAAC;iBACD,MAAM,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC;iBAChC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;iBACjC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC3B,KAAK,GAAG,MAAM,CAAC;QACjB,CAAC;QAED,MAAM,MAAM,GAAc;YACxB,GAAG,EAAE,OAAO;YACZ,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;YAC5B,IAAI,EAAE,YAAY,CAAC,IAAI;YACvB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;SACnC,CAAC;QAEF,IAAI,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,CAAC,MAAM,WAAW,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;QACjE,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,0EAA0E;IAE1E;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CACd,IAAc,EACd,UAAyB,EAAE,EAC3B,WAAW,GAAG,CAAC;QAEf,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;QAExB,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;YAC3C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAC1C,CAAC;YAEF,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;gBAC9B,IAAI,OAAO,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;oBACnC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC9B,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC;wBACX,GAAG,EAAE,SAAS;wBACd,UAAU,EAAE,CAAC;wBACb,KAAK,EAAE,EAAE;wBACT,IAAI,EAAE,MAAM;wBACZ,UAAU,EAAE,CAAC;wBACb,KAAK,EACH,OAAO,CAAC,MAAM,YAAY,KAAK;4BAC7B,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO;4BACxB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;qBAC7B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,0EAA0E;IAE1E;;;OAGG;IACH,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED,0EAA0E;IAElE,GAAG,CAAC,GAAW;QACrB,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,aAAa,GAAG,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;CACF"}
|