auspex 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/scraper/extractors/content.d.ts +10 -0
- package/dist/scraper/extractors/content.d.ts.map +1 -1
- package/dist/scraper/extractors/content.js +40 -1
- package/dist/scraper/extractors/content.js.map +1 -1
- package/dist/scraper/index.d.ts +12 -4
- package/dist/scraper/index.d.ts.map +1 -1
- package/dist/scraper/index.js +125 -4
- package/dist/scraper/index.js.map +1 -1
- package/dist/scraper/tiers/tier1-http.d.ts.map +1 -1
- package/dist/scraper/tiers/tier1-http.js +2 -0
- package/dist/scraper/tiers/tier1-http.js.map +1 -1
- package/dist/scraper/tiers/tier2-stealth.d.ts.map +1 -1
- package/dist/scraper/tiers/tier2-stealth.js +2 -0
- package/dist/scraper/tiers/tier2-stealth.js.map +1 -1
- package/dist/scraper/tiers/tier3-browser.d.ts +2 -2
- package/dist/scraper/tiers/tier3-browser.d.ts.map +1 -1
- package/dist/scraper/tiers/tier3-browser.js +2 -0
- package/dist/scraper/tiers/tier3-browser.js.map +1 -1
- package/dist/scraper/types.d.ts +33 -2
- package/dist/scraper/types.d.ts.map +1 -1
- package/dist/scraper/types.js +1 -1
- package/package.json +1 -1
- package/readme.md +40 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { Auspex } from "./agent/agent.js";
|
|
2
2
|
export type { AgentConfig, AgentResult, AgentAction, AgentStatus, ActionRecord, LLMUsage, MemoryUsage, RunOptions, PageSnapshot, SnapshotLink, SnapshotForm, SnapshotInput, } from "./types.js";
|
|
3
|
-
export {
|
|
4
|
-
export type {
|
|
3
|
+
export { Scraper } from "./scraper/index.js";
|
|
4
|
+
export type { ScraperConfig, ScrapeOptions, ScrapeResult, ScrapeTier, ContentFormat, SSRData, InterceptedAPI, TierRawResult, MapLink, MapOptions, MapResult, } from "./scraper/index.js";
|
|
5
5
|
export { UrlValidationError } from "./security/url-validator.js";
|
|
6
6
|
export { ActionValidationError } from "./security/action-validator.js";
|
|
7
7
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAE1C,YAAY,EACV,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,YAAY,EACZ,QAAQ,EACR,WAAW,EACX,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,YAAY,EACZ,aAAa,GACd,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAE1C,YAAY,EACV,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,YAAY,EACZ,QAAQ,EACR,WAAW,EACX,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,YAAY,EACZ,aAAa,GACd,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAE7C,YAAY,EACV,aAAa,EACb,aAAa,EACb,YAAY,EACZ,UAAU,EACV,aAAa,EACb,OAAO,EACP,cAAc,EACd,aAAa,EACb,OAAO,EACP,UAAU,EACV,SAAS,GACV,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,gCAAgC,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// ── Agente LLM (automação via Playwright) ─────────────────────────────────
|
|
2
2
|
export { Auspex } from "./agent/agent.js";
|
|
3
|
-
// ──
|
|
4
|
-
export {
|
|
3
|
+
// ── Scraper (fallback automático HTTP → Stealth → Browser) ─────────────────
|
|
4
|
+
export { Scraper } from "./scraper/index.js";
|
|
5
5
|
// ── Segurança ─────────────────────────────────────────────────────────────
|
|
6
6
|
export { UrlValidationError } from "./security/url-validator.js";
|
|
7
7
|
export { ActionValidationError } from "./security/action-validator.js";
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAC7E,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAiB1C,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAC7E,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAiB1C,8EAA8E;AAC9E,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAgB7C,6EAA6E;AAC7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,gCAAgC,CAAC"}
|
|
@@ -5,6 +5,16 @@ export interface ExtractedContent {
|
|
|
5
5
|
description: string;
|
|
6
6
|
links: string[];
|
|
7
7
|
}
|
|
8
|
+
/** Link com metadados para Map */
|
|
9
|
+
export interface LinkWithMetadata {
|
|
10
|
+
url: string;
|
|
11
|
+
title?: string;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Extrai links da página com texto do âncora (title).
|
|
15
|
+
* Usado pelo map() para descobrir URLs com contexto.
|
|
16
|
+
*/
|
|
17
|
+
export declare function extractLinksWithMetadata(html: string, baseUrl: string): LinkWithMetadata[];
|
|
8
18
|
/**
|
|
9
19
|
* Extrai o conteúdo significativo de um HTML.
|
|
10
20
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content.d.ts","sourceRoot":"","sources":["../../../src/scraper/extractors/content.ts"],"names":[],"mappings":"AAuFA,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;
|
|
1
|
+
{"version":3,"file":"content.d.ts","sourceRoot":"","sources":["../../../src/scraper/extractors/content.ts"],"names":[],"mappings":"AAuFA,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;AAmCD,kCAAkC;AAClC,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;GAGG;AACH,wBAAgB,wBAAwB,CACtC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd,gBAAgB,EAAE,CAgCpB;AA6GD;;;;;;;;;;;;GAYG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,UAAO,EACf,OAAO,CAAC,EAAE,MAAM,GACf,gBAAgB,CA+BlB"}
|
|
@@ -111,6 +111,45 @@ function extractLinks($, baseUrl) {
|
|
|
111
111
|
});
|
|
112
112
|
return links;
|
|
113
113
|
}
|
|
114
|
+
/**
|
|
115
|
+
* Extrai links da página com texto do âncora (title).
|
|
116
|
+
* Usado pelo map() para descobrir URLs com contexto.
|
|
117
|
+
*/
|
|
118
|
+
export function extractLinksWithMetadata(html, baseUrl) {
|
|
119
|
+
const $ = load(html);
|
|
120
|
+
const links = [];
|
|
121
|
+
const seen = new Set();
|
|
122
|
+
$("a[href]").each((_, el) => {
|
|
123
|
+
const href = $(el).attr("href");
|
|
124
|
+
if (!href)
|
|
125
|
+
return;
|
|
126
|
+
if (href.startsWith("#"))
|
|
127
|
+
return;
|
|
128
|
+
if (href.startsWith("javascript:"))
|
|
129
|
+
return;
|
|
130
|
+
if (href.startsWith("mailto:"))
|
|
131
|
+
return;
|
|
132
|
+
if (href.startsWith("tel:"))
|
|
133
|
+
return;
|
|
134
|
+
let resolved = href;
|
|
135
|
+
if (baseUrl && (href.startsWith("/") || href.startsWith("."))) {
|
|
136
|
+
try {
|
|
137
|
+
resolved = new URL(href, baseUrl).href;
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
if (!seen.has(resolved)) {
|
|
144
|
+
seen.add(resolved);
|
|
145
|
+
const title = ($(el).text().trim() || $(el).attr("title") || "")
|
|
146
|
+
.replace(/\s+/g, " ")
|
|
147
|
+
.slice(0, 200);
|
|
148
|
+
links.push({ url: resolved, title: title || undefined });
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
return links;
|
|
152
|
+
}
|
|
114
153
|
// ─── Extração de metadados ─────────────────────────────────────────────────
|
|
115
154
|
function extractMeta($) {
|
|
116
155
|
const title = $("title").first().text().trim() ||
|
|
@@ -125,7 +164,7 @@ function extractMeta($) {
|
|
|
125
164
|
}
|
|
126
165
|
// ─── Mozilla Readability (caminho principal) ───────────────────────────────────
|
|
127
166
|
//
|
|
128
|
-
// Mesmo algoritmo que o Firefox usa no Reader Mode
|
|
167
|
+
// Mesmo algoritmo que o Firefox usa no Reader Mode.
|
|
129
168
|
// Produz conteúdo semanticamente limpo, muito superior a heurísticas manuais.
|
|
130
169
|
function extractWithReadability(html, baseUrl) {
|
|
131
170
|
try {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content.js","sourceRoot":"","sources":["../../../src/scraper/extractors/content.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAmB,MAAM,SAAS,CAAC;AAChD,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,iFAAiF;AAEjF,MAAM,eAAe,GAAG;IACtB,aAAa;IACb,QAAQ;IACR,OAAO;IACP,UAAU;IACV,QAAQ;IACR,KAAK;IACL,YAAY;IACZ,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,qBAAqB;IACrB,iBAAiB;IACjB,sBAAsB;IACtB,MAAM;IACN,SAAS;IACT,aAAa;IACb,OAAO;IACP,SAAS;IACT,SAAS;IACT,cAAc;IACd,cAAc;IACd,UAAU;IACV,OAAO;IACP,UAAU;IACV,WAAW;IACX,UAAU;IACV,wBAAwB;IACxB,uBAAuB;IACvB,KAAK;IACL,MAAM;IACN,cAAc;IACd,gBAAgB;IAChB,QAAQ;IACR,SAAS;IACT,oBAAoB;IACpB,sBAAsB;IACtB,iBAAiB;IACjB,gBAAgB;IAChB,gBAAgB;IAChB,iBAAiB;IACjB,OAAO;IACP,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;IACX,gBAAgB;IAChB,eAAe;IACf,gBAAgB;IAChB,gBAAgB;IAChB,WAAW;IACX,WAAW;IACX,kBAAkB;IAClB,aAAa;IACb,YAAY;CACJ,CAAC;AAEX,iFAAiF;AAEjF,MAAM,sBAAsB,GAAG;IAC7B,MAAM;IACN,SAAS;IACT,eAAe;IACf,eAAe;IACf,UAAU;IACV,OAAO;IACP,eAAe;IACf,UAAU;IACV,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,eAAe;IACf,YAAY;IACZ,eAAe;IACf,YAAY;IACZ,eAAe;CACP,CAAC;AAYX,8EAA8E;AAE9E,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,OAAO;QACjC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAAE,OAAO;QAC3C,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,OAAO;QACvC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAAE,OAAO;QAEpC,8BAA8B;QAC9B,IAAI,QAAQ,GAAG,IAAI,CAAC;QACpB,IAAI,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC9D,IAAI,CAAC;gBACH,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YACzC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;QACH,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACxB,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnB,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAED,8EAA8E;AAE9E,SAAS,WAAW,CAAC,CAAa;IAChC,MAAM,KAAK,GACT,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;QAChC,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QACtD,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;QAC7B,EAAE,CAAC;IAEL,MAAM,WAAW,GACf,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QACrD,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QAC5D,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QAC7D,EAAE,CAAC;IAEL,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC;AAChC,CAAC;AAED,kFAAkF;AAClF,EAAE;AACF
|
|
1
|
+
{"version":3,"file":"content.js","sourceRoot":"","sources":["../../../src/scraper/extractors/content.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAmB,MAAM,SAAS,CAAC;AAChD,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAC9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,iFAAiF;AAEjF,MAAM,eAAe,GAAG;IACtB,aAAa;IACb,QAAQ;IACR,OAAO;IACP,UAAU;IACV,QAAQ;IACR,KAAK;IACL,YAAY;IACZ,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,qBAAqB;IACrB,iBAAiB;IACjB,sBAAsB;IACtB,MAAM;IACN,SAAS;IACT,aAAa;IACb,OAAO;IACP,SAAS;IACT,SAAS;IACT,cAAc;IACd,cAAc;IACd,UAAU;IACV,OAAO;IACP,UAAU;IACV,WAAW;IACX,UAAU;IACV,wBAAwB;IACxB,uBAAuB;IACvB,KAAK;IACL,MAAM;IACN,cAAc;IACd,gBAAgB;IAChB,QAAQ;IACR,SAAS;IACT,oBAAoB;IACpB,sBAAsB;IACtB,iBAAiB;IACjB,gBAAgB;IAChB,gBAAgB;IAChB,iBAAiB;IACjB,OAAO;IACP,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;IACX,gBAAgB;IAChB,eAAe;IACf,gBAAgB;IAChB,gBAAgB;IAChB,WAAW;IACX,WAAW;IACX,kBAAkB;IAClB,aAAa;IACb,YAAY;CACJ,CAAC;AAEX,iFAAiF;AAEjF,MAAM,sBAAsB,GAAG;IAC7B,MAAM;IACN,SAAS;IACT,eAAe;IACf,eAAe;IACf,UAAU;IACV,OAAO;IACP,eAAe;IACf,UAAU;IACV,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,eAAe;IACf,YAAY;IACZ,eAAe;IACf,YAAY;IACZ,eAAe;CACP,CAAC;AAYX,8EAA8E;AAE9E,SAAS,YAAY,CAAC,CAAa,EAAE,OAAgB;IACnD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,OAAO;QACjC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAAE,OAAO;QAC3C,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,OAAO;QACvC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAAE,OAAO;QAEpC,8BAA8B;QAC9B,IAAI,QAAQ,GAAG,IAAI,CAAC;QACpB,IAAI,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC9D,IAAI,CAAC;gBACH,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YACzC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;QACH,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACxB,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnB,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAQD;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CACtC,IAAY,EACZ,OAAe;IAEf,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,KAAK,GAAuB,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAClB,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,OAAO;QACjC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;YAAE,OAAO;QAC3C,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YAAE,OAAO;QACvC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAAE,OAAO;QAEpC,IAAI,QAAQ,GAAG,IAAI,CAAC;QACpB,IAAI,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC9D,IAAI,CAAC;gBACH,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YACzC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;QACH,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACxB,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACnB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;iBAC7D,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;iBACpB,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,KAAK,IAAI,SAAS,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC;AAED,8EAA8E;AAE9E,SAAS,WAAW,CAAC,CAAa;IAChC,MAAM,KAAK,GACT,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;QAChC,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QACtD,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;QAC7B,EAAE,CAAC;IAEL,MAAM,WAAW,GACf,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QACrD,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QAC5D,CAAC,CAAC,kCAAkC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE;QAC7D,EAAE,CAAC;IAEL,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC;AAChC,CAAC;AAED,kFAAkF;AAClF,EAAE;AACF,oDAAoD;AACpD,8EAA8E;AAE9E,SAAS,sBAAsB,CAC7B,IAAY,EACZ,OAAgB;IAEhB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE;YAC1B,wEAAwE;YACxE,GAAG,EAAE,OAAO,IAAI,qBAAqB;SACtC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE;YAClD,6DAA6D;YAC7D,aAAa,EAAE,EAAE;SAClB,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAE/B,8CAA8C;QAC9C,IACE,CAAC,OAAO;YACR,CAAC,OAAO,CAAC,OAAO;YAChB,CAAC,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,GAAG,EAChD,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO;YACL,IAAI,EAAE,OAAO,CAAC,OAAO;YACrB,IAAI,EAAE,CAAC,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE;YAC7D,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;SAC3B,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,0DAA0D;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,iFAAiF;AAEjF,SAAS,kBAAkB,CACzB,CAAa,EACb,QAAiB;IAEjB,eAAe;IACf,eAAe,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QACnC,IAAI,CAAC;YACH,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;QACvB,CAAC;QAAC,MAAM,CAAC;YACP,wCAAwC;QAC1C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,qCAAqC;IACrC,IAAI,SAAS,GAAyB,CAAC,CAAC,MAAM,CAAC,CAAC;IAEhD,IAAI,QAAQ,EAAE,CAAC;QACb,6CAA6C;QAC7C,KAAK,MAAM,QAAQ,IAAI,sBAAsB,EAAE,CAAC;YAC9C,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;YACvB,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClB,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC3D,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;oBACtB,SAAS,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC;oBACvB,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IAC9C,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IAClD,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACvC,CAAC,CAAC,EAAE,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IAC3C,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrC,CAAC;AAED,kFAAkF;AAElF;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,QAAQ,GAAG,IAAI,EACf,OAAgB;IAEhB,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,mEAAmE;IACnE,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IAC9C,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAEvC,6EAA6E;IAC7E,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,sBAAsB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACvD,IAAI,QAAQ,EAAE,CAAC;YACb,OAAO;gBACL,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,gEAAgE;gBAChE,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,KAAK;gBAC9B,WAAW;gBACX,KAAK;aACN,CAAC;QACJ,CAAC;IACH,CAAC;IAED,6EAA6E;IAC7E,MAAM,OAAO,GAAG,kBAAkB,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAChD,OAAO;QACL,IAAI,EAAE,OAAO,CAAC,IAAI;QAClB,IAAI,EAAE,OAAO,CAAC,IAAI;QAClB,KAAK;QACL,WAAW;QACX,KAAK;KACN,CAAC;AACJ,CAAC"}
|
package/dist/scraper/index.d.ts
CHANGED
|
@@ -1,12 +1,20 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
export declare class
|
|
1
|
+
import type { MapOptions, MapResult, ScrapeOptions, ScrapeResult, ScraperConfig } from "./types.js";
|
|
2
|
+
export declare class Scraper {
|
|
3
3
|
private readonly fullConfig;
|
|
4
4
|
private readonly tier1;
|
|
5
5
|
private readonly tier2;
|
|
6
6
|
private readonly tier3;
|
|
7
7
|
private readonly config;
|
|
8
|
-
constructor(fullConfig?:
|
|
8
|
+
constructor(fullConfig?: ScraperConfig);
|
|
9
9
|
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
10
|
+
/**
|
|
11
|
+
* Mapeia links de uma página (URL + texto do âncora).
|
|
12
|
+
* Reutiliza a cascata de tiers (HTTP → Stealth → Browser).
|
|
13
|
+
*
|
|
14
|
+
* @param url - URL base para extrair links
|
|
15
|
+
* @param options - Filtros e limites
|
|
16
|
+
*/
|
|
17
|
+
map(url: string, options?: MapOptions): Promise<MapResult>;
|
|
10
18
|
/**
|
|
11
19
|
* Scrapia múltiplas URLs em paralelo com concorrência limitada.
|
|
12
20
|
* Erros em URLs individuais não derrubam o lote inteiro.
|
|
@@ -23,5 +31,5 @@ export declare class Firecrawl {
|
|
|
23
31
|
close(): Promise<void>;
|
|
24
32
|
private log;
|
|
25
33
|
}
|
|
26
|
-
export type { ScrapeOptions, ScrapeResult, ScrapeTier, ContentFormat, SSRData, InterceptedAPI,
|
|
34
|
+
export type { ScrapeOptions, ScrapeResult, ScrapeTier, ContentFormat, SSRData, InterceptedAPI, ScraperConfig, TierRawResult, MapLink, MapOptions, MapResult, } from "./types.js";
|
|
27
35
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAEV,UAAU,EACV,SAAS,EACT,aAAa,EACb,YAAY,EACZ,aAAa,EACd,MAAM,YAAY,CAAC;AAepB,qBAAa,OAAO;IAYN,OAAO,CAAC,QAAQ,CAAC,UAAU;IAXvC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAY;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;IACrC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;IACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAMrB;gBAE2B,UAAU,GAAE,aAAkB;IAerD,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;IAqG7E;;;;;;OAMG;IACG,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,UAAe,GAAG,OAAO,CAAC,SAAS,CAAC;IA0HpE;;;;;;;OAOG;IACG,UAAU,CACd,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,aAAkB,EAC3B,WAAW,SAAI,GACd,OAAO,CAAC,YAAY,EAAE,CAAC;IAkC1B;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,OAAO,CAAC,GAAG;CAKZ;AAGD,YAAY,EACV,aAAa,EACb,YAAY,EACZ,UAAU,EACV,aAAa,EACb,OAAO,EACP,cAAc,EACd,aAAa,EACb,aAAa,EACb,OAAO,EACP,UAAU,EACV,SAAS,GACV,MAAM,YAAY,CAAC"}
|
package/dist/scraper/index.js
CHANGED
|
@@ -2,11 +2,12 @@ import { validateUrl } from "../security/url-validator.js";
|
|
|
2
2
|
import { Tier1HTTP } from "./tiers/tier1-http.js";
|
|
3
3
|
import { Tier2Stealth } from "./tiers/tier2-stealth.js";
|
|
4
4
|
import { Tier3Browser } from "./tiers/tier3-browser.js";
|
|
5
|
-
|
|
5
|
+
import { extractLinksWithMetadata } from "./extractors/content.js";
|
|
6
|
+
// ─── Scraper ───────────────────────────────────────────────────────────────
|
|
6
7
|
//
|
|
7
8
|
// Scraper de alta qualidade com fallback automático em 3 tiers:
|
|
8
9
|
//
|
|
9
|
-
// Tier 1 → HTTP puro (
|
|
10
|
+
// Tier 1 → HTTP puro (got-scraping) (~100-500ms, sem browser)
|
|
10
11
|
// ↓ bloqueado ou conteúdo insuficiente (SPA, anti-bot básico)
|
|
11
12
|
// Tier 2 → HTTP Stealth (got-scraping) (~200-800ms, TLS fingerprint)
|
|
12
13
|
// ↓ ainda bloqueado ou SPA sem SSR
|
|
@@ -14,7 +15,7 @@ import { Tier3Browser } from "./tiers/tier3-browser.js";
|
|
|
14
15
|
//
|
|
15
16
|
// Anti-SSRF integrado: todas as URLs são validadas antes do scrape.
|
|
16
17
|
// ──────────────────────────────────────────────────────────────────────────
|
|
17
|
-
export class
|
|
18
|
+
export class Scraper {
|
|
18
19
|
fullConfig;
|
|
19
20
|
tier1;
|
|
20
21
|
tier2;
|
|
@@ -125,6 +126,126 @@ export class Firecrawl {
|
|
|
125
126
|
};
|
|
126
127
|
}
|
|
127
128
|
}
|
|
129
|
+
// ── Map: descobrir URLs de um site ──────────────────────────────────────
|
|
130
|
+
/**
|
|
131
|
+
* Mapeia links de uma página (URL + texto do âncora).
|
|
132
|
+
* Reutiliza a cascata de tiers (HTTP → Stealth → Browser).
|
|
133
|
+
*
|
|
134
|
+
* @param url - URL base para extrair links
|
|
135
|
+
* @param options - Filtros e limites
|
|
136
|
+
*/
|
|
137
|
+
async map(url, options = {}) {
|
|
138
|
+
const startTime = Date.now();
|
|
139
|
+
const limit = options.limit ?? 500;
|
|
140
|
+
const includeSubdomains = options.includeSubdomains ?? true;
|
|
141
|
+
const ignoreQueryParameters = options.ignoreQueryParameters ?? true;
|
|
142
|
+
const searchTerm = options.search?.toLowerCase().trim();
|
|
143
|
+
let scrapeResult;
|
|
144
|
+
try {
|
|
145
|
+
scrapeResult = await this.scrape(url, {
|
|
146
|
+
getRawHtml: true,
|
|
147
|
+
forceTier: options.forceTier,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
catch (err) {
|
|
151
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
152
|
+
return {
|
|
153
|
+
url,
|
|
154
|
+
links: [],
|
|
155
|
+
tier: "http",
|
|
156
|
+
durationMs: Date.now() - startTime,
|
|
157
|
+
error: `Falha ao carregar a página: ${errorMsg}`,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
if (scrapeResult.error) {
|
|
161
|
+
return {
|
|
162
|
+
url: scrapeResult.url,
|
|
163
|
+
links: [],
|
|
164
|
+
tier: scrapeResult.tier,
|
|
165
|
+
durationMs: scrapeResult.durationMs,
|
|
166
|
+
error: scrapeResult.error,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
const rawHtml = scrapeResult.rawHtml ?? scrapeResult.html ?? "";
|
|
170
|
+
if (!rawHtml) {
|
|
171
|
+
return {
|
|
172
|
+
url: scrapeResult.url,
|
|
173
|
+
links: [],
|
|
174
|
+
tier: scrapeResult.tier,
|
|
175
|
+
durationMs: scrapeResult.durationMs,
|
|
176
|
+
error: "HTML não disponível para extração de links",
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
const baseUrl = scrapeResult.url;
|
|
180
|
+
const baseHostname = new URL(baseUrl).hostname;
|
|
181
|
+
const baseDomain = baseHostname.replace(/^www\./, "");
|
|
182
|
+
let links = extractLinksWithMetadata(rawHtml, baseUrl);
|
|
183
|
+
// Filtrar por mesmo domínio
|
|
184
|
+
links = links.filter((link) => {
|
|
185
|
+
try {
|
|
186
|
+
const linkHost = new URL(link.url).hostname.replace(/^www\./, "");
|
|
187
|
+
if (includeSubdomains) {
|
|
188
|
+
return linkHost === baseDomain || linkHost.endsWith(`.${baseDomain}`);
|
|
189
|
+
}
|
|
190
|
+
return linkHost === baseDomain;
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
// Normalizar URL (remover query string) e deduplicar
|
|
197
|
+
const normalizeUrl = (href) => {
|
|
198
|
+
if (!ignoreQueryParameters)
|
|
199
|
+
return href;
|
|
200
|
+
try {
|
|
201
|
+
const u = new URL(href);
|
|
202
|
+
u.search = "";
|
|
203
|
+
return u.href;
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
return href;
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
const seen = new Set();
|
|
210
|
+
const deduped = [];
|
|
211
|
+
for (const link of links) {
|
|
212
|
+
const key = ignoreQueryParameters ? normalizeUrl(link.url) : link.url;
|
|
213
|
+
if (seen.has(key))
|
|
214
|
+
continue;
|
|
215
|
+
seen.add(key);
|
|
216
|
+
deduped.push({
|
|
217
|
+
url: link.url,
|
|
218
|
+
title: link.title || undefined,
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
links = deduped;
|
|
222
|
+
// Filtrar e ordenar por search (relevância simples)
|
|
223
|
+
if (searchTerm) {
|
|
224
|
+
const escaped = searchTerm.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
225
|
+
const regex = new RegExp(escaped, "gi");
|
|
226
|
+
const scored = links
|
|
227
|
+
.map((link) => {
|
|
228
|
+
const urlLower = link.url.toLowerCase();
|
|
229
|
+
const titleLower = (link.title ?? "").toLowerCase();
|
|
230
|
+
const urlMatches = (urlLower.match(regex) ?? []).length;
|
|
231
|
+
const titleMatches = (titleLower.match(regex) ?? []).length;
|
|
232
|
+
const score = urlMatches * 2 + titleMatches * 3; // title tem mais peso
|
|
233
|
+
return { link, score };
|
|
234
|
+
})
|
|
235
|
+
.filter(({ score }) => score > 0)
|
|
236
|
+
.sort((a, b) => b.score - a.score)
|
|
237
|
+
.map(({ link }) => link);
|
|
238
|
+
links = scored;
|
|
239
|
+
}
|
|
240
|
+
const result = {
|
|
241
|
+
url: baseUrl,
|
|
242
|
+
links: links.slice(0, limit),
|
|
243
|
+
tier: scrapeResult.tier,
|
|
244
|
+
durationMs: Date.now() - startTime,
|
|
245
|
+
};
|
|
246
|
+
this.log(`✓ Map: ${result.links.length} links (${result.tier})`);
|
|
247
|
+
return result;
|
|
248
|
+
}
|
|
128
249
|
// ── Scrape em lote com concorrência controlada ─────────────────────────
|
|
129
250
|
/**
|
|
130
251
|
* Scrapia múltiplas URLs em paralelo com concorrência limitada.
|
|
@@ -171,7 +292,7 @@ export class Firecrawl {
|
|
|
171
292
|
// ── Helpers ────────────────────────────────────────────────────────────
|
|
172
293
|
log(msg) {
|
|
173
294
|
if (this.config.verbose) {
|
|
174
|
-
console.log(`[
|
|
295
|
+
console.log(`[Scraper] ${msg}`);
|
|
175
296
|
}
|
|
176
297
|
}
|
|
177
298
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,8BAA8B,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scraper/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,8BAA8B,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AAUnE,8EAA8E;AAC9E,EAAE;AACF,gEAAgE;AAChE,EAAE;AACF,wEAAwE;AACxE,2EAA2E;AAC3E,4EAA4E;AAC5E,gDAAgD;AAChD,yEAAyE;AACzE,EAAE;AACF,oEAAoE;AACpE,6EAA6E;AAE7E,MAAM,OAAO,OAAO;IAYW;IAXZ,KAAK,CAAY;IACjB,KAAK,CAAe;IACpB,KAAK,CAAe;IACpB,MAAM,CAMrB;IAEF,YAA6B,aAA4B,EAAE;QAA9B,eAAU,GAAV,UAAU,CAAoB;QACzD,IAAI,CAAC,KAAK,GAAG,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QACxD,IAAI,CAAC,MAAM,GAAG;YACZ,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,MAAM;YACrC,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,KAAK;YACpC,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,cAAc,EAAE,UAAU,CAAC,cAAc;YACzC,cAAc,EAAE,UAAU,CAAC,cAAc;SAC1C,CAAC;IACJ,CAAC;IAED,0EAA0E;IAE1E,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,mDAAmD;QACnD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE;YACtC,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;YAC1C,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;SAC3C,CAAC,CAAC;QAEH,MAAM,aAAa,GAAkB;YACnC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,GAAG,OAAO;SACX,CAAC;QAEF,sEAAsE;QACtE,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QAE1D,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,IAAI,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;YAC7C,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACrC,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAED,oEAAoE;QAEpE,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,mEAAmE;YACnE,iEAAiE;YACjE,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,4EAA4E,CAAC;gBAC1F,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,qBAAqB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACrD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC;QAC9C,CAAC;QAED,qEAAqE;QACrE,IAAI,UAAU,GAAkB,IAAI,CAAC;QACrC,IAAI,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;QAC5D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAErD,6DAA6D;YAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBAC5C,UAAU,GAAG,iEAAiE,CAAC;gBAC/E,IAAI,CAAC,GAAG,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;gBACxD,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,GAAG,CAAC,+BAA+B,UAAU,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,qEAAqE;QACrE,IAAI,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;QAChE,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;YAChE,IAAI,CAAC,GAAG,CAAC,2BAA2B,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;YAC3D,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACpE,IAAI,CAAC,GAAG,CAAC,iCAAiC,UAAU,EAAE,CAAC,CAAC;YAExD,mEAAmE;YACnE,OAAO;gBACL,GAAG,EAAE,QAAQ;gBACb,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,SAAS;gBACf,UAAU,EAAE,CAAC;gBACb,KAAK,EAAE;oBACL,0BAA0B;oBAC1B,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,IAAI,aAAa,EAAE;oBACpD,uBAAuB,UAAU,EAAE;iBACpC,CAAC,IAAI,CAAC,IAAI,CAAC;aACb,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAE3E;;;;;;OAMG;IACH,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,UAAsB,EAAE;QAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,GAAG,CAAC;QACnC,MAAM,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC;QAC5D,MAAM,qBAAqB,GAAG,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAC;QACpE,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAExD,IAAI,YAA0B,CAAC;QAE/B,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE;gBACpC,UAAU,EAAE,IAAI;gBAChB,SAAS,EAAE,OAAO,CAAC,SAAS;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAClE,OAAO;gBACL,GAAG;gBACH,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,MAAM;gBACZ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAClC,KAAK,EAAE,+BAA+B,QAAQ,EAAE;aACjD,CAAC;QACJ,CAAC;QAED,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;YACvB,OAAO;gBACL,GAAG,EAAE,YAAY,CAAC,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,YAAY,CAAC,IAAI;gBACvB,UAAU,EAAE,YAAY,CAAC,UAAU;gBACnC,KAAK,EAAE,YAAY,CAAC,KAAK;aAC1B,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,IAAI,YAAY,CAAC,IAAI,IAAI,EAAE,CAAC;QAChE,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO;gBACL,GAAG,EAAE,YAAY,CAAC,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,IAAI,EAAE,YAAY,CAAC,IAAI;gBACvB,UAAU,EAAE,YAAY,CAAC,UAAU;gBACnC,KAAK,EAAE,4CAA4C;aACpD,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC;QACjC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC;QAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEtD,IAAI,KAAK,GAAG,wBAAwB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEvD,4BAA4B;QAC5B,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YAC5B,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;gBAClE,IAAI,iBAAiB,EAAE,CAAC;oBACtB,OAAO,QAAQ,KAAK,UAAU,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC;gBACxE,CAAC;gBACD,OAAO,QAAQ,KAAK,UAAU,CAAC;YACjC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,qDAAqD;QACrD,MAAM,YAAY,GAAG,CAAC,IAAY,EAAU,EAAE;YAC5C,IAAI,CAAC,qBAAqB;gBAAE,OAAO,IAAI,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC;gBACd,OAAO,CAAC,CAAC,IAAI,CAAC;YAChB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,MAAM,OAAO,GAAc,EAAE,CAAC;QAC9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,qBAAqB,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YACtE,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,CAAC,IAAI,CAAC;gBACX,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;aAC/B,CAAC,CAAC;QACL,CAAC;QACD,KAAK,GAAG,OAAO,CAAC;QAEhB,oDAAoD;QACpD,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;YAClE,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YACxC,MAAM,MAAM,GAAG,KAAK;iBACjB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;gBACZ,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;gBACxC,MAAM,UAAU,GAAG,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBACxD,MAAM,YAAY,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBAC5D,MAAM,KAAK,GAAG,UAAU,GAAG,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,sBAAsB;gBACvE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;YACzB,CAAC,CAAC;iBACD,MAAM,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC;iBAChC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;iBACjC,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC3B,KAAK,GAAG,MAAM,CAAC;QACjB,CAAC;QAED,MAAM,MAAM,GAAc;YACxB,GAAG,EAAE,OAAO;YACZ,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;YAC5B,IAAI,EAAE,YAAY,CAAC,IAAI;YACvB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;SACnC,CAAC;QAEF,IAAI,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,CAAC,MAAM,WAAW,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;QACjE,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,0EAA0E;IAE1E;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CACd,IAAc,EACd,UAAyB,EAAE,EAC3B,WAAW,GAAG,CAAC;QAEf,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;QAExB,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;YAC3C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAC1C,CAAC;YAEF,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;gBAC9B,IAAI,OAAO,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;oBACnC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAC9B,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC;wBACX,GAAG,EAAE,SAAS;wBACd,UAAU,EAAE,CAAC;wBACb,KAAK,EAAE,EAAE;wBACT,IAAI,EAAE,MAAM;wBACZ,UAAU,EAAE,CAAC;wBACb,KAAK,EACH,OAAO,CAAC,MAAM,YAAY,KAAK;4BAC7B,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO;4BACxB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;qBAC7B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,0EAA0E;IAE1E;;;OAGG;IACH,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED,0EAA0E;IAElE,GAAG,CAAC,GAAW;QACrB,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,aAAa,GAAG,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tier1-http.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier1-http.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAmD/D,qBAAa,SAAS;IACd,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"tier1-http.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier1-http.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAmD/D,qBAAa,SAAS;IACd,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;CAqG9E"}
|
|
@@ -106,6 +106,8 @@ export class Tier1HTTP {
|
|
|
106
106
|
durationMs: Date.now() - startTime,
|
|
107
107
|
links: extracted.links.length > 0 ? extracted.links : undefined,
|
|
108
108
|
};
|
|
109
|
+
if (options.getRawHtml)
|
|
110
|
+
result.rawHtml = html;
|
|
109
111
|
if (formats.includes("markdown"))
|
|
110
112
|
result.markdown = htmlToMarkdown(extracted.html);
|
|
111
113
|
if (formats.includes("html"))
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tier1-http.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier1-http.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAW3C,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAE9D,8EAA8E;AAC9E,EAAE;AACF,YAAY;AACZ,0EAA0E;AAC1E,kEAAkE;AAClE,8CAA8C;AAC9C,0DAA0D;AAC1D,EAAE;AACF,iDAAiD;AACjD,gFAAgF;AAChF,qEAAqE;AACrE,iFAAiF;AACjF,2DAA2D;AAC3D,EAAE;AACF,cAAc;AACd,uDAAuD;AACvD,4EAA4E;AAC5E,6EAA6E;AAE7E,iEAAiE;AACjE,6DAA6D;AAC7D,mEAAmE;AACnE,gEAAgE;AAChE,EAAE;AACF,sEAAsE;AACtE,0CAA0C;AAC1C,sDAAsD;AACtD,qEAAqE;AACrE,gCAAgC;AAChC,MAAM,aAAa,GAA2B;IAC5C,iBAAiB,EAAE,qCAAqC;IACxD,eAAe,EAAE,UAAU;IAC3B,QAAQ,EAAE,UAAU;CACrB,CAAC;AAEF,8EAA8E;AAC9E,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEhD,MAAM,OAAO,SAAS;IACpB,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,uEAAuE;QACvE,4EAA4E;QAC5E,yEAAyE;QACzE,sEAAsE;QACtE,2DAA2D;QAC3D,IAAI,QAAqB,CAAC;QAE1B,IAAI,CAAC;YACH,QAAQ,GAAG,CAAC,MAAM,WAAW,CAAC;gBAC5B,GAAG;gBACH,qEAAqE;gBACrE,iEAAiE;gBACjE,OAAO,EAAE,EAAE,GAAG,aAAa,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE;gBAEjD,6DAA6D;gBAC7D,eAAe,EAAE,KAAK;gBAEtB,kFAAkF;gBAClF,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,MAAM,EAAE;gBAE/C,qCAAqC;gBACrC,YAAY,EAAE,MAAM;gBACpB,8DAA8D;aAC/D,CAAC,CAA2B,CAAC;QAChC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,uDAAuD;YACvD,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,MAAM,IAAI,KAAK,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;QAC9D,CAAC;QAED,uEAAuE;QACvE,IAAI,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CACb,sBAAsB,QAAQ,CAAC,UAAU,2BAA2B,CACrE,CAAC;QACJ,CAAC;QAED,IAAI,QAAQ,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACpE,CAAC;QAED,wEAAwE;QACxE,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAC/C,MAAM,WAAW,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAE5E,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC9E,MAAM,IAAI,KAAK,CACb,6BAA6B,WAAW,mCAAmC,CAC5E,CAAC;QACJ,CAAC;QAED,6DAA6D;QAC7D,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAc,CAAC;QACrC,oDAAoD;QACpD,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC;QAE9B,uEAAuE;QACvE,4DAA4D;QAC5D,+DAA+D;QAC/D,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAErC,uEAAuE;QACvE,gFAAgF;QAChF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,2EAA2E,CAC5E,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,kEAAkE;QAClE,+DAA+D;QAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,OAAO,CAAC,eAAe,IAAI,IAAI,EAC/B,QAAQ,CACT,CAAC;QAEF,MAAM,MAAM,GAAiB;YAC3B,GAAG,EAAE,QAAQ;YACb,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,SAAS;YAC/C,IAAI,EAAE,MAAM;YACZ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAChE,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAO,SAAS,CAAC,IAAI,CAAC;QACnE,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAO,SAAS,CAAC,IAAI,CAAC;QACnE,IAAI,OAAO;YAAuB,MAAM,CAAC,OAAO,GAAI,OAAO,CAAC;QAE5D,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
1
|
+
{"version":3,"file":"tier1-http.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier1-http.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAW3C,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAE9D,8EAA8E;AAC9E,EAAE;AACF,YAAY;AACZ,0EAA0E;AAC1E,kEAAkE;AAClE,8CAA8C;AAC9C,0DAA0D;AAC1D,EAAE;AACF,iDAAiD;AACjD,gFAAgF;AAChF,qEAAqE;AACrE,iFAAiF;AACjF,2DAA2D;AAC3D,EAAE;AACF,cAAc;AACd,uDAAuD;AACvD,4EAA4E;AAC5E,6EAA6E;AAE7E,iEAAiE;AACjE,6DAA6D;AAC7D,mEAAmE;AACnE,gEAAgE;AAChE,EAAE;AACF,sEAAsE;AACtE,0CAA0C;AAC1C,sDAAsD;AACtD,qEAAqE;AACrE,gCAAgC;AAChC,MAAM,aAAa,GAA2B;IAC5C,iBAAiB,EAAE,qCAAqC;IACxD,eAAe,EAAE,UAAU;IAC3B,QAAQ,EAAE,UAAU;CACrB,CAAC;AAEF,8EAA8E;AAC9E,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEhD,MAAM,OAAO,SAAS;IACpB,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,uEAAuE;QACvE,4EAA4E;QAC5E,yEAAyE;QACzE,sEAAsE;QACtE,2DAA2D;QAC3D,IAAI,QAAqB,CAAC;QAE1B,IAAI,CAAC;YACH,QAAQ,GAAG,CAAC,MAAM,WAAW,CAAC;gBAC5B,GAAG;gBACH,qEAAqE;gBACrE,iEAAiE;gBACjE,OAAO,EAAE,EAAE,GAAG,aAAa,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE;gBAEjD,6DAA6D;gBAC7D,eAAe,EAAE,KAAK;gBAEtB,kFAAkF;gBAClF,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,MAAM,EAAE;gBAE/C,qCAAqC;gBACrC,YAAY,EAAE,MAAM;gBACpB,8DAA8D;aAC/D,CAAC,CAA2B,CAAC;QAChC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,uDAAuD;YACvD,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,MAAM,IAAI,KAAK,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;QAC9D,CAAC;QAED,uEAAuE;QACvE,IAAI,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CACb,sBAAsB,QAAQ,CAAC,UAAU,2BAA2B,CACrE,CAAC;QACJ,CAAC;QAED,IAAI,QAAQ,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACpE,CAAC;QAED,wEAAwE;QACxE,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;QAC/C,MAAM,WAAW,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QAE5E,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC9E,MAAM,IAAI,KAAK,CACb,6BAA6B,WAAW,mCAAmC,CAC5E,CAAC;QACJ,CAAC;QAED,6DAA6D;QAC7D,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAc,CAAC;QACrC,oDAAoD;QACpD,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC;QAE9B,uEAAuE;QACvE,4DAA4D;QAC5D,+DAA+D;QAC/D,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAErC,uEAAuE;QACvE,gFAAgF;QAChF,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,2EAA2E,CAC5E,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,kEAAkE;QAClE,+DAA+D;QAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,OAAO,CAAC,eAAe,IAAI,IAAI,EAC/B,QAAQ,CACT,CAAC;QAEF,MAAM,MAAM,GAAiB;YAC3B,GAAG,EAAE,QAAQ;YACb,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,SAAS;YAC/C,IAAI,EAAE,MAAM;YACZ,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAChE,CAAC;QAEF,IAAI,OAAO,CAAC,UAAU;YAAE,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC;QAE9C,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAO,SAAS,CAAC,IAAI,CAAC;QACnE,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAO,SAAS,CAAC,IAAI,CAAC;QACnE,IAAI,OAAO;YAAuB,MAAM,CAAC,OAAO,GAAI,OAAO,CAAC;QAE5D,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tier2-stealth.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier2-stealth.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAgC/D,qBAAa,YAAY;IACjB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"tier2-stealth.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier2-stealth.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAgC/D,qBAAa,YAAY;IACjB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;CAyG9E"}
|
|
@@ -92,6 +92,8 @@ export class Tier2Stealth {
|
|
|
92
92
|
durationMs: Date.now() - startTime,
|
|
93
93
|
links: extracted.links.length > 0 ? extracted.links : undefined,
|
|
94
94
|
};
|
|
95
|
+
if (options.getRawHtml)
|
|
96
|
+
result.rawHtml = html;
|
|
95
97
|
if (formats.includes("markdown"))
|
|
96
98
|
result.markdown = htmlToMarkdown(extracted.html);
|
|
97
99
|
if (formats.includes("html"))
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tier2-stealth.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier2-stealth.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAU9D,8EAA8E;AAC9E,EAAE;AACF,YAAY;AACZ,yEAAyE;AACzE,kFAAkF;AAClF,kEAAkE;AAClE,8CAA8C;AAC9C,2DAA2D;AAC3D,EAAE;AACF,4EAA4E;AAC5E,yDAAyD;AACzD,EAAE;AACF,wEAAwE;AACxE,uDAAuD;AACvD,6EAA6E;AAE7E,iDAAiD;AACjD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEhD,MAAM,OAAO,YAAY;IACvB,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,uEAAuE;QACvE,IAAI,QAAqB,CAAC;QAC1B,IAAI,CAAC;YACH,QAAQ,GAAG,CAAC,MAAM,WAAW,CAAC;gBAC5B,GAAG;gBACH,MAAM,EAAE,KAAK;gBACb,mDAAmD;gBACnD,sBAAsB,EAAE;oBACtB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;oBAC/C,gBAAgB,EAAE,CAAC,OAAO,EAAE,SAAS,CAAC;oBACtC,OAAO,EAAE,CAAC,SAAS,CAAC;oBACpB,OAAO,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC;iBAClC;gBACD,0CAA0C;gBAC1C,OAAO,EAAE;oBACP,MAAM,EACJ,kGAAkG;oBACpG,iBAAiB,EAAE,qCAAqC;oBACxD,eAAe,EAAE,UAAU;oBAC3B,MAAM,EAAE,UAAU;oBAClB,gBAAgB,EAAE,UAAU;oBAC5B,gBAAgB,EAAE,UAAU;oBAC5B,gBAAgB,EAAE,MAAM;oBACxB,gBAAgB,EAAE,IAAI;oBACtB,2BAA2B,EAAE,GAAG;oBAChC,GAAG,OAAO,CAAC,OAAO;iBACnB;gBACD,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,MAAM,EAAE;gBAC/C,cAAc,EAAE,IAAI;gBACpB,YAAY,EAAE,EAAE;gBAChB,KAAK,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,KAAK,CAAC,EAAE;gBACrC,eAAe,EAAE,KAAK;gBACtB,UAAU,EAAE,IAAI;aACjB,CAAC,CAA2B,CAAC;QAChC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,wCAAwC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC3F,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QAC3B,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC;QACvC,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,IAAI,GAAG,CAAC;QAErC,uEAAuE;QACvE,IAAI,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CACb,yBAAyB,UAAU,2BAA2B,CAC/D,CAAC;QACJ,CAAC;QAED,IAAI,UAAU,IAAI,GAAG,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,yBAAyB,UAAU,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC3D,IACE,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC;YAC1C,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EAC3C,CAAC;YACD,MAAM,IAAI,KAAK,CACb,2CAA2C,WAAW,wBAAwB,CAC/E,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAErC,uEAAuE;QACvE,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,qFAAqF,CACtF,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,OAAO,CAAC,eAAe,IAAI,IAAI,EAC/B,QAAQ,CACT,CAAC;QAEF,MAAM,MAAM,GAAiB;YAC3B,GAAG,EAAE,QAAQ;YACb,UAAU;YACV,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,SAAS;YAC/C,IAAI,EAAE,SAAS;YACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAChE,CAAC;QAEF,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC/D,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC/D,IAAI,OAAO;YAAuB,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAE3D,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
1
|
+
{"version":3,"file":"tier2-stealth.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier2-stealth.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAU9D,8EAA8E;AAC9E,EAAE;AACF,YAAY;AACZ,yEAAyE;AACzE,kFAAkF;AAClF,kEAAkE;AAClE,8CAA8C;AAC9C,2DAA2D;AAC3D,EAAE;AACF,4EAA4E;AAC5E,yDAAyD;AACzD,EAAE;AACF,wEAAwE;AACxE,uDAAuD;AACvD,6EAA6E;AAE7E,iDAAiD;AACjD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEhD,MAAM,OAAO,YAAY;IACvB,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,uEAAuE;QACvE,IAAI,QAAqB,CAAC;QAC1B,IAAI,CAAC;YACH,QAAQ,GAAG,CAAC,MAAM,WAAW,CAAC;gBAC5B,GAAG;gBACH,MAAM,EAAE,KAAK;gBACb,mDAAmD;gBACnD,sBAAsB,EAAE;oBACtB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;oBAC/C,gBAAgB,EAAE,CAAC,OAAO,EAAE,SAAS,CAAC;oBACtC,OAAO,EAAE,CAAC,SAAS,CAAC;oBACpB,OAAO,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC;iBAClC;gBACD,0CAA0C;gBAC1C,OAAO,EAAE;oBACP,MAAM,EACJ,kGAAkG;oBACpG,iBAAiB,EAAE,qCAAqC;oBACxD,eAAe,EAAE,UAAU;oBAC3B,MAAM,EAAE,UAAU;oBAClB,gBAAgB,EAAE,UAAU;oBAC5B,gBAAgB,EAAE,UAAU;oBAC5B,gBAAgB,EAAE,MAAM;oBACxB,gBAAgB,EAAE,IAAI;oBACtB,2BAA2B,EAAE,GAAG;oBAChC,GAAG,OAAO,CAAC,OAAO;iBACnB;gBACD,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,MAAM,EAAE;gBAC/C,cAAc,EAAE,IAAI;gBACpB,YAAY,EAAE,EAAE;gBAChB,KAAK,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,KAAK,CAAC,EAAE;gBACrC,eAAe,EAAE,KAAK;gBACtB,UAAU,EAAE,IAAI;aACjB,CAAC,CAA2B,CAAC;QAChC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,wCAAwC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC3F,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QAC3B,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC;QACvC,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,IAAI,GAAG,CAAC;QAErC,uEAAuE;QACvE,IAAI,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CACb,yBAAyB,UAAU,2BAA2B,CAC/D,CAAC;QACJ,CAAC;QAED,IAAI,UAAU,IAAI,GAAG,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,yBAAyB,UAAU,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC3D,IACE,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC;YAC1C,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EAC3C,CAAC;YACD,MAAM,IAAI,KAAK,CACb,2CAA2C,WAAW,wBAAwB,CAC/E,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAErC,uEAAuE;QACvE,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,qFAAqF,CACtF,CAAC;QACJ,CAAC;QAED,uEAAuE;QACvE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,OAAO,CAAC,eAAe,IAAI,IAAI,EAC/B,QAAQ,CACT,CAAC;QAEF,MAAM,MAAM,GAAiB;YAC3B,GAAG,EAAE,QAAQ;YACb,UAAU;YACV,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,SAAS;YAC/C,IAAI,EAAE,SAAS;YACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;SAChE,CAAC;QAEF,IAAI,OAAO,CAAC,UAAU;YAAE,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC;QAE9C,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;YAAE,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC/D,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;QAC/D,IAAI,OAAO;YAAuB,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAE3D,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import type { ScrapeOptions, ScrapeResult,
|
|
1
|
+
import type { ScrapeOptions, ScrapeResult, ScraperConfig } from "../types.js";
|
|
2
2
|
export declare class Tier3Browser {
|
|
3
3
|
private browser;
|
|
4
4
|
private readonly browserConfig;
|
|
5
|
-
constructor(browserConfig?:
|
|
5
|
+
constructor(browserConfig?: ScraperConfig["browserConfig"]);
|
|
6
6
|
private getBrowser;
|
|
7
7
|
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
8
8
|
close(): Promise<void>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tier3-browser.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier3-browser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,aAAa,EACb,YAAY,EAEZ,
|
|
1
|
+
{"version":3,"file":"tier3-browser.d.ts","sourceRoot":"","sources":["../../../src/scraper/tiers/tier3-browser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,aAAa,EACb,YAAY,EAEZ,aAAa,EACd,MAAM,aAAa,CAAC;AA4RrB,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA8C;gBAEhE,aAAa,GAAE,aAAa,CAAC,eAAe,CAAM;YAMhD,UAAU;IAgClB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC;IAoNvE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAM7B"}
|
|
@@ -481,6 +481,8 @@ export class Tier3Browser {
|
|
|
481
481
|
links: extracted.links.length > 0 ? extracted.links : undefined,
|
|
482
482
|
interceptedAPIs: interceptedAPIs.length > 0 ? interceptedAPIs : undefined,
|
|
483
483
|
};
|
|
484
|
+
if (options.getRawHtml)
|
|
485
|
+
result.rawHtml = html;
|
|
484
486
|
if (formats.includes("markdown"))
|
|
485
487
|
result.markdown = htmlToMarkdown(extracted.html);
|
|
486
488
|
if (formats.includes("html"))
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tier3-browser.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier3-browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAqC,MAAM,YAAY,CAAC;AAOzE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAE9D,8EAA8E;AAC9E,EAAE;AACF,gEAAgE;AAChE,6EAA6E;AAC7E,EAAE;AACF,yBAAyB;AACzB,oEAAoE;AACpE,2EAA2E;AAC3E,kEAAkE;AAClE,kDAAkD;AAClD,sDAAsD;AACtD,6EAA6E;AAE7E,2EAA2E;AAC3E,gDAAgD;AAChD,MAAM,SAAS,GACb,iHAAiH,CAAC;AAEpH,mDAAmD;AACnD,MAAM,YAAY,GAAG;IACnB,+CAA+C;IAC/C,oDAAoD;IACpD,oBAAoB;IACpB,gBAAgB;IAChB,cAAc;IACd,0BAA0B;IAC1B,yBAAyB;IACzB,iCAAiC;IACjC,aAAa;IACb,eAAe;IACf,yBAAyB;IACzB,iCAAiC;IACjC,0CAA0C;IAC1C,4BAA4B;IAC5B,wBAAwB;IACxB,8BAA8B;IAC9B,sBAAsB;IACtB,wBAAwB;IACxB,0BAA0B;IAC1B,4BAA4B;IAC5B,gBAAgB;IAChB,0BAA0B;IAC1B,oCAAoC;CACrC,CAAC;AAEF,iFAAiF;AACjF,EAAE;AACF,6DAA6D;AAC7D,8DAA8D;AAC9D,EAAE;AACF,8DAA8D;AAC9D,yEAAyE;AACzE,2FAA2F;AAC3F,oFAAoF;AACpF,2EAA2E;AAC3E,iEAAiE;AACjE,yFAAyF;AACzF,2FAA2F;AAC3F,6CAA6C;AAC7C,6FAA6F;AAC7F,iFAAiF;AACjF,MAAM,mBAAmB,GAAG,yBAAyB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiMrD,CAAC;AAEF,uDAAuD;AACvD,4EAA4E;AAC5E,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;AAEnE,+CAA+C;AAC/C,MAAM,oBAAoB,GAAG;IAC3B,sBAAsB;IACtB,sBAAsB;IACtB,gCAAgC;IAChC,sBAAsB;IACtB,YAAY;IACZ,eAAe;IACf,aAAa;IACb,cAAc;IACd,eAAe;IACf,WAAW;IACX,YAAY;IACZ,iBAAiB;IACjB,WAAW;IACX,YAAY;IACZ,aAAa;IACb,cAAc;CACf,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,OAAO,GAAmB,IAAI,CAAC;IACtB,aAAa,
|
|
1
|
+
{"version":3,"file":"tier3-browser.js","sourceRoot":"","sources":["../../../src/scraper/tiers/tier3-browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAqC,MAAM,YAAY,CAAC;AAOzE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAE9D,8EAA8E;AAC9E,EAAE;AACF,gEAAgE;AAChE,6EAA6E;AAC7E,EAAE;AACF,yBAAyB;AACzB,oEAAoE;AACpE,2EAA2E;AAC3E,kEAAkE;AAClE,kDAAkD;AAClD,sDAAsD;AACtD,6EAA6E;AAE7E,2EAA2E;AAC3E,gDAAgD;AAChD,MAAM,SAAS,GACb,iHAAiH,CAAC;AAEpH,mDAAmD;AACnD,MAAM,YAAY,GAAG;IACnB,+CAA+C;IAC/C,oDAAoD;IACpD,oBAAoB;IACpB,gBAAgB;IAChB,cAAc;IACd,0BAA0B;IAC1B,yBAAyB;IACzB,iCAAiC;IACjC,aAAa;IACb,eAAe;IACf,yBAAyB;IACzB,iCAAiC;IACjC,0CAA0C;IAC1C,4BAA4B;IAC5B,wBAAwB;IACxB,8BAA8B;IAC9B,sBAAsB;IACtB,wBAAwB;IACxB,0BAA0B;IAC1B,4BAA4B;IAC5B,gBAAgB;IAChB,0BAA0B;IAC1B,oCAAoC;CACrC,CAAC;AAEF,iFAAiF;AACjF,EAAE;AACF,6DAA6D;AAC7D,8DAA8D;AAC9D,EAAE;AACF,8DAA8D;AAC9D,yEAAyE;AACzE,2FAA2F;AAC3F,oFAAoF;AACpF,2EAA2E;AAC3E,iEAAiE;AACjE,yFAAyF;AACzF,2FAA2F;AAC3F,6CAA6C;AAC7C,6FAA6F;AAC7F,iFAAiF;AACjF,MAAM,mBAAmB,GAAG,yBAAyB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiMrD,CAAC;AAEF,uDAAuD;AACvD,4EAA4E;AAC5E,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;AAEnE,+CAA+C;AAC/C,MAAM,oBAAoB,GAAG;IAC3B,sBAAsB;IACtB,sBAAsB;IACtB,gCAAgC;IAChC,sBAAsB;IACtB,YAAY;IACZ,eAAe;IACf,aAAa;IACb,cAAc;IACd,eAAe;IACf,WAAW;IACX,YAAY;IACZ,iBAAiB;IACjB,WAAW;IACX,YAAY;IACZ,aAAa;IACb,cAAc;CACf,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,OAAO,GAAmB,IAAI,CAAC;IACtB,aAAa,CAA8C;IAE5E,YAAY,gBAAgD,EAAE;QAC5D,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED,0EAA0E;IAElE,KAAK,CAAC,UAAU;QACtB,IAAI,IAAI,CAAC,OAAO,EAAE,WAAW,EAAE;YAAE,OAAO,IAAI,CAAC,OAAO,CAAC;QAErD,MAAM,aAAa,GAA0C;YAC3D,QAAQ,EAAE,IAAI,CAAC,aAAa,CAAC,QAAQ,IAAI,IAAI;YAC7C,IAAI,EAAE,YAAY;SACnB,CAAC;QAEF,yCAAyC;QACzC,uDAAuD;QACvD,sDAAsD;QACtD,oEAAoE;QACpE,IAAI,IAAI,CAAC,aAAa,CAAC,cAAc,EAAE,CAAC;YACtC,aAAa,CAAC,cAAc,GAAG,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC;QACnE,CAAC;aAAM,IAAI,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC;YACtC,aAAa,CAAC,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,0EAA0E;YAC1E,IAAI,CAAC;gBACH,IAAI,CAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,EAAE,GAAG,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;gBAC9E,OAAO,IAAI,CAAC,OAAO,CAAC;YACtB,CAAC;YAAC,MAAM,CAAC;gBACP,kEAAkE;YACpE,CAAC;QACH,CAAC;QAED,IAAI,CAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QACpD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,0EAA0E;IAE1E,KAAK,CAAC,MAAM,CAAC,GAAW,EAAE,UAAyB,EAAE;QACnD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAExC,IAAI,OAAO,GAA0B,IAAI,CAAC;QAE1C,IAAI,CAAC;YACH,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;gBACjC,SAAS,EAAE,SAAS;gBACpB,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACvC,MAAM,EAAE,OAAO;gBACf,UAAU,EAAE,mBAAmB;gBAC/B,gBAAgB,EAAE;oBAChB,iBAAiB,EAAE,qCAAqC;oBACxD,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;iBAC3B;gBACD,iBAAiB,EAAE,IAAI;gBACvB,4EAA4E;gBAC5E,6DAA6D;aAC9D,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YAErC,qEAAqE;YACrE,MAAM,IAAI,CAAC,aAAa,CAAC,mBAAmB,CAAC,CAAC;YAE9C,qEAAqE;YACrE,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;gBACjC,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;gBAC5B,MAAM,IAAI,GAAG,GAAG,CAAC,YAAY,EAAE,CAAC;gBAChC,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,EAAE,CAAC;gBAEzB,IAAI,sBAAsB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACrC,OAAO,KAAK,CAAC,KAAK,EAAE,CAAC;gBACvB,CAAC;gBACD,IACE,IAAI,KAAK,QAAQ;oBACjB,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EACpD,CAAC;oBACD,OAAO,KAAK,CAAC,KAAK,EAAE,CAAC;gBACvB,CAAC;gBAED,OAAO,KAAK,CAAC,QAAQ,EAAE,CAAC;YAC1B,CAAC,CAAC,CAAC;YAEH,qEAAqE;YACrE,MAAM,eAAe,GAAqB,EAAE,CAAC;YAC7C,MAAM,eAAe,GAAG,OAAO,CAAC,aAAa,KAAK,KAAK,CAAC;YAExD,IAAI,eAAe,EAAE,CAAC;gBACpB,IAAI,CAAC,EAAE,CAAC,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE;oBACrC,IAAI,CAAC;wBACH,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;wBAC7D,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,kBAAkB,CAAC;4BAAE,OAAO;wBAEtD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;wBAC9B,qCAAqC;wBACrC,IAAI,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;4BAAE,OAAO;wBACjE,IAAI,iCAAiC,CAAC,IAAI,CAAC,MAAM,CAAC;4BAAE,OAAO;wBAE3D,uEAAuE;wBACvE,MAAM,aAAa,GAAG,QAAQ,CAC5B,QAAQ,CAAC,OAAO,EAAE,CAAC,gBAAgB,CAAC,IAAI,GAAG,EAC3C,EAAE,CACH,CAAC;wBACF,IAAI,aAAa,GAAG,OAAO;4BAAE,OAAO;wBAEpC,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;wBACrD,IAAI,CAAC,IAAI;4BAAE,OAAO;wBAElB,eAAe,CAAC,IAAI,CAAC;4BACnB,GAAG,EAAE,MAAM;4BACX,MAAM,EAAE,QAAQ,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE;4BACnC,UAAU,EAAE,QAAQ,CAAC,MAAM,EAAE;4BAC7B,WAAW;4BACX,IAAI;yBACL,CAAC,CAAC;oBACL,CAAC;oBAAC,MAAM,CAAC;wBACP,mEAAmE;oBACrE,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;YAED,0EAA0E;YAC1E,IAAI,CAAC,EAAE,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC;YAEhE,sEAAsE;YACtE,6EAA6E;YAC7E,sEAAsE;YACtE,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,MAAM,CAAC;YAC1C,IAAI,UAAU,GAAG,GAAG,CAAC;YACrB,IAAI,YAAY,GAAiB,IAAI,CAAC;YAEtC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;gBAC9C,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;wBACvC,SAAS,EAAE,kBAAkB;wBAC7B,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC;qBACnC,CAAC,CAAC;oBACH,UAAU,GAAG,WAAW,EAAE,MAAM,EAAE,IAAI,GAAG,CAAC;oBAC1C,YAAY,GAAG,IAAI,CAAC;oBACpB,MAAM,CAAC,wBAAwB;gBACjC,CAAC;gBAAC,OAAO,MAAM,EAAE,CAAC;oBAChB,YAAY,GAAG,MAAM,YAAY,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;oBAC5E,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;wBAChB,MAAM,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;oBACnD,CAAC;gBACH,CAAC;YACH,CAAC;YAED,IAAI,YAAY,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,uCAAuC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC;YACjF,CAAC;YAED,qEAAqE;YACrE,sDAAsD;YACtD,MAAM,IAAI;iBACP,gBAAgB,CAAC,aAAa,EAAE;gBAC/B,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,GAAG,EAAE,MAAM,CAAC;aACzC,CAAC;iBACD,KAAK,CAAC,GAAG,EAAE;gBACV,qDAAqD;YACvD,CAAC,CAAC,CAAC;YAEL,8DAA8D;YAC9D,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI;qBACP,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;oBACxC,KAAK,EAAE,SAAS;oBAChB,OAAO,EAAE,MAAM;iBAChB,CAAC;qBACD,KAAK,CAAC,GAAG,EAAE;oBACV,kDAAkD;gBACpD,CAAC,CAAC,CAAC;YACP,CAAC;YAED,qEAAqE;YACrE,uEAAuE;YACvE,2EAA2E;YAC3E,kEAAkE;YAClE,MAAM,IAAI;iBACP,QAAQ,CAAC,GAAG,EAAE;gBACb,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;oBACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC;oBAC/C,IAAI,WAAW,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;wBACtC,OAAO,EAAE,CAAC;wBACV,OAAO;oBACT,CAAC;oBAED,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;oBACxD,IAAI,QAAQ,GAAG,CAAC,CAAC;oBAEjB,MAAM,IAAI,GAAG,GAAG,EAAE;wBAChB,QAAQ,IAAI,IAAI,CAAC;wBACjB,MAAM,CAAC,QAAQ,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,CAAC;wBACvD,IAAI,QAAQ,GAAG,WAAW,EAAE,CAAC;4BAC3B,8DAA8D;4BAC9D,uDAAuD;4BACvD,UAAU,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;wBAC1D,CAAC;6BAAM,CAAC;4BACN,MAAM,CAAC,QAAQ,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC;4BACjD,OAAO,EAAE,CAAC;wBACZ,CAAC;oBACH,CAAC,CAAC;oBAEF,UAAU,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBACxB,CAAC,CAAC,CAAC;YACL,CAAC,CAAC;iBACD,KAAK,CAAC,GAAG,EAAE;gBACV,2DAA2D;YAC7D,CAAC,CAAC,CAAC;YAEL,qEAAqE;YACrE,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;gBAC1C,IAAI,CAAC,OAAO,EAAE;gBACd,IAAI,CAAC,KAAK,EAAE;aACb,CAAC,CAAC;YAEH,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACxD,MAAM,SAAS,GAAG,cAAc,CAC9B,IAAI,EACJ,OAAO,CAAC,eAAe,IAAI,IAAI,EAC/B,QAAQ,CACT,CAAC;YAEF,MAAM,MAAM,GAAiB;gBAC3B,GAAG,EAAE,QAAQ;gBACb,UAAU;gBACV,KAAK,EAAE,SAAS,IAAI,SAAS,CAAC,KAAK;gBACnC,WAAW,EAAE,SAAS,CAAC,WAAW,IAAI,SAAS;gBAC/C,IAAI,EAAE,SAAS;gBACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAClC,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBAC/D,eAAe,EACb,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,SAAS;aAC3D,CAAC;YAEF,IAAI,OAAO,CAAC,UAAU;gBAAE,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC;YAE9C,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAAE,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACnF,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;YAC/D,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAM,MAAM,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC;YAE/D,OAAO,MAAM,CAAC;QAChB,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,EAAE,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACzC,CAAC;IACH,CAAC;IAED,0EAA0E;IAE1E,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAC3C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF"}
|
package/dist/scraper/types.d.ts
CHANGED
|
@@ -19,8 +19,8 @@ export interface TierRawResult {
|
|
|
19
19
|
*/
|
|
20
20
|
sufficient: boolean;
|
|
21
21
|
}
|
|
22
|
-
/** Configuração global do
|
|
23
|
-
export interface
|
|
22
|
+
/** Configuração global do Scraper */
|
|
23
|
+
export interface ScraperConfig {
|
|
24
24
|
/** Timeout padrão em ms. Default: 30_000 */
|
|
25
25
|
timeout?: number;
|
|
26
26
|
/**
|
|
@@ -67,6 +67,8 @@ export interface ScrapeOptions {
|
|
|
67
67
|
* - "browser" → vai direto ao Playwright Chromium
|
|
68
68
|
*/
|
|
69
69
|
forceTier?: ScrapeTier;
|
|
70
|
+
/** Incluir HTML bruto (antes da extração de conteúdo). Útil para map(). Default: false */
|
|
71
|
+
getRawHtml?: boolean;
|
|
70
72
|
/** Aguardar esse seletor CSS aparecer e estar visível antes de extrair */
|
|
71
73
|
waitForSelector?: string;
|
|
72
74
|
/** Interceptar respostas JSON das APIs chamadas pela SPA */
|
|
@@ -98,6 +100,33 @@ export interface InterceptedAPI {
|
|
|
98
100
|
contentType: string;
|
|
99
101
|
data: unknown;
|
|
100
102
|
}
|
|
103
|
+
/** Link descoberto no mapeamento */
|
|
104
|
+
export interface MapLink {
|
|
105
|
+
url: string;
|
|
106
|
+
/** Texto do âncora (innerText do <a>) */
|
|
107
|
+
title?: string;
|
|
108
|
+
}
|
|
109
|
+
/** Opções do mapeamento */
|
|
110
|
+
export interface MapOptions {
|
|
111
|
+
/** Filtrar/ordenar links por relevância a este termo */
|
|
112
|
+
search?: string;
|
|
113
|
+
/** Incluir links de subdomínios. Default: true */
|
|
114
|
+
includeSubdomains?: boolean;
|
|
115
|
+
/** Ignorar query strings (?foo=bar). Default: true */
|
|
116
|
+
ignoreQueryParameters?: boolean;
|
|
117
|
+
/** Máximo de links a retornar. Default: 500 */
|
|
118
|
+
limit?: number;
|
|
119
|
+
/** Tier forçado (igual ao scrape) */
|
|
120
|
+
forceTier?: ScrapeTier;
|
|
121
|
+
}
|
|
122
|
+
/** Resultado do mapeamento */
|
|
123
|
+
export interface MapResult {
|
|
124
|
+
url: string;
|
|
125
|
+
links: MapLink[];
|
|
126
|
+
tier: ScrapeTier;
|
|
127
|
+
durationMs: number;
|
|
128
|
+
error?: string;
|
|
129
|
+
}
|
|
101
130
|
/** Resultado completo do scrape */
|
|
102
131
|
export interface ScrapeResult {
|
|
103
132
|
/** URL final (após redirecionamentos) */
|
|
@@ -120,6 +149,8 @@ export interface ScrapeResult {
|
|
|
120
149
|
durationMs: number;
|
|
121
150
|
/** Links encontrados na página */
|
|
122
151
|
links?: string[];
|
|
152
|
+
/** HTML bruto (quando getRawHtml: true) */
|
|
153
|
+
rawHtml?: string;
|
|
123
154
|
/** Dados SSR extraídos (Next.js, Nuxt, etc.) */
|
|
124
155
|
ssrData?: SSRData;
|
|
125
156
|
/** Chamadas JSON interceptadas durante renderização (só Tier 3) */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/scraper/types.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,CAAC;AAExD,+EAA+E;AAC/E,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,UAAU,EAAE,OAAO,CAAC;CACrB;AAED,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/scraper/types.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,CAAC;AAExD,+EAA+E;AAC/E,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,UAAU,EAAE,OAAO,CAAC;CACrB;AAED,qCAAqC;AACrC,MAAM,WAAW,aAAa;IAC5B,4CAA4C;IAC5C,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,SAAS,CAAC,EAAE,UAAU,CAAC;IAEvB,yFAAyF;IACzF,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAE1B,iDAAiD;IACjD,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAE1B,4EAA4E;IAC5E,OAAO,CAAC,EAAE,OAAO,CAAC;IAElB,iDAAiD;IACjD,aAAa,CAAC,EAAE;QACd,4CAA4C;QAC5C,QAAQ,CAAC,EAAE,OAAO,CAAC;QACnB;;;;WAIG;QACH,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,qDAAqD;QACrD,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;CACH;AAED,oCAAoC;AACpC,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;AAEzD,+CAA+C;AAC/C,MAAM,WAAW,aAAa;IAC5B,iEAAiE;IACjE,OAAO,CAAC,EAAE,aAAa,EAAE,CAAC;IAC1B,qCAAqC;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,sFAAsF;IACtF,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,0BAA0B;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC;;;;;OAKG;IACH,SAAS,CAAC,EAAE,UAAU,CAAC;IAEvB,0FAA0F;IAC1F,UAAU,CAAC,EAAE,OAAO,CAAC;IAGrB,0EAA0E;IAC1E,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,4DAA4D;IAC5D,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,0DAA0D;AAC1D,MAAM,WAAW,OAAO;IACtB;;;;;;;;;;;OAWG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,WAAW,GAAG,KAAK,GAAG,SAAS,GAAG,UAAU,GAAG,SAAS,CAAC;IACtG,2BAA2B;IAC3B,IAAI,EAAE,OAAO,CAAC;CACf;AAED,kEAAkE;AAClE,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,OAAO,CAAC;CACf;AAID,oCAAoC;AACpC,MAAM,WAAW,OAAO;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,yCAAyC;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,2BAA2B;AAC3B,MAAM,WAAW,UAAU;IACzB,wDAAwD;IACxD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,kDAAkD;IAClD,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B,sDAAsD;IACtD,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,+CAA+C;IAC/C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,SAAS,CAAC,EAAE,UAAU,CAAC;CACxB;AAED,8BAA8B;AAC9B,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,OAAO,EAAE,CAAC;IACjB,IAAI,EAAE,UAAU,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,mCAAmC;AACnC,MAAM,WAAW,YAAY;IAC3B,yCAAyC;IACzC,GAAG,EAAE,MAAM,CAAC;IACZ,uBAAuB;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,uBAAuB;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IAGrB,2BAA2B;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6BAA6B;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IAGd,qCAAqC;IACrC,IAAI,EAAE,UAAU,CAAC;IACjB,wBAAwB;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IAEjB,2CAA2C;IAC3C,OAAO,CAAC,EAAE,MAAM,CAAC;IAGjB,gDAAgD;IAChD,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mEAAmE;IACnE,eAAe,CAAC,EAAE,cAAc,EAAE,CAAC;IAEnC,iCAAiC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
|
package/dist/scraper/types.js
CHANGED
package/package.json
CHANGED
package/readme.md
CHANGED
|
@@ -23,6 +23,7 @@ Framework de browser automation alimentado por LLM. Voce fornece uma **URL** e u
|
|
|
23
23
|
- [Monitoramento — Tokens e Memoria](#monitoramento--tokens-e-memoria)
|
|
24
24
|
- [Dicas de uso](#dicas-de-uso)
|
|
25
25
|
- [Limitacoes](#limitacoes)
|
|
26
|
+
- [Scraper e Map](#scraper-e-map)
|
|
26
27
|
- [Arquitetura](#arquitetura)
|
|
27
28
|
- [Variaveis de ambiente](#variaveis-de-ambiente)
|
|
28
29
|
- [Tipos exportados](#tipos-exportados)
|
|
@@ -635,6 +636,44 @@ Criar um `Auspex` uma vez e chamar `run()` multiplas vezes eh mais eficiente do
|
|
|
635
636
|
|
|
636
637
|
---
|
|
637
638
|
|
|
639
|
+
## Scraper e Map
|
|
640
|
+
|
|
641
|
+
O pacote inclui a classe `Scraper` para scraping com fallback em 3 tiers (HTTP → Stealth → Playwright). Além de `scrape()` e `scrapeMany()`, há o método **Map** — descoberta rápida de URLs de um site.
|
|
642
|
+
|
|
643
|
+
### Map
|
|
644
|
+
|
|
645
|
+
Extrai links de uma página com título (texto do âncora), filtrando por domínio e permitindo busca por relevância. Útil para descobrir páginas antes de navegar ou de chamar o Agent.
|
|
646
|
+
|
|
647
|
+
```typescript
|
|
648
|
+
import { Scraper } from "auspex";
|
|
649
|
+
|
|
650
|
+
const crawler = new Scraper({ verbose: true });
|
|
651
|
+
|
|
652
|
+
const result = await crawler.map("https://nodejs.org", {
|
|
653
|
+
search: "pricing", // filtrar/ordenar por relevância
|
|
654
|
+
limit: 20,
|
|
655
|
+
includeSubdomains: true,
|
|
656
|
+
ignoreQueryParameters: true,
|
|
657
|
+
});
|
|
658
|
+
|
|
659
|
+
for (const link of result.links) {
|
|
660
|
+
console.log(link.url, link.title);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
await crawler.close();
|
|
664
|
+
```
|
|
665
|
+
|
|
666
|
+
| Opção | Tipo | Default | Descrição |
|
|
667
|
+
|-------|------|---------|-----------|
|
|
668
|
+
| `search` | `string` | — | Filtrar links por relevância ao termo |
|
|
669
|
+
| `includeSubdomains` | `boolean` | `true` | Incluir links de subdomínios |
|
|
670
|
+
| `ignoreQueryParameters` | `boolean` | `true` | Deduplicar URLs removendo `?foo=bar` |
|
|
671
|
+
| `limit` | `number` | `500` | Máximo de links retornados |
|
|
672
|
+
|
|
673
|
+
Exemplo: `npx tsx examples/map.ts`
|
|
674
|
+
|
|
675
|
+
---
|
|
676
|
+
|
|
638
677
|
## Arquitetura
|
|
639
678
|
|
|
640
679
|
```
|
|
@@ -656,7 +695,7 @@ src/
|
|
|
656
695
|
actions.ts # Parser e validador de acoes do LLM
|
|
657
696
|
report.ts # Gerador de relatorio de execucao
|
|
658
697
|
scraper/
|
|
659
|
-
index.ts #
|
|
698
|
+
index.ts # Scraper — fallback HTTP -> Stealth -> Browser
|
|
660
699
|
tiers/
|
|
661
700
|
tier1-http.ts # Tier 1: got-scraping (HTTP puro)
|
|
662
701
|
tier2-stealth.ts # Tier 2: Playwright stealth
|