recker 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache/memory-storage.d.ts.map +1 -1
- package/dist/cache/memory-storage.js +0 -5
- package/dist/plugins/cache.d.ts.map +1 -1
- package/dist/plugins/cache.js +0 -7
- package/dist/plugins/scrape.d.ts.map +1 -1
- package/dist/plugins/scrape.js +9 -14
- package/dist/runner/request-runner.d.ts.map +1 -1
- package/dist/runner/request-runner.js +0 -1
- package/dist/scrape/document.d.ts +3 -2
- package/dist/scrape/document.d.ts.map +1 -1
- package/dist/scrape/document.js +20 -3
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"memory-storage.d.ts","sourceRoot":"","sources":["../../src/cache/memory-storage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAE7D,qBAAa,aAAc,YAAW,YAAY;IAChD,OAAO,CAAC,OAAO,CAAiC;IAChD,OAAO,CAAC,IAAI,CAA6B;IAEnC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"memory-storage.d.ts","sourceRoot":"","sources":["../../src/cache/memory-storage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAE7D,qBAAa,aAAc,YAAW,YAAY;IAChD,OAAO,CAAC,OAAO,CAAiC;IAChD,OAAO,CAAC,IAAI,CAA6B;IAEnC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,SAAS,CAAC;IAejD,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAOhE,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAMxC,KAAK;CAIN"}
|
|
@@ -2,23 +2,18 @@ export class MemoryStorage {
|
|
|
2
2
|
storage = new Map();
|
|
3
3
|
ttls = new Map();
|
|
4
4
|
async get(key) {
|
|
5
|
-
console.log(`[DEBUG MemoryStorage] Getting key: ${key}`);
|
|
6
5
|
const entry = this.storage.get(key);
|
|
7
6
|
if (!entry) {
|
|
8
|
-
console.log(`[DEBUG MemoryStorage] Key not found: ${key}`);
|
|
9
7
|
return undefined;
|
|
10
8
|
}
|
|
11
9
|
const expiry = this.ttls.get(key);
|
|
12
10
|
if (expiry && Date.now() > expiry) {
|
|
13
|
-
console.log(`[DEBUG MemoryStorage] Key expired: ${key} (expiry: ${expiry}, now: ${Date.now()})`);
|
|
14
11
|
this.delete(key);
|
|
15
12
|
return undefined;
|
|
16
13
|
}
|
|
17
|
-
console.log(`[DEBUG MemoryStorage] Key found: ${key}`);
|
|
18
14
|
return entry;
|
|
19
15
|
}
|
|
20
16
|
async set(key, entry, ttl) {
|
|
21
|
-
console.log(`[DEBUG MemoryStorage] Setting key: ${key}, ttl: ${ttl}`);
|
|
22
17
|
this.storage.set(key, entry);
|
|
23
18
|
if (ttl) {
|
|
24
19
|
this.ttls.set(key, Date.now() + ttl);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/plugins/cache.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,aAAa,EAAc,MAAM,EAAE,aAAa,EAAkB,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAM/H,MAAM,WAAW,YAAY;IAC3B,OAAO,CAAC,EAAE,YAAY,CAAC;IACvB,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,aAAa,KAAK,MAAM,CAAC;IAU9C,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAM9B,WAAW,CAAC,EAAE,OAAO,CAAC;IAOtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAOlB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAmDD,iBAAS,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CA6BrE;
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/plugins/cache.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,aAAa,EAAc,MAAM,EAAE,aAAa,EAAkB,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAM/H,MAAM,WAAW,YAAY;IAC3B,OAAO,CAAC,EAAE,YAAY,CAAC;IACvB,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,aAAa,KAAK,MAAM,CAAC;IAU9C,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAM9B,WAAW,CAAC,EAAE,OAAO,CAAC;IAOtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAOlB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAmDD,iBAAS,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,CA6BrE;AAyOD,wBAAgB,KAAK,CAAC,OAAO,GAAE,YAAiB,GAAG,MAAM,CAiYxD;AAGD,OAAO,EAAE,iBAAiB,EAAE,CAAC"}
|
package/dist/plugins/cache.js
CHANGED
|
@@ -358,14 +358,10 @@ export function cache(options = {}) {
|
|
|
358
358
|
}
|
|
359
359
|
}
|
|
360
360
|
if (cachedEntry) {
|
|
361
|
-
console.log('[DEBUG EXTREME] Revalidating. Entry:', !!cachedEntry);
|
|
362
361
|
const conditionalReq = createConditionalRequest(req, cachedEntry);
|
|
363
362
|
try {
|
|
364
|
-
console.log('[DEBUG EXTREME] Calling next(conditionalReq)');
|
|
365
363
|
const response = await next(conditionalReq);
|
|
366
|
-
console.log('[DEBUG EXTREME] Returned from next. Status:', response.status);
|
|
367
364
|
if (response.status === 304) {
|
|
368
|
-
console.log('[DEBUG EXTREME] Status is 304. Updating cache.');
|
|
369
365
|
const updatedEntry = {
|
|
370
366
|
...cachedEntry,
|
|
371
367
|
timestamp: now,
|
|
@@ -377,9 +373,6 @@ export function cache(options = {}) {
|
|
|
377
373
|
await storage.set(key, updatedEntry, storageTtl);
|
|
378
374
|
return createCachedResponse(updatedEntry, 'revalidated');
|
|
379
375
|
}
|
|
380
|
-
else {
|
|
381
|
-
console.log('[DEBUG EXTREME] Status is NOT 304. Status:', response.status);
|
|
382
|
-
}
|
|
383
376
|
if (response.ok) {
|
|
384
377
|
const cacheControl = parseCacheControl(response.headers.get('Cache-Control'));
|
|
385
378
|
if (!cacheControl.noStore) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../src/plugins/scrape.ts"],"names":[],"mappings":"AAuBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,KAAK,EACV,aAAa,EACb,gBAAgB,EAChB,aAAa,EACb,cAAc,EACd,aAAa,EACb,aAAa,EACb,eAAe,EACf,UAAU,EACV,aAAa,EACb,cAAc,EACd,eAAe,EACf,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACvB,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../src/plugins/scrape.ts"],"names":[],"mappings":"AAuBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,KAAK,EACV,aAAa,EACb,gBAAgB,EAChB,aAAa,EACb,cAAc,EACd,aAAa,EACb,aAAa,EACb,eAAe,EACf,UAAU,EACV,aAAa,EACb,cAAc,EACd,eAAe,EACf,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACvB,MAAM,oBAAoB,CAAC;AAgB5B,MAAM,WAAW,aAAa,CAAC,CAAC,CAAE,SAAQ,OAAO,CAAC,CAAC,CAAC;IAIlD,MAAM,CAAC,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,OAAO,uBAAuB,EAAE,cAAc,CAAC,CAAC;IAKzF,KAAK,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC;IAKjE,MAAM,CAAC,OAAO,CAAC,EAAE,sBAAsB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;IAKpE,IAAI,IAAI,OAAO,CAAC,aAAa,CAAC,CAAC;IAK/B,SAAS,IAAI,OAAO,CAAC,aAAa,CAAC,CAAC;IAKpC,WAAW,IAAI,OAAO,CAAC,eAAe,CAAC,CAAC;IAKxC,MAAM,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC;IAKhC,KAAK,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC;IAKnD,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;IAKrD,OAAO,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC;IAKtC,MAAM,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;IAKpC,OAAO,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;CAClF;AA2BD,wBAAgB,MAAM,CAAC,CAAC,SAAS,cAAc,EAC7C,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,GACtC,aAAa,CAAC,CAAC,CAAC,CA8ElB;AAcD,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,OAAO,uBAAuB,EAAE,cAAc,CAAC,CAGzD;AAaD,wBAAsB,cAAc,CAClC,OAAO,EAAE,OAAO,CAAC,cAAc,CAAC,EAChC,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,OAAO,uBAAuB,EAAE,cAAc,CAAC,CAQzD;AAGD,YAAY,EACV,aAAa,EACb,gBAAgB,EAChB,aAAa,EACb,cAAc,EACd,aAAa,EACb,aAAa,EACb,eAAe,EACf,UAAU,EACV,aAAa,EACb,kBAAkB,EAClB,cAAc,EACd,eAAe,EACf,cAAc,EACd,qBAAqB,EACrB,sBAAsB,GACvB,MAAM,oBAAoB,CAAC"}
|
package/dist/plugins/scrape.js
CHANGED
|
@@ -1,23 +1,18 @@
|
|
|
1
1
|
let ScrapeDocumentClass = null;
|
|
2
|
-
async function
|
|
2
|
+
async function getScrapeDocumentClass() {
|
|
3
3
|
if (!ScrapeDocumentClass) {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
ScrapeDocumentClass = module.ScrapeDocument;
|
|
7
|
-
}
|
|
8
|
-
catch (error) {
|
|
9
|
-
throw new Error('Failed to load scrape module. Make sure cheerio is installed: pnpm add cheerio');
|
|
10
|
-
}
|
|
4
|
+
const module = await import('../scrape/document.js');
|
|
5
|
+
ScrapeDocumentClass = module.ScrapeDocument;
|
|
11
6
|
}
|
|
12
7
|
return ScrapeDocumentClass;
|
|
13
8
|
}
|
|
14
9
|
export function scrape(promise) {
|
|
15
10
|
const basePromise = Promise.resolve(promise);
|
|
16
11
|
const getDocument = async (options) => {
|
|
17
|
-
const ScrapeDoc = await
|
|
12
|
+
const ScrapeDoc = await getScrapeDocumentClass();
|
|
18
13
|
const response = await basePromise;
|
|
19
14
|
const html = await response.text();
|
|
20
|
-
return
|
|
15
|
+
return ScrapeDoc.create(html, {
|
|
21
16
|
baseUrl: options?.baseUrl || response.url,
|
|
22
17
|
...options,
|
|
23
18
|
});
|
|
@@ -73,14 +68,14 @@ export function scrape(promise) {
|
|
|
73
68
|
return enhanced;
|
|
74
69
|
}
|
|
75
70
|
export async function parseHtml(html, options) {
|
|
76
|
-
const ScrapeDoc = await
|
|
77
|
-
return
|
|
71
|
+
const ScrapeDoc = await getScrapeDocumentClass();
|
|
72
|
+
return ScrapeDoc.create(html, options);
|
|
78
73
|
}
|
|
79
74
|
export async function scrapeResponse(promise, options) {
|
|
80
|
-
const ScrapeDoc = await
|
|
75
|
+
const ScrapeDoc = await getScrapeDocumentClass();
|
|
81
76
|
const response = await promise;
|
|
82
77
|
const html = await response.text();
|
|
83
|
-
return
|
|
78
|
+
return ScrapeDoc.create(html, {
|
|
84
79
|
baseUrl: options?.baseUrl || response.url,
|
|
85
80
|
...options,
|
|
86
81
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"request-runner.d.ts","sourceRoot":"","sources":["../../src/runner/request-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAItC,MAAM,WAAW,aAAa;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW,CAAC,CAAC,GAAG,GAAG;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY,CAAC,CAAC,GAAG,GAAG;IACnC,OAAO,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,EAAE,CAAC;IACvB,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAED,qBAAa,aAAc,SAAQ,YAAY;IAC7C,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,KAAK,CAAqB;IAClC,OAAO,CAAC,WAAW,CAAa;IAChC,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,KAAK,CAA0C;IACvD,OAAO,CAAC,SAAS,CAAa;gBAElB,OAAO,GAAE,aAAkB;IAKhC,GAAG,CAAC,CAAC,EACV,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,OAAO,GAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,EAAE,CAAC,EAAE,MAAM,CAAA;KAAO,GAC/C,IAAI;IAWM,GAAG,CAAC,CAAC,EAChB,KAAK,EAAE,GAAG,EAAE,EACZ,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,CAAC,EACnD,OAAO,GAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAO,GAClC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YA+Bb,WAAW;
|
|
1
|
+
{"version":3,"file":"request-runner.d.ts","sourceRoot":"","sources":["../../src/runner/request-runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAItC,MAAM,WAAW,aAAa;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW,CAAC,CAAC,GAAG,GAAG;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY,CAAC,CAAC,GAAG,GAAG;IACnC,OAAO,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,EAAE,CAAC;IACvB,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;CACH;AAED,qBAAa,aAAc,SAAQ,YAAY;IAC7C,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,KAAK,CAAqB;IAClC,OAAO,CAAC,WAAW,CAAa;IAChC,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,KAAK,CAA0C;IACvD,OAAO,CAAC,SAAS,CAAa;gBAElB,OAAO,GAAE,aAAkB;IAKhC,GAAG,CAAC,CAAC,EACV,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,OAAO,GAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,EAAE,CAAC,EAAE,MAAM,CAAA;KAAO,GAC/C,IAAI;IAWM,GAAG,CAAC,CAAC,EAChB,KAAK,EAAE,GAAG,EAAE,EACZ,SAAS,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,CAAC,EACnD,OAAO,GAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAO,GAClC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YA+Bb,WAAW;IA8BlB,WAAW;;;;;;;CAUnB"}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { CheerioAPI } from 'cheerio';
|
|
2
2
|
import { ScrapeElement } from './element.js';
|
|
3
3
|
import type { ScrapeOptions, ExtractionSchema, ExtractedLink, ExtractedImage, ExtractedMeta, OpenGraphData, TwitterCardData, JsonLdData, ExtractedForm, ExtractedTable, ExtractedScript, ExtractedStyle, LinkExtractionOptions, ImageExtractionOptions } from './types.js';
|
|
4
4
|
export declare class ScrapeDocument {
|
|
5
5
|
private $;
|
|
6
6
|
private options;
|
|
7
|
-
constructor(
|
|
7
|
+
constructor($: CheerioAPI, options?: ScrapeOptions);
|
|
8
|
+
static create(html: string, options?: ScrapeOptions): Promise<ScrapeDocument>;
|
|
8
9
|
select(selector: string): ScrapeElement;
|
|
9
10
|
selectFirst(selector: string): ScrapeElement;
|
|
10
11
|
selectAll(selector: string): ScrapeElement[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/scrape/document.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/scrape/document.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,UAAU,EAAW,MAAM,SAAS,CAAC;AAEnD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAa7C,OAAO,KAAK,EACV,aAAa,EACb,gBAAgB,EAEhB,aAAa,EACb,cAAc,EACd,aAAa,EACb,aAAa,EACb,eAAe,EACf,UAAU,EACV,aAAa,EACb,cAAc,EACd,eAAe,EACf,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACvB,MAAM,YAAY,CAAC;AAyBpB,qBAAa,cAAc;IACzB,OAAO,CAAC,CAAC,CAAa;IACtB,OAAO,CAAC,OAAO,CAAgB;gBAKnB,CAAC,EAAE,UAAU,EAAE,OAAO,CAAC,EAAE,aAAa;WASrC,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,cAAc,CAAC;IAWnF,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOvC,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAO5C,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,EAAE;IAW5C,KAAK,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOtC,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,EAAE;IAS3C,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM;IAO9B,KAAK,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE;IAcjC,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;IAO7D,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;IAcpD,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAO1C,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM;IAUnC,KAAK,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,aAAa,EAAE;IAUvD,MAAM,CAAC,OAAO,CAAC,EAAE,sBAAsB,GAAG,cAAc,EAAE;IAU1D,IAAI,IAAI,aAAa;IAOrB,SAAS,IAAI,aAAa;IAO1B,WAAW,IAAI,eAAe;IAO9B,MAAM,IAAI,UAAU,EAAE;IAOtB,KAAK,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa,EAAE;IAOzC,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,cAAc,EAAE;IAO3C,OAAO,IAAI,eAAe,EAAE;IAO5B,MAAM,IAAI,cAAc,EAAE;IAkB1B,OAAO,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,gBAAgB,GAAG,CAAC;IAUvE,OAAO,CAAC,YAAY;IAgDpB,KAAK,IAAI,MAAM,GAAG,SAAS;IAQ3B,IAAI,IAAI,aAAa;IAOrB,IAAI,IAAI,aAAa;IAOrB,IAAI,IAAI,MAAM;IAOd,IAAI,IAAI,aAAa;IAOrB,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAOjC,KAAK,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM;IAS/B,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa,EAAE;IAiB5D,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa,EAAE;IAiBjE,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,aAAa,EAAE;IAazD,IAAI,GAAG,IAAI,UAAU,CAEpB;IAKD,IAAI,OAAO,IAAI,MAAM,GAAG,SAAS,CAEhC;CACF"}
|
package/dist/scrape/document.js
CHANGED
|
@@ -1,13 +1,30 @@
|
|
|
1
|
-
import { load } from 'cheerio';
|
|
2
1
|
import { ScrapeElement } from './element.js';
|
|
3
2
|
import { extractLinks, extractImages, extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractForms, extractTables, extractScripts, extractStyles, } from './extractors.js';
|
|
3
|
+
let cheerioLoad = null;
|
|
4
|
+
async function loadCheerio() {
|
|
5
|
+
if (cheerioLoad) {
|
|
6
|
+
return cheerioLoad;
|
|
7
|
+
}
|
|
8
|
+
try {
|
|
9
|
+
const cheerio = await import('cheerio');
|
|
10
|
+
cheerioLoad = cheerio.load;
|
|
11
|
+
return cheerioLoad;
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
throw new Error('cheerio is required for scraping but not installed. Install it with: pnpm add cheerio');
|
|
15
|
+
}
|
|
16
|
+
}
|
|
4
17
|
export class ScrapeDocument {
|
|
5
18
|
$;
|
|
6
19
|
options;
|
|
7
|
-
constructor(
|
|
8
|
-
this.$ =
|
|
20
|
+
constructor($, options) {
|
|
21
|
+
this.$ = $;
|
|
9
22
|
this.options = options || {};
|
|
10
23
|
}
|
|
24
|
+
static async create(html, options) {
|
|
25
|
+
const load = await loadCheerio();
|
|
26
|
+
return new ScrapeDocument(load(html), options);
|
|
27
|
+
}
|
|
11
28
|
select(selector) {
|
|
12
29
|
return new ScrapeElement(this.$(selector), this.$);
|
|
13
30
|
}
|