@crawlee/linkedom 4.0.0-beta.64 → 4.0.0-beta.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import type { BasicCrawlingContext, EnqueueLinksOptions, ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, RequestHandler, RequestProvider, RouterRoutes, SkippedRequestCallback } from '@crawlee/http';
1
+ import type { BasicCrawlingContext, EnqueueLinksOptions, ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, IRequestManager, RequestHandler, RouterRoutes, SkippedRequestCallback } from '@crawlee/http';
2
2
  import { HttpCrawler } from '@crawlee/http';
3
3
  import type { Dictionary } from '@crawlee/types';
4
4
  import { type CheerioRoot, type RobotsTxtFile } from '@crawlee/utils';
@@ -8,7 +8,7 @@ JSONData extends Dictionary = any> = ErrorHandler<LinkeDOMCrawlingContext<UserDa
8
8
  export interface LinkeDOMCrawlerOptions<ContextExtension = Dictionary<never>, ExtendedContext extends LinkeDOMCrawlingContext = LinkeDOMCrawlingContext & ContextExtension, UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
9
9
  JSONData extends Dictionary = any> extends HttpCrawlerOptions<LinkeDOMCrawlingContext<UserData, JSONData>, ContextExtension, ExtendedContext> {
10
10
  }
11
- export interface LinkeDOMCrawlerEnqueueLinksOptions extends Omit<EnqueueLinksOptions, 'urls' | 'requestQueue'> {
11
+ export interface LinkeDOMCrawlerEnqueueLinksOptions extends Omit<EnqueueLinksOptions, 'urls' | 'requestManager'> {
12
12
  }
13
13
  export type LinkeDOMHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
14
14
  JSONData extends Dictionary = any> = InternalHttpHook<LinkeDOMCrawlingContext<UserData, JSONData>>;
@@ -65,13 +65,15 @@ JSONData extends Dictionary = any> = RequestHandler<LinkeDOMCrawlingContext<User
65
65
  * and then invokes the user-provided {@link LinkeDOMCrawlerOptions.requestHandler} to extract page data
66
66
  * using the `window` object.
67
67
  *
68
- * The source URLs are represented using {@link Request} objects that are fed from
69
- * {@link RequestList} or {@link RequestQueue} instances provided by the {@link LinkeDOMCrawlerOptions.requestList}
70
- * or {@link LinkeDOMCrawlerOptions.requestQueue} constructor options, respectively.
68
+ * The source URLs are represented using {@link Request} objects that are fed from the
69
+ * {@link IRequestManager|request manager} provided via the {@link LinkeDOMCrawlerOptions.requestManager|`requestManager`}
70
+ * constructor option (a {@link RequestQueue} is itself a request manager). To read from a read-only source such
71
+ * as a {@link RequestList} while still being able to enqueue new requests, combine it with a queue into a
72
+ * {@link RequestManagerTandem} via {@link IRequestLoader.toTandem|`requestLoader.toTandem()`} and pass the
73
+ * result as `requestManager`.
71
74
  *
72
- * If both {@link LinkeDOMCrawlerOptions.requestList} and {@link LinkeDOMCrawlerOptions.requestQueue} are used,
73
- * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
74
- * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
75
+ * > The {@link LinkeDOMCrawlerOptions.requestList|`requestList`} and {@link LinkeDOMCrawlerOptions.requestQueue|`requestQueue`}
76
+ * > options are deprecated; they are still accepted and folded into a single `requestManager` for back-compat.
75
77
  *
76
78
  * The crawler finishes when there are no more {@link Request} objects to crawl.
77
79
  *
@@ -135,7 +137,7 @@ export declare class LinkeDOMCrawler<ContextExtension = Dictionary<never>, Exten
135
137
  interface EnqueueLinksInternalOptions {
136
138
  options?: EnqueueLinksOptions;
137
139
  window: Window | null;
138
- requestQueue: RequestProvider;
140
+ requestManager: IRequestManager;
139
141
  robotsTxtFile?: RobotsTxtFile;
140
142
  onSkippedRequest?: SkippedRequestCallback;
141
143
  originalRequestUrl: string;
@@ -1 +1 @@
1
- {"version":3,"file":"linkedom-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,oBAAoB,EACpB,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAEH,WAAW,EAKd,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAS,MAAM,gBAAgB,CAAC;AAC7E,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,oBAAoB,CAC5B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D,MAAM,WAAW,sBAAsB,CACnC,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,uBAAuB,GAAG,uBAAuB,GAAG,gBAAgB,EAC5F,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,gBAAgB,EAAE,eAAe,CAAC;CAAG;AAE/G,MAAM,WAAW,kCAAmC,SAAQ,IAAI,CAAC,mBAAmB,EAAE,MAAM,GAAG,cAAc,CAAC;CAAG;AAEjH,MAAM,MAAM,YAAY,CACpB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAElE,MAAM,WAAW,uBAAuB,CACpC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACrD,MAAM,EAAE,MAAM,CAAC;IAMf,QAAQ,EAAE,QAAQ,CAAC;IAEnB;;;;;;;;;;;;OAYG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,MAAM,sBAAsB,CAC9B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEhE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AAEH,qBAAa,eAAe,CACxB,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,uBAAuB,GAAG,uBAAuB,GAAG,gBAAgB,CAC9F,SAAQ,WAAW,CAAC,uBAAuB,EAAE,gBAAgB,EAAE,eAAe,CAAC;IAC7E,OAAO,CAAC,MAAM,CAAC,MAAM,CAAmB;gBAE5B,OAAO,EAAE,sBAAsB,CAAC,gBAAgB,EAAE,eAAe,CAAC;cAS3D,oBAAoB;yBA8BT,MAAM;uBAMR,MAAM;2BAMF,QAAQ;;wCAeM,kCAAkC;kCAWxC,MAAM;oCAaJ,MAAM;;YAxElC,YAAY;YA8CZ,UAAU;CAqC3B;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,YAAY,EAAE,eAAe,CAAC;IAC9B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,UAAU,gCAAgC;IACtC,YAAY,EAAE,oBAAoB,CAAC,cAAc,CAAC,CAAC;IACnD,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AASD,gBAAgB;AAChB,wBAAsB,2BAA2B,CAC7C,OAAO,EAAE,2BAA2B,GAAG,gCAAgC,oBAqC1E;AAmBD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,oBAAoB,CAChC,OAAO,SAAS,uBAAuB,GAAG,uBAAuB,EACjE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
1
+ {"version":3,"file":"linkedom-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,oBAAoB,EACpB,mBAAmB,EACnB,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,eAAe,EACf,cAAc,EACd,YAAY,EACZ,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAEH,WAAW,EAKd,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAS,MAAM,gBAAgB,CAAC;AAC7E,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,MAAM,oBAAoB,CAC5B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D,MAAM,WAAW,sBAAsB,CACnC,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,uBAAuB,GAAG,uBAAuB,GAAG,gBAAgB,EAC5F,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,kBAAkB,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,gBAAgB,EAAE,eAAe,CAAC;CAAG;AAE/G,MAAM,WAAW,kCAAmC,SAAQ,IAAI,CAAC,mBAAmB,EAAE,MAAM,GAAG,gBAAgB,CAAC;CAAG;AAEnH,MAAM,MAAM,YAAY,CACpB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAElE,MAAM,WAAW,uBAAuB,CACpC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACrD,MAAM,EAAE,MAAM,CAAC;IAMf,QAAQ,EAAE,QAAQ,CAAC;IAEnB;;;;;;;;;;;;OAYG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,MAAM,sBAAsB,CAC9B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,uBAAuB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEhE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuEG;AAEH,qBAAa,eAAe,CACxB,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,uBAAuB,GAAG,uBAAuB,GAAG,gBAAgB,CAC9F,SAAQ,WAAW,CAAC,uBAAuB,EAAE,gBAAgB,EAAE,eAAe,CAAC;IAC7E,OAAO,CAAC,MAAM,CAAC,MAAM,CAAmB;gBAE5B,OAAO,EAAE,sBAAsB,CAAC,gBAAgB,EAAE,eAAe,CAAC;cAS3D,oBAAoB;yBA8BT,MAAM;uBAMR,MAAM;2BAMF,QAAQ;;wCAeM,kCAAkC;kCAcxC,MAAM;oCAaJ,MAAM;;YA3ElC,YAAY;YA8CZ,UAAU;CAwC3B;AAED,UAAU,2BAA2B;IACjC,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,cAAc,EAAE,eAAe,CAAC;IAChC,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAC1C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,UAAU,gCAAgC;IACtC,YAAY,EAAE,oBAAoB,CAAC,cAAc,CAAC,CAAC;IACnD,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B;AASD,gBAAgB;AAChB,wBAAsB,2BAA2B,CAC7C,OAAO,EAAE,2BAA2B,GAAG,gCAAgC,oBAqC1E;AAmBD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,oBAAoB,CAChC,OAAO,SAAS,uBAAuB,GAAG,uBAAuB,EACjE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,kDAEzC"}
@@ -21,13 +21,15 @@ import { DOMParser } from 'linkedom/cached';
21
21
  * and then invokes the user-provided {@link LinkeDOMCrawlerOptions.requestHandler} to extract page data
22
22
  * using the `window` object.
23
23
  *
24
- * The source URLs are represented using {@link Request} objects that are fed from
25
- * {@link RequestList} or {@link RequestQueue} instances provided by the {@link LinkeDOMCrawlerOptions.requestList}
26
- * or {@link LinkeDOMCrawlerOptions.requestQueue} constructor options, respectively.
24
+ * The source URLs are represented using {@link Request} objects that are fed from the
25
+ * {@link IRequestManager|request manager} provided via the {@link LinkeDOMCrawlerOptions.requestManager|`requestManager`}
26
+ * constructor option (a {@link RequestQueue} is itself a request manager). To read from a read-only source such
27
+ * as a {@link RequestList} while still being able to enqueue new requests, combine it with a queue into a
28
+ * {@link RequestManagerTandem} via {@link IRequestLoader.toTandem|`requestLoader.toTandem()`} and pass the
29
+ * result as `requestManager`.
27
30
  *
28
- * If both {@link LinkeDOMCrawlerOptions.requestList} and {@link LinkeDOMCrawlerOptions.requestQueue} are used,
29
- * the instance first processes URLs from the {@link RequestList} and automatically enqueues all of them
30
- * to {@link RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
31
+ * > The {@link LinkeDOMCrawlerOptions.requestList|`requestList`} and {@link LinkeDOMCrawlerOptions.requestQueue|`requestQueue`}
32
+ * > options are deprecated; they are still accepted and folded into a single `requestManager` for back-compat.
31
33
  *
32
34
  * The crawler finishes when there are no more {@link Request} objects to crawl.
33
35
  *
@@ -125,9 +127,12 @@ export class LinkeDOMCrawler extends HttpCrawler {
125
127
  return {
126
128
  enqueueLinks: async (enqueueOptions) => {
127
129
  return linkedomCrawlerEnqueueLinks({
128
- options: { ...enqueueOptions, limit: this.calculateEnqueuedRequestLimit(enqueueOptions?.limit) },
130
+ options: {
131
+ ...enqueueOptions,
132
+ limit: await this.calculateEnqueuedRequestLimit(enqueueOptions?.limit),
133
+ },
129
134
  window: document.defaultView,
130
- requestQueue: await this.getRequestQueue(),
135
+ requestManager: await this.getRequestManager(),
131
136
  robotsTxtFile: await this.getRobotsTxtFileForUrl(crawlingContext.request.url),
132
137
  onSkippedRequest: this.handleSkippedRequest,
133
138
  originalRequestUrl: crawlingContext.request.url,
@@ -180,7 +185,7 @@ export async function linkedomCrawlerEnqueueLinks(options) {
180
185
  });
181
186
  }
182
187
  return enqueueLinks({
183
- requestQueue: options.requestQueue,
188
+ requestManager: options.requestManager,
184
189
  robotsTxtFile: options.robotsTxtFile,
185
190
  onSkippedRequest: options.onSkippedRequest,
186
191
  urls,
@@ -1 +1 @@
1
- {"version":3,"file":"linkedom-crawler.js","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":"AAaA,OAAO,EACH,YAAY,EACZ,WAAW,EACX,sBAAsB,EACtB,sCAAsC,EACtC,MAAM,EACN,cAAc,GACjB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAwC,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAC7E,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAoE5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AAEH,MAAM,OAAO,eAGX,SAAQ,WAAuE;IACrE,MAAM,CAAC,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;IAExC,YAAY,OAAkE;QAC1E,MAAM,EAAE,sBAAsB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;QAEpD,KAAK,CAAC;YACF,GAAG,IAAI;YACP,sBAAsB,EAAE,sBAAsB,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;SACxF,CAAC,CAAC;IACP,CAAC;IAEkB,oBAAoB;QACnC,OAAO,KAAK;aACP,oBAAoB,EAAE;aACtB,OAAO,CAAC;YACL,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;SACxD,CAAC;aACD,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC1E,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,eAA4C;QACnE,IAAI,CAAC;YACD,MAAM,KAAK,GAAG,eAAe,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAC/D,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,eAAe,CACnD,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,EAC/B,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CACnC,CAAC;YAEF,OAAO;gBACH,MAAM,EAAE,QAAQ,CAAC,WAAW;gBAC5B,IAAI,IAAI;oBACJ,OAAO,QAAQ,CAAC,eAAe,CAAC,SAAS,CAAC;gBAC9C,CAAC;gBACD,IAAI,QAAQ;oBACR,iEAAiE;oBACjE,OAAO,QAA+B,CAAC;gBAC3C,CAAC;aACJ,CAAC;QACN,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,GAAG,YAAY,sBAAsB,EAAE,CAAC;gBACxC,OAAO;oBACH,IAAI,MAAM;wBACN,MAAM,IAAI,sBAAsB,CAC5B,oEAAoE,EACpE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;oBACD,IAAI,IAAI;wBACJ,MAAM,IAAI,sBAAsB,CAC5B,kEAAkE,EAClE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;oBACD,IAAI,QAAQ;wBACR,MAAM,IAAI,sBAAsB,CAC5B,sEAAsE,EACtE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;iBACJ,CAAC;YACN,CAAC;YAED,MAAM,GAAG,CAAC;QACd,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,eAA+D;QACpF,OAAO;YACH,YAAY,EAAE,KAAK,EAAE,cAAmD,EAAE,EAAE;gBACxE,OAAO,2BAA2B,CAAC;oBAC/B,OAAO,EAAE,EAAE,GAAG,cAAc,EAAE,KAAK,EAAE,IAAI,CAAC,6BAA6B,CAAC,cAAc,EAAE,KAAK,CAAC,EAAE;oBAChG,MAAM,EAAE,QAAQ,CAAC,WAAW;oBAC5B,YAAY,EAAE,MAAM,IAAI,CAAC,eAAe,EAAE;oBAC1C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,oBAAoB;oBAC3C,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;YACD,KAAK,CAAC,eAAe,CAAC,QAAgB,EAAE,SAAS,GAAG,KAAK;gBACrD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;gBAE7C,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACjC,IAAI,SAAS,EAAE,CAAC;wBACZ,MAAM,KAAK,CAAC,EAAE,CAAC,CAAC;wBAChB,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;wBAClE,OAAO;oBACX,CAAC;oBAED,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;YACD,KAAK,CAAC,gBAAgB,CAAC,QAAiB,EAAE,UAAU,GAAG,KAAK;gBACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;gBAE7C,IAAI,QAAQ,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAC7C,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;gBAED,OAAO,CAAC,CAAC;YACb,CAAC;SACJ,CAAC;IACN,CAAC;;AAqBL,gBAAgB;AAChB,SAAS,oBAAoB,CACzB,OAAuE;IAEvE,OAAO,CAAC,CAAE,OAA4C,CAAC,YAAY,CAAC;AACxE,CAAC;AAED,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAC7C,OAAuE;IAEvE,MAAM,EAAE,OAAO,EAAE,mBAAmB,EAAE,MAAM,EAAE,kBAAkB,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IAE9F,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,mBAAmB,EAAE,QAAQ;QAC9C,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,mBAAmB,EAAE,OAAO;KACpD,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,qBAAqB,CAC9B,MAAM,EACN,mBAAmB,EAAE,QAAQ,IAAI,GAAG,EACpC,mBAAmB,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CACxE,CAAC;IAEF,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC,YAAY,CAAC;YACxB,IAAI;YACJ,OAAO;YACP,GAAG,mBAAmB;SACzB,CAAC,CAAC;IACP,CAAC;IAED,OAAO,YAAY,CAAC;QAChB,YAAY,EAAE,OAAO,CAAC,YAAY;QAClC,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,IAAI;QACJ,OAAO;QACP,GAAG,mBAAmB;KACzB,CAAC,CAAC;AACP,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAAC,MAAc,EAAE,QAAgB,EAAE,OAAe;IAC5E,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;SACxD,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SACvB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAC;SACnD,GAAG,CAAC,CAAC,IAAwB,EAAE,EAAE;QAC9B,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,OAAO,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAa,CAAC;AACzE,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,oBAAoB,CAGlC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
1
+ {"version":3,"file":"linkedom-crawler.js","sourceRoot":"","sources":["../../src/internals/linkedom-crawler.ts"],"names":[],"mappings":"AAaA,OAAO,EACH,YAAY,EACZ,WAAW,EACX,sBAAsB,EACtB,sCAAsC,EACtC,MAAM,EACN,cAAc,GACjB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAwC,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAC7E,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAoE5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuEG;AAEH,MAAM,OAAO,eAGX,SAAQ,WAAuE;IACrE,MAAM,CAAC,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;IAExC,YAAY,OAAkE;QAC1E,MAAM,EAAE,sBAAsB,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;QAEpD,KAAK,CAAC;YACF,GAAG,IAAI;YACP,sBAAsB,EAAE,sBAAsB,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;SACxF,CAAC,CAAC;IACP,CAAC;IAEkB,oBAAoB;QACnC,OAAO,KAAK;aACP,oBAAoB,EAAE;aACtB,OAAO,CAAC;YACL,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;SACxD,CAAC;aACD,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC1E,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,eAA4C;QACnE,IAAI,CAAC;YACD,MAAM,KAAK,GAAG,eAAe,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAC/D,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,eAAe,CACnD,eAAe,CAAC,IAAI,CAAC,QAAQ,EAAE,EAC/B,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,CACnC,CAAC;YAEF,OAAO;gBACH,MAAM,EAAE,QAAQ,CAAC,WAAW;gBAC5B,IAAI,IAAI;oBACJ,OAAO,QAAQ,CAAC,eAAe,CAAC,SAAS,CAAC;gBAC9C,CAAC;gBACD,IAAI,QAAQ;oBACR,iEAAiE;oBACjE,OAAO,QAA+B,CAAC;gBAC3C,CAAC;aACJ,CAAC;QACN,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,GAAG,YAAY,sBAAsB,EAAE,CAAC;gBACxC,OAAO;oBACH,IAAI,MAAM;wBACN,MAAM,IAAI,sBAAsB,CAC5B,oEAAoE,EACpE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;oBACD,IAAI,IAAI;wBACJ,MAAM,IAAI,sBAAsB,CAC5B,kEAAkE,EAClE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;oBACD,IAAI,QAAQ;wBACR,MAAM,IAAI,sBAAsB,CAC5B,sEAAsE,EACtE,EAAE,KAAK,EAAE,GAAG,EAAE,CACjB,CAAC;oBACN,CAAC;iBACJ,CAAC;YACN,CAAC;YAED,MAAM,GAAG,CAAC;QACd,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,eAA+D;QACpF,OAAO;YACH,YAAY,EAAE,KAAK,EAAE,cAAmD,EAAE,EAAE;gBACxE,OAAO,2BAA2B,CAAC;oBAC/B,OAAO,EAAE;wBACL,GAAG,cAAc;wBACjB,KAAK,EAAE,MAAM,IAAI,CAAC,6BAA6B,CAAC,cAAc,EAAE,KAAK,CAAC;qBACzE;oBACD,MAAM,EAAE,QAAQ,CAAC,WAAW;oBAC5B,cAAc,EAAE,MAAM,IAAI,CAAC,iBAAiB,EAAE;oBAC9C,aAAa,EAAE,MAAM,IAAI,CAAC,sBAAsB,CAAC,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC;oBAC7E,gBAAgB,EAAE,IAAI,CAAC,oBAAoB;oBAC3C,kBAAkB,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG;oBAC/C,eAAe,EAAE,eAAe,CAAC,OAAO,CAAC,SAAS;iBACrD,CAAC,CAAC;YACP,CAAC;YACD,KAAK,CAAC,eAAe,CAAC,QAAgB,EAAE,SAAS,GAAG,KAAK;gBACrD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;gBAE7C,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACjC,IAAI,SAAS,EAAE,CAAC;wBACZ,MAAM,KAAK,CAAC,EAAE,CAAC,CAAC;wBAChB,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;wBAClE,OAAO;oBACX,CAAC;oBAED,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;YACL,CAAC;YACD,KAAK,CAAC,gBAAgB,CAAC,QAAiB,EAAE,UAAU,GAAG,KAAK;gBACxD,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;gBAE7C,IAAI,QAAQ,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAC7C,MAAM,IAAI,KAAK,CAAC,aAAa,QAAQ,cAAc,CAAC,CAAC;gBACzD,CAAC;gBAED,OAAO,CAAC,CAAC;YACb,CAAC;SACJ,CAAC;IACN,CAAC;;AAqBL,gBAAgB;AAChB,SAAS,oBAAoB,CACzB,OAAuE;IAEvE,OAAO,CAAC,CAAE,OAA4C,CAAC,YAAY,CAAC;AACxE,CAAC;AAED,gBAAgB;AAChB,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAC7C,OAAuE;IAEvE,MAAM,EAAE,OAAO,EAAE,mBAAmB,EAAE,MAAM,EAAE,kBAAkB,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IAE9F,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,OAAO,GAAG,sCAAsC,CAAC;QACnD,eAAe,EAAE,mBAAmB,EAAE,QAAQ;QAC9C,eAAe;QACf,kBAAkB;QAClB,mBAAmB,EAAE,mBAAmB,EAAE,OAAO;KACpD,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,qBAAqB,CAC9B,MAAM,EACN,mBAAmB,EAAE,QAAQ,IAAI,GAAG,EACpC,mBAAmB,EAAE,OAAO,IAAI,eAAe,IAAI,kBAAkB,CACxE,CAAC;IAEF,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC,YAAY,CAAC;YACxB,IAAI;YACJ,OAAO;YACP,GAAG,mBAAmB;SACzB,CAAC,CAAC;IACP,CAAC;IAED,OAAO,YAAY,CAAC;QAChB,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,gBAAgB,EAAE,OAAO,CAAC,gBAAgB;QAC1C,IAAI;QACJ,OAAO;QACP,GAAG,mBAAmB;KACzB,CAAC,CAAC;AACP,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAAC,MAAc,EAAE,QAAgB,EAAE,OAAe;IAC5E,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;SACxD,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SACvB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAC;SACnD,GAAG,CAAC,CAAC,IAAwB,EAAE,EAAE;QAC9B,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,OAAO,SAAS,CAAC;QACrB,CAAC;QACD,OAAO,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE,CAAa,CAAC;AACzE,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,oBAAoB,CAGlC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/linkedom",
3
- "version": "4.0.0-beta.64",
3
+ "version": "4.0.0-beta.66",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=22.0.0"
@@ -49,9 +49,9 @@
49
49
  "dependencies": {
50
50
  "@apify/timeout": "^0.3.2",
51
51
  "@apify/utilities": "^2.15.5",
52
- "@crawlee/http": "4.0.0-beta.64",
53
- "@crawlee/types": "4.0.0-beta.64",
54
- "@crawlee/utils": "4.0.0-beta.64",
52
+ "@crawlee/http": "4.0.0-beta.66",
53
+ "@crawlee/types": "4.0.0-beta.66",
54
+ "@crawlee/utils": "4.0.0-beta.66",
55
55
  "cheerio": "^1.0.0",
56
56
  "linkedom": "^0.18.10",
57
57
  "ow": "^2.0.0",
@@ -64,5 +64,5 @@
64
64
  }
65
65
  }
66
66
  },
67
- "gitHead": "76ba0fe1101b1e14fb5e1962540107775ad4e513"
67
+ "gitHead": "0890633b51caf16d93940be6f7da7881ad839992"
68
68
  }