@crawlee/utils 4.0.0-beta.2 → 4.0.0-beta.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +5 -1
  2. package/index.d.ts +5 -3
  3. package/index.d.ts.map +1 -1
  4. package/index.js +5 -3
  5. package/index.js.map +1 -1
  6. package/internals/cheerio.d.ts +2 -0
  7. package/internals/cheerio.d.ts.map +1 -1
  8. package/internals/cheerio.js.map +1 -1
  9. package/internals/debug.d.ts +1 -0
  10. package/internals/debug.d.ts.map +1 -1
  11. package/internals/debug.js +12 -0
  12. package/internals/debug.js.map +1 -1
  13. package/internals/extract-urls.d.ts +5 -0
  14. package/internals/extract-urls.d.ts.map +1 -1
  15. package/internals/extract-urls.js +7 -3
  16. package/internals/extract-urls.js.map +1 -1
  17. package/internals/iterables.d.ts +108 -0
  18. package/internals/iterables.d.ts.map +1 -0
  19. package/internals/iterables.js +167 -0
  20. package/internals/iterables.js.map +1 -0
  21. package/internals/robots.d.ts +9 -2
  22. package/internals/robots.d.ts.map +1 -1
  23. package/internals/robots.js +21 -29
  24. package/internals/robots.js.map +1 -1
  25. package/internals/sitemap.d.ts +14 -6
  26. package/internals/sitemap.d.ts.map +1 -1
  27. package/internals/sitemap.js +37 -24
  28. package/internals/sitemap.js.map +1 -1
  29. package/internals/system-info/cpu-info.d.ts.map +1 -0
  30. package/internals/system-info/cpu-info.js.map +1 -0
  31. package/internals/{systemInfoV2 → system-info}/memory-info.d.ts +1 -1
  32. package/internals/system-info/memory-info.d.ts.map +1 -0
  33. package/internals/{systemInfoV2 → system-info}/memory-info.js +1 -1
  34. package/internals/system-info/memory-info.js.map +1 -0
  35. package/internals/system-info/ps-tree.d.ts.map +1 -0
  36. package/internals/system-info/ps-tree.js.map +1 -0
  37. package/internals/url.d.ts +1 -1
  38. package/internals/url.d.ts.map +1 -1
  39. package/package.json +6 -5
  40. package/internals/memory-info.d.ts +0 -26
  41. package/internals/memory-info.d.ts.map +0 -1
  42. package/internals/memory-info.js +0 -131
  43. package/internals/memory-info.js.map +0 -1
  44. package/internals/systemInfoV2/cpu-info.d.ts.map +0 -1
  45. package/internals/systemInfoV2/cpu-info.js.map +0 -1
  46. package/internals/systemInfoV2/memory-info.d.ts.map +0 -1
  47. package/internals/systemInfoV2/memory-info.js.map +0 -1
  48. package/internals/systemInfoV2/ps-tree.d.ts.map +0 -1
  49. package/internals/systemInfoV2/ps-tree.js.map +0 -1
  50. package/tsconfig.build.tsbuildinfo +0 -1
  51. /package/internals/{systemInfoV2 → system-info}/cpu-info.d.ts +0 -0
  52. /package/internals/{systemInfoV2 → system-info}/cpu-info.js +0 -0
  53. /package/internals/{systemInfoV2 → system-info}/ps-tree.d.ts +0 -0
  54. /package/internals/{systemInfoV2 → system-info}/ps-tree.js +0 -0
package/README.md CHANGED
@@ -9,6 +9,10 @@
9
9
  <small>A web scraping and browser automation library</small>
10
10
  </h1>
11
11
 
12
+ <p align=center>
13
+ <a href="https://trendshift.io/repositories/5179" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5179" alt="apify%2Fcrawlee | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
14
+ </p>
15
+
12
16
  <p align=center>
13
17
  <a href="https://www.npmjs.com/package/@crawlee/core" rel="nofollow"><img src="https://img.shields.io/npm/v/@crawlee/core.svg" alt="NPM latest version" data-canonical-src="https://img.shields.io/npm/v/@crawlee/core/next.svg" style="max-width: 100%;"></a>
14
18
  <a href="https://www.npmjs.com/package/@crawlee/core" rel="nofollow"><img src="https://img.shields.io/npm/dm/@crawlee/core.svg" alt="Downloads" data-canonical-src="https://img.shields.io/npm/dm/@crawlee/core.svg" style="max-width: 100%;"></a>
@@ -24,7 +28,7 @@ Crawlee is available as the [`crawlee`](https://www.npmjs.com/package/crawlee) N
24
28
 
25
29
  > 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev)** 👈
26
30
 
27
- > Crawlee for Python is open for early adopters. 🐍 [👉 Checkout the source code 👈](https://github.com/apify/crawlee-python).
31
+ > Do you prefer 🐍 Python instead of JavaScript? [👉 Checkout Crawlee for Python 👈](https://github.com/apify/crawlee-python).
28
32
 
29
33
  ## Installation
30
34
 
package/index.d.ts CHANGED
@@ -3,15 +3,17 @@ export * from './internals/cheerio.js';
3
3
  export * from './internals/chunk.js';
4
4
  export * from './internals/extract-urls.js';
5
5
  export * from './internals/general.js';
6
- export * from './internals/memory-info.js';
7
6
  export * from './internals/debug.js';
8
7
  export * as social from './internals/social.js';
9
8
  export * from './internals/typedefs.js';
10
9
  export * from './internals/open_graph_parser.js';
11
10
  export * from './internals/robots.js';
12
11
  export * from './internals/sitemap.js';
12
+ export * from './internals/iterables.js';
13
+ export * from './internals/robots.js';
14
+ export * from './internals/sitemap.js';
13
15
  export * from './internals/url.js';
14
- export { getCurrentCpuTicksV2 } from './internals/systemInfoV2/cpu-info.js';
15
- export { getMemoryInfoV2 } from './internals/systemInfoV2/memory-info.js';
16
+ export { getCurrentCpuTicksV2, CpuSample } from './internals/system-info/cpu-info.js';
17
+ export { getMemoryInfo, MemoryInfo } from './internals/system-info/memory-info.js';
16
18
  export { Dictionary, Awaitable, Constructor } from '@crawlee/types';
17
19
  //# sourceMappingURL=index.d.ts.map
package/index.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,sBAAsB,CAAC;AACrC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,wBAAwB,CAAC;AACvC,cAAc,4BAA4B,CAAC;AAC3C,cAAc,sBAAsB,CAAC;AACrC,OAAO,KAAK,MAAM,MAAM,uBAAuB,CAAC;AAChD,cAAc,yBAAyB,CAAC;AACxC,cAAc,kCAAkC,CAAC;AACjD,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AAEnC,OAAO,EAAE,oBAAoB,EAAE,MAAM,sCAAsC,CAAC;AAC5E,OAAO,EAAE,eAAe,EAAE,MAAM,yCAAyC,CAAC;AAE1E,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,sBAAsB,CAAC;AACrC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,wBAAwB,CAAC;AACvC,cAAc,sBAAsB,CAAC;AACrC,OAAO,KAAK,MAAM,MAAM,uBAAuB,CAAC;AAChD,cAAc,yBAAyB,CAAC;AACxC,cAAc,kCAAkC,CAAC;AACjD,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AAEnC,OAAO,EAAE,oBAAoB,EAAE,SAAS,EAAE,MAAM,qCAAqC,CAAC;AACtF,OAAO,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,wCAAwC,CAAC;AAEnF,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC"}
package/index.js CHANGED
@@ -3,14 +3,16 @@ export * from './internals/cheerio.js';
3
3
  export * from './internals/chunk.js';
4
4
  export * from './internals/extract-urls.js';
5
5
  export * from './internals/general.js';
6
- export * from './internals/memory-info.js';
7
6
  export * from './internals/debug.js';
8
7
  export * as social from './internals/social.js';
9
8
  export * from './internals/typedefs.js';
10
9
  export * from './internals/open_graph_parser.js';
11
10
  export * from './internals/robots.js';
12
11
  export * from './internals/sitemap.js';
12
+ export * from './internals/iterables.js';
13
+ export * from './internals/robots.js';
14
+ export * from './internals/sitemap.js';
13
15
  export * from './internals/url.js';
14
- export { getCurrentCpuTicksV2 } from './internals/systemInfoV2/cpu-info.js';
15
- export { getMemoryInfoV2 } from './internals/systemInfoV2/memory-info.js';
16
+ export { getCurrentCpuTicksV2 } from './internals/system-info/cpu-info.js';
17
+ export { getMemoryInfo } from './internals/system-info/memory-info.js';
16
18
  //# sourceMappingURL=index.js.map
package/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,sBAAsB,CAAC;AACrC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,wBAAwB,CAAC;AACvC,cAAc,4BAA4B,CAAC;AAC3C,cAAc,sBAAsB,CAAC;AACrC,OAAO,KAAK,MAAM,MAAM,uBAAuB,CAAC;AAChD,cAAc,yBAAyB,CAAC;AACxC,cAAc,kCAAkC,CAAC;AACjD,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AAEnC,OAAO,EAAE,oBAAoB,EAAE,MAAM,sCAAsC,CAAC;AAC5E,OAAO,EAAE,eAAe,EAAE,MAAM,yCAAyC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,sBAAsB,CAAC;AACrC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,wBAAwB,CAAC;AACvC,cAAc,sBAAsB,CAAC;AACrC,OAAO,KAAK,MAAM,MAAM,uBAAuB,CAAC;AAChD,cAAc,yBAAyB,CAAC;AACxC,cAAc,kCAAkC,CAAC;AACjD,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,oBAAoB,CAAC;AAEnC,OAAO,EAAE,oBAAoB,EAAa,MAAM,qCAAqC,CAAC;AACtF,OAAO,EAAE,aAAa,EAAc,MAAM,wCAAwC,CAAC"}
@@ -1,5 +1,7 @@
1
1
  import type { CheerioAPI } from 'cheerio';
2
2
  export type CheerioRoot = CheerioAPI;
3
+ export type { CheerioAPI, Cheerio } from 'cheerio';
4
+ export type { Element } from 'domhandler';
3
5
  /**
4
6
  * The function converts a HTML document to a plain text.
5
7
  *
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAK1C,MAAM,MAAM,WAAW,GAAG,UAAU,CAAC;AAOrC;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,wBAAgB,UAAU,CAAC,oBAAoB,EAAE,MAAM,GAAG,WAAW,GAAG,MAAM,CAwC7E;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAAC,CAAC,EAAE,UAAU,EAAE,QAAQ,SAAM,EAAE,OAAO,SAAK,GAAG,MAAM,EAAE,CAwB5F"}
1
+ {"version":3,"file":"cheerio.d.ts","sourceRoot":"","sources":["../../src/internals/cheerio.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAK1C,MAAM,MAAM,WAAW,GAAG,UAAU,CAAC;AACrC,YAAY,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AACnD,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAO1C;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,wBAAgB,UAAU,CAAC,oBAAoB,EAAE,MAAM,GAAG,WAAW,GAAG,MAAM,CAwC7E;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAAC,CAAC,EAAE,UAAU,EAAE,QAAQ,SAAM,EAAE,OAAO,SAAK,GAAG,MAAM,EAAE,CAwB5F"}
@@ -1 +1 @@
1
- {"version":3,"file":"cheerio.js","sourceRoot":"","sources":["../../src/internals/cheerio.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAInD,gIAAgI;AAChI,MAAM,eAAe,GAAG,uCAAuC,CAAC;AAChE,MAAM,gBAAgB,GAClB,sGAAsG,CAAC;AAE3G;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,UAAU,UAAU,CAAC,oBAA0C;IACjE,IAAI,CAAC,oBAAoB;QAAE,OAAO,EAAE,CAAC;IAErC,MAAM,CAAC,GAAG,OAAO,oBAAoB,KAAK,UAAU,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IACjH,IAAI,IAAI,GAAG,EAAE,CAAC;IAEd,MAAM,OAAO,GAAG,CAAC,KAAiB,EAAE,EAAE;QAClC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACvB,qDAAqD;gBACrD,IAAI,KAAK,CAAC;gBACV,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,KAAK;oBAAE,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC;;oBAC/D,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBAC5C,+EAA+E;gBAC/E,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;oBAAE,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;gBAC9E,IAAI,IAAI,KAAK,CAAC;YAClB,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBACvE,qCAAqC;YACzC,CAAC;iBAAM,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC/B,IAAI,IAAI,IAAI,CAAC;YACjB,CAAC;iBAAM,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACvB,IAAI,IAAI,IAAI,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACJ,2EAA2E;gBAC3E,MAAM,UAAU,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACvD,IAAI,UAAU,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;oBAAE,IAAI,IAAI,IAAI,CAAC;gBACtD,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACvB,IAAI,UAAU,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;oBAAE,IAAI,IAAI,IAAI,CAAC;YACzD,CAAC;QACL,CAAC;IACL,CAAC,CAAC;IAEF,kFAAkF;IAClF,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;IACxB,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAE7C,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACvB,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,sBAAsB,CAAC,CAAa,EAAE,QAAQ,GAAG,GAAG,EAAE,OAAO,GAAG,EAAE;IAC9E,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACpC,MAAM,eAAe,GAAG,IAAI,IAAI,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAE9D,IAAI,eAAe,EAAE,CAAC;QAClB,OAAO,GAAG,eAAe,CAAC;IAC9B,CAAC;IAED,OAAO,CAAC,CAAC,QAAQ,CAAC;SACb,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;SACnC,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC;SACf,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACV,yHAAyH;QACzH,MAAM,cAAc,GAAG,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,6CAA6C;QACtG,IAAI,CAAC,cAAc,IAAI,CAAC,OAAO,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CACX,qBAAqB,IAAI,uCAAuC;gBAC5D,2DAA2D,CAClE,CAAC;QACN,CAAC;QACD,OAAO,OAAO,CAAC,CAAC,CAAC,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1D,CAAC,CAAC;SACD,MAAM,CAAC,OAAO,CAAa,CAAC;AACrC,CAAC"}
1
+ {"version":3,"file":"cheerio.js","sourceRoot":"","sources":["../../src/internals/cheerio.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAMnD,gIAAgI;AAChI,MAAM,eAAe,GAAG,uCAAuC,CAAC;AAChE,MAAM,gBAAgB,GAClB,sGAAsG,CAAC;AAE3G;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,UAAU,UAAU,CAAC,oBAA0C;IACjE,IAAI,CAAC,oBAAoB;QAAE,OAAO,EAAE,CAAC;IAErC,MAAM,CAAC,GAAG,OAAO,oBAAoB,KAAK,UAAU,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IACjH,IAAI,IAAI,GAAG,EAAE,CAAC;IAEd,MAAM,OAAO,GAAG,CAAC,KAAiB,EAAE,EAAE;QAClC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACvB,qDAAqD;gBACrD,IAAI,KAAK,CAAC;gBACV,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,KAAK;oBAAE,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC;;oBAC/D,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBAC5C,+EAA+E;gBAC/E,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;oBAAE,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;gBAC9E,IAAI,IAAI,KAAK,CAAC;YAClB,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBACvE,qCAAqC;YACzC,CAAC;iBAAM,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC/B,IAAI,IAAI,IAAI,CAAC;YACjB,CAAC;iBAAM,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACvB,IAAI,IAAI,IAAI,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACJ,2EAA2E;gBAC3E,MAAM,UAAU,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACvD,IAAI,UAAU,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;oBAAE,IAAI,IAAI,IAAI,CAAC;gBACtD,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACvB,IAAI,UAAU,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;oBAAE,IAAI,IAAI,IAAI,CAAC;YACzD,CAAC;QACL,CAAC;IACL,CAAC,CAAC;IAEF,kFAAkF;IAClF,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;IACxB,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAE7C,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACvB,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,sBAAsB,CAAC,CAAa,EAAE,QAAQ,GAAG,GAAG,EAAE,OAAO,GAAG,EAAE;IAC9E,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACpC,MAAM,eAAe,GAAG,IAAI,IAAI,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAE9D,IAAI,eAAe,EAAE,CAAC;QAClB,OAAO,GAAG,eAAe,CAAC;IAC9B,CAAC;IAED,OAAO,CAAC,CAAC,QAAQ,CAAC;SACb,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;SACnC,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC;SACf,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACV,yHAAyH;QACzH,MAAM,cAAc,GAAG,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,6CAA6C;QACtG,IAAI,CAAC,cAAc,IAAI,CAAC,OAAO,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CACX,qBAAqB,IAAI,uCAAuC;gBAC5D,2DAA2D,CAClE,CAAC;QACN,CAAC;QACD,OAAO,OAAO,CAAC,CAAC,CAAC,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC1D,CAAC,CAAC;SACD,MAAM,CAAC,OAAO,CAAa,CAAC;AACrC,CAAC"}
@@ -27,5 +27,6 @@ interface Request<UserData extends Dictionary = Dictionary> {
27
27
  * @param [additionalFields] Object containing additional fields to be added.
28
28
  */
29
29
  export declare function createRequestDebugInfo(request: Request, response?: IncomingMessage | Partial<BrowserResponseLike>, additionalFields?: Dictionary): Dictionary;
30
+ export declare function getObjectType(value: unknown): string;
30
31
  export {};
31
32
  //# sourceMappingURL=debug.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"debug.d.ts","sourceRoot":"","sources":["../../src/internals/debug.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGrE,UAAU,mBAAmB;IACzB,MAAM,IAAI,MAAM,CAAC;CACpB;AAED,UAAU,OAAO,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IACtD,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,OAAO,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAClC,OAAO,EAAE,OAAO,EAChB,QAAQ,GAAE,eAAe,GAAG,OAAO,CAAC,mBAAmB,CAAM,EAC7D,gBAAgB,GAAE,UAAe,GAClC,UAAU,CAmBZ"}
1
+ {"version":3,"file":"debug.d.ts","sourceRoot":"","sources":["../../src/internals/debug.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,OAAO,KAAK,EAAE,kBAAkB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGrE,UAAU,mBAAmB;IACzB,MAAM,IAAI,MAAM,CAAC;CACpB;AAED,UAAU,OAAO,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU;IACtD,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,OAAO,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAClC,OAAO,EAAE,OAAO,EAChB,QAAQ,GAAE,eAAe,GAAG,OAAO,CAAC,mBAAmB,CAAM,EAC7D,gBAAgB,GAAE,UAAe,GAClC,UAAU,CAmBZ;AAED,wBAAgB,aAAa,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAepD"}
@@ -26,4 +26,16 @@ export function createRequestDebugInfo(request, response = {}, additionalFields
26
26
  ...additionalFields,
27
27
  };
28
28
  }
29
+ export function getObjectType(value) {
30
+ const simple = typeof value;
31
+ if (['string', 'number', 'boolean', 'bigint'].includes(simple)) {
32
+ return simple;
33
+ }
34
+ const objectType = Object.prototype.toString.call(value);
35
+ const type = objectType.match(/\[object (\w+)]/)[1];
36
+ if (type === 'Uint8Array') {
37
+ return 'Buffer';
38
+ }
39
+ return ['Date', 'Buffer', 'RegExp'].includes(type) ? type : type.toLowerCase();
40
+ }
29
41
  //# sourceMappingURL=debug.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"debug.js","sourceRoot":"","sources":["../../src/internals/debug.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,MAAM,IAAI,CAAC;AAqBpB;;;;;;;;GAQG;AACH,MAAM,UAAU,sBAAsB,CAClC,OAAgB,EAChB,WAA2D,EAAE,EAC7D,mBAA+B,EAAE;IAEjC,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC;IACvB,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC;IACxB,EAAE,CAAC,gBAAgB,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC;IAEhC,OAAO;QACH,SAAS,EAAE,OAAO,CAAC,EAAE;QACrB,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,SAAS,EAAE,OAAO,CAAC,SAAS;QAC5B,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,UAAU,EAAE,OAAO,CAAC,UAAU;QAC9B,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,sFAAsF;QACtF,UAAU,EACN,QAAQ,IAAI,QAAQ,IAAI,QAAQ,CAAC,MAAM,YAAY,QAAQ;YACvD,CAAC,CAAC,QAAQ,CAAC,MAAM,EAAE;YACnB,CAAC,CAAE,QAA4B,CAAC,UAAU;QAClD,GAAG,gBAAgB;KACtB,CAAC;AACN,CAAC"}
1
+ {"version":3,"file":"debug.js","sourceRoot":"","sources":["../../src/internals/debug.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,MAAM,IAAI,CAAC;AAqBpB;;;;;;;;GAQG;AACH,MAAM,UAAU,sBAAsB,CAClC,OAAgB,EAChB,WAA2D,EAAE,EAC7D,mBAA+B,EAAE;IAEjC,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC;IACvB,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC;IACxB,EAAE,CAAC,gBAAgB,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC;IAEhC,OAAO;QACH,SAAS,EAAE,OAAO,CAAC,EAAE;QACrB,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,SAAS,EAAE,OAAO,CAAC,SAAS;QAC5B,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,UAAU,EAAE,OAAO,CAAC,UAAU;QAC9B,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,sFAAsF;QACtF,UAAU,EACN,QAAQ,IAAI,QAAQ,IAAI,QAAQ,CAAC,MAAM,YAAY,QAAQ;YACvD,CAAC,CAAC,QAAQ,CAAC,MAAM,EAAE;YACnB,CAAC,CAAE,QAA4B,CAAC,UAAU;QAClD,GAAG,gBAAgB;KACtB,CAAC;AACN,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAc;IACxC,MAAM,MAAM,GAAG,OAAO,KAAK,CAAC;IAE5B,IAAI,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7D,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzD,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,iBAAiB,CAAE,CAAC,CAAC,CAAC,CAAC;IAErD,IAAI,IAAI,KAAK,YAAY,EAAE,CAAC;QACxB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,OAAO,CAAC,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;AACnF,CAAC"}
@@ -1,3 +1,4 @@
1
+ import type { BaseHttpClient } from '@crawlee/types';
1
2
  export interface DownloadListOfUrlsOptions {
2
3
  /**
3
4
  * URL to the file
@@ -16,6 +17,10 @@ export interface DownloadListOfUrlsOptions {
16
17
  urlRegExp?: RegExp;
17
18
  /** Allows to use a proxy for the download request. */
18
19
  proxyUrl?: string;
20
+ /**
21
+ * Custom HTTP client to use for downloading the file.
22
+ */
23
+ httpClient?: BaseHttpClient;
19
24
  }
20
25
  /**
21
26
  * Returns a promise that resolves to an array of urls parsed from the resource available at the provided url.
@@ -1 +1 @@
1
- {"version":3,"file":"extract-urls.d.ts","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,yBAAyB;IACtC;;OAEG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;;OAGG;IACH,QAAQ,CAAC,EAAE,cAAc,CAAC;IAE1B;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,sDAAsD;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,yBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAuB9F;AAED,MAAM,WAAW,kBAAkB;IAC/B;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,EAAE,CAiBjE;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAMhF"}
1
+ {"version":3,"file":"extract-urls.d.ts","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAKrD,MAAM,WAAW,yBAAyB;IACtC;;OAEG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;;OAGG;IACH,QAAQ,CAAC,EAAE,cAAc,CAAC;IAE1B;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,sDAAsD;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;OAEG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;CAC/B;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,yBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAkC9F;AAED,MAAM,WAAW,kBAAkB;IAC/B;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,kBAAkB,GAAG,MAAM,EAAE,CAiBjE;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAMhF"}
@@ -1,4 +1,4 @@
1
- import { gotScraping } from 'got-scraping';
1
+ import { ImpitHttpClient } from '@crawlee/impit-client';
2
2
  import ow from 'ow';
3
3
  import { URL_NO_COMMAS_REGEX } from './general.js';
4
4
  /**
@@ -11,15 +11,19 @@ export async function downloadListOfUrls(options) {
11
11
  encoding: ow.optional.string,
12
12
  urlRegExp: ow.optional.regExp,
13
13
  proxyUrl: ow.optional.string,
14
+ httpClient: ow.optional.object,
14
15
  }));
15
- const { url, encoding = 'utf8', urlRegExp = URL_NO_COMMAS_REGEX, proxyUrl } = options;
16
+ const { url, encoding = 'utf8', urlRegExp = URL_NO_COMMAS_REGEX, proxyUrl, httpClient = new ImpitHttpClient(), } = options;
16
17
  // Try to detect wrong urls and fix them. Currently, detects only sharing url instead of csv download one.
17
18
  const match = url.match(/^(https:\/\/docs\.google\.com\/spreadsheets\/d\/(?:\w|-)+)\/?/);
18
19
  let fixedUrl = url;
19
20
  if (match) {
20
21
  fixedUrl = `${match[1]}/gviz/tq?tqx=out:csv`;
21
22
  }
22
- const { body: string } = await gotScraping({ url: fixedUrl, encoding, proxyUrl });
23
+ const response = await httpClient.sendRequest(new Request(fixedUrl, { method: 'GET' }), {
24
+ proxyUrl,
25
+ });
26
+ const string = new TextDecoder(encoding).decode(new Uint8Array(await response.arrayBuffer()));
23
27
  return extractUrls({ string, urlRegExp });
24
28
  }
25
29
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"extract-urls.js","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC3C,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAyBnD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,OAAkC;IACvE,EAAE,CACE,OAAc,EACd,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG;QAClB,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC7B,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;KAC/B,CAAC,CACL,CAAC;IACF,MAAM,EAAE,GAAG,EAAE,QAAQ,GAAG,MAAM,EAAE,SAAS,GAAG,mBAAmB,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAEtF,0GAA0G;IAC1G,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,+DAA+D,CAAC,CAAC;IACzF,IAAI,QAAQ,GAAG,GAAG,CAAC;IAEnB,IAAI,KAAK,EAAE,CAAC;QACR,QAAQ,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,sBAAsB,CAAC;IACjD,CAAC;IAED,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,WAAW,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,OAAO,WAAW,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;AAC9C,CAAC;AAeD;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAA2B;IACnD,EAAE,CACE,OAAc,EACd,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,MAAM,EAAE,EAAE,CAAC,MAAM;QACjB,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;KAChC,CAAC,CACL,CAAC;IACF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,mBAAmB,CAAC;IAE3D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IACxD,IAAI,CAAC;QACD,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACL,OAAO,SAAS,CAAC;IACrB,CAAC;AACL,CAAC"}
1
+ {"version":3,"file":"extract-urls.js","sourceRoot":"","sources":["../../src/internals/extract-urls.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AA8BnD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,OAAkC;IACvE,EAAE,CACE,OAAc,EACd,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG;QAClB,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC7B,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;QAC5B,UAAU,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;KACjC,CAAC,CACL,CAAC;IACF,MAAM,EACF,GAAG,EACH,QAAQ,GAAG,MAAM,EACjB,SAAS,GAAG,mBAAmB,EAC/B,QAAQ,EACR,UAAU,GAAG,IAAI,eAAe,EAAE,GACrC,GAAG,OAAO,CAAC;IAEZ,0GAA0G;IAC1G,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,+DAA+D,CAAC,CAAC;IACzF,IAAI,QAAQ,GAAG,GAAG,CAAC;IAEnB,IAAI,KAAK,EAAE,CAAC;QACR,QAAQ,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,sBAAsB,CAAC;IACjD,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE;QACpF,QAAQ;KACX,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAE9F,OAAO,WAAW,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;AAC9C,CAAC;AAeD;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAA2B;IACnD,EAAE,CACE,OAAc,EACd,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QACjB,MAAM,EAAE,EAAE,CAAC,MAAM;QACjB,SAAS,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;KAChC,CAAC,CACL,CAAC;IACF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,mBAAmB,CAAC;IAE3D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IACxD,IAAI,CAAC;QACD,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACL,OAAO,SAAS,CAAC;IACrB,CAAC;AACL,CAAC"}
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Type guard that checks if a value is iterable (has Symbol.iterator).
3
+ * @internal
4
+ *
5
+ * **Example usage:**
6
+ * ```ts
7
+ * if (isIterable(someValue)) {
8
+ * for (const item of someValue) {
9
+ * console.log(item);
10
+ * }
11
+ * }
12
+ * ```
13
+ */
14
+ export declare function isIterable<T>(value: unknown): value is Iterable<T>;
15
+ /**
16
+ * Type guard that checks if a value is async iterable (has Symbol.asyncIterator).
17
+ * @internal
18
+ *
19
+ * **Example usage:**
20
+ * ```ts
21
+ * if (isAsyncIterable(someValue)) {
22
+ * for await (const item of someValue) {
23
+ * console.log(item);
24
+ * }
25
+ * }
26
+ * ```
27
+ */
28
+ export declare function isAsyncIterable<T>(value: unknown): value is AsyncIterable<T>;
29
+ /**
30
+ * Converts any iterable or async iterable to an async iterable.
31
+ * @internal
32
+ *
33
+ * @yields Each item from the input iterable
34
+ *
35
+ * **Example usage:**
36
+ * ```ts
37
+ * const syncArray = [1, 2, 3];
38
+ * for await (const item of asyncifyIterable(syncArray)) {
39
+ * console.log(item); // 1, 2, 3
40
+ * }
41
+ * ```
42
+ */
43
+ export declare function asyncifyIterable<T>(iterable: Iterable<T> | AsyncIterable<T>): AsyncIterable<T>;
44
+ /**
45
+ * Lazily splits the input async iterable into chunks of specified size.
46
+ * The last chunk may contain fewer items if the total number of items
47
+ * is not evenly divisible by the chunk size.
48
+ * @internal
49
+ *
50
+ * @yields Arrays of items, each containing up to chunkSize items
51
+ *
52
+ * **Example usage:**
53
+ * ```ts
54
+ * const numbers = async function* () {
55
+ * for (let i = 1; i <= 10; i++) yield i;
56
+ * };
57
+ *
58
+ * for await (const chunk of chunkedAsyncIterable(numbers(), 3)) {
59
+ * console.log(chunk); // [1, 2, 3], [4, 5, 6], [7, 8, 9], [10]
60
+ * }
61
+ * ```
62
+ */
63
+ export declare function chunkedAsyncIterable<T>(iterable: AsyncIterable<T> | Iterable<T>, chunkSize: number): AsyncIterable<T[]>;
64
+ /**
65
+ * An async iterator that also supports peeking at the next value without consuming it.
66
+ * Extends both AsyncIterator and AsyncIterable interfaces.
67
+ * @internal
68
+ */
69
+ export interface PeekableAsyncIterator<T> extends AsyncIterator<T>, AsyncIterable<T> {
70
+ /**
71
+ * Peeks at the next value without consuming it from the iterator.
72
+ * Subsequent calls to peek() will return the same value until next() is called.
73
+ *
74
+ * @returns Promise that resolves to the next value, or undefined if the iterator is exhausted
75
+ */
76
+ peek(): Promise<T | undefined>;
77
+ }
78
+ /**
79
+ * An async iterable that yields peekable async iterators.
80
+ * @internal
81
+ */
82
+ export interface PeekableAsyncIterable<T> extends AsyncIterable<T> {
83
+ [Symbol.asyncIterator](): PeekableAsyncIterator<T>;
84
+ }
85
+ /**
86
+ * Wraps an async iterable to provide peek functionality, allowing you to look at
87
+ * the next value without consuming it from the iterator.
88
+ * @internal
89
+ *
90
+ * @param iterable - The async iterable to make peekable
91
+ *
92
+ * **Example usage:**
93
+ * ```ts
94
+ * const numbers = async function* () {
95
+ * yield 1; yield 2; yield 3;
96
+ * };
97
+ *
98
+ * const peekable = peekableAsyncIterable(numbers());
99
+ * const iterator = peekable[Symbol.asyncIterator]();
100
+ *
101
+ * console.log(await iterator.peek()); // 1 (doesn't consume)
102
+ * console.log(await iterator.peek()); // 1 (still doesn't consume)
103
+ * console.log(await iterator.next()); // { value: 1, done: false } (now consumed)
104
+ * console.log(await iterator.peek()); // 2 (next value)
105
+ * ```
106
+ */
107
+ export declare function peekableAsyncIterable<T>(iterable: AsyncIterable<T> | Iterable<T>): PeekableAsyncIterable<T>;
108
+ //# sourceMappingURL=iterables.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"iterables.d.ts","sourceRoot":"","sources":["../../src/internals/iterables.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;GAYG;AACH,wBAAgB,UAAU,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,QAAQ,CAAC,CAAC,CAAC,CAUlE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,aAAa,CAAC,CAAC,CAAC,CAM5E;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAuB,gBAAgB,CAAC,CAAC,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,CAErG;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAuB,oBAAoB,CAAC,CAAC,EACzC,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,EACxC,SAAS,EAAE,MAAM,GAClB,aAAa,CAAC,CAAC,EAAE,CAAC,CAmBpB;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB,CAAC,CAAC,CAAE,SAAQ,aAAa,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC;IAChF;;;;;OAKG;IACH,IAAI,IAAI,OAAO,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;CAClC;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB,CAAC,CAAC,CAAE,SAAQ,aAAa,CAAC,CAAC,CAAC;IAC9D,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,qBAAqB,CAAC,CAAC,CAAC,CAAC;CACtD;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,qBAAqB,CAAC,CAAC,EAAE,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,qBAAqB,CAAC,CAAC,CAAC,CA+D3G"}
@@ -0,0 +1,167 @@
1
+ import { inspect } from 'node:util';
2
+ /**
3
+ * Type guard that checks if a value is iterable (has Symbol.iterator).
4
+ * @internal
5
+ *
6
+ * **Example usage:**
7
+ * ```ts
8
+ * if (isIterable(someValue)) {
9
+ * for (const item of someValue) {
10
+ * console.log(item);
11
+ * }
12
+ * }
13
+ * ```
14
+ */
15
+ export function isIterable(value) {
16
+ if (value == null || typeof value === 'string' || ArrayBuffer.isView(value)) {
17
+ return false;
18
+ }
19
+ if (Array.isArray(value)) {
20
+ return true;
21
+ }
22
+ return typeof Object(value)[Symbol.iterator] === 'function';
23
+ }
24
+ /**
25
+ * Type guard that checks if a value is async iterable (has Symbol.asyncIterator).
26
+ * @internal
27
+ *
28
+ * **Example usage:**
29
+ * ```ts
30
+ * if (isAsyncIterable(someValue)) {
31
+ * for await (const item of someValue) {
32
+ * console.log(item);
33
+ * }
34
+ * }
35
+ * ```
36
+ */
37
+ export function isAsyncIterable(value) {
38
+ if (value == null || typeof value === 'string' || ArrayBuffer.isView(value)) {
39
+ return false;
40
+ }
41
+ return typeof Object(value)[Symbol.asyncIterator] === 'function';
42
+ }
43
+ /**
44
+ * Converts any iterable or async iterable to an async iterable.
45
+ * @internal
46
+ *
47
+ * @yields Each item from the input iterable
48
+ *
49
+ * **Example usage:**
50
+ * ```ts
51
+ * const syncArray = [1, 2, 3];
52
+ * for await (const item of asyncifyIterable(syncArray)) {
53
+ * console.log(item); // 1, 2, 3
54
+ * }
55
+ * ```
56
+ */
57
+ export async function* asyncifyIterable(iterable) {
58
+ yield* iterable;
59
+ }
60
+ /**
61
+ * Lazily splits the input async iterable into chunks of specified size.
62
+ * The last chunk may contain fewer items if the total number of items
63
+ * is not evenly divisible by the chunk size.
64
+ * @internal
65
+ *
66
+ * @yields Arrays of items, each containing up to chunkSize items
67
+ *
68
+ * **Example usage:**
69
+ * ```ts
70
+ * const numbers = async function* () {
71
+ * for (let i = 1; i <= 10; i++) yield i;
72
+ * };
73
+ *
74
+ * for await (const chunk of chunkedAsyncIterable(numbers(), 3)) {
75
+ * console.log(chunk); // [1, 2, 3], [4, 5, 6], [7, 8, 9], [10]
76
+ * }
77
+ * ```
78
+ */
79
+ export async function* chunkedAsyncIterable(iterable, chunkSize) {
80
+ if (typeof chunkSize !== 'number' || chunkSize < 1) {
81
+ throw new Error(`Chunk size must be a positive number (${inspect(chunkSize)}) received`);
82
+ }
83
+ let chunk = [];
84
+ for await (const item of iterable) {
85
+ chunk.push(item);
86
+ if (chunk.length >= chunkSize) {
87
+ yield chunk;
88
+ chunk = [];
89
+ }
90
+ }
91
+ if (chunk.length) {
92
+ yield chunk;
93
+ }
94
+ }
95
+ /**
96
+ * Wraps an async iterable to provide peek functionality, allowing you to look at
97
+ * the next value without consuming it from the iterator.
98
+ * @internal
99
+ *
100
+ * @param iterable - The async iterable to make peekable
101
+ *
102
+ * **Example usage:**
103
+ * ```ts
104
+ * const numbers = async function* () {
105
+ * yield 1; yield 2; yield 3;
106
+ * };
107
+ *
108
+ * const peekable = peekableAsyncIterable(numbers());
109
+ * const iterator = peekable[Symbol.asyncIterator]();
110
+ *
111
+ * console.log(await iterator.peek()); // 1 (doesn't consume)
112
+ * console.log(await iterator.peek()); // 1 (still doesn't consume)
113
+ * console.log(await iterator.next()); // { value: 1, done: false } (now consumed)
114
+ * console.log(await iterator.peek()); // 2 (next value)
115
+ * ```
116
+ */
117
+ export function peekableAsyncIterable(iterable) {
118
+ const iterator = asyncifyIterable(iterable)[Symbol.asyncIterator]();
119
+ let peekedValue;
120
+ let isExhausted = false;
121
+ const peekableIterator = {
122
+ async next() {
123
+ // If we have peeked a value, return it and clear the peek
124
+ if (peekedValue !== undefined) {
125
+ const result = peekedValue;
126
+ peekedValue = undefined;
127
+ if (result.done) {
128
+ isExhausted = true;
129
+ return { done: true, value: undefined };
130
+ }
131
+ return { done: false, value: result.value };
132
+ }
133
+ if (isExhausted) {
134
+ return { done: true, value: undefined };
135
+ }
136
+ const result = await iterator.next();
137
+ if (result.done) {
138
+ isExhausted = true;
139
+ }
140
+ return result;
141
+ },
142
+ async peek() {
143
+ if (peekedValue !== undefined) {
144
+ return peekedValue.done ? undefined : peekedValue.value;
145
+ }
146
+ if (isExhausted) {
147
+ return undefined;
148
+ }
149
+ const result = await iterator.next();
150
+ peekedValue = { done: result.done ?? false, value: result.value };
151
+ if (result.done) {
152
+ isExhausted = true;
153
+ return undefined;
154
+ }
155
+ return result.value;
156
+ },
157
+ [Symbol.asyncIterator]() {
158
+ return this;
159
+ },
160
+ };
161
+ return {
162
+ [Symbol.asyncIterator]() {
163
+ return peekableIterator;
164
+ },
165
+ };
166
+ }
167
+ //# sourceMappingURL=iterables.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"iterables.js","sourceRoot":"","sources":["../../src/internals/iterables.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,UAAU,CAAI,KAAc;IACxC,IAAI,KAAK,IAAI,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAC1E,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,OAAO,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,UAAU,CAAC;AAChE,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,eAAe,CAAI,KAAc;IAC7C,IAAI,KAAK,IAAI,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;QAC1E,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,OAAO,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,KAAK,UAAU,CAAC;AACrE,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,gBAAgB,CAAI,QAAwC;IAC/E,KAAK,CAAC,CAAC,QAAQ,CAAC;AACpB,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,oBAAoB,CACvC,QAAwC,EACxC,SAAiB;IAEjB,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QACjD,MAAM,IAAI,KAAK,CAAC,yCAAyC,OAAO,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IAC7F,CAAC;IAED,IAAI,KAAK,GAAQ,EAAE,CAAC;IAEpB,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjB,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YAC5B,MAAM,KAAK,CAAC;YACZ,KAAK,GAAG,EAAE,CAAC;QACf,CAAC;IACL,CAAC;IAED,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QACf,MAAM,KAAK,CAAC;IAChB,CAAC;AACL,CAAC;AAyBD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,UAAU,qBAAqB,CAAI,QAAwC;IAC7E,MAAM,QAAQ,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;IACpE,IAAI,WAAoD,CAAC;IACzD,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,MAAM,gBAAgB,GAA6B;QAC/C,KAAK,CAAC,IAAI;YACN,0DAA0D;YAC1D,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;gBAC5B,MAAM,MAAM,GAAG,WAAW,CAAC;gBAC3B,WAAW,GAAG,SAAS,CAAC;gBAExB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;oBACd,WAAW,GAAG,IAAI,CAAC;oBACnB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;gBAC5C,CAAC;gBAED,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC;YAChD,CAAC;YAED,IAAI,WAAW,EAAE,CAAC;gBACd,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;YAC5C,CAAC;YAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAErC,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBACd,WAAW,GAAG,IAAI,CAAC;YACvB,CAAC;YAED,OAAO,MAAM,CAAC;QAClB,CAAC;QAED,KAAK,CAAC,IAAI;YACN,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;gBAC5B,OAAO,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC;YAC5D,CAAC;YAED,IAAI,WAAW,EAAE,CAAC;gBACd,OAAO,SAAS,CAAC;YACrB,CAAC;YAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrC,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,KAAK,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC;YAElE,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBACd,WAAW,GAAG,IAAI,CAAC;gBACnB,OAAO,SAAS,CAAC;YACrB,CAAC;YAED,OAAO,MAAM,CAAC,KAAK,CAAC;QACxB,CAAC;QAED,CAAC,MAAM,CAAC,aAAa,CAAC;YAClB,OAAO,IAAI,CAAC;QAChB,CAAC;KACJ,CAAC;IAEF,OAAO;QACH,CAAC,MAAM,CAAC,aAAa,CAAC;YAClB,OAAO,gBAAgB,CAAC;QAC5B,CAAC;KACJ,CAAC;AACN,CAAC"}
@@ -1,3 +1,4 @@
1
+ import type { BaseHttpClient } from '@crawlee/types';
1
2
  import { Sitemap } from './sitemap.js';
2
3
  /**
3
4
  * Loads and queries information from a [robots.txt file](https://en.wikipedia.org/wiki/Robots.txt).
@@ -26,7 +27,10 @@ export declare class RobotsTxtFile {
26
27
  * @param url the URL to fetch robots.txt for
27
28
  * @param [proxyUrl] a proxy to be used for fetching the robots.txt file
28
29
  */
29
- static find(url: string, proxyUrl?: string): Promise<RobotsTxtFile>;
30
+ static find(url: string, options?: {
31
+ proxyUrl?: string;
32
+ httpClient?: BaseHttpClient;
33
+ }): Promise<RobotsTxtFile>;
30
34
  /**
31
35
  * Allows providing the URL and robots.txt content explicitly instead of loading it from the target site.
32
36
  * @param url the URL for robots.txt file
@@ -34,7 +38,10 @@ export declare class RobotsTxtFile {
34
38
  * @param [proxyUrl] a proxy to be used for fetching the robots.txt file
35
39
  */
36
40
  static from(url: string, content: string, proxyUrl?: string): RobotsTxtFile;
37
- protected static load(url: string, proxyUrl?: string): Promise<RobotsTxtFile>;
41
+ protected static load(url: string, options?: {
42
+ proxyUrl?: string;
43
+ httpClient?: BaseHttpClient;
44
+ }): Promise<RobotsTxtFile>;
38
45
  /**
39
46
  * Check if a URL should be crawled by robots.
40
47
  * @param url the URL to check against the rules in robots.txt
@@ -1 +1 @@
1
- {"version":3,"file":"robots.d.ts","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAIvC;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,aAAa;IAElB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,QAAQ,CAAC;IAFrB,OAAO;IAKP;;;;OAIG;WACU,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAQzE;;;;;OAKG;IACH,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;qBAKpD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAiCnF;;;;OAIG;IACH,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,SAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,WAAW,IAAI,MAAM,EAAE;IAIvB;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,OAAO,CAAC;IAIvC;;OAEG;IACG,qBAAqB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;CAGnD;AAGD,OAAO,EAAE,aAAa,IAAI,UAAU,EAAE,CAAC"}
1
+ {"version":3,"file":"robots.d.ts","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAIrD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAEvC;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,aAAa;IAElB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,QAAQ,CAAC;IAFrB,OAAO;IAKP;;;;OAIG;WACU,IAAI,CACb,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,cAAc,CAAA;KAAE,GAC7D,OAAO,CAAC,aAAa,CAAC;IAQzB;;;;;OAKG;IACH,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;qBAKpD,IAAI,CACvB,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,cAAc,CAAA;KAAE,GAC7D,OAAO,CAAC,aAAa,CAAC;IA6BzB;;;;OAIG;IACH,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,SAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,WAAW,IAAI,MAAM,EAAE;IAIvB;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,OAAO,CAAC;IAIvC;;OAEG;IACG,qBAAqB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;CAGnD;AAGD,OAAO,EAAE,aAAa,IAAI,UAAU,EAAE,CAAC"}
@@ -1,7 +1,6 @@
1
- import { gotScraping } from 'got-scraping';
1
+ import { ImpitHttpClient } from '@crawlee/impit-client';
2
2
  import robotsParser from 'robots-parser';
3
3
  import { Sitemap } from './sitemap.js';
4
- let HTTPError;
5
4
  /**
6
5
  * Loads and queries information from a [robots.txt file](https://en.wikipedia.org/wiki/Robots.txt).
7
6
  *
@@ -32,11 +31,11 @@ export class RobotsTxtFile {
32
31
  * @param url the URL to fetch robots.txt for
33
32
  * @param [proxyUrl] a proxy to be used for fetching the robots.txt file
34
33
  */
35
- static async find(url, proxyUrl) {
34
+ static async find(url, options) {
36
35
  const robotsTxtFileUrl = new URL(url);
37
36
  robotsTxtFileUrl.pathname = '/robots.txt';
38
37
  robotsTxtFileUrl.search = '';
39
- return RobotsTxtFile.load(robotsTxtFileUrl.toString(), proxyUrl);
38
+ return RobotsTxtFile.load(robotsTxtFileUrl.toString(), options);
40
39
  }
41
40
  /**
42
41
  * Allows providing the URL and robots.txt content explicitly instead of loading it from the target site.
@@ -48,33 +47,26 @@ export class RobotsTxtFile {
48
47
  // @ts-ignore
49
48
  return new RobotsTxtFile(robotsParser(url, content), proxyUrl);
50
49
  }
51
- static async load(url, proxyUrl) {
52
- if (!HTTPError) {
53
- HTTPError = (await import('got-scraping')).HTTPError;
50
+ static async load(url, options) {
51
+ const { proxyUrl, httpClient = new ImpitHttpClient({ followRedirects: true }) } = options || {};
52
+ const response = await httpClient.sendRequest(new Request(url, { method: 'GET' }), {
53
+ proxyUrl,
54
+ });
55
+ if (response.status < 200 || response.status >= 300) {
56
+ throw new Error(`Failed to load robots.txt from ${url}: HTTP ${response.status}`);
54
57
  }
55
- try {
56
- const response = await gotScraping({
57
- url,
58
- proxyUrl,
59
- method: 'GET',
60
- responseType: 'text',
61
- });
62
- // @ts-ignore
63
- return new RobotsTxtFile(robotsParser(url.toString(), response.body), proxyUrl);
64
- }
65
- catch (e) {
66
- if (e instanceof HTTPError && e.response.statusCode === 404) {
67
- return new RobotsTxtFile({
68
- isAllowed() {
69
- return true;
70
- },
71
- getSitemaps() {
72
- return [];
73
- },
74
- }, proxyUrl);
75
- }
76
- throw e;
58
+ if (response.status === 404) {
59
+ return new RobotsTxtFile({
60
+ isAllowed() {
61
+ return true;
62
+ },
63
+ getSitemaps() {
64
+ return [];
65
+ },
66
+ }, proxyUrl);
77
67
  }
68
+ // @ts-ignore
69
+ return new RobotsTxtFile(robotsParser(url.toString(), await response.text()), proxyUrl);
78
70
  }
79
71
  /**
80
72
  * Check if a URL should be crawled by robots.
@@ -1 +1 @@
1
- {"version":3,"file":"robots.js","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,OAAO,YAAY,MAAM,eAAe,CAAC;AAEzC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAEvC,IAAI,SAAgC,CAAC;AAErC;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,OAAO,aAAa;IAEV;IACA;IAFZ,YACY,MAAgD,EAChD,QAAiB;QADjB,WAAM,GAAN,MAAM,CAA0C;QAChD,aAAQ,GAAR,QAAQ,CAAS;IAC1B,CAAC;IAEJ;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,GAAW,EAAE,QAAiB;QAC5C,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACtC,gBAAgB,CAAC,QAAQ,GAAG,aAAa,CAAC;QAC1C,gBAAgB,CAAC,MAAM,GAAG,EAAE,CAAC;QAE7B,OAAO,aAAa,CAAC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,EAAE,QAAQ,CAAC,CAAC;IACrE,CAAC;IAED;;;;;OAKG;IACH,MAAM,CAAC,IAAI,CAAC,GAAW,EAAE,OAAe,EAAE,QAAiB;QACvD,aAAa;QACb,OAAO,IAAI,aAAa,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;IAES,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,GAAW,EAAE,QAAiB;QACtD,IAAI,CAAC,SAAS,EAAE,CAAC;YACb,SAAS,GAAG,CAAC,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS,CAAC;QACzD,CAAC;QAED,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC;gBAC/B,GAAG;gBACH,QAAQ;gBACR,MAAM,EAAE,KAAK;gBACb,YAAY,EAAE,MAAM;aACvB,CAAC,CAAC;YAEH,aAAa;YACb,OAAO,IAAI,aAAa,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,QAAQ,CAAC,CAAC;QACpF,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACT,IAAI,CAAC,YAAY,SAAS,IAAI,CAAC,CAAC,QAAQ,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;gBAC1D,OAAO,IAAI,aAAa,CACpB;oBACI,SAAS;wBACL,OAAO,IAAI,CAAC;oBAChB,CAAC;oBACD,WAAW;wBACP,OAAO,EAAE,CAAC;oBACd,CAAC;iBACJ,EACD,QAAQ,CACX,CAAC;YACN,CAAC;YACD,MAAM,CAAC,CAAC;QACZ,CAAC;IACL,CAAC;IAED;;;;OAIG;IACH,SAAS,CAAC,GAAW,EAAE,SAAS,GAAG,GAAG;QAClC,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,GAAG,EAAE,SAAS,CAAC,IAAI,IAAI,CAAC,CAAC,+FAA+F;IACzJ,CAAC;IAED;;OAEG;IACH,WAAW;QACP,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa;QACf,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAClE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,qBAAqB;QACvB,OAAO,CAAC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,IAAI,CAAC;IAC7C,CAAC;CACJ;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,IAAI,UAAU,EAAE,CAAC"}
1
+ {"version":3,"file":"robots.js","sourceRoot":"","sources":["../../src/internals/robots.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAGxD,OAAO,YAAY,MAAM,eAAe,CAAC;AAEzC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAEvC;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,OAAO,aAAa;IAEV;IACA;IAFZ,YACY,MAAgD,EAChD,QAAiB;QADjB,WAAM,GAAN,MAAM,CAA0C;QAChD,aAAQ,GAAR,QAAQ,CAAS;IAC1B,CAAC;IAEJ;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,IAAI,CACb,GAAW,EACX,OAA4D;QAE5D,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACtC,gBAAgB,CAAC,QAAQ,GAAG,aAAa,CAAC;QAC1C,gBAAgB,CAAC,MAAM,GAAG,EAAE,CAAC;QAE7B,OAAO,aAAa,CAAC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,EAAE,OAAO,CAAC,CAAC;IACpE,CAAC;IAED;;;;;OAKG;IACH,MAAM,CAAC,IAAI,CAAC,GAAW,EAAE,OAAe,EAAE,QAAiB;QACvD,aAAa;QACb,OAAO,IAAI,aAAa,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;IAES,MAAM,CAAC,KAAK,CAAC,IAAI,CACvB,GAAW,EACX,OAA4D;QAE5D,MAAM,EAAE,QAAQ,EAAE,UAAU,GAAG,IAAI,eAAe,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAEhG,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,IAAI,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE;YAC/E,QAAQ;SACX,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;YAClD,MAAM,IAAI,KAAK,CAAC,kCAAkC,GAAG,UAAU,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACtF,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC1B,OAAO,IAAI,aAAa,CACpB;gBACI,SAAS;oBACL,OAAO,IAAI,CAAC;gBAChB,CAAC;gBACD,WAAW;oBACP,OAAO,EAAE,CAAC;gBACd,CAAC;aACJ,EACD,QAAQ,CACX,CAAC;QACN,CAAC;QAED,aAAa;QACb,OAAO,IAAI,aAAa,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;IAC5F,CAAC;IAED;;;;OAIG;IACH,SAAS,CAAC,GAAW,EAAE,SAAS,GAAG,GAAG;QAClC,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,GAAG,EAAE,SAAS,CAAC,IAAI,IAAI,CAAC,CAAC,+FAA+F;IACzJ,CAAC;IAED;;OAEG;IACH,WAAW;QACP,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa;QACf,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAClE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,qBAAqB;QACvB,OAAO,CAAC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,IAAI,CAAC;IAC7C,CAAC;CACJ;AAED,+BAA+B;AAC/B,OAAO,EAAE,aAAa,IAAI,UAAU,EAAE,CAAC"}