website-scrap-engine 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/lib/downloader/adjust-concurrency.d.ts +2 -1
  2. package/lib/downloader/adjust-concurrency.d.ts.map +1 -0
  3. package/lib/downloader/adjust-concurrency.js +4 -8
  4. package/lib/downloader/adjust-concurrency.js.map +1 -1
  5. package/lib/downloader/index.d.ts +9 -8
  6. package/lib/downloader/index.d.ts.map +1 -0
  7. package/lib/downloader/index.js +8 -40
  8. package/lib/downloader/index.js.map +1 -1
  9. package/lib/downloader/main.d.ts +15 -6
  10. package/lib/downloader/main.d.ts.map +1 -0
  11. package/lib/downloader/main.js +49 -32
  12. package/lib/downloader/main.js.map +1 -1
  13. package/lib/downloader/multi.d.ts +7 -5
  14. package/lib/downloader/multi.d.ts.map +1 -0
  15. package/lib/downloader/multi.js +10 -17
  16. package/lib/downloader/multi.js.map +1 -1
  17. package/lib/downloader/pipeline-executor-impl.d.ts +8 -7
  18. package/lib/downloader/pipeline-executor-impl.d.ts.map +1 -0
  19. package/lib/downloader/pipeline-executor-impl.js +1 -5
  20. package/lib/downloader/pipeline-executor-impl.js.map +1 -1
  21. package/lib/downloader/single.d.ts +4 -3
  22. package/lib/downloader/single.d.ts.map +1 -0
  23. package/lib/downloader/single.js +7 -11
  24. package/lib/downloader/single.js.map +1 -1
  25. package/lib/downloader/types.d.ts +4 -4
  26. package/lib/downloader/types.d.ts.map +1 -0
  27. package/lib/downloader/types.js +2 -5
  28. package/lib/downloader/types.js.map +1 -1
  29. package/lib/downloader/worker-pool.d.ts +6 -7
  30. package/lib/downloader/worker-pool.d.ts.map +1 -0
  31. package/lib/downloader/worker-pool.js +7 -35
  32. package/lib/downloader/worker-pool.js.map +1 -1
  33. package/lib/downloader/worker-type.d.ts +4 -3
  34. package/lib/downloader/worker-type.d.ts.map +1 -0
  35. package/lib/downloader/worker-type.js +1 -2
  36. package/lib/downloader/worker.d.ts +1 -0
  37. package/lib/downloader/worker.d.ts.map +1 -0
  38. package/lib/downloader/worker.js +52 -27
  39. package/lib/downloader/worker.js.map +1 -1
  40. package/lib/index.d.ts +9 -8
  41. package/lib/index.d.ts.map +1 -0
  42. package/lib/index.js +7 -33
  43. package/lib/index.js.map +1 -1
  44. package/lib/io.d.ts +2 -1
  45. package/lib/io.d.ts.map +1 -0
  46. package/lib/io.js +17 -25
  47. package/lib/io.js.map +1 -1
  48. package/lib/life-cycle/adapters.d.ts +7 -5
  49. package/lib/life-cycle/adapters.d.ts.map +1 -0
  50. package/lib/life-cycle/adapters.js +18 -30
  51. package/lib/life-cycle/adapters.js.map +1 -1
  52. package/lib/life-cycle/default-life-cycle.d.ts +2 -1
  53. package/lib/life-cycle/default-life-cycle.d.ts.map +1 -0
  54. package/lib/life-cycle/default-life-cycle.js +28 -32
  55. package/lib/life-cycle/default-life-cycle.js.map +1 -1
  56. package/lib/life-cycle/detect-resource-type.d.ts +2 -1
  57. package/lib/life-cycle/detect-resource-type.d.ts.map +1 -0
  58. package/lib/life-cycle/detect-resource-type.js +12 -17
  59. package/lib/life-cycle/detect-resource-type.js.map +1 -1
  60. package/lib/life-cycle/download-resource.d.ts +6 -7
  61. package/lib/life-cycle/download-resource.d.ts.map +1 -0
  62. package/lib/life-cycle/download-resource.js +23 -52
  63. package/lib/life-cycle/download-resource.js.map +1 -1
  64. package/lib/life-cycle/download-streaming-resource.d.ts +6 -5
  65. package/lib/life-cycle/download-streaming-resource.d.ts.map +1 -0
  66. package/lib/life-cycle/download-streaming-resource.js +39 -74
  67. package/lib/life-cycle/download-streaming-resource.js.map +1 -1
  68. package/lib/life-cycle/index.d.ts +16 -15
  69. package/lib/life-cycle/index.d.ts.map +1 -0
  70. package/lib/life-cycle/index.js +14 -59
  71. package/lib/life-cycle/index.js.map +1 -1
  72. package/lib/life-cycle/pipeline-executor.d.ts +7 -6
  73. package/lib/life-cycle/pipeline-executor.d.ts.map +1 -0
  74. package/lib/life-cycle/pipeline-executor.js +1 -2
  75. package/lib/life-cycle/process-css.d.ts +5 -4
  76. package/lib/life-cycle/process-css.d.ts.map +1 -0
  77. package/lib/life-cycle/process-css.js +10 -18
  78. package/lib/life-cycle/process-css.js.map +1 -1
  79. package/lib/life-cycle/process-html-meta.d.ts +4 -3
  80. package/lib/life-cycle/process-html-meta.d.ts.map +1 -0
  81. package/lib/life-cycle/process-html-meta.js +11 -15
  82. package/lib/life-cycle/process-html-meta.js.map +1 -1
  83. package/lib/life-cycle/process-html.d.ts +4 -3
  84. package/lib/life-cycle/process-html.d.ts.map +1 -0
  85. package/lib/life-cycle/process-html.js +27 -31
  86. package/lib/life-cycle/process-html.js.map +1 -1
  87. package/lib/life-cycle/process-site-map.d.ts +4 -3
  88. package/lib/life-cycle/process-site-map.d.ts.map +1 -0
  89. package/lib/life-cycle/process-site-map.js +7 -11
  90. package/lib/life-cycle/process-site-map.js.map +1 -1
  91. package/lib/life-cycle/process-source-map.d.ts +4 -4
  92. package/lib/life-cycle/process-source-map.d.ts.map +1 -0
  93. package/lib/life-cycle/process-source-map.js +16 -21
  94. package/lib/life-cycle/process-source-map.js.map +1 -1
  95. package/lib/life-cycle/process-svg.d.ts +4 -3
  96. package/lib/life-cycle/process-svg.d.ts.map +1 -0
  97. package/lib/life-cycle/process-svg.js +17 -21
  98. package/lib/life-cycle/process-svg.js.map +1 -1
  99. package/lib/life-cycle/read-or-copy-local-resource.d.ts +4 -3
  100. package/lib/life-cycle/read-or-copy-local-resource.d.ts.map +1 -0
  101. package/lib/life-cycle/read-or-copy-local-resource.js +15 -42
  102. package/lib/life-cycle/read-or-copy-local-resource.js.map +1 -1
  103. package/lib/life-cycle/save-html-to-disk.d.ts +6 -4
  104. package/lib/life-cycle/save-html-to-disk.d.ts.map +1 -0
  105. package/lib/life-cycle/save-html-to-disk.js +24 -33
  106. package/lib/life-cycle/save-html-to-disk.js.map +1 -1
  107. package/lib/life-cycle/save-resource-to-disk.d.ts +4 -3
  108. package/lib/life-cycle/save-resource-to-disk.d.ts.map +1 -0
  109. package/lib/life-cycle/save-resource-to-disk.js +10 -17
  110. package/lib/life-cycle/save-resource-to-disk.js.map +1 -1
  111. package/lib/life-cycle/skip-links.d.ts +1 -0
  112. package/lib/life-cycle/skip-links.d.ts.map +1 -0
  113. package/lib/life-cycle/skip-links.js +6 -10
  114. package/lib/life-cycle/skip-links.js.map +1 -1
  115. package/lib/life-cycle/types.d.ts +8 -7
  116. package/lib/life-cycle/types.d.ts.map +1 -0
  117. package/lib/life-cycle/types.js +1 -2
  118. package/lib/logger/config-logger.d.ts +2 -1
  119. package/lib/logger/config-logger.d.ts.map +1 -0
  120. package/lib/logger/config-logger.js +4 -30
  121. package/lib/logger/config-logger.js.map +1 -1
  122. package/lib/logger/logger-worker.d.ts +3 -2
  123. package/lib/logger/logger-worker.d.ts.map +1 -0
  124. package/lib/logger/logger-worker.js +11 -13
  125. package/lib/logger/logger-worker.js.map +1 -1
  126. package/lib/logger/logger.d.ts +2 -1
  127. package/lib/logger/logger.d.ts.map +1 -0
  128. package/lib/logger/logger.js +15 -17
  129. package/lib/logger/logger.js.map +1 -1
  130. package/lib/options.d.ts +8 -8
  131. package/lib/options.d.ts.map +1 -0
  132. package/lib/options.js +22 -32
  133. package/lib/options.js.map +1 -1
  134. package/lib/resource.d.ts +3 -4
  135. package/lib/resource.d.ts.map +1 -0
  136. package/lib/resource.js +34 -70
  137. package/lib/resource.js.map +1 -1
  138. package/lib/sources.d.ts +2 -1
  139. package/lib/sources.d.ts.map +1 -0
  140. package/lib/sources.js +9 -12
  141. package/lib/sources.js.map +1 -1
  142. package/lib/types.d.ts +1 -0
  143. package/lib/types.d.ts.map +1 -0
  144. package/lib/types.js +1 -2
  145. package/lib/util.d.ts +4 -3
  146. package/lib/util.d.ts.map +1 -0
  147. package/lib/util.js +17 -34
  148. package/lib/util.js.map +1 -1
  149. package/package.json +19 -21
  150. package/src/downloader/adjust-concurrency.ts +2 -2
  151. package/src/downloader/index.ts +8 -8
  152. package/src/downloader/main.ts +50 -28
  153. package/src/downloader/multi.ts +11 -10
  154. package/src/downloader/pipeline-executor-impl.ts +7 -7
  155. package/src/downloader/single.ts +9 -6
  156. package/src/downloader/types.ts +3 -3
  157. package/src/downloader/worker-pool.ts +9 -9
  158. package/src/downloader/worker-type.ts +3 -3
  159. package/src/downloader/worker.ts +51 -29
  160. package/src/index.ts +8 -8
  161. package/src/io.ts +6 -6
  162. package/src/life-cycle/adapters.ts +7 -6
  163. package/src/life-cycle/css-url-parser.d.ts +1 -1
  164. package/src/life-cycle/default-life-cycle.ts +15 -15
  165. package/src/life-cycle/detect-resource-type.ts +2 -2
  166. package/src/life-cycle/download-resource.ts +18 -20
  167. package/src/life-cycle/download-streaming-resource.ts +20 -18
  168. package/src/life-cycle/index.ts +15 -15
  169. package/src/life-cycle/pipeline-executor.ts +6 -6
  170. package/src/life-cycle/process-css.ts +6 -5
  171. package/src/life-cycle/process-html-meta.ts +7 -6
  172. package/src/life-cycle/process-html.ts +21 -13
  173. package/src/life-cycle/process-site-map.ts +7 -6
  174. package/src/life-cycle/process-source-map.ts +5 -4
  175. package/src/life-cycle/process-svg.ts +10 -9
  176. package/src/life-cycle/read-or-copy-local-resource.ts +9 -7
  177. package/src/life-cycle/save-html-to-disk.ts +9 -13
  178. package/src/life-cycle/save-resource-to-disk.ts +6 -6
  179. package/src/life-cycle/types.ts +7 -7
  180. package/src/logger/config-logger.ts +5 -3
  181. package/src/logger/logger-worker.ts +8 -4
  182. package/src/logger/logger.ts +6 -4
  183. package/src/options.ts +15 -19
  184. package/src/resource.ts +10 -5
  185. package/src/sources.ts +1 -1
  186. package/src/util.ts +6 -10
  187. package/tsconfig.json +6 -2
package/lib/sources.js CHANGED
@@ -1,12 +1,9 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.sources = void 0;
4
- const resource_1 = require("./resource");
1
+ import { ResourceType } from './resource.js';
5
2
  // https://github.com/website-scraper/node-website-scraper
6
3
  // /blob/66f5113475843ae86f12ea9e5d2ebcfade9f056e/lib/config/defaults.js
7
- exports.sources = [
8
- { selector: 'style', type: resource_1.ResourceType.CssInline },
9
- { selector: '[style]', attr: 'style', type: resource_1.ResourceType.CssInline },
4
+ export const sources = [
5
+ { selector: 'style', type: ResourceType.CssInline },
6
+ { selector: '[style]', attr: 'style', type: ResourceType.CssInline },
10
7
  { selector: 'img', attr: 'src' },
11
8
  { selector: 'img', attr: 'srcset' },
12
9
  { selector: 'input', attr: 'src' },
@@ -14,7 +11,7 @@ exports.sources = [
14
11
  { selector: 'embed', attr: 'src' },
15
12
  { selector: 'param[name="movie"]', attr: 'value' },
16
13
  { selector: 'script', attr: 'src' },
17
- { selector: 'link[rel="stylesheet"]', attr: 'href', type: resource_1.ResourceType.Css },
14
+ { selector: 'link[rel="stylesheet"]', attr: 'href', type: ResourceType.Css },
18
15
  { selector: 'link[rel*="icon"]', attr: 'href' },
19
16
  { selector: 'link[rel*="preload"]', attr: 'href' },
20
17
  // prefetch links not included by default
@@ -38,9 +35,9 @@ exports.sources = [
38
35
  { selector: 'audio', attr: 'src' },
39
36
  { selector: 'audio source', attr: 'src' },
40
37
  { selector: 'audio track', attr: 'src' },
41
- { selector: 'frame', attr: 'src', type: resource_1.ResourceType.Html },
42
- { selector: 'iframe', attr: 'src', type: resource_1.ResourceType.Html },
43
- { selector: 'a', attr: 'href', type: resource_1.ResourceType.Html },
38
+ { selector: 'frame', attr: 'src', type: ResourceType.Html },
39
+ { selector: 'iframe', attr: 'src', type: ResourceType.Html },
40
+ { selector: 'a', attr: 'href', type: ResourceType.Html },
44
41
  // https://github.com/website-scraper/node-website-scraper/pull/408
45
42
  { selector: '[background]', attr: 'background' },
46
43
  ].map((obj) => {
@@ -48,7 +45,7 @@ exports.sources = [
48
45
  obj.selector += `[${obj.attr}]`;
49
46
  }
50
47
  if (!obj.type) {
51
- obj.type = resource_1.ResourceType.Binary;
48
+ obj.type = ResourceType.Binary;
52
49
  }
53
50
  return obj;
54
51
  });
@@ -1 +1 @@
1
- {"version":3,"file":"sources.js","sourceRoot":"","sources":["../src/sources.ts"],"names":[],"mappings":";;;AAAA,yCAAwC;AAQxC,0DAA0D;AAC1D,wEAAwE;AAC3D,QAAA,OAAO,GAAuB;IACzC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,uBAAY,CAAC,SAAS,EAAC;IACjD,EAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,uBAAY,CAAC,SAAS,EAAC;IAClE,EAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAC;IAC9B,EAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAC;IACjC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAC;IAClC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,qBAAqB,EAAE,IAAI,EAAE,OAAO,EAAC;IAChD,EAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAC;IACjC,EAAC,QAAQ,EAAE,wBAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,uBAAY,CAAC,GAAG,EAAC;IAC1E,EAAC,QAAQ,EAAE,mBAAmB,EAAE,IAAI,EAAE,MAAM,EAAC;IAC7C,EAAC,QAAQ,EAAE,sBAAsB,EAAE,IAAI,EAAE,MAAM,EAAC;IAChD,yCAAyC;IACzC,qDAAqD;IACrD,EAAC,QAAQ,EAAE,qBAAqB,EAAE,IAAI,EAAE,YAAY,EAAC;IACrD,EAAC,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,EAAC;IACvC,EAAC,QAAQ,EAAE,gBAAgB,EAAE,IAAI,EAAE,QAAQ,EAAC;IAC5C,EAAC,QAAQ,EAAE,6BAA6B,EAAE,IAAI,EAAE,SAAS,EAAC;IAC1D,EAAC,QAAQ,EAAE,mCAAmC,EAAE,IAAI,EAAE,SAAS,EAAC;IAChE,EAAC,QAAQ,EAAE,0CAA0C,EAAE,IAAI,EAAE,SAAS,EAAC;IACvE,EAAC,QAAQ,EAAE,6BAA6B,EAAE,IAAI,EAAE,SAAS,EAAC;IAC1D,EAAC,QAAQ,EAAE,mCAAmC,EAAE,IAAI,EAAE,SAAS,EAAC;IAChE,EAAC,QAAQ,EAAE,0CAA0C,EAAE,IAAI,EAAE,SAAS,EAAC;IACvE,EAAC,QAAQ,EAAE,6BAA6B,EAAE,IAAI,EAAE,SAAS,EAAC;IAC1D,EAAC,QAAQ,EAAE,mCAAmC,EAAE,IAAI,EAAE,SAAS,EAAC;IAChE,EAAC,QAAQ,EAAE,0CAA0C,EAAE,IAAI,EAAE,SAAS,EAAC;IACvE,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAC;IACnC,EAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,KAAK,EAAC;IACvC,EAAC,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,KAAK,EAAC;IACtC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,KAAK,EAAC;IACvC,EAAC,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,KAAK,EAAC;IACtC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,uBAAY,CAAC,IAAI,EAAC;IACzD,EAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,uBAAY,CAAC,IAAI,EAAC;IAC1D,EAAC,QAAQ,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,uBAAY,CAAC,IAAI,EAAC;IACtD,mEAAmE;IACnE,EAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,YAAY,EAAC;CAC/C,CAAC,GAAG,CAAC,CAAC,GAA8B,EAAE,EAAE;IACvC,IAAI,GAAG,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,IAAI,EAAE;QAC/D,GAAG,CAAC,QAAQ,IAAI,IAAI,GAAG,CAAC,IAAI,GAAG,CAAC;KACjC;IACD,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE;QACb,GAAG,CAAC,IAAI,GAAG,uBAAY,CAAC,MAAM,CAAC;KAChC;IACD,OAAO,GAAuB,CAAC;AACjC,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"sources.js","sourceRoot":"","sources":["../src/sources.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,eAAe,CAAC;AAQ3C,0DAA0D;AAC1D,wEAAwE;AACxE,MAAM,CAAC,MAAM,OAAO,GAAuB;IACzC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,CAAC,SAAS,EAAC;IACjD,EAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,CAAC,SAAS,EAAC;IAClE,EAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAC;IAC9B,EAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAC;IACjC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAC;IAClC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,qBAAqB,EAAE,IAAI,EAAE,OAAO,EAAC;IAChD,EAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAC;IACjC,EAAC,QAAQ,EAAE,wBAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,CAAC,GAAG,EAAC;IAC1E,EAAC,QAAQ,EAAE,mBAAmB,EAAE,IAAI,EAAE,MAAM,EAAC;IAC7C,EAAC,QAAQ,EAAE,sBAAsB,EAAE,IAAI,EAAE,MAAM,EAAC;IAChD,yCAAyC;IACzC,qDAAqD;IACrD,EAAC,QAAQ,EAAE,qBAAqB,EAAE,IAAI,EAAE,YAAY,EAAC;IACrD,EAAC,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,EAAC;IACvC,EAAC,QAAQ,EAAE,gBAAgB,EAAE,IAAI,EAAE,QAAQ,EAAC;IAC5C,EAAC,QAAQ,EAAE,6BAA6B,EAAE,IAAI,EAAE,SAAS,EAAC;IAC1D,EAAC,QAAQ,EAAE,mCAAmC,EAAE,IAAI,EAAE,SAAS,EAAC;IAChE,EAAC,QAAQ,EAAE,0CAA0C,EAAE,IAAI,EAAE,SAAS,EAAC;IACvE,EAAC,QAAQ,EAAE,6BAA6B,EAAE,IAAI,EAAE,SAAS,EAAC;IAC1D,EAAC,QAAQ,EAAE,mCAAmC,EAAE,IAAI,EAAE,SAAS,EAAC;IAChE,EAAC,QAAQ,EAAE,0CAA0C,EAAE,IAAI,EAAE,SAAS,EAAC;IACvE,EAAC,QAAQ,EAAE,6BAA6B,EAAE,IAAI,EAAE,SAAS,EAAC;IAC1D,EAAC,QAAQ,EAAE,mCAAmC,EAAE,IAAI,EAAE,SAAS,EAAC;IAChE,EAAC,QAAQ,EAAE,0CAA0C,EAAE,IAAI,EAAE,SAAS,EAAC;IACvE,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAC;IACnC,EAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,KAAK,EAAC;IACvC,EAAC,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,KAAK,EAAC;IACtC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAC;IAChC,EAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,KAAK,EAAC;IACvC,EAAC,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,KAAK,EAAC;IACtC,EAAC,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,YAAY,CAAC,IAAI,EAAC;IACzD,EAAC,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,YAAY,CAAC,IAAI,EAAC;IAC1D,EAAC,QAAQ,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,CAAC,IAAI,EAAC;IACtD,mEAAmE;IACnE,EAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,EAAE,YAAY,EAAC;CAC/C,CAAC,GAAG,CAAC,CAAC,GAA8B,EAAE,EAAE;IACvC,IAAI,GAAG,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;QAChE,GAAG,CAAC,QAAQ,IAAI,IAAI,GAAG,CAAC,IAAI,GAAG,CAAC;IAClC,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACd,GAAG,CAAC,IAAI,GAAG,YAAY,CAAC,MAAM,CAAC;IACjC,CAAC;IACD,OAAO,GAAuB,CAAC;AACjC,CAAC,CAAC,CAAC"}
package/lib/types.d.ts CHANGED
@@ -3,3 +3,4 @@ export type CheerioStatic = ReturnType<typeof load>;
3
3
  export type Cheerio = ReturnType<CheerioStatic>;
4
4
  export type CheerioOptionsInterface = NonNullable<Parameters<typeof load>[1]>;
5
5
  export type CheerioElement = Cheerio[number];
6
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,IAAI,EAAC,MAAM,SAAS,CAAC;AAGlC,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC,OAAO,IAAI,CAAC,CAAC;AACpD,MAAM,MAAM,OAAO,GAAG,UAAU,CAAC,aAAa,CAAC,CAAC;AAChD,MAAM,MAAM,uBAAuB,GAAG,WAAW,CAAC,UAAU,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC9E,MAAM,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC"}
package/lib/types.js CHANGED
@@ -1,3 +1,2 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
1
+ export {};
3
2
  //# sourceMappingURL=types.js.map
package/lib/util.d.ts CHANGED
@@ -1,10 +1,10 @@
1
- import type { ResourceBody, ResourceEncoding } from './resource';
1
+ import type { ResourceBody, ResourceEncoding } from './resource.js';
2
2
  export declare const sleep: (ms: number) => Promise<void>;
3
3
  export declare const escapePath: (str: string) => string;
4
- export declare const isSiteMap: (url?: string) => boolean | '' | void;
4
+ export declare const isSiteMap: (url?: string) => boolean | "" | void;
5
5
  export declare const arrayToMap: (array: (string | number)[], freeze?: boolean) => Record<string | number, number>;
6
6
  export declare const toString: (body: ResourceBody, encoding: ResourceEncoding) => string;
7
- export declare const importDefaultFromPath: (path: string) => any;
7
+ export declare const importDefaultFromPath: <T>(path: string) => Promise<T>;
8
8
  export declare const orderUrlSearch: (search: string) => string;
9
9
  export declare const simpleHashString: (str: string) => string;
10
10
  export declare const hasOwnProperty: (v: PropertyKey) => boolean;
@@ -19,3 +19,4 @@ export declare const weakAssign: <T, U>(target: T, source: U) => T & U;
19
19
  * @param url
20
20
  */
21
21
  export declare const isUrlHttp: (url: string) => boolean;
22
+ //# sourceMappingURL=util.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"util.d.ts","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAE,gBAAgB,EAAC,MAAM,eAAe,CAAC;AAIlE,eAAO,MAAM,KAAK,OAAQ,MAAM,KAAG,OAAO,CAAC,IAAI,CACN,CAAC;AAE1C,eAAO,MAAM,UAAU,QAAS,MAAM,KAAG,MACD,CAAC;AAEzC,eAAO,MAAM,SAAS,SAAU,MAAM,KAAG,OAAO,GAAG,EAAE,GAAG,IAEY,CAAC;AAErE,eAAO,MAAM,UAAU,UAAW,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,WAAW,OAAO,KACrE,MAAM,CAAC,MAAM,GAAG,MAAM,EAAE,MAAM,CAM/B,CAAC;AAEF,eAAO,MAAM,QAAQ,SAAU,YAAY,YAAY,gBAAgB,KAAG,MAezE,CAAC;AAEF,eAAO,MAAM,qBAAqB,GAAI,CAAC,QAAQ,MAAM,KAAG,OAAO,CAAC,CAAC,CAIhE,CAAC;AAEF,eAAO,MAAM,cAAc,WAAY,MAAM,KAAG,MAmB/C,CAAC;AAEF,eAAO,MAAM,gBAAgB,QAAS,MAAM,KAAG,MAQzB,CAAC;AAEvB,eAAO,MAAM,cAAc,6BAAkC,CAAC;AAE9D;;;;GAIG;AACH,eAAO,MAAM,UAAU,GAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,KAAG,CAAC,GAAG,CAW3D,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,SAAS,QAAS,MAAM,KAAG,OACiB,CAAC"}
package/lib/util.js CHANGED
@@ -1,25 +1,18 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.isUrlHttp = exports.weakAssign = exports.hasOwnProperty = exports.simpleHashString = exports.orderUrlSearch = exports.importDefaultFromPath = exports.toString = exports.arrayToMap = exports.isSiteMap = exports.escapePath = exports.sleep = void 0;
4
- const crypto_1 = require("crypto");
1
+ import { createHash } from 'node:crypto';
5
2
  const forbiddenChar = /[:*?"<>|&]|%3A|%2A|%3F|%22|%3C|%3E|%7C|%26/ig;
6
- const sleep = (ms) => new Promise(r => setTimeout(r, ms | 0));
7
- exports.sleep = sleep;
8
- const escapePath = (str) => str && str.replace(forbiddenChar, '_');
9
- exports.escapePath = escapePath;
10
- const isSiteMap = (url) => url &&
3
+ export const sleep = (ms) => new Promise(r => setTimeout(r, ms | 0));
4
+ export const escapePath = (str) => str && str.replace(forbiddenChar, '_');
5
+ export const isSiteMap = (url) => url &&
11
6
  url.includes('/sitemaps/') &&
12
7
  (url.endsWith('sitemap.xml') || url.endsWith('sitemap_other.xml'));
13
- exports.isSiteMap = isSiteMap;
14
- const arrayToMap = (array, freeze) => {
8
+ export const arrayToMap = (array, freeze) => {
15
9
  const obj = {};
16
10
  for (const item of array) {
17
11
  obj[item] = 1;
18
12
  }
19
13
  return freeze ? Object.freeze(obj) : obj;
20
14
  };
21
- exports.arrayToMap = arrayToMap;
22
- const toString = (body, encoding) => {
15
+ export const toString = (body, encoding) => {
23
16
  let stringValue;
24
17
  if (Buffer.isBuffer(body)) {
25
18
  stringValue = body.toString(encoding || 'utf8');
@@ -38,18 +31,12 @@ const toString = (body, encoding) => {
38
31
  }
39
32
  return stringValue;
40
33
  };
41
- exports.toString = toString;
42
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
43
- const importDefaultFromPath = (path) => {
44
- // eslint-disable-next-line @typescript-eslint/no-var-requires,@typescript-eslint/no-explicit-any
45
- const mod = require(path);
46
- if (mod && mod.__esModule && mod.default) {
47
- return mod.default;
48
- }
49
- return mod;
34
+ export const importDefaultFromPath = (path) => {
35
+ return import(path).then(mod => {
36
+ return mod.default || mod;
37
+ });
50
38
  };
51
- exports.importDefaultFromPath = importDefaultFromPath;
52
- const orderUrlSearch = (search) => {
39
+ export const orderUrlSearch = (search) => {
53
40
  const parts = (search[0] === '?' ? search.slice(1) : search)
54
41
  .split('&');
55
42
  const searchKeys = [], searchMap = {};
@@ -69,8 +56,7 @@ const orderUrlSearch = (search) => {
69
56
  .map(k => { var _a; return (_a = searchMap[k]) === null || _a === void 0 ? void 0 : _a.map(v => k + '=' + v).join('&'); })
70
57
  .join('&');
71
58
  };
72
- exports.orderUrlSearch = orderUrlSearch;
73
- const simpleHashString = (str) => (0, crypto_1.createHash)('sha256')
59
+ export const simpleHashString = (str) => createHash('sha256')
74
60
  .update(str)
75
61
  .digest()
76
62
  .toString('base64')
@@ -78,32 +64,29 @@ const simpleHashString = (str) => (0, crypto_1.createHash)('sha256')
78
64
  .replace(/\+/g, '-')
79
65
  .replace(/\//g, '_')
80
66
  .replace(/=/g, '');
81
- exports.simpleHashString = simpleHashString;
82
- exports.hasOwnProperty = Object.prototype.hasOwnProperty;
67
+ export const hasOwnProperty = Object.prototype.hasOwnProperty;
83
68
  /**
84
69
  * Merge values from source to target only if key not exists in target
85
70
  * Note that using this function against incompatible type or null | undefined
86
71
  * may lead to typescript parser errors.
87
72
  */
88
- const weakAssign = (target, source) => {
73
+ export const weakAssign = (target, source) => {
89
74
  if (!target)
90
75
  return Object.assign({}, source);
91
76
  if (!source)
92
77
  return target;
93
78
  for (const key in source) {
94
- if (exports.hasOwnProperty.call(source, key) &&
95
- !exports.hasOwnProperty.call(target, key)) {
79
+ if (hasOwnProperty.call(source, key) &&
80
+ !hasOwnProperty.call(target, key)) {
96
81
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
97
82
  Reflect.set(target, key, source[key]);
98
83
  }
99
84
  }
100
85
  return target;
101
86
  };
102
- exports.weakAssign = weakAssign;
103
87
  /**
104
88
  * Test if the given url is http url
105
89
  * @param url
106
90
  */
107
- const isUrlHttp = (url) => url.startsWith('http://') || url.startsWith('https://');
108
- exports.isUrlHttp = isUrlHttp;
91
+ export const isUrlHttp = (url) => url.startsWith('http://') || url.startsWith('https://');
109
92
  //# sourceMappingURL=util.js.map
package/lib/util.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":";;;AAAA,mCAAkC;AAGlC,MAAM,aAAa,GAAG,8CAA8C,CAAC;AAE9D,MAAM,KAAK,GAAG,CAAC,EAAU,EAAiB,EAAE,CACjD,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;AAD7B,QAAA,KAAK,SACwB;AAEnC,MAAM,UAAU,GAAG,CAAC,GAAW,EAAU,EAAE,CAChD,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;AAD5B,QAAA,UAAU,cACkB;AAElC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAuB,EAAE,CAAC,GAAG;IACjE,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC;IAC1B,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAC,CAAC;AAFxD,QAAA,SAAS,aAE+C;AAE9D,MAAM,UAAU,GAAG,CAAC,KAA0B,EAAE,MAAgB,EACrC,EAAE;IAClC,MAAM,GAAG,GAAoC,EAAE,CAAC;IAChD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE;QACxB,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;KACf;IACD,OAAO,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC3C,CAAC,CAAC;AAPW,QAAA,UAAU,cAOrB;AAEK,MAAM,QAAQ,GAAG,CAAC,IAAkB,EAAE,QAA0B,EAAU,EAAE;IACjF,IAAI,WAAmB,CAAC;IACxB,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;QACzB,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,IAAI,MAAM,CAAC,CAAC;KACjD;SAAM,IAAI,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;QACnC,uCAAuC;QACvC,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC;aACrE,QAAQ,CAAC,QAAQ,IAAI,MAAM,CAAC,CAAC;KACjC;SAAM,IAAI,IAAI,YAAY,WAAW,EAAE;QACtC,uCAAuC;QACvC,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,IAAI,MAAM,CAAC,CAAC;KAC9D;SAAM;QACL,WAAW,GAAG,IAAI,CAAC;KACpB;IACD,OAAO,WAAW,CAAC;AACrB,CAAC,CAAC;AAfW,QAAA,QAAQ,YAenB;AAEF,8DAA8D;AACvD,MAAM,qBAAqB,GAAG,CAAC,IAAY,EAAO,EAAE;IACzD,iGAAiG;IACjG,MAAM,GAAG,GAAQ,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,GAAG,IAAI,GAAG,CAAC,UAAU,IAAI,GAAG,CAAC,OAAO,EAAE;QACxC,OAAO,GAAG,CAAC,OAAO,CAAC;KACpB;IACD,OAAO,GAAG,CAAC;AACb,CAAC,CAAC;AAPW,QAAA,qBAAqB,yBAOhC;AAEK,MAAM,cAAc,GAAG,CAAC,MAAc,EAAU,EAAE;IACvD,MAAM,KAAK,GAAa,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;SACnE,KAAK,CAAC,GAAG,CAAC,CAAC;IACd,MAAM,UAAU,GAAa,EAAE,EAC7B,SAAS,GAA6B,EAAE,CAAC;IAC3C,IAAI,WAAqB,EAAE,SAAiB,CAAC;IAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QACrC,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,SAAS,CAAC,SAAS,GAAG,WAAW,CAAC,KAAK,EAAE,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;YAC1D,SAAS,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;SAClD;aAAM;YACL,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC3B,SAAS,CAAC,SAAS,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;SAChD;KACF;IACD,OAAO,GAAG,GAAG,UAAU;SACpB,IAAI,EAAE;SACN,GAAG,CAAC,CAAC,CAAC,EAAE,WAAC,OAAA,MAAA,SAAS,CAAC,CAAC,CAAC,0CAAE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA,EAAA,CAAC;SACvD,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC,CAAC;AAnBW,QAAA,cAAc,kBAmBzB;AAEK,MAAM,gBAAgB,GAAG,CAAC,GAAW,EAAU,EAAE,CACtD,IAAA,mBAAU,EAAC,QAAQ,CAAC;KACjB,MAAM,CAAC,GAAG,CAAC;KACX,MAAM,EAAE;KACR,QAAQ,CAAC,QAAQ,CAAC;IACnB,qBAAqB;KACpB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;KACnB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;KACnB,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;AARV,QAAA,gBAAgB,oBAQN;AAEV,QAAA,cAAc,GAAG,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC;AAE9D;;;;GAIG;AACI,MAAM,UAAU,GAAG,CAAO,MAAS,EAAE,MAAS,EAAS,EAAE;IAC9D,IAAI,CAAC,MAAM;QAAE,OAAO,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAU,CAAC;IACvD,IAAI,CAAC,MAAM;QAAE,OAAO,MAAe,CAAC;IACpC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE;QACxB,IAAI,sBAAc,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC;YAClC,CAAC,sBAAc,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;YACnC,8DAA8D;YAC9D,OAAO,CAAC,GAAG,CAAC,MAAa,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;SAC9C;KACF;IACD,OAAO,MAAe,CAAC;AACzB,CAAC,CAAC;AAXW,QAAA,UAAU,cAWrB;AAEF;;;GAGG;AACI,MAAM,SAAS,GAAG,CAAC,GAAW,EAAW,EAAE,CAChD,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;AAD7C,QAAA,SAAS,aACoC"}
1
+ {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,UAAU,EAAC,MAAM,aAAa,CAAC;AAGvC,MAAM,aAAa,GAAG,8CAA8C,CAAC;AAErE,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,EAAU,EAAiB,EAAE,CACjD,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;AAE1C,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,GAAW,EAAU,EAAE,CAChD,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;AAEzC,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAuB,EAAE,CAAC,GAAG;IACjE,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC;IAC1B,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAC,CAAC;AAErE,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAA0B,EAAE,MAAgB,EACrC,EAAE;IAClC,MAAM,GAAG,GAAoC,EAAE,CAAC;IAChD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAChB,CAAC;IACD,OAAO,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC3C,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,QAAQ,GAAG,CAAC,IAAkB,EAAE,QAA0B,EAAU,EAAE;IACjF,IAAI,WAAmB,CAAC;IACxB,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,IAAI,MAAM,CAAC,CAAC;IAClD,CAAC;SAAM,IAAI,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;QACpC,uCAAuC;QACvC,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC;aACrE,QAAQ,CAAC,QAAQ,IAAI,MAAM,CAAC,CAAC;IAClC,CAAC;SAAM,IAAI,IAAI,YAAY,WAAW,EAAE,CAAC;QACvC,uCAAuC;QACvC,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,IAAI,MAAM,CAAC,CAAC;IAC/D,CAAC;SAAM,CAAC;QACN,WAAW,GAAG,IAAI,CAAC;IACrB,CAAC;IACD,OAAO,WAAW,CAAC;AACrB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAI,IAAY,EAAc,EAAE;IACnE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE;QAC7B,OAAO,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC;IAC5B,CAAC,CAAC,CAAC;AACL,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,MAAc,EAAU,EAAE;IACvD,MAAM,KAAK,GAAa,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;SACnE,KAAK,CAAC,GAAG,CAAC,CAAC;IACd,MAAM,UAAU,GAAa,EAAE,EAC7B,SAAS,GAA6B,EAAE,CAAC;IAC3C,IAAI,WAAqB,EAAE,SAAiB,CAAC;IAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,SAAS,CAAC,SAAS,GAAG,WAAW,CAAC,KAAK,EAAE,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3D,SAAS,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QACnD,CAAC;aAAM,CAAC;YACN,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC3B,SAAS,CAAC,SAAS,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IACD,OAAO,GAAG,GAAG,UAAU;SACpB,IAAI,EAAE;SACN,GAAG,CAAC,CAAC,CAAC,EAAE,WAAC,OAAA,MAAA,SAAS,CAAC,CAAC,CAAC,0CAAE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA,EAAA,CAAC;SACvD,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,GAAW,EAAU,EAAE,CACtD,UAAU,CAAC,QAAQ,CAAC;KACjB,MAAM,CAAC,GAAG,CAAC;KACX,MAAM,EAAE;KACR,QAAQ,CAAC,QAAQ,CAAC;IACnB,qBAAqB;KACpB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;KACnB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;KACnB,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;AAEvB,MAAM,CAAC,MAAM,cAAc,GAAG,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC;AAE9D;;;;GAIG;AACH,MAAM,CAAC,MAAM,UAAU,GAAG,CAAO,MAAS,EAAE,MAAS,EAAS,EAAE;IAC9D,IAAI,CAAC,MAAM;QAAE,OAAO,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAU,CAAC;IACvD,IAAI,CAAC,MAAM;QAAE,OAAO,MAAe,CAAC;IACpC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC;YAClC,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;YACpC,8DAA8D;YAC9D,OAAO,CAAC,GAAG,CAAC,MAAa,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IACD,OAAO,MAAe,CAAC;AACzB,CAAC,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,GAAW,EAAW,EAAE,CAChD,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,45 +1,43 @@
1
1
  {
2
2
  "name": "website-scrap-engine",
3
- "version": "0.7.1",
3
+ "version": "0.8.0",
4
4
  "description": "Configurable website scraper in typescript",
5
5
  "main": "lib",
6
6
  "types": "lib",
7
+ "type": "module",
7
8
  "engines": {
8
- "node": ">=12.16.0"
9
+ "node": ">=18.17.0"
9
10
  },
10
11
  "scripts": {
11
- "clean": "rimraf lib/*",
12
+ "clean": "node -e \"require('fs').rmSync('lib',{force:true,recursive:true})\"",
12
13
  "tsc": "tsc",
13
14
  "lint": "eslint --fix src test",
14
- "test": "npm run lint && jest",
15
+ "test": "npm run lint && node --experimental-vm-modules node_modules/jest/bin/jest.js",
15
16
  "build": "npm run lint && npm run tsc && npm run copy",
16
17
  "copy": "node copy-src.js",
17
18
  "prepack": "npm run clean && npm run build",
18
19
  "postshrinkwrap": "node package-lock-resolved.js"
19
20
  },
20
- "jest": {
21
- "preset": "ts-jest"
22
- },
23
21
  "dependencies": {
24
- "cheerio": "^1.0.0-rc.12",
25
- "css-url-parser": "^1.1.3",
26
- "got": "^11.8.6",
22
+ "cheerio": "^1.0.0",
23
+ "css-url-parser": "^1.1.4",
24
+ "got": "^13.0.0",
27
25
  "log4js": "^6.9.1",
28
26
  "mkdirp": "^3.0.1",
29
- "p-queue": "^6.6.2",
30
- "srcset": "^4.0.0",
27
+ "p-queue": "^8.1.0",
28
+ "srcset": "^5.0.1",
31
29
  "urijs": "^1.19.11"
32
30
  },
33
31
  "devDependencies": {
34
- "@types/jest": "^28.1.1",
35
- "@types/node": "^20.8.7",
36
- "@types/urijs": "^1.19.20",
37
- "@typescript-eslint/eslint-plugin": "^5.62.0",
38
- "@typescript-eslint/parser": "^5.62.0",
39
- "eslint": "^8.52.0",
40
- "jest": "^28.1.3",
41
- "ts-jest": "^28.0.8",
42
- "typescript": "^5.0.4"
32
+ "@jest/globals": "^29.7.0",
33
+ "@types/node": "^22.12.0",
34
+ "@types/urijs": "^1.19.25",
35
+ "@typescript-eslint/eslint-plugin": "^8.22.0",
36
+ "@typescript-eslint/parser": "^8.22.0",
37
+ "eslint": "^9.19.0",
38
+ "jest": "^29.7.0",
39
+ "ts-jest": "^29.2.5",
40
+ "typescript": "^5.7.3"
43
41
  },
44
42
  "files": [
45
43
  ".editorconfig",
@@ -1,5 +1,5 @@
1
- import {adjustConcurrency as logger} from '../logger/logger';
2
- import type {DownloaderWithMeta} from './types';
1
+ import {adjustConcurrency as logger} from '../logger/logger.js';
2
+ import type {DownloaderWithMeta} from './types.js';
3
3
 
4
4
  export function adjust(downloader: DownloaderWithMeta): void {
5
5
  const {meta} = downloader;
@@ -1,8 +1,8 @@
1
- export {adjust} from './adjust-concurrency';
2
- export {AbstractDownloader} from './main';
3
- export {MultiThreadDownloader} from './multi';
4
- export {PipelineExecutorImpl} from './pipeline-executor-impl';
5
- export {SingleThreadDownloader} from './single';
6
- export * as types from './types';
7
- export {WorkerPool} from './worker-pool';
8
- export * as workerType from './worker-type';
1
+ export {adjust} from './adjust-concurrency.js';
2
+ export {AbstractDownloader} from './main.js';
3
+ export {MultiThreadDownloader} from './multi.js';
4
+ export {PipelineExecutorImpl} from './pipeline-executor-impl.js';
5
+ export {SingleThreadDownloader} from './single.js';
6
+ export * as types from './types.js';
7
+ export {WorkerPool} from './worker-pool.js';
8
+ export * as workerType from './worker-type.js';
@@ -1,26 +1,23 @@
1
1
  import PQueue from 'p-queue';
2
2
  import type {HTTPError} from 'got';
3
3
  import URI from 'urijs';
4
- import {
5
- DownloadOptions,
6
- mergeOverrideOptions,
7
- StaticDownloadOptions
8
- } from '../options';
9
- import {
10
- normalizeResource,
11
- RawResource,
12
- Resource,
13
- ResourceType
14
- } from '../resource';
15
- import {error, notFound, skip} from '../logger/logger';
16
- import {importDefaultFromPath} from '../util';
17
- import type {DownloaderStats, DownloaderWithMeta} from './types';
18
- import {PipelineExecutorImpl} from './pipeline-executor-impl';
4
+ import type {DownloadOptions, StaticDownloadOptions} from '../options.js';
5
+ import {mergeOverrideOptions} from '../options.js';
6
+ import type {RawResource, Resource} from '../resource.js';
7
+ import {normalizeResource, ResourceType} from '../resource.js';
8
+ import {error, notFound, skip} from '../logger/logger.js';
9
+ import {importDefaultFromPath} from '../util.js';
10
+ import type {DownloaderStats, DownloaderWithMeta} from './types.js';
11
+ import {PipelineExecutorImpl} from './pipeline-executor-impl.js';
19
12
 
20
13
  export abstract class AbstractDownloader implements DownloaderWithMeta {
21
14
  readonly queue: PQueue;
22
- readonly pipeline: PipelineExecutorImpl;
23
- readonly options: DownloadOptions;
15
+ readonly _asyncOptions: Promise<DownloadOptions>;
16
+ readonly _overrideOptions?: Partial<StaticDownloadOptions> & { pathToWorker?: string };
17
+ _options?: DownloadOptions;
18
+ _isInit: boolean;
19
+ _pipeline?: PipelineExecutorImpl;
20
+ _initOptions: Promise<void>;
24
21
  readonly downloadedUrl: Set<string> = new Set<string>();
25
22
  readonly queuedUrl: Set<string> = new Set<string>();
26
23
  readonly meta: DownloaderStats = {
@@ -33,10 +30,31 @@ export abstract class AbstractDownloader implements DownloaderWithMeta {
33
30
 
34
31
  protected constructor(public pathToOptions: string,
35
32
  overrideOptions?: Partial<StaticDownloadOptions> & { pathToWorker?: string }) {
36
- this.options = mergeOverrideOptions(importDefaultFromPath(pathToOptions), overrideOptions);
37
- this.queue = new PQueue({concurrency: this.options.concurrency});
38
- this.pipeline = new PipelineExecutorImpl(this.options, this.options.req, this.options);
39
- this.options.configureLogger(this.options.localRoot, this.options.logSubDir || '');
33
+ this._asyncOptions = importDefaultFromPath(pathToOptions);
34
+ this._overrideOptions = overrideOptions;
35
+ this.queue = new PQueue();
36
+ this._isInit = false;
37
+ this._initOptions = this._asyncOptions.then(options => {
38
+ options = mergeOverrideOptions(options, this._overrideOptions);
39
+ this._options = options;
40
+ this._pipeline = new PipelineExecutorImpl(options, options.req, options);
41
+ options.configureLogger(options.localRoot, options.logSubDir || '');
42
+ this._isInit = true;
43
+ });
44
+ }
45
+
46
+ get options(): DownloadOptions {
47
+ if (this._options) {
48
+ return this._options;
49
+ }
50
+ throw new TypeError('AbstractDownloader: not initialized');
51
+ }
52
+
53
+ get pipeline(): PipelineExecutorImpl {
54
+ if (this._pipeline) {
55
+ return this._pipeline;
56
+ }
57
+ throw new TypeError('AbstractDownloader: not initialized');
40
58
  }
41
59
 
42
60
  get concurrency(): number {
@@ -56,20 +74,22 @@ export abstract class AbstractDownloader implements DownloaderWithMeta {
56
74
  }
57
75
 
58
76
  async addInitialResource(urlArr: string[]): Promise<void> {
59
- await this.pipeline.init(this.pipeline, this);
77
+ await this._initOptions;
78
+ const pipeline = this.pipeline;
79
+ await pipeline.init(pipeline, this);
60
80
  // noinspection DuplicatedCode
61
81
  for (let i = 0, l = urlArr.length; i < l; i++) {
62
82
  let url: string | void = urlArr[i];
63
- url = await this.pipeline.linkRedirect(url, null, null);
83
+ url = await pipeline.linkRedirect(url, null, null);
64
84
  if (!url) continue;
65
- const type: ResourceType | void = await this.pipeline.detectResourceType(
85
+ const type: ResourceType | void = await pipeline.detectResourceType(
66
86
  url, ResourceType.Html, null, null);
67
87
  if (!type) continue;
68
- let r: Resource | void = await this.pipeline.createResource(
88
+ let r: Resource | void = await pipeline.createResource(
69
89
  type, 0, url, url,
70
90
  undefined, undefined, undefined, type);
71
91
  if (!r) continue;
72
- r = await this.pipeline.processBeforeDownload(r, null, null);
92
+ r = await pipeline.processBeforeDownload(r, null, null);
73
93
  if (!r) continue;
74
94
  if (!r.shouldBeDiscardedFromDownload) {
75
95
  this.addProcessedResource(r);
@@ -138,7 +158,9 @@ export abstract class AbstractDownloader implements DownloaderWithMeta {
138
158
  () => this.options.adjustConcurrencyFunc?.(this),
139
159
  this.options.adjustConcurrencyPeriod || 60000);
140
160
  }
141
- this.queue.start();
161
+ this._initOptions.then(() => {
162
+ this.queue.start();
163
+ });
142
164
  }
143
165
 
144
166
  stop(): void {
@@ -155,7 +177,7 @@ export abstract class AbstractDownloader implements DownloaderWithMeta {
155
177
  async dispose(): Promise<void> {
156
178
  this.stop();
157
179
  this.queue.clear();
158
- await this.pipeline.dispose(this.pipeline, this);
180
+ await this.pipeline?.dispose(this.pipeline, this);
159
181
  }
160
182
 
161
183
  }
@@ -1,11 +1,12 @@
1
- import path from 'path';
2
- import {WorkerPool, WorkerFactory} from './worker-pool';
3
- import type {RawResource, Resource} from '../resource';
4
- import type {DownloadWorkerMessage} from './types';
5
- import type {StaticDownloadOptions} from '../options';
6
- import type {DownloadResource} from '../life-cycle/types';
7
- import {skip} from '../logger/logger';
8
- import {AbstractDownloader} from './main';
1
+ import path from 'node:path';
2
+ import type {WorkerFactory} from './worker-pool.js';
3
+ import {WorkerPool} from './worker-pool.js';
4
+ import type {RawResource, Resource} from '../resource.js';
5
+ import type {DownloadWorkerMessage} from './types.js';
6
+ import type {StaticDownloadOptions} from '../options.js';
7
+ import type {DownloadResource} from '../life-cycle/types.js';
8
+ import {skip} from '../logger/logger.js';
9
+ import {AbstractDownloader} from './main.js';
9
10
 
10
11
  export interface MultiThreadDownloaderOptions extends StaticDownloadOptions {
11
12
  pathToWorker?: string;
@@ -46,14 +47,14 @@ export class MultiThreadDownloader extends AbstractDownloader {
46
47
  if (this.options.initialUrl) {
47
48
  this.init = this.addInitialResource(this.options.initialUrl);
48
49
  } else {
49
- this.init = this.pipeline.init(this.pipeline, this);
50
+ this.init = this._initOptions.then(() => this.pipeline.init(this.pipeline, this));
50
51
  }
51
52
  }
52
53
 
53
54
  async downloadAndProcessResource(res: Resource): Promise<boolean | void> {
54
55
  let r: DownloadResource | void;
55
56
  try {
56
- r = await this.pipeline.download(res);
57
+ r = await this.pipeline!.download(res);
57
58
  if (!r) {
58
59
  skip.debug('discarded after download', res.url, res.rawUrl, res.refUrl);
59
60
  return;
@@ -1,21 +1,21 @@
1
- import type {StaticDownloadOptions} from '../options';
1
+ import type {StaticDownloadOptions} from '../options.js';
2
2
  import type {
3
3
  CreateResourceArgument,
4
4
  Resource,
5
5
  ResourceEncoding,
6
6
  ResourceType
7
- } from '../resource';
7
+ } from '../resource.js';
8
8
  import type {
9
9
  DownloadResource,
10
10
  ProcessingLifeCycle,
11
11
  RequestOptions,
12
12
  SubmitResourceFunc
13
- } from '../life-cycle/types';
13
+ } from '../life-cycle/types.js';
14
14
  // noinspection ES6PreferShortImport
15
- import type {PipelineExecutor} from '../life-cycle/pipeline-executor';
16
- import type {Cheerio} from '../types';
17
- import type {DownloaderWithMeta} from './types';
18
- import type {WorkerInfo} from './worker-pool';
15
+ import type {PipelineExecutor} from '../life-cycle/pipeline-executor.js';
16
+ import type {Cheerio} from '../types.js';
17
+ import type {DownloaderWithMeta} from './types.js';
18
+ import type {WorkerInfo} from './worker-pool.js';
19
19
 
20
20
  /**
21
21
  * Pipeline executor
@@ -1,8 +1,11 @@
1
- import {AbstractDownloader} from './main';
2
- import type {Resource} from '../resource';
3
- import type {StaticDownloadOptions} from '../options';
4
- import {skip} from '../logger/logger';
5
- import type {DownloadResource, SubmitResourceFunc} from '../life-cycle/types';
1
+ import {AbstractDownloader} from './main.js';
2
+ import type {Resource} from '../resource.js';
3
+ import type {StaticDownloadOptions} from '../options.js';
4
+ import {skip} from '../logger/logger.js';
5
+ import type {
6
+ DownloadResource,
7
+ SubmitResourceFunc
8
+ } from '../life-cycle/types.js';
6
9
 
7
10
  export class SingleThreadDownloader extends AbstractDownloader {
8
11
  readonly init: Promise<void>;
@@ -13,7 +16,7 @@ export class SingleThreadDownloader extends AbstractDownloader {
13
16
  if (this.options.initialUrl) {
14
17
  this.init = this.addInitialResource(this.options.initialUrl);
15
18
  } else {
16
- this.init = this.pipeline.init(this.pipeline, this);
19
+ this.init = this._initOptions.then(() => this.pipeline.init(this.pipeline, this));
17
20
  }
18
21
  }
19
22
 
@@ -1,6 +1,6 @@
1
- import type {MessagePort} from 'worker_threads';
2
- import type {DownloadOptions} from '../options';
3
- import type {RawResource} from '../resource';
1
+ import type {MessagePort} from 'node:worker_threads';
2
+ import type {DownloadOptions} from '../options.js';
3
+ import type {RawResource} from '../resource.js';
4
4
 
5
5
  export interface DownloaderStats {
6
6
  firstPeriodCount: number;
@@ -1,14 +1,14 @@
1
- import type {MessagePort, WorkerOptions} from 'worker_threads';
2
- import type {URL} from 'url';
3
- import {Worker} from 'worker_threads';
4
- import * as logger from '../logger/logger';
5
- import type {LogWorkerMessage} from './worker-type';
6
- import {
1
+ import type {MessagePort, WorkerOptions} from 'node:worker_threads';
2
+ import {Worker} from 'node:worker_threads';
3
+ import type {URL} from 'node:url';
4
+ import * as logger from '../logger/logger.js';
5
+ import type {LogWorkerMessage} from './worker-type.js';
6
+ import type {
7
7
  PendingPromise,
8
8
  PendingPromiseWithBody,
9
- WorkerMessage,
10
- WorkerMessageType
11
- } from './types';
9
+ WorkerMessage
10
+ } from './types.js';
11
+ import {WorkerMessageType} from './types.js';
12
12
 
13
13
  export interface WorkerInfo {
14
14
  readonly id: number;
@@ -1,6 +1,6 @@
1
- import type {logLevels} from '../logger/logger-worker';
2
- import type * as logger from '../logger/logger';
3
- import {WorkerMessage, WorkerMessageType} from './types';
1
+ import type {logLevels} from '../logger/logger-worker.js';
2
+ import type * as logger from '../logger/logger.js';
3
+ import type {WorkerMessage, WorkerMessageType} from './types.js';
4
4
 
5
5
  export interface WorkerLog<T = unknown> {
6
6
  logger: keyof typeof logger;