hydra-crawler 1.4.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/dist/apis/autocomplete.api.d.ts +7 -0
  2. package/dist/apis/autocomplete.api.js +15 -9
  3. package/dist/apis/autocomplete.api.js.map +1 -0
  4. package/dist/apis/bugs.api.d.ts +7 -0
  5. package/dist/apis/bugs.api.js +21 -15
  6. package/dist/apis/bugs.api.js.map +1 -0
  7. package/dist/apis/crawl.api.d.ts +7 -0
  8. package/dist/apis/crawl.api.js +15 -9
  9. package/dist/apis/crawl.api.js.map +1 -0
  10. package/dist/apis/domains.api.d.ts +7 -0
  11. package/dist/apis/domains.api.js +24 -19
  12. package/dist/apis/domains.api.js.map +1 -0
  13. package/dist/apis/images.api.d.ts +7 -0
  14. package/dist/apis/images.api.js +20 -14
  15. package/dist/apis/images.api.js.map +1 -0
  16. package/dist/apis/statistics.api.d.ts +8 -0
  17. package/dist/apis/statistics.api.js +27 -20
  18. package/dist/apis/statistics.api.js.map +1 -0
  19. package/dist/apis/test.api.d.ts +5 -0
  20. package/dist/apis/test.api.js +15 -9
  21. package/dist/apis/test.api.js.map +1 -0
  22. package/dist/apis/urls.api.d.ts +7 -0
  23. package/dist/apis/urls.api.js +21 -15
  24. package/dist/apis/urls.api.js.map +1 -0
  25. package/dist/apps/cleanup.app.d.ts +19 -0
  26. package/dist/apps/cleanup.app.js +118 -100
  27. package/dist/apps/cleanup.app.js.map +1 -0
  28. package/dist/apps/cross-populate-export.app.d.ts +12 -0
  29. package/dist/apps/cross-populate-export.app.js +60 -47
  30. package/dist/apps/cross-populate-export.app.js.map +1 -0
  31. package/dist/apps/cross-populate-import.app.d.ts +12 -0
  32. package/dist/apps/cross-populate-import.app.js +64 -51
  33. package/dist/apps/cross-populate-import.app.js.map +1 -0
  34. package/dist/apps/denylist.app.d.ts +17 -0
  35. package/dist/apps/denylist.app.js +115 -98
  36. package/dist/apps/denylist.app.js.map +1 -0
  37. package/dist/apps/expire.app.d.ts +19 -0
  38. package/dist/apps/expire.app.js +44 -31
  39. package/dist/apps/expire.app.js.map +1 -0
  40. package/dist/apps/extract-text.app.d.ts +8 -0
  41. package/dist/apps/extract-text.app.js +43 -35
  42. package/dist/apps/extract-text.app.js.map +1 -0
  43. package/dist/apps/hydra.app.d.ts +34 -0
  44. package/dist/apps/hydra.app.js +150 -137
  45. package/dist/apps/hydra.app.js.map +1 -0
  46. package/dist/apps/import.app.d.ts +11 -0
  47. package/dist/apps/import.app.js +44 -32
  48. package/dist/apps/import.app.js.map +1 -0
  49. package/dist/apps/internal-hydra-common.app.d.ts +28 -0
  50. package/dist/apps/internal-hydra-common.app.js +5 -11
  51. package/dist/apps/internal-hydra-common.app.js.map +1 -0
  52. package/dist/apps/query.app.d.ts +20 -0
  53. package/dist/apps/query.app.js +63 -49
  54. package/dist/apps/query.app.js.map +1 -0
  55. package/dist/apps/reattempt.app.d.ts +17 -0
  56. package/dist/apps/reattempt.app.js +66 -53
  57. package/dist/apps/reattempt.app.js.map +1 -0
  58. package/dist/apps/requeue-domain.app.d.ts +13 -0
  59. package/dist/apps/requeue-domain.app.js +50 -37
  60. package/dist/apps/requeue-domain.app.js.map +1 -0
  61. package/dist/apps/seed.app.d.ts +15 -0
  62. package/dist/apps/seed.app.js +53 -40
  63. package/dist/apps/seed.app.js.map +1 -0
  64. package/dist/apps/startup.app.d.ts +11 -0
  65. package/dist/apps/startup.app.js +51 -38
  66. package/dist/apps/startup.app.js.map +1 -0
  67. package/dist/apps/unarchive.app.d.ts +15 -0
  68. package/dist/apps/unarchive.app.js +67 -54
  69. package/dist/apps/unarchive.app.js.map +1 -0
  70. package/dist/classes/cleaner.d.ts +12 -0
  71. package/dist/classes/cleaner.js +227 -207
  72. package/dist/classes/cleaner.js.map +1 -0
  73. package/dist/classes/crawler.d.ts +34 -0
  74. package/dist/classes/crawler.js +248 -241
  75. package/dist/classes/crawler.js.map +1 -0
  76. package/dist/classes/dns.d.ts +3 -0
  77. package/dist/classes/dns.js +10 -13
  78. package/dist/classes/dns.js.map +1 -0
  79. package/dist/classes/expirer.d.ts +10 -0
  80. package/dist/classes/expirer.js +107 -94
  81. package/dist/classes/expirer.js.map +1 -0
  82. package/dist/classes/expiry.d.ts +8 -0
  83. package/dist/classes/expiry.js +16 -19
  84. package/dist/classes/expiry.js.map +1 -0
  85. package/dist/classes/lists.d.ts +9 -0
  86. package/dist/classes/lists.js +13 -18
  87. package/dist/classes/lists.js.map +1 -0
  88. package/dist/classes/robot.d.ts +15 -0
  89. package/dist/classes/robot.js +40 -30
  90. package/dist/classes/robot.js.map +1 -0
  91. package/dist/classes/tracker.d.ts +25 -0
  92. package/dist/classes/tracker.js +82 -64
  93. package/dist/classes/tracker.js.map +1 -0
  94. package/dist/cli.d.ts +1 -0
  95. package/dist/cli.js +72 -65
  96. package/dist/cli.js.map +1 -0
  97. package/dist/enums/eavailable-strategy.d.ts +4 -0
  98. package/dist/enums/eavailable-strategy.js +3 -5
  99. package/dist/enums/eavailable-strategy.js.map +1 -0
  100. package/dist/enums/elist.d.ts +7 -0
  101. package/dist/enums/elist.js +7 -11
  102. package/dist/enums/elist.js.map +1 -0
  103. package/dist/enums/eserver.d.ts +8 -0
  104. package/dist/enums/eserver.js +3 -5
  105. package/dist/enums/eserver.js.map +1 -0
  106. package/dist/enums/ex-powered-by.d.ts +6 -0
  107. package/dist/enums/ex-powered-by.js +3 -5
  108. package/dist/enums/ex-powered-by.js.map +1 -0
  109. package/dist/helpers/matcher.d.ts +5 -0
  110. package/dist/helpers/matcher.js +2 -5
  111. package/dist/helpers/matcher.js.map +1 -0
  112. package/dist/helpers/random.d.ts +4 -0
  113. package/dist/helpers/random.js +2 -5
  114. package/dist/helpers/random.js.map +1 -0
  115. package/dist/helpers/utf-decoder.d.ts +4 -0
  116. package/dist/helpers/utf-decoder.js +3 -6
  117. package/dist/helpers/utf-decoder.js.map +1 -0
  118. package/dist/interfaces/iexpiry.d.ts +7 -0
  119. package/dist/interfaces/iexpiry.js +9 -13
  120. package/dist/interfaces/iexpiry.js.map +1 -0
  121. package/dist/interfaces/imatch.d.ts +6 -0
  122. package/dist/interfaces/imatch.js +6 -9
  123. package/dist/interfaces/imatch.js.map +1 -0
  124. package/dist/interfaces/iparser-config.d.ts +4 -0
  125. package/dist/interfaces/iparser-config.js +4 -7
  126. package/dist/interfaces/iparser-config.js.map +1 -0
  127. package/dist/interfaces/iparser.d.ts +8 -0
  128. package/dist/interfaces/iparser.js +2 -2
  129. package/dist/interfaces/iparser.js.map +1 -0
  130. package/dist/interfaces/irequest-outcome.d.ts +11 -0
  131. package/dist/interfaces/irequest-outcome.js +2 -2
  132. package/dist/interfaces/irequest-outcome.js.map +1 -0
  133. package/dist/interfaces/iserver.d.ts +4 -0
  134. package/dist/interfaces/iserver.js +2 -2
  135. package/dist/interfaces/iserver.js.map +1 -0
  136. package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
  137. package/dist/parsers/accessibility-metrics.parser.js +34 -26
  138. package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
  139. package/dist/parsers/asp-error.parser.d.ts +12 -0
  140. package/dist/parsers/asp-error.parser.js +36 -28
  141. package/dist/parsers/asp-error.parser.js.map +1 -0
  142. package/dist/parsers/bad-words.parser.d.ts +10 -0
  143. package/dist/parsers/bad-words.parser.js +21 -13
  144. package/dist/parsers/bad-words.parser.js.map +1 -0
  145. package/dist/parsers/complex-english.parser.d.ts +15 -0
  146. package/dist/parsers/complex-english.parser.js +33 -25
  147. package/dist/parsers/complex-english.parser.js.map +1 -0
  148. package/dist/parsers/data.parser.d.ts +14 -0
  149. package/dist/parsers/data.parser.js +12 -16
  150. package/dist/parsers/data.parser.js.map +1 -0
  151. package/dist/parsers/dictionary.parser.d.ts +19 -0
  152. package/dist/parsers/dictionary.parser.js +47 -39
  153. package/dist/parsers/dictionary.parser.js.map +1 -0
  154. package/dist/parsers/html.parser.d.ts +13 -0
  155. package/dist/parsers/html.parser.js +4 -8
  156. package/dist/parsers/html.parser.js.map +1 -0
  157. package/dist/parsers/hyperlinks.parser.d.ts +20 -0
  158. package/dist/parsers/hyperlinks.parser.js +82 -77
  159. package/dist/parsers/hyperlinks.parser.js.map +1 -0
  160. package/dist/parsers/image-tags.parser.d.ts +20 -0
  161. package/dist/parsers/image-tags.parser.js +38 -34
  162. package/dist/parsers/image-tags.parser.js.map +1 -0
  163. package/dist/parsers/jpeg.parser.d.ts +11 -0
  164. package/dist/parsers/jpeg.parser.js +28 -20
  165. package/dist/parsers/jpeg.parser.js.map +1 -0
  166. package/dist/parsers/paragraphs.parser.d.ts +13 -0
  167. package/dist/parsers/paragraphs.parser.js +33 -40
  168. package/dist/parsers/paragraphs.parser.js.map +1 -0
  169. package/dist/parsers/parser.d.ts +19 -0
  170. package/dist/parsers/parser.js +30 -17
  171. package/dist/parsers/parser.js.map +1 -0
  172. package/dist/parsers/php-error.parser.d.ts +12 -0
  173. package/dist/parsers/php-error.parser.js +42 -34
  174. package/dist/parsers/php-error.parser.js.map +1 -0
  175. package/dist/parsers/phrase.parser.d.ts +8 -0
  176. package/dist/parsers/phrase.parser.js +16 -11
  177. package/dist/parsers/phrase.parser.js.map +1 -0
  178. package/dist/parsers/regex.parser.d.ts +10 -0
  179. package/dist/parsers/regex.parser.js +30 -22
  180. package/dist/parsers/regex.parser.js.map +1 -0
  181. package/dist/parsers/server.parser.d.ts +12 -0
  182. package/dist/parsers/server.parser.js +66 -56
  183. package/dist/parsers/server.parser.js.map +1 -0
  184. package/dist/parsers/spelling.parser.d.ts +10 -0
  185. package/dist/parsers/spelling.parser.js +21 -13
  186. package/dist/parsers/spelling.parser.js.map +1 -0
  187. package/dist/parsers/string.parser.d.ts +8 -0
  188. package/dist/parsers/string.parser.js +5 -8
  189. package/dist/parsers/string.parser.js.map +1 -0
  190. package/dist/parsers/text.parser.d.ts +8 -0
  191. package/dist/parsers/text.parser.js +24 -18
  192. package/dist/parsers/text.parser.js.map +1 -0
  193. package/dist/parsers/words.parser.d.ts +11 -0
  194. package/dist/parsers/words.parser.js +32 -28
  195. package/dist/parsers/words.parser.js.map +1 -0
  196. package/dist/queries/complex-english.query.d.ts +2 -0
  197. package/dist/queries/complex-english.query.js +37 -38
  198. package/dist/queries/complex-english.query.js.map +1 -0
  199. package/dist/queries/flash-content.query.d.ts +2 -0
  200. package/dist/queries/flash-content.query.js +39 -30
  201. package/dist/queries/flash-content.query.js.map +1 -0
  202. package/dist/queries/linking-to-domains.query.d.ts +2 -0
  203. package/dist/queries/linking-to-domains.query.js +35 -27
  204. package/dist/queries/linking-to-domains.query.js.map +1 -0
  205. package/dist/queries/readability-score.query.d.ts +2 -0
  206. package/dist/queries/readability-score.query.js +21 -13
  207. package/dist/queries/readability-score.query.js.map +1 -0
  208. package/dist/servers/crawl.server.d.ts +35 -0
  209. package/dist/servers/crawl.server.js +133 -121
  210. package/dist/servers/crawl.server.js.map +1 -0
  211. package/dist/servers/express.server.d.ts +8 -0
  212. package/dist/servers/express.server.js +7 -10
  213. package/dist/servers/express.server.js.map +1 -0
  214. package/dist/servers/maintenance.server.d.ts +22 -0
  215. package/dist/servers/maintenance.server.js +42 -36
  216. package/dist/servers/maintenance.server.js.map +1 -0
  217. package/dist/servers/rest.server.d.ts +7 -0
  218. package/dist/servers/rest.server.js +40 -51
  219. package/dist/servers/rest.server.js.map +1 -0
  220. package/dist/servers/socket-io.server.d.ts +12 -0
  221. package/dist/servers/socket-io.server.js +48 -15
  222. package/dist/servers/socket-io.server.js.map +1 -0
  223. package/dist/services/database.service.d.ts +68 -0
  224. package/dist/services/database.service.js +528 -462
  225. package/dist/services/database.service.js.map +1 -0
  226. package/dist/types/tcrawl-config.d.ts +14 -0
  227. package/dist/types/tcrawl-config.js +14 -17
  228. package/dist/types/tcrawl-config.js.map +1 -0
  229. package/dist/types/thydra-config.d.ts +4 -0
  230. package/dist/types/thydra-config.js +4 -7
  231. package/dist/types/thydra-config.js.map +1 -0
  232. package/dist/types/tparser-ctor.d.ts +7 -0
  233. package/dist/types/tparser-ctor.js +2 -2
  234. package/dist/types/tparser-ctor.js.map +1 -0
  235. package/dist/types/tquery.d.ts +7 -0
  236. package/dist/types/tquery.js +2 -2
  237. package/dist/types/tquery.js.map +1 -0
  238. package/dist/types/trobots-config.d.ts +4 -0
  239. package/dist/types/trobots-config.js +4 -7
  240. package/dist/types/trobots-config.js.map +1 -0
  241. package/package.json +41 -29
  242. package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
  243. package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
  244. package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
  245. package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
  246. package/angular/3rdpartylicenses.txt +0 -1127
  247. package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
  248. package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
  249. package/angular/6-es2015.65f680261a3506b88381.js +0 -1
  250. package/angular/6-es5.65f680261a3506b88381.js +0 -1
  251. package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
  252. package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
  253. package/angular/8-es2015.55518901987a5b834309.js +0 -1
  254. package/angular/8-es5.55518901987a5b834309.js +0 -1
  255. package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
  256. package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
  257. package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
  258. package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
  259. package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
  260. package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
  261. package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
  262. package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
  263. package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
  264. package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
  265. package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
  266. package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
  267. package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
  268. package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
  269. package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
  270. package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
  271. package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
  272. package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
  273. package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
  274. package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
  275. package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
  276. package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
  277. package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
  278. package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
  279. package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
  280. package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
  281. package/angular/assets/config/app-config.json +0 -16
  282. package/angular/assets/images/splashbg.jpg +0 -0
  283. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
  284. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
  285. package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
  286. package/angular/favicon.ico +0 -0
  287. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
  288. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
  289. package/angular/index.html +0 -16
  290. package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
  291. package/angular/main-es5.3a582572476c7f292e52.js +0 -1
  292. package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
  293. package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
  294. package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
  295. package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
  296. package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
  297. package/config/bad-words.json +0 -1
  298. package/config/complex-english.json +0 -400
  299. package/config/hydra-auth.json +0 -8
  300. package/config/hydra-crawler.json +0 -84
  301. package/config/list-allow.json +0 -171
  302. package/config/list-deny.json +0 -248
  303. package/config/list-expiry.json +0 -7
  304. package/config/schedule.json +0 -25
  305. package/config/spelling.json +0 -1
@@ -1,16 +1,13 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Dns = void 0;
4
- const dns = require("dns");
5
- const tscommons_async_1 = require("tscommons-async");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- class Dns {
1
+ import * as dns from 'dns';
2
+ import { commonsAsyncAbortTimeout, commonsAsyncTimeout } from 'tscommons-es-async';
3
+ import { commonsOutputDebug } from 'nodecommons-es-cli';
4
+ export class Dns {
8
5
  static resolve(domain, connectTimeout) {
9
6
  // try IP4 first, then IP6
10
7
  return new Promise((resolve, reject) => {
11
8
  let timedout = false;
12
9
  const timeoutId = `dns_${domain}_timeout`;
13
- tscommons_async_1.CommonsAsync.timeout(connectTimeout, timeoutId)
10
+ commonsAsyncTimeout(connectTimeout, timeoutId)
14
11
  .then(() => {
15
12
  timedout = true;
16
13
  reject(new Error('DNS timeout'));
@@ -18,7 +15,7 @@ class Dns {
18
15
  .catch((e) => {
19
16
  if (e.message === 'abortTimeout called')
20
17
  return;
21
- nodecommons_cli_1.CommonsOutput.debug('debug position 4');
18
+ commonsOutputDebug('debug position 4');
22
19
  console.log(e);
23
20
  reject(e);
24
21
  });
@@ -27,12 +24,12 @@ class Dns {
27
24
  dns.resolve6(domain, (err2, address2) => {
28
25
  if (err2) {
29
26
  if (!timedout)
30
- tscommons_async_1.CommonsAsync.abortTimeout(timeoutId);
27
+ commonsAsyncAbortTimeout(timeoutId);
31
28
  reject(err2);
32
29
  return;
33
30
  }
34
31
  if (!timedout) {
35
- tscommons_async_1.CommonsAsync.abortTimeout(timeoutId);
32
+ commonsAsyncAbortTimeout(timeoutId);
36
33
  resolve(address2);
37
34
  }
38
35
  return;
@@ -40,11 +37,11 @@ class Dns {
40
37
  return;
41
38
  }
42
39
  if (!timedout) {
43
- tscommons_async_1.CommonsAsync.abortTimeout(timeoutId);
40
+ commonsAsyncAbortTimeout(timeoutId);
44
41
  resolve(address);
45
42
  }
46
43
  });
47
44
  });
48
45
  }
49
46
  }
50
- exports.Dns = Dns;
47
+ //# sourceMappingURL=dns.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dns.js","sourceRoot":"","sources":["../../src/classes/dns.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,GAAG,MAAM,KAAK,CAAC;AAE3B,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAEnF,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAExD,MAAM,OAAgB,GAAG;IACjB,MAAM,CAAC,OAAO,CACnB,MAAc,EACd,cAAsB;QAEvB,0BAA0B;QAE1B,OAAO,IAAI,OAAO,CAAW,CAAC,OAA8B,EAAE,MAA0B,EAAQ,EAAE;YACjG,IAAI,QAAQ,GAAY,KAAK,CAAC;YAC9B,MAAM,SAAS,GAAW,OAAO,MAAM,UAAU,CAAC;YAElD,mBAAmB,CACjB,cAAc,EACd,SAAS,CACV;iBACE,IAAI,CAAC,GAAS,EAAE;gBAChB,QAAQ,GAAG,IAAI,CAAC;gBAChB,MAAM,CAAC,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAClC,CAAC,CAAC;iBACD,KAAK,CAAC,CAAC,CAAQ,EAAQ,EAAE;gBACzB,IAAI,CAAC,CAAC,OAAO,KAAK,qBAAqB;oBAAE,OAAO;gBAChD,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;gBACvC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBACf,MAAM,CAAC,CAAC,CAAC,CAAC;YACX,CAAC,CAAC,CAAC;YAEL,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,GAAe,EAAE,OAAiB,EAAQ,EAAE;gBACjE,IAAI,GAAG,EAAE;oBACR,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAgB,EAAE,QAAkB,EAAQ,EAAE;wBACnE,IAAI,IAAI,EAAE;4BACT,IAAI,CAAC,QAAQ;gCAAE,wBAAwB,CAAC,SAAS,CAAC,CAAC;4BACnD,MAAM,CAAC,IAAI,CAAC,CAAC;4BACb,OAAO;yBACP;wBACD,IAAI,CAAC,QAAQ,EAAE;4BACd,wBAAwB,CAAC,SAAS,CAAC,CAAC;4BACpC,OAAO,CAAC,QAAQ,CAAC,CAAC;yBAClB;wBACD,OAAO;oBACR,CAAC,CAAC,CAAC;oBACH,OAAO;iBACP;gBACD,IAAI,CAAC,QAAQ,EAAE;oBACd,wBAAwB,CAAC,SAAS,CAAC,CAAC;oBACpC,OAAO,CAAC,OAAO,CAAC,CAAC;iBACjB;YACF,CAAC,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;IACJ,CAAC;CACD"}
@@ -0,0 +1,10 @@
1
+ import { Expiry } from '../classes/expiry';
2
+ import { DatabaseService } from '../services/database.service';
3
+ export declare class Expirer {
4
+ private expiry;
5
+ private database;
6
+ constructor(expiry: Expiry, database: DatabaseService);
7
+ private listQueuedAndActive;
8
+ expire(limit?: number): Promise<boolean>;
9
+ expireFixed(threshold: Date): Promise<boolean>;
10
+ }
@@ -1,105 +1,118 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Expirer = void 0;
4
- const tscommons_core_1 = require("tscommons-core");
5
- const tscommons_core_2 = require("tscommons-core");
6
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
7
- const nodecommons_cli_1 = require("nodecommons-cli");
8
- const database_service_1 = require("../services/database.service");
9
- class Expirer {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { commonsArrayChunk, commonsDateDateToYmdHis, commonsTypeHasPropertyDate } from 'tscommons-es-core';
11
+ import { EStatus } from 'hydra-crawler-ts-assets';
12
+ import { commonsOutputAlert, commonsOutputDoing, commonsOutputProgress, commonsOutputSuccess } from 'nodecommons-es-cli';
13
+ import { isTMongoIdRow } from '../services/database.service';
14
+ export class Expirer {
10
15
  constructor(expiry, database) {
11
16
  this.expiry = expiry;
12
17
  this.database = database;
13
18
  }
14
- async listQueuedAndActive() {
15
- nodecommons_cli_1.CommonsOutput.doing(`Enumering queued and active domains`);
16
- const results = this.database.getUrls().aggregate([
17
- { $match: { status: { $in: [hydra_crawler_ts_assets_1.EStatus.QUEUED, hydra_crawler_ts_assets_1.EStatus.ACTIVE] } } },
18
- { $group: { _id: '$domain' } }
19
- ], { allowDiskUse: true });
20
- const queued = await this.database.listQueryResults(results, database_service_1.isTMongoIdRow);
21
- nodecommons_cli_1.CommonsOutput.success(queued.length);
22
- return queued
23
- .map((q) => q._id);
19
+ listQueuedAndActive() {
20
+ return __awaiter(this, void 0, void 0, function* () {
21
+ commonsOutputDoing('Enumering queued and active domains');
22
+ const results = this.database.getUrls().aggregate([
23
+ { $match: { status: { $in: [EStatus.QUEUED, EStatus.ACTIVE] } } },
24
+ { $group: { _id: '$domain' } }
25
+ ], { allowDiskUse: true });
26
+ const queued = yield this.database.listQueryResults(results, isTMongoIdRow);
27
+ commonsOutputSuccess(queued.length);
28
+ return queued
29
+ // eslint-disable-next-line no-underscore-dangle
30
+ .map((q) => q._id);
31
+ });
24
32
  }
25
- async expire(limit) {
26
- nodecommons_cli_1.CommonsOutput.alert(`Running expiry thread`);
27
- const queued = await this.listQueuedAndActive();
28
- nodecommons_cli_1.CommonsOutput.doing(`Searching for expired URLs`);
29
- const results = this.database.getUrls().find({ $and: [
30
- { status: { $nin: [
31
- hydra_crawler_ts_assets_1.EStatus.ARCHIVED,
32
- hydra_crawler_ts_assets_1.EStatus.QUEUED,
33
- hydra_crawler_ts_assets_1.EStatus.ACTIVE
34
- ] } },
35
- { domain: { $nin: queued } },
36
- { attempted: { $exists: true } }
37
- ] });
38
- const now = new Date().getTime() / 1000;
39
- const expired = [];
40
- let tally = 0;
41
- let found = 0;
42
- while (true) {
43
- tally++;
44
- if ((tally % 1000) === 0)
45
- nodecommons_cli_1.CommonsOutput.progress(`${tally}, ${found}`);
46
- if (limit !== undefined && tally > limit)
47
- break;
48
- const row = await results.next();
49
- if (row === null)
50
- break;
51
- const interval = this.expiry.getBestExpiry(row.url);
52
- if (!tscommons_core_1.CommonsType.hasPropertyDate(row, 'attempted'))
53
- continue;
54
- const attempted = row['attempted'].getTime() / 1000;
55
- if ((attempted + interval) < now) {
56
- expired.push(row['_id']);
57
- found++;
33
+ expire(limit) {
34
+ return __awaiter(this, void 0, void 0, function* () {
35
+ commonsOutputAlert('Running expiry thread');
36
+ const queued = yield this.listQueuedAndActive();
37
+ commonsOutputDoing('Searching for expired URLs');
38
+ const results = this.database.getUrls().find({ $and: [
39
+ { status: { $nin: [
40
+ EStatus.ARCHIVED,
41
+ EStatus.QUEUED,
42
+ EStatus.ACTIVE
43
+ ] } },
44
+ { domain: { $nin: queued } },
45
+ { attempted: { $exists: true } }
46
+ ] }, {});
47
+ const now = new Date().getTime() / 1000;
48
+ const expired = [];
49
+ let tally = 0;
50
+ let found = 0;
51
+ while (true) {
52
+ tally++;
53
+ if ((tally % 1000) === 0)
54
+ commonsOutputProgress(`${tally}, ${found}`);
55
+ if (limit !== undefined && tally > limit)
56
+ break;
57
+ const row = yield results.next();
58
+ if (row === null)
59
+ break;
60
+ const interval = this.expiry.getBestExpiry(row.url);
61
+ if (!commonsTypeHasPropertyDate(row, 'attempted'))
62
+ continue;
63
+ const attempted = row['attempted'].getTime() / 1000;
64
+ if ((attempted + interval) < now) {
65
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
66
+ expired.push(row['_id']);
67
+ found++;
68
+ }
69
+ }
70
+ commonsOutputSuccess(found);
71
+ if (found === 0)
72
+ return false;
73
+ const batches = commonsArrayChunk(expired, 100);
74
+ commonsOutputDoing('Re-queuing expired');
75
+ tally = 0;
76
+ for (const batch of batches) {
77
+ yield this.database.getUrls().updateMany({ _id: { $in: batch } }, {
78
+ $set: { status: EStatus.QUEUED },
79
+ $unset: { ttl: true }
80
+ });
81
+ tally += 100;
82
+ commonsOutputProgress(tally);
58
83
  }
59
- }
60
- nodecommons_cli_1.CommonsOutput.success(found);
61
- if (found === 0)
62
- return false;
63
- const batches = tscommons_core_2.CommonsArray.chunk(expired, 100);
64
- nodecommons_cli_1.CommonsOutput.doing(`Re-queuing expired`);
65
- tally = 0;
66
- for (const batch of batches) {
67
- await this.database.getUrls().updateMany({ _id: { $in: batch } }, {
68
- $set: { status: hydra_crawler_ts_assets_1.EStatus.QUEUED },
84
+ commonsOutputSuccess();
85
+ return true;
86
+ });
87
+ }
88
+ expireFixed(threshold) {
89
+ return __awaiter(this, void 0, void 0, function* () {
90
+ commonsOutputAlert('Running fixed date expiry thread');
91
+ const queued = yield this.listQueuedAndActive();
92
+ commonsOutputDoing(`Searching attempted before date ${commonsDateDateToYmdHis(threshold)}`);
93
+ const count = yield this.database.getUrls().find({ $and: [
94
+ { status: { $ne: EStatus.ARCHIVED } },
95
+ { domain: { $nin: queued } },
96
+ { attempted: { $exists: true } },
97
+ { attempted: { $lt: threshold } }
98
+ ] }, {}).count();
99
+ commonsOutputSuccess(count);
100
+ if (count === 0)
101
+ return false;
102
+ commonsOutputDoing('Re-queuing expired');
103
+ yield this.database.getUrls().updateMany({ $and: [
104
+ { status: { $ne: EStatus.ARCHIVED } },
105
+ { domain: { $nin: queued } },
106
+ { attempted: { $exists: true } },
107
+ { attempted: { $lt: threshold } }
108
+ ] }, {
109
+ $set: { status: EStatus.QUEUED },
69
110
  $unset: { ttl: true }
70
111
  });
71
- tally += 100;
72
- nodecommons_cli_1.CommonsOutput.progress(tally);
73
- }
74
- nodecommons_cli_1.CommonsOutput.success();
75
- return true;
76
- }
77
- async expireFixed(threshold) {
78
- nodecommons_cli_1.CommonsOutput.alert(`Running fixed date expiry thread`);
79
- const queued = await this.listQueuedAndActive();
80
- nodecommons_cli_1.CommonsOutput.doing(`Searching attempted before date ${threshold}`);
81
- const count = await this.database.getUrls().find({ $and: [
82
- { status: { $ne: hydra_crawler_ts_assets_1.EStatus.ARCHIVED } },
83
- { domain: { $nin: queued } },
84
- { attempted: { $exists: true } },
85
- { attempted: { $lt: threshold } }
86
- ] }).count();
87
- nodecommons_cli_1.CommonsOutput.success(count);
88
- if (count === 0)
89
- return false;
90
- nodecommons_cli_1.CommonsOutput.doing(`Re-queuing expired`);
91
- await this.database.getUrls().updateMany({ $and: [
92
- { status: { $ne: hydra_crawler_ts_assets_1.EStatus.ARCHIVED } },
93
- { domain: { $nin: queued } },
94
- { attempted: { $exists: true } },
95
- { attempted: { $lt: threshold } }
96
- ] }, {
97
- $set: { status: hydra_crawler_ts_assets_1.EStatus.QUEUED },
98
- $unset: { ttl: true }
112
+ commonsOutputSuccess();
113
+ commonsOutputDoing('Searching for expired URLs');
114
+ return true;
99
115
  });
100
- nodecommons_cli_1.CommonsOutput.success();
101
- nodecommons_cli_1.CommonsOutput.doing(`Searching for expired URLs`);
102
- return true;
103
116
  }
104
117
  }
105
- exports.Expirer = Expirer;
118
+ //# sourceMappingURL=expirer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"expirer.js","sourceRoot":"","sources":["../../src/classes/expirer.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,iBAAiB,EAAE,uBAAuB,EAAE,0BAA0B,EAAE,MAAM,mBAAmB,CAAC;AAE3G,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAGlD,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAIzH,OAAO,EAAgC,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAE3F,MAAM,OAAO,OAAO;IACnB,YACU,MAAc,EACd,QAAyB;QADzB,WAAM,GAAN,MAAM,CAAQ;QACd,aAAQ,GAAR,QAAQ,CAAiB;IAChC,CAAC;IAEU,mBAAmB;;YAChC,kBAAkB,CAAC,qCAAqC,CAAC,CAAC;YAC1D,MAAM,OAAO,GAAmC,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,SAAS,CAAc;gBAC7F,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE,EAAE,EAAE;gBACnE,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE;aAC/B,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;YAE3B,MAAM,MAAM,GAAkB,MAAM,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;YAE3F,oBAAoB,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAEpC,OAAO,MAAM;gBACX,gDAAgD;iBAC/C,GAAG,CAAC,CAAC,CAAc,EAAU,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3C,CAAC;KAAA;IAEY,MAAM,CAAC,KAAc;;YACjC,kBAAkB,CAAC,uBAAuB,CAAC,CAAC;YAE5C,MAAM,MAAM,GAAa,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAE1D,kBAAkB,CAAC,4BAA4B,CAAC,CAAC;YAEjD,MAAM,OAAO,GAAiB,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,IAAI,CACxD,EAAE,IAAI,EAAE;oBACN,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE;gCAChB,OAAO,CAAC,QAAQ;gCAChB,OAAO,CAAC,MAAM;gCACd,OAAO,CAAC,MAAM;6BACf,EAAE,EAAE;oBACL,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE;oBAC5B,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE;iBACjC,EAAC,EACF,EAAE,CACH,CAAC;YAEF,MAAM,GAAG,GAAW,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;YAEhD,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,OAAO,IAAI,EAAE;gBACZ,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;oBAAE,qBAAqB,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBACtE,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,MAAM,QAAQ,GAAW,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAE5D,IAAI,CAAC,0BAA0B,CAAC,GAAG,EAAE,WAAW,CAAC;oBAAE,SAAS;gBAE5D,MAAM,SAAS,GAAY,GAAG,CAAC,WAAW,CAAU,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;gBACtE,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC,GAAG,GAAG,EAAE;oBACjC,iEAAiE;oBACjE,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzB,KAAK,EAAE,CAAC;iBACR;aACD;YACD,oBAAoB,CAAC,KAAK,CAAC,CAAC;YAE5B,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YAE9B,MAAM,OAAO,GAAe,iBAAiB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAE5D,kBAAkB,CAAC,oBAAoB,CAAC,CAAC;YACzC,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;gBAC5B,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,UAAU,CACtC,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB;oBACE,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;oBAChC,MAAM,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE;iBACtB,CACF,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,qBAAqB,CAAC,KAAK,CAAC,CAAC;aAC7B;YACD,oBAAoB,EAAE,CAAC;YAEvB,OAAO,IAAI,CAAC;QACb,CAAC;KAAA;IAEY,WAAW,CAAC,SAAe;;YACvC,kBAAkB,CAAC,kCAAkC,CAAC,CAAC;YAEvD,MAAM,MAAM,GAAa,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAE1D,kBAAkB,CAAC,mCAAmC,uBAAuB,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;YAC5F,MAAM,KAAK,GAAW,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,IAAI,CACtD,EAAE,IAAI,EAAE;oBACN,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE,EAAE;oBACrC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE;oBAC5B,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE;oBAChC,EAAE,SAAS,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE;iBAClC,EAAC,EACF,EAAE,CACH,CAAC,KAAK,EAAE,CAAC;YACV,oBAAoB,CAAC,KAAK,CAAC,CAAC;YAE5B,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YAE9B,kBAAkB,CAAC,oBAAoB,CAAC,CAAC;YACzC,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,UAAU,CACtC,EAAE,IAAI,EAAE;oBACN,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE,EAAE;oBACrC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE;oBAC5B,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE;oBAChC,EAAE,SAAS,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE;iBAClC,EAAC,EACF;gBACE,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;gBAChC,MAAM,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE;aACtB,CACF,CAAC;YACF,oBAAoB,EAAE,CAAC;YAEvB,kBAAkB,CAAC,4BAA4B,CAAC,CAAC;YAEjD,OAAO,IAAI,CAAC;QACb,CAAC;KAAA;CACD"}
@@ -0,0 +1,8 @@
1
+ import { IExpiry } from '../interfaces/iexpiry';
2
+ export declare class Expiry {
3
+ static loadFromFile(file: string): IExpiry[];
4
+ private static getScore;
5
+ private expiries;
6
+ add(expiries: IExpiry[]): void;
7
+ getBestExpiry(url: string): number;
8
+ }
@@ -1,41 +1,38 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Expiry = void 0;
4
- const url_1 = require("url");
5
- const tscommons_core_1 = require("tscommons-core");
6
- const nodecommons_file_1 = require("nodecommons-file");
7
- const matcher_1 = require("../helpers/matcher");
8
- const iexpiry_1 = require("../interfaces/iexpiry");
9
- class Expiry {
1
+ import { URL } from 'url';
2
+ import { commonsTypeHasPropertyString, commonsTypeIsArray, commonsTypeIsTArray } from 'tscommons-es-core';
3
+ import { commonsFileReadJsonFile } from 'nodecommons-es-file';
4
+ import { Matcher } from '../helpers/matcher';
5
+ import { isIExpiry, toIExpiry } from '../interfaces/iexpiry';
6
+ export class Expiry {
10
7
  constructor() {
11
8
  this.expiries = [];
12
9
  }
13
10
  static loadFromFile(file) {
14
- const json = nodecommons_file_1.CommonsFile.readJsonFile(file);
15
- if (!tscommons_core_1.CommonsType.isArray(json))
11
+ const json = commonsFileReadJsonFile(file);
12
+ if (!commonsTypeIsArray(json))
16
13
  throw new Error('Invalid expiry list');
17
14
  const expiries = json
18
15
  .map((entry) => {
19
16
  try {
20
- return iexpiry_1.toIExpiry(entry);
17
+ return toIExpiry(entry);
21
18
  }
22
19
  catch (e) {
23
20
  return undefined;
24
21
  }
25
22
  });
26
- if (!tscommons_core_1.CommonsType.isTArray(expiries, iexpiry_1.isIExpiry))
23
+ if (!commonsTypeIsTArray(expiries, isIExpiry))
27
24
  throw new Error(`Invalid JSON list in ${file}`);
28
25
  return expiries;
29
26
  }
30
27
  static getScore(expiry) {
31
28
  let total = 0;
32
- if (tscommons_core_1.CommonsType.hasPropertyString(expiry.match, 'search')) {
29
+ if (commonsTypeHasPropertyString(expiry.match, 'search')) {
33
30
  total += 1000 + expiry.match.search.length;
34
31
  }
35
- if (tscommons_core_1.CommonsType.hasPropertyString(expiry.match, 'pathname')) {
32
+ if (commonsTypeHasPropertyString(expiry.match, 'pathname')) {
36
33
  total += 1000 + expiry.match.pathname.length;
37
34
  }
38
- if (tscommons_core_1.CommonsType.hasPropertyString(expiry.match, 'hostname')) {
35
+ if (commonsTypeHasPropertyString(expiry.match, 'hostname')) {
39
36
  total += 1000 + expiry.match.hostname.length;
40
37
  }
41
38
  return total;
@@ -46,11 +43,11 @@ class Expiry {
46
43
  }
47
44
  }
48
45
  getBestExpiry(url) {
49
- const whatwg = new url_1.URL(url);
46
+ const whatwg = new URL(url);
50
47
  let best;
51
48
  let bestScore;
52
49
  for (const expiry of this.expiries) {
53
- if (!matcher_1.Matcher.matches(whatwg, expiry.match))
50
+ if (!Matcher.matches(whatwg, expiry.match))
54
51
  continue;
55
52
  const score = Expiry.getScore(expiry);
56
53
  if (bestScore === undefined || score > bestScore) {
@@ -63,4 +60,4 @@ class Expiry {
63
60
  return best.expiry;
64
61
  }
65
62
  }
66
- exports.Expiry = Expiry;
63
+ //# sourceMappingURL=expiry.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"expiry.js","sourceRoot":"","sources":["../../src/classes/expiry.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAE1B,OAAO,EAAE,4BAA4B,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAE1G,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAE7C,OAAO,EAAW,SAAS,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAEtE,MAAM,OAAO,MAAM;IAAnB;QAmCS,aAAQ,GAAc,EAAE,CAAC;IA6BlC,CAAC;IA/DO,MAAM,CAAC,YAAY,CAAC,IAAY;QACtC,MAAM,IAAI,GAAY,uBAAuB,CAAC,IAAI,CAAC,CAAC;QACpD,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAEtE,MAAM,QAAQ,GAA0B,IAAI;aACzC,GAAG,CAAC,CAAC,KAAc,EAAqB,EAAE;YAC1C,IAAI;gBACH,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;aACxB;YAAC,OAAO,CAAC,EAAE;gBACX,OAAO,SAAS,CAAC;aACjB;QACF,CAAC,CAAC,CAAC;QAEL,IAAI,CAAC,mBAAmB,CAAU,QAAQ,EAAE,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,EAAE,CAAC,CAAC;QAExG,OAAO,QAAQ,CAAC;IACjB,CAAC;IAEO,MAAM,CAAC,QAAQ,CAAC,MAAe;QACtC,IAAI,KAAK,GAAW,CAAC,CAAC;QAEtB,IAAI,4BAA4B,CAAC,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE;YACzD,KAAK,IAAI,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,MAAO,CAAC,MAAM,CAAC;SAC5C;QACD,IAAI,4BAA4B,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,EAAE;YAC3D,KAAK,IAAI,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,QAAS,CAAC,MAAM,CAAC;SAC9C;QACD,IAAI,4BAA4B,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,EAAE;YAC3D,KAAK,IAAI,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,QAAS,CAAC,MAAM,CAAC;SAC9C;QAED,OAAO,KAAK,CAAC;IACd,CAAC;IAIM,GAAG,CAAC,QAAmB;QAC7B,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE;YAC9B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;SAC3B;IACF,CAAC;IAEM,aAAa,CAAC,GAAW;QAC/B,MAAM,MAAM,GAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,IAAI,IAAuB,CAAC;QAC5B,IAAI,SAA2B,CAAC;QAEhC,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE;YACnC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC;gBAAE,SAAS;YAErD,MAAM,KAAK,GAAW,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAE9C,IAAI,SAAS,KAAK,SAAS,IAAI,KAAK,GAAG,SAAS,EAAE;gBACjD,IAAI,GAAG,MAAM,CAAC;gBACd,SAAS,GAAG,KAAK,CAAC;aAClB;SACD;QAED,IAAI,IAAI,KAAK,SAAS;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAEtE,OAAO,IAAI,CAAC,MAAM,CAAC;IACpB,CAAC;CACD"}
@@ -0,0 +1,9 @@
1
+ import { IMatch } from '../interfaces/imatch';
2
+ import { EList } from '../enums/elist';
3
+ export declare class Lists {
4
+ private lists;
5
+ constructor();
6
+ add(list: EList, entries: IMatch[]): void;
7
+ match(list: EList, url: string): boolean;
8
+ listHostnames(list: EList): string[];
9
+ }
@@ -1,12 +1,9 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Lists = void 0;
4
- const matcher_1 = require("../helpers/matcher");
5
- const elist_1 = require("../enums/elist");
6
- class Lists {
1
+ import { Matcher } from '../helpers/matcher';
2
+ import { ELISTS } from '../enums/elist';
3
+ export class Lists {
7
4
  constructor() {
8
5
  this.lists = new Map();
9
- for (const list of elist_1.ELISTS) {
6
+ for (const list of ELISTS) {
10
7
  this.lists.set(list, []);
11
8
  }
12
9
  }
@@ -19,7 +16,7 @@ class Lists {
19
16
  const entries = this.lists.get(list);
20
17
  if (!entries)
21
18
  throw new Error('Unknown list');
22
- return matcher_1.Matcher.isAnyMatch(url, entries);
19
+ return Matcher.isAnyMatch(url, entries);
23
20
  }
24
21
  listHostnames(list) {
25
22
  const entries = this.lists.get(list);
@@ -28,16 +25,14 @@ class Lists {
28
25
  return entries
29
26
  .filter((item) => item.hostname !== undefined)
30
27
  .map((item) => item.hostname)
31
- .map((hostname) => {
32
- return hostname
33
- .replace(/\(\^\|\\.\)/g, '')
34
- .replace(/\\./g, '.')
35
- .replace(/[$^]/g, '')
36
- .replace(/[^-a-z0-9.]/gi, '')
37
- .replace('www.', '')
38
- .trim();
39
- })
28
+ .map((hostname) => hostname
29
+ .replace(/\(\^\|\\.\)/g, '')
30
+ .replace(/\\./g, '.')
31
+ .replace(/[$^]/g, '')
32
+ .replace(/[^-a-z0-9.]/gi, '')
33
+ .replace('www.', '')
34
+ .trim())
40
35
  .filter((hostname) => hostname !== '');
41
36
  }
42
37
  }
43
- exports.Lists = Lists;
38
+ //# sourceMappingURL=lists.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lists.js","sourceRoot":"","sources":["../../src/classes/lists.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAI7C,OAAO,EAAS,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAE/C,MAAM,OAAO,KAAK;IAGjB;QAFQ,UAAK,GAAyB,IAAI,GAAG,EAAmB,CAAC;QAGhE,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;YAC1B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;SACzB;IACF,CAAC;IAEM,GAAG,CAAC,IAAW,EAAE,OAAiB;QACxC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;YAC5B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SAClC;IACF,CAAC;IAEM,KAAK,CAAC,IAAW,EAAE,GAAW;QACpC,MAAM,OAAO,GAAuB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACzD,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,CAAC;QAE9C,OAAO,OAAO,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAEM,aAAa,CAAC,IAAW;QAC/B,MAAM,OAAO,GAAuB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACzD,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,CAAC;QAE9C,OAAO,OAAO;aACX,MAAM,CAAC,CAAC,IAAY,EAAW,EAAE,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC;aAC9D,GAAG,CAAC,CAAC,IAAY,EAAU,EAAE,CAAC,IAAI,CAAC,QAAS,CAAC;aAC7C,GAAG,CAAC,CAAC,QAAgB,EAAU,EAAE,CAAC,QAAQ;aACxC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;aAC3B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;aACpB,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;aAC5B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;aACnB,IAAI,EAAE,CACR;aACA,MAAM,CAAC,CAAC,QAAgB,EAAW,EAAE,CAAC,QAAQ,KAAK,EAAE,CAAC,CAAC;IAC3D,CAAC;CACD"}
@@ -0,0 +1,15 @@
1
+ import { TCrawlConfig } from '../types/tcrawl-config';
2
+ import { TRobotsConfig } from '../types/trobots-config';
3
+ import { Tracker } from './tracker';
4
+ export declare class Robot {
5
+ private domain;
6
+ private crawlConfig;
7
+ private robotsConfig;
8
+ private tracker?;
9
+ static parse(robotstxt: string, robotsConfig: TRobotsConfig): string[];
10
+ private paths;
11
+ constructor(domain: string, crawlConfig: TCrawlConfig, robotsConfig: TRobotsConfig, tracker?: Tracker | undefined);
12
+ private checkDomain;
13
+ load(): Promise<void>;
14
+ isDisallowed(url: string): boolean;
15
+ }
@@ -1,11 +1,17 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Robot = void 0;
4
- const http = require("http");
5
- const https = require("https");
6
- const url_1 = require("url");
7
- const crawler_1 = require("./crawler");
8
- class Robot {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import * as http from 'http';
11
+ import * as https from 'https';
12
+ import { URL } from 'url';
13
+ import { Crawler } from './crawler';
14
+ export class Robot {
9
15
  constructor(domain, crawlConfig, robotsConfig, tracker) {
10
16
  this.domain = domain;
11
17
  this.crawlConfig = crawlConfig;
@@ -32,31 +38,35 @@ class Robot {
32
38
  }
33
39
  return [...new Set(paths)];
34
40
  }
35
- async checkDomain(protocol) {
36
- let outcome;
37
- try {
38
- switch (protocol) {
39
- case 'http':
40
- outcome = await crawler_1.Crawler.request(http, `http://${this.domain}/robots.txt`, this.crawlConfig.connectTimeout, this.crawlConfig.maxFileSize, this.tracker);
41
- break;
42
- case 'https':
43
- outcome = await crawler_1.Crawler.request(https, `https://${this.domain}/robots.txt`, this.crawlConfig.connectTimeout, this.crawlConfig.maxFileSize, this.tracker);
44
- break;
45
- default: throw new Error('Unknown protocol');
41
+ checkDomain(protocol) {
42
+ return __awaiter(this, void 0, void 0, function* () {
43
+ let outcome;
44
+ try {
45
+ switch (protocol) {
46
+ case 'http':
47
+ outcome = yield Crawler.request(http, `http://${this.domain}/robots.txt`, this.crawlConfig.connectTimeout, this.crawlConfig.maxFileSize, this.tracker);
48
+ break;
49
+ case 'https':
50
+ outcome = yield Crawler.request(https, `https://${this.domain}/robots.txt`, this.crawlConfig.connectTimeout, this.crawlConfig.maxFileSize, this.tracker);
51
+ break;
52
+ default: throw new Error('Unknown protocol');
53
+ }
46
54
  }
47
- }
48
- catch (ex) { /* ignore */ }
49
- if (!outcome || !outcome.data)
50
- return [];
51
- return Robot.parse(outcome.data.toString(), this.robotsConfig);
55
+ catch (ex) { /* ignore */ }
56
+ if (!outcome || !outcome.data)
57
+ return [];
58
+ return Robot.parse(outcome.data.toString(), this.robotsConfig);
59
+ });
52
60
  }
53
- async load() {
54
- for (const protocol of ['http', 'https']) {
55
- this.paths.set(protocol, await this.checkDomain(protocol));
56
- }
61
+ load() {
62
+ return __awaiter(this, void 0, void 0, function* () {
63
+ for (const protocol of ['http', 'https']) {
64
+ this.paths.set(protocol, yield this.checkDomain(protocol));
65
+ }
66
+ });
57
67
  }
58
68
  isDisallowed(url) {
59
- const whatwg = new url_1.URL(url);
69
+ const whatwg = new URL(url);
60
70
  const protocol = whatwg.protocol.match(/^(http(?:s?)):$/);
61
71
  if (!protocol)
62
72
  return false;
@@ -69,4 +79,4 @@ class Robot {
69
79
  return false;
70
80
  }
71
81
  }
72
- exports.Robot = Robot;
82
+ //# sourceMappingURL=robot.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"robot.js","sourceRoot":"","sources":["../../src/classes/robot.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAC/B,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAO1B,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,MAAM,OAAO,KAAK;IAyBjB,YACU,MAAc,EACd,WAAyB,EACzB,YAA2B,EAC3B,OAAiB;QAHjB,WAAM,GAAN,MAAM,CAAQ;QACd,gBAAW,GAAX,WAAW,CAAc;QACzB,iBAAY,GAAZ,YAAY,CAAe;QAC3B,YAAO,GAAP,OAAO,CAAU;QAE1B,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC1C,CAAC;IA/BM,MAAM,CAAC,KAAK,CAAC,SAAiB,EAAE,YAA2B;QACjE,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,MAAM,QAAQ,GAAa,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACtE,KAAK,IAAI,OAAO,IAAI,QAAQ,EAAE;YAC7B,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;YAEzB,MAAM,KAAK,GAA0B,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;YAClF,IAAI,CAAC,KAAK;gBAAE,SAAS;YAErB,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE;gBAC/C,MAAM,KAAK,GAA0B,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;gBAC9E,IAAI,CAAC,KAAK;oBAAE,SAAS;gBAErB,IAAI,YAAY,CAAC,yBAAyB,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG;oBAAE,SAAS;gBACzE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;aACrB;SACD;QAED,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IAC5B,CAAC;IAaa,WAAW,CAAC,QAAgB;;YACzC,IAAI,OAAkC,CAAC;YACvC,IAAI;gBACH,QAAQ,QAAQ,EAAE;oBACjB,KAAK,MAAM;wBACV,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,CAC7B,IAAI,EACJ,UAAU,IAAI,CAAC,MAAM,aAAa,EAClC,IAAI,CAAC,WAAW,CAAC,cAAc,EAC/B,IAAI,CAAC,WAAW,CAAC,WAAW,EAC5B,IAAI,CAAC,OAAO,CACb,CAAC;wBACF,MAAM;oBACP,KAAK,OAAO;wBACX,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,CAC7B,KAAK,EACL,WAAW,IAAI,CAAC,MAAM,aAAa,EACnC,IAAI,CAAC,WAAW,CAAC,cAAc,EAC/B,IAAI,CAAC,WAAW,CAAC,WAAW,EAC5B,IAAI,CAAC,OAAO,CACb,CAAC;wBACF,MAAM;oBACP,OAAO,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;iBAC7C;aACD;YAAC,OAAO,EAAE,EAAE,EAAE,YAAY,EAAE;YAE7B,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI;gBAAE,OAAO,EAAE,CAAC;YAEzC,OAAO,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;QAChE,CAAC;KAAA;IAEY,IAAI;;YAChB,KAAK,MAAM,QAAQ,IAAI,CAAE,MAAM,EAAE,OAAO,CAAE,EAAE;gBAC3C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;aAC3D;QACF,CAAC;KAAA;IAEM,YAAY,CAAC,GAAW;QAC9B,MAAM,MAAM,GAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,MAAM,QAAQ,GAA0B,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACjF,IAAI,CAAC,QAAQ;YAAE,OAAO,KAAK,CAAC;QAE5B,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAE/C,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAE,EAAE;YAChD,IAAI,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI;gBAAE,OAAO,IAAI,CAAC;SACpE;QAED,OAAO,KAAK,CAAC;IACd,CAAC;CACD"}
@@ -0,0 +1,25 @@
1
+ import { EStatus } from 'hydra-crawler-ts-assets';
2
+ import { SocketIoServer } from '../servers/socket-io.server';
3
+ import { DatabaseService } from '../services/database.service';
4
+ export declare class Tracker {
5
+ private databaseService;
6
+ private socketIoServer;
7
+ private map;
8
+ private current;
9
+ private index;
10
+ private fetches;
11
+ private linkTallies;
12
+ private domainTallies;
13
+ constructor(databaseService: DatabaseService, socketIoServer: SocketIoServer);
14
+ fetching(url: string): Promise<void>;
15
+ private outcome;
16
+ done(url: string, statusCode: number): Promise<void>;
17
+ failed(url: string): Promise<void>;
18
+ bandwidth(size: number): void;
19
+ delta(status: EStatus, delta: number): void;
20
+ abort(): void;
21
+ link(count?: number): void;
22
+ domain(): void;
23
+ private sync;
24
+ start(): Promise<void>;
25
+ }