hydra-crawler 1.4.5 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/dist/apis/autocomplete.api.d.ts +7 -0
  2. package/dist/apis/autocomplete.api.js +15 -9
  3. package/dist/apis/autocomplete.api.js.map +1 -0
  4. package/dist/apis/bugs.api.d.ts +7 -0
  5. package/dist/apis/bugs.api.js +21 -15
  6. package/dist/apis/bugs.api.js.map +1 -0
  7. package/dist/apis/crawl.api.d.ts +7 -0
  8. package/dist/apis/crawl.api.js +15 -9
  9. package/dist/apis/crawl.api.js.map +1 -0
  10. package/dist/apis/domains.api.d.ts +7 -0
  11. package/dist/apis/domains.api.js +24 -19
  12. package/dist/apis/domains.api.js.map +1 -0
  13. package/dist/apis/images.api.d.ts +7 -0
  14. package/dist/apis/images.api.js +20 -14
  15. package/dist/apis/images.api.js.map +1 -0
  16. package/dist/apis/statistics.api.d.ts +8 -0
  17. package/dist/apis/statistics.api.js +27 -20
  18. package/dist/apis/statistics.api.js.map +1 -0
  19. package/dist/apis/test.api.d.ts +5 -0
  20. package/dist/apis/test.api.js +15 -9
  21. package/dist/apis/test.api.js.map +1 -0
  22. package/dist/apis/urls.api.d.ts +7 -0
  23. package/dist/apis/urls.api.js +21 -15
  24. package/dist/apis/urls.api.js.map +1 -0
  25. package/dist/apps/cleanup.app.d.ts +19 -0
  26. package/dist/apps/cleanup.app.js +118 -100
  27. package/dist/apps/cleanup.app.js.map +1 -0
  28. package/dist/apps/cross-populate-export.app.d.ts +12 -0
  29. package/dist/apps/cross-populate-export.app.js +60 -47
  30. package/dist/apps/cross-populate-export.app.js.map +1 -0
  31. package/dist/apps/cross-populate-import.app.d.ts +12 -0
  32. package/dist/apps/cross-populate-import.app.js +64 -51
  33. package/dist/apps/cross-populate-import.app.js.map +1 -0
  34. package/dist/apps/denylist.app.d.ts +17 -0
  35. package/dist/apps/denylist.app.js +115 -98
  36. package/dist/apps/denylist.app.js.map +1 -0
  37. package/dist/apps/expire.app.d.ts +19 -0
  38. package/dist/apps/expire.app.js +44 -31
  39. package/dist/apps/expire.app.js.map +1 -0
  40. package/dist/apps/extract-text.app.d.ts +8 -0
  41. package/dist/apps/extract-text.app.js +43 -35
  42. package/dist/apps/extract-text.app.js.map +1 -0
  43. package/dist/apps/hydra.app.d.ts +34 -0
  44. package/dist/apps/hydra.app.js +150 -137
  45. package/dist/apps/hydra.app.js.map +1 -0
  46. package/dist/apps/import.app.d.ts +11 -0
  47. package/dist/apps/import.app.js +44 -32
  48. package/dist/apps/import.app.js.map +1 -0
  49. package/dist/apps/internal-hydra-common.app.d.ts +28 -0
  50. package/dist/apps/internal-hydra-common.app.js +5 -11
  51. package/dist/apps/internal-hydra-common.app.js.map +1 -0
  52. package/dist/apps/query.app.d.ts +20 -0
  53. package/dist/apps/query.app.js +63 -49
  54. package/dist/apps/query.app.js.map +1 -0
  55. package/dist/apps/reattempt.app.d.ts +17 -0
  56. package/dist/apps/reattempt.app.js +66 -53
  57. package/dist/apps/reattempt.app.js.map +1 -0
  58. package/dist/apps/requeue-domain.app.d.ts +13 -0
  59. package/dist/apps/requeue-domain.app.js +50 -37
  60. package/dist/apps/requeue-domain.app.js.map +1 -0
  61. package/dist/apps/seed.app.d.ts +15 -0
  62. package/dist/apps/seed.app.js +53 -40
  63. package/dist/apps/seed.app.js.map +1 -0
  64. package/dist/apps/startup.app.d.ts +11 -0
  65. package/dist/apps/startup.app.js +51 -38
  66. package/dist/apps/startup.app.js.map +1 -0
  67. package/dist/apps/unarchive.app.d.ts +15 -0
  68. package/dist/apps/unarchive.app.js +67 -54
  69. package/dist/apps/unarchive.app.js.map +1 -0
  70. package/dist/classes/cleaner.d.ts +12 -0
  71. package/dist/classes/cleaner.js +227 -207
  72. package/dist/classes/cleaner.js.map +1 -0
  73. package/dist/classes/crawler.d.ts +34 -0
  74. package/dist/classes/crawler.js +248 -241
  75. package/dist/classes/crawler.js.map +1 -0
  76. package/dist/classes/dns.d.ts +3 -0
  77. package/dist/classes/dns.js +10 -13
  78. package/dist/classes/dns.js.map +1 -0
  79. package/dist/classes/expirer.d.ts +10 -0
  80. package/dist/classes/expirer.js +107 -94
  81. package/dist/classes/expirer.js.map +1 -0
  82. package/dist/classes/expiry.d.ts +8 -0
  83. package/dist/classes/expiry.js +16 -19
  84. package/dist/classes/expiry.js.map +1 -0
  85. package/dist/classes/lists.d.ts +9 -0
  86. package/dist/classes/lists.js +13 -18
  87. package/dist/classes/lists.js.map +1 -0
  88. package/dist/classes/robot.d.ts +15 -0
  89. package/dist/classes/robot.js +40 -30
  90. package/dist/classes/robot.js.map +1 -0
  91. package/dist/classes/tracker.d.ts +25 -0
  92. package/dist/classes/tracker.js +82 -64
  93. package/dist/classes/tracker.js.map +1 -0
  94. package/dist/cli.d.ts +1 -0
  95. package/dist/cli.js +72 -65
  96. package/dist/cli.js.map +1 -0
  97. package/dist/enums/eavailable-strategy.d.ts +4 -0
  98. package/dist/enums/eavailable-strategy.js +3 -5
  99. package/dist/enums/eavailable-strategy.js.map +1 -0
  100. package/dist/enums/elist.d.ts +7 -0
  101. package/dist/enums/elist.js +7 -11
  102. package/dist/enums/elist.js.map +1 -0
  103. package/dist/enums/eserver.d.ts +8 -0
  104. package/dist/enums/eserver.js +3 -5
  105. package/dist/enums/eserver.js.map +1 -0
  106. package/dist/enums/ex-powered-by.d.ts +6 -0
  107. package/dist/enums/ex-powered-by.js +3 -5
  108. package/dist/enums/ex-powered-by.js.map +1 -0
  109. package/dist/helpers/matcher.d.ts +5 -0
  110. package/dist/helpers/matcher.js +2 -5
  111. package/dist/helpers/matcher.js.map +1 -0
  112. package/dist/helpers/random.d.ts +4 -0
  113. package/dist/helpers/random.js +2 -5
  114. package/dist/helpers/random.js.map +1 -0
  115. package/dist/helpers/utf-decoder.d.ts +4 -0
  116. package/dist/helpers/utf-decoder.js +3 -6
  117. package/dist/helpers/utf-decoder.js.map +1 -0
  118. package/dist/interfaces/iexpiry.d.ts +7 -0
  119. package/dist/interfaces/iexpiry.js +9 -13
  120. package/dist/interfaces/iexpiry.js.map +1 -0
  121. package/dist/interfaces/imatch.d.ts +6 -0
  122. package/dist/interfaces/imatch.js +6 -9
  123. package/dist/interfaces/imatch.js.map +1 -0
  124. package/dist/interfaces/iparser-config.d.ts +4 -0
  125. package/dist/interfaces/iparser-config.js +4 -7
  126. package/dist/interfaces/iparser-config.js.map +1 -0
  127. package/dist/interfaces/iparser.d.ts +8 -0
  128. package/dist/interfaces/iparser.js +2 -2
  129. package/dist/interfaces/iparser.js.map +1 -0
  130. package/dist/interfaces/irequest-outcome.d.ts +11 -0
  131. package/dist/interfaces/irequest-outcome.js +2 -2
  132. package/dist/interfaces/irequest-outcome.js.map +1 -0
  133. package/dist/interfaces/iserver.d.ts +4 -0
  134. package/dist/interfaces/iserver.js +2 -2
  135. package/dist/interfaces/iserver.js.map +1 -0
  136. package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
  137. package/dist/parsers/accessibility-metrics.parser.js +34 -26
  138. package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
  139. package/dist/parsers/asp-error.parser.d.ts +12 -0
  140. package/dist/parsers/asp-error.parser.js +36 -28
  141. package/dist/parsers/asp-error.parser.js.map +1 -0
  142. package/dist/parsers/bad-words.parser.d.ts +10 -0
  143. package/dist/parsers/bad-words.parser.js +21 -13
  144. package/dist/parsers/bad-words.parser.js.map +1 -0
  145. package/dist/parsers/complex-english.parser.d.ts +15 -0
  146. package/dist/parsers/complex-english.parser.js +33 -25
  147. package/dist/parsers/complex-english.parser.js.map +1 -0
  148. package/dist/parsers/data.parser.d.ts +14 -0
  149. package/dist/parsers/data.parser.js +12 -16
  150. package/dist/parsers/data.parser.js.map +1 -0
  151. package/dist/parsers/dictionary.parser.d.ts +19 -0
  152. package/dist/parsers/dictionary.parser.js +47 -39
  153. package/dist/parsers/dictionary.parser.js.map +1 -0
  154. package/dist/parsers/html.parser.d.ts +13 -0
  155. package/dist/parsers/html.parser.js +4 -8
  156. package/dist/parsers/html.parser.js.map +1 -0
  157. package/dist/parsers/hyperlinks.parser.d.ts +20 -0
  158. package/dist/parsers/hyperlinks.parser.js +82 -77
  159. package/dist/parsers/hyperlinks.parser.js.map +1 -0
  160. package/dist/parsers/image-tags.parser.d.ts +20 -0
  161. package/dist/parsers/image-tags.parser.js +38 -34
  162. package/dist/parsers/image-tags.parser.js.map +1 -0
  163. package/dist/parsers/jpeg.parser.d.ts +11 -0
  164. package/dist/parsers/jpeg.parser.js +28 -20
  165. package/dist/parsers/jpeg.parser.js.map +1 -0
  166. package/dist/parsers/paragraphs.parser.d.ts +13 -0
  167. package/dist/parsers/paragraphs.parser.js +33 -40
  168. package/dist/parsers/paragraphs.parser.js.map +1 -0
  169. package/dist/parsers/parser.d.ts +19 -0
  170. package/dist/parsers/parser.js +30 -17
  171. package/dist/parsers/parser.js.map +1 -0
  172. package/dist/parsers/php-error.parser.d.ts +12 -0
  173. package/dist/parsers/php-error.parser.js +42 -34
  174. package/dist/parsers/php-error.parser.js.map +1 -0
  175. package/dist/parsers/phrase.parser.d.ts +8 -0
  176. package/dist/parsers/phrase.parser.js +16 -11
  177. package/dist/parsers/phrase.parser.js.map +1 -0
  178. package/dist/parsers/regex.parser.d.ts +10 -0
  179. package/dist/parsers/regex.parser.js +30 -22
  180. package/dist/parsers/regex.parser.js.map +1 -0
  181. package/dist/parsers/server.parser.d.ts +12 -0
  182. package/dist/parsers/server.parser.js +66 -56
  183. package/dist/parsers/server.parser.js.map +1 -0
  184. package/dist/parsers/spelling.parser.d.ts +10 -0
  185. package/dist/parsers/spelling.parser.js +21 -13
  186. package/dist/parsers/spelling.parser.js.map +1 -0
  187. package/dist/parsers/string.parser.d.ts +8 -0
  188. package/dist/parsers/string.parser.js +5 -8
  189. package/dist/parsers/string.parser.js.map +1 -0
  190. package/dist/parsers/text.parser.d.ts +8 -0
  191. package/dist/parsers/text.parser.js +24 -18
  192. package/dist/parsers/text.parser.js.map +1 -0
  193. package/dist/parsers/words.parser.d.ts +11 -0
  194. package/dist/parsers/words.parser.js +32 -28
  195. package/dist/parsers/words.parser.js.map +1 -0
  196. package/dist/queries/complex-english.query.d.ts +2 -0
  197. package/dist/queries/complex-english.query.js +37 -38
  198. package/dist/queries/complex-english.query.js.map +1 -0
  199. package/dist/queries/flash-content.query.d.ts +2 -0
  200. package/dist/queries/flash-content.query.js +45 -32
  201. package/dist/queries/flash-content.query.js.map +1 -0
  202. package/dist/queries/linking-to-domains.query.d.ts +2 -0
  203. package/dist/queries/linking-to-domains.query.js +35 -27
  204. package/dist/queries/linking-to-domains.query.js.map +1 -0
  205. package/dist/queries/readability-score.query.d.ts +2 -0
  206. package/dist/queries/readability-score.query.js +21 -13
  207. package/dist/queries/readability-score.query.js.map +1 -0
  208. package/dist/servers/crawl.server.d.ts +35 -0
  209. package/dist/servers/crawl.server.js +133 -121
  210. package/dist/servers/crawl.server.js.map +1 -0
  211. package/dist/servers/express.server.d.ts +8 -0
  212. package/dist/servers/express.server.js +7 -10
  213. package/dist/servers/express.server.js.map +1 -0
  214. package/dist/servers/maintenance.server.d.ts +22 -0
  215. package/dist/servers/maintenance.server.js +42 -36
  216. package/dist/servers/maintenance.server.js.map +1 -0
  217. package/dist/servers/rest.server.d.ts +7 -0
  218. package/dist/servers/rest.server.js +40 -51
  219. package/dist/servers/rest.server.js.map +1 -0
  220. package/dist/servers/socket-io.server.d.ts +12 -0
  221. package/dist/servers/socket-io.server.js +48 -15
  222. package/dist/servers/socket-io.server.js.map +1 -0
  223. package/dist/services/database.service.d.ts +68 -0
  224. package/dist/services/database.service.js +527 -462
  225. package/dist/services/database.service.js.map +1 -0
  226. package/dist/types/tcrawl-config.d.ts +14 -0
  227. package/dist/types/tcrawl-config.js +14 -17
  228. package/dist/types/tcrawl-config.js.map +1 -0
  229. package/dist/types/thydra-config.d.ts +4 -0
  230. package/dist/types/thydra-config.js +4 -7
  231. package/dist/types/thydra-config.js.map +1 -0
  232. package/dist/types/tparser-ctor.d.ts +7 -0
  233. package/dist/types/tparser-ctor.js +2 -2
  234. package/dist/types/tparser-ctor.js.map +1 -0
  235. package/dist/types/tquery.d.ts +7 -0
  236. package/dist/types/tquery.js +2 -2
  237. package/dist/types/tquery.js.map +1 -0
  238. package/dist/types/trobots-config.d.ts +4 -0
  239. package/dist/types/trobots-config.js +4 -7
  240. package/dist/types/trobots-config.js.map +1 -0
  241. package/package.json +41 -29
  242. package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
  243. package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
  244. package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
  245. package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
  246. package/angular/3rdpartylicenses.txt +0 -1127
  247. package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
  248. package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
  249. package/angular/6-es2015.65f680261a3506b88381.js +0 -1
  250. package/angular/6-es5.65f680261a3506b88381.js +0 -1
  251. package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
  252. package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
  253. package/angular/8-es2015.55518901987a5b834309.js +0 -1
  254. package/angular/8-es5.55518901987a5b834309.js +0 -1
  255. package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
  256. package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
  257. package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
  258. package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
  259. package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
  260. package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
  261. package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
  262. package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
  263. package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
  264. package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
  265. package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
  266. package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
  267. package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
  268. package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
  269. package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
  270. package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
  271. package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
  272. package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
  273. package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
  274. package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
  275. package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
  276. package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
  277. package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
  278. package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
  279. package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
  280. package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
  281. package/angular/assets/config/app-config.json +0 -16
  282. package/angular/assets/images/splashbg.jpg +0 -0
  283. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
  284. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
  285. package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
  286. package/angular/favicon.ico +0 -0
  287. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
  288. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
  289. package/angular/index.html +0 -16
  290. package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
  291. package/angular/main-es5.3a582572476c7f292e52.js +0 -1
  292. package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
  293. package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
  294. package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
  295. package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
  296. package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
  297. package/config/bad-words.json +0 -1
  298. package/config/complex-english.json +0 -400
  299. package/config/hydra-auth.json +0 -8
  300. package/config/hydra-crawler.json +0 -84
  301. package/config/list-allow.json +0 -171
  302. package/config/list-deny.json +0 -248
  303. package/config/list-expiry.json +0 -7
  304. package/config/schedule.json +0 -25
  305. package/config/spelling.json +0 -1
@@ -1,10 +1,17 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
4
- const hydra_crawler_ts_assets_2 = require("hydra-crawler-ts-assets");
5
- const hydra_crawler_ts_assets_3 = require("hydra-crawler-ts-assets");
6
- const hydra_crawler_ts_assets_4 = require("hydra-crawler-ts-assets");
7
- const nodecommons_cli_1 = require("nodecommons-cli");
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { isIUrl } from 'hydra-crawler-ts-assets';
11
+ import { isTLink } from 'hydra-crawler-ts-assets';
12
+ import { isTDomain } from 'hydra-crawler-ts-assets';
13
+ import { EStatus } from 'hydra-crawler-ts-assets';
14
+ import { commonsOutputDoing, commonsOutputError, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
8
15
  const domainSort = (a, b) => {
9
16
  if (a.startsWith('http://'))
10
17
  a = a.substr(7);
@@ -24,31 +31,31 @@ const domainSort = (a, b) => {
24
31
  return 1;
25
32
  return 0;
26
33
  };
27
- const query = async (args, databaseService, _lists, _expiry, _parsersConfig) => {
34
+ export const query = (args, databaseService, _lists, _expiry, _parsersConfig) => __awaiter(void 0, void 0, void 0, function* () {
28
35
  const regexStr = args.getString('regex');
29
36
  if (!regexStr) {
30
- nodecommons_cli_1.CommonsOutput.error('No domain regex specified');
37
+ commonsOutputError('No domain regex specified');
31
38
  return;
32
39
  }
33
40
  const regex = new RegExp(regexStr);
34
- nodecommons_cli_1.CommonsOutput.doing('Searching for matching domains');
41
+ commonsOutputDoing('Searching for matching domains');
35
42
  const results = databaseService.getDomains().find({
36
43
  domain: regex
37
- });
38
- const domains = await databaseService.listQueryResults(results, hydra_crawler_ts_assets_3.isTDomain);
39
- nodecommons_cli_1.CommonsOutput.result(domains.length);
44
+ }, {});
45
+ const domains = yield databaseService.listQueryResults(results, isTDomain);
46
+ commonsOutputResult(domains.length);
40
47
  const domainUrls = new Map();
41
48
  for (const domain of domains) {
42
- nodecommons_cli_1.CommonsOutput.doing(`Searching for URLs for ${domain.domain}`);
49
+ commonsOutputDoing(`Searching for URLs for ${domain.domain}`);
43
50
  const results2 = databaseService.getUrls().find({
44
51
  domain: domain.domain,
45
- status: { $ne: hydra_crawler_ts_assets_4.EStatus.ARCHIVED }
46
- });
47
- const urls = await databaseService.listQueryResults(results2, hydra_crawler_ts_assets_1.isIUrl);
48
- nodecommons_cli_1.CommonsOutput.result(urls.length);
52
+ status: { $ne: EStatus.ARCHIVED }
53
+ }, {});
54
+ const urls = yield databaseService.listQueryResults(results2, isIUrl);
55
+ commonsOutputResult(urls.length);
49
56
  domainUrls.set(domain, urls);
50
57
  }
51
- nodecommons_cli_1.CommonsOutput.doing(`Searching for outgoing links to URLs`);
58
+ commonsOutputDoing('Searching for outgoing links to URLs');
52
59
  let tally = 0;
53
60
  const domainUrlLinks = new Map();
54
61
  for (const domain of domains) {
@@ -59,20 +66,20 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
59
66
  for (const url of urls) {
60
67
  const results3 = databaseService.getLinks().find({
61
68
  outgoing: url.url
62
- });
63
- const links = await databaseService.listQueryResults(results3, hydra_crawler_ts_assets_2.isTLink);
69
+ }, {});
70
+ const links = yield databaseService.listQueryResults(results3, isTLink);
64
71
  if (links.length === 0)
65
72
  continue;
66
73
  tally += links.length;
67
- nodecommons_cli_1.CommonsOutput.progress(tally);
74
+ commonsOutputProgress(tally);
68
75
  map.set(url, links);
69
76
  }
70
77
  if (map.size === 0)
71
78
  continue;
72
79
  domainUrlLinks.set(domain, map);
73
80
  }
74
- nodecommons_cli_1.CommonsOutput.result(tally);
75
- nodecommons_cli_1.CommonsOutput.doing(`Constructing JSON object`);
81
+ commonsOutputResult(tally);
82
+ commonsOutputDoing('Constructing JSON object');
76
83
  const outcome = {};
77
84
  for (const domain of domainUrlLinks.keys()) {
78
85
  outcome[domain.domain] = {};
@@ -81,7 +88,7 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
81
88
  .map((link) => link.url);
82
89
  }
83
90
  }
84
- nodecommons_cli_1.CommonsOutput.success();
91
+ commonsOutputSuccess();
85
92
  if (args.hasAttribute('domains-only')) {
86
93
  Object.keys(outcome)
87
94
  .sort(domainSort)
@@ -119,5 +126,6 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
119
126
  else if (args.hasAttribute('json-dump')) {
120
127
  console.log(JSON.stringify(outcome));
121
128
  }
122
- };
123
- exports.default = query;
129
+ });
130
+ // export default query;
131
+ //# sourceMappingURL=linking-to-domains.query.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"linking-to-domains.query.js","sourceRoot":"","sources":["../../src/queries/linking-to-domains.query.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAQ,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAS,OAAO,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAW,SAAS,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAe,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAiB3J,MAAM,UAAU,GAAqC,CAAC,CAAS,EAAE,CAAS,EAAU,EAAE;IACrF,IAAI,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC7C,IAAI,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC9C,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC7C,IAAI,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC9C,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAE1C,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,OAAO,CAAC,CAAC;AACV,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,KAAK,GAAW,CAC3B,IAAiB,EACjB,eAAgC,EAChC,MAAa,EACb,OAAe,EACf,cAAyC,EAC1B,EAAE;IAClB,MAAM,QAAQ,GAAqB,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC3D,IAAI,CAAC,QAAQ,EAAE;QACd,kBAAkB,CAAC,2BAA2B,CAAC,CAAC;QAChD,OAAO;KACP;IACD,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC;IAE3C,kBAAkB,CAAC,gCAAgC,CAAC,CAAC;IACrD,MAAM,OAAO,GAAoB,eAAe,CAAC,UAAU,EAAE,CAAC,IAAI,CAChE;QACE,MAAM,EAAE,KAAK;KACd,EACD,EAAE,CACH,CAAC;IACF,MAAM,OAAO,GAAc,MAAM,eAAe,CAAC,gBAAgB,CAC/D,OAAO,EACP,SAAS,CACV,CAAC;IACF,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAEpC,MAAM,UAAU,GAAyB,IAAI,GAAG,EAAmB,CAAC;IACpE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;QAC7B,kBAAkB,CAAC,0BAA0B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9D,MAAM,QAAQ,GAAiB,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CAC3D;YACE,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE;SAClC,EACD,EAAE,CACH,CAAC;QACF,MAAM,IAAI,GAAW,MAAM,eAAe,CAAC,gBAAgB,CACzD,QAAQ,EACR,MAAM,CACP,CAAC;QACF,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEjC,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;KAC7B;IAED,kBAAkB,CAAC,sCAAsC,CAAC,CAAC;IAC3D,IAAI,KAAK,GAAW,CAAC,CAAC;IACtB,MAAM,cAAc,GAAqC,IAAI,GAAG,EAA+B,CAAC;IAChG,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;QAC7B,MAAM,IAAI,GAAqB,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACtD,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,SAAS;QAEpC,MAAM,GAAG,GAAuB,IAAI,GAAG,EAAiB,CAAC;QAEzD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACvB,MAAM,QAAQ,GAAkB,eAAe,CAAC,QAAQ,EAAE,CAAC,IAAI,CAC7D;gBACE,QAAQ,EAAE,GAAG,CAAC,GAAG;aAClB,EACD,EAAE,CACH,CAAC;YACF,MAAM,KAAK,GAAY,MAAM,eAAe,CAAC,gBAAgB,CAC3D,QAAQ,EACR,OAAO,CACR,CAAC;YAEF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAEjC,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC;YACtB,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAE7B,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;SACpB;QAED,IAAI,GAAG,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE7B,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAChC;IACD,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE3B,kBAAkB,CAAC,0BAA0B,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,MAAM,IAAI,cAAc,CAAC,IAAI,EAAE,EAAE;QAC3C,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC;QAE5B,KAAK,MAAM,GAAG,IAAI,cAAc,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,IAAI,EAAE,EAAE;YACrD,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,GAAG,CAAC,GAAG,CAAE;iBACpE,GAAG,CAAC,CAAC,IAAW,EAAU,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAC1C;KACD;IACD,oBAAoB,EAAE,CAAC;IAEvB,IAAI,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,EAAE;QACtC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;aACjB,IAAI,CAAC,UAAU,CAAC;aAChB,OAAO,CAAC,CAAC,MAAc,EAAQ,EAAE;YACjC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;KACL;SAAM,IAAI,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,EAAE;QACzC,MAAM,UAAU,GAA0B,IAAI,GAAG,EAAoB,CAAC;QAEtE,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE;YAC1C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE;gBAC/C,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,EAAE;oBACvC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC;wBAAE,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;oBAClD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,QAAQ,CAAC,GAAG,CAAC;wBAAE,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;iBACxE;aACD;SACD;QAED,MAAM,IAAI,GAAa,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;aACjD,IAAI,CAAC,UAAU,CAAC,CAAC;QACpB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACvB,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAEtC,IAAI,KAAK,GAAY,IAAI,CAAC;YAC1B,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,EAAE;gBACvC,MAAM,GAAG,GAAa;oBACpB,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;oBAChB,GAAG;iBACJ,CAAC;gBACF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gBAE5B,KAAK,GAAG,KAAK,CAAC;aACd;SACD;KACD;SAAM,IAAI,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,EAAE;QAC1C,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;KACrC;AACF,CAAC,CAAA,CAAC;AAEF,wBAAwB"}
@@ -0,0 +1,2 @@
1
+ import { TQuery } from '../types/tquery';
2
+ export declare const query: TQuery;
@@ -1,17 +1,24 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
4
- const nodecommons_cli_1 = require("nodecommons-cli");
5
- const query = async (args, databaseService, _lists, _expiry, _parsersConfig) => {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { EStatus } from 'hydra-crawler-ts-assets';
11
+ import { commonsOutputDoing, commonsOutputError, commonsOutputProgress, commonsOutputResult } from 'nodecommons-es-cli';
12
+ export const query = (args, databaseService, _lists, _expiry, _parsersConfig) => __awaiter(void 0, void 0, void 0, function* () {
6
13
  const domain = args.getString('domain');
7
14
  if (!domain) {
8
- nodecommons_cli_1.CommonsOutput.error('No domain specified');
15
+ commonsOutputError('No domain specified');
9
16
  return;
10
17
  }
11
- nodecommons_cli_1.CommonsOutput.doing(`Scoring and ordering readability of DONE URLs for domain ${domain}`);
18
+ commonsOutputDoing(`Scoring and ordering readability of DONE URLs for domain ${domain}`);
12
19
  const results = databaseService.getUrls().aggregate([
13
20
  { $match: {
14
- status: { $ne: hydra_crawler_ts_assets_1.EStatus.ARCHIVED },
21
+ status: { $ne: EStatus.ARCHIVED },
15
22
  domain: domain,
16
23
  'headers.content-type': 'text/html'
17
24
  } },
@@ -57,16 +64,17 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
57
64
  while (true) {
58
65
  tally++;
59
66
  if ((tally % 100) === 0)
60
- nodecommons_cli_1.CommonsOutput.progress(`${tally}`);
61
- const row = await results.next();
67
+ commonsOutputProgress(`${tally}`);
68
+ const row = yield results.next();
62
69
  if (row === null)
63
70
  break;
64
71
  matches.push(row);
65
72
  }
66
- nodecommons_cli_1.CommonsOutput.result(tally);
73
+ commonsOutputResult(tally);
67
74
  console.log('url\tparagraphs\tsentences\twords\tfkre\tari\tscore');
68
75
  for (const match of matches) {
69
76
  console.log(`${match.url}\t${match.stats.paragraphs}\t${match.stats.sentences}\t${match.stats.words}\t${match.stats.fkre}\t${match.stats.ari}\t${match.score}`);
70
77
  }
71
- };
72
- exports.default = query;
78
+ });
79
+ // export default query;
80
+ //# sourceMappingURL=readability-score.query.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"readability-score.query.js","sourceRoot":"","sources":["../../src/queries/readability-score.query.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAe,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAuBrI,MAAM,CAAC,MAAM,KAAK,GAAW,CAC3B,IAAiB,EACjB,eAAgC,EAChC,MAAa,EACb,OAAe,EACf,cAAyC,EAC1B,EAAE;IAClB,MAAM,MAAM,GAAqB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAC1D,IAAI,CAAC,MAAM,EAAE;QACZ,kBAAkB,CAAC,qBAAqB,CAAC,CAAC;QAC1C,OAAO;KACP;IAED,kBAAkB,CAAC,4DAA4D,MAAM,EAAE,CAAC,CAAC;IAEzF,MAAM,OAAO,GAA+B,eAAe,CAAC,OAAO,EAAE,CAAC,SAAS,CAAU;QACvF,EAAE,MAAM,EAAE;gBACR,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE;gBACjC,MAAM,EAAE,MAAM;gBACd,sBAAsB,EAAE,WAAW;aACpC,EAAE;QACH,EAAE,MAAM,EAAE;gBACR,2BAA2B,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE;gBAC3C,0BAA0B,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE;aAC3C,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE;oBACL,UAAU,EAAE,kCAAkC;oBAC9C,SAAS,EAAE,iCAAiC;oBAC5C,KAAK,EAAE,6BAA6B;oBACpC,IAAI,EAAE,4BAA4B;oBAClC,GAAG,EAAE,2BAA2B;iBACjC;aACF,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE,IAAI;gBACX,KAAK,EAAE;oBACL,IAAI,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAE,GAAG,EAAE,aAAa,CAAE,EAAE,CAAE,EAAE;oBACtD,GAAG,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAE,EAAE,EAAE,YAAY,CAAE,EAAE,CAAE,EAAE;iBACpD;aACF,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE;oBACR,IAAI,EAAE,EAAE,OAAO,EAAE,CAAE,EAAE,SAAS,EAAE,CAAE,GAAG,EAAE,aAAa,CAAE,EAAE,EAAE,GAAG,CAAE,EAAE;oBACjE,GAAG,EAAE,EAAE,OAAO,EAAE,CAAE,EAAE,SAAS,EAAE,CAAE,YAAY,EAAE,CAAC,CAAE,EAAE,EAAE,EAAE,CAAE,EAAE;iBAC7D;aACF,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE,IAAI;gBACX,KAAK,EAAE,EAAE,OAAO,EAAE,CAAE,EAAE,IAAI,EAAE,CAAE,gBAAgB,EAAE,eAAe,CAAE,EAAE,EAAE,CAAC,CAAE,EAAE;aAC3E,EAAE;QACH,EAAE,KAAK,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE;KACzB,CAAC,CAAC;IAEH,MAAM,OAAO,GAAc,EAAE,CAAC;IAC9B,IAAI,KAAK,GAAW,CAAC,CAAC;IACtB,OAAO,IAAI,EAAE;QACZ,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;YAAE,qBAAqB,CAAC,GAAG,KAAK,EAAE,CAAC,CAAC;QAE3D,MAAM,GAAG,GAAiB,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;QAC/C,IAAI,GAAG,KAAK,IAAI;YAAE,MAAM;QAExB,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;KAClB;IAED,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE3B,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IACnE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;QAC5B,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,KAAK,KAAK,CAAC,KAAK,CAAC,UAAU,KAAK,KAAK,CAAC,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,KAAK,CAAC,KAAK,KAAK,KAAK,CAAC,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,GAAG,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;KAChK;AACF,CAAC,CAAA,CAAC;AAEF,wBAAwB"}
@@ -0,0 +1,35 @@
1
+ import { TKeyObject } from 'tscommons-es-core';
2
+ import { Lists } from '../classes/lists';
3
+ import { Tracker } from '../classes/tracker';
4
+ import { DatabaseService } from '../services/database.service';
5
+ import { IParserConfig } from '../interfaces/iparser-config';
6
+ import { THydraConfig } from '../types/thydra-config';
7
+ import { TCrawlConfig } from '../types/tcrawl-config';
8
+ import { TRobotsConfig } from '../types/trobots-config';
9
+ import { TParserCtor } from '../types/tparser-ctor';
10
+ export declare class CrawlServer {
11
+ private database;
12
+ private hydraConfig;
13
+ private crawlConfig;
14
+ private parsersConfig;
15
+ private robotsConfig;
16
+ private lists;
17
+ private tracker;
18
+ private parsers;
19
+ private crawlers;
20
+ private strategies;
21
+ private isAborted;
22
+ private preDelayIds;
23
+ private isPaused;
24
+ constructor(database: DatabaseService, hydraConfig: THydraConfig, crawlConfig: TCrawlConfig, parsersConfig: TKeyObject<IParserConfig>, robotsConfig: TRobotsConfig, lists: Lists, tracker: Tracker);
25
+ addParser(parser: TParserCtor): void;
26
+ listParsers(): TParserCtor[];
27
+ pause(): void;
28
+ resume(): void;
29
+ private abort;
30
+ terminate(): Promise<void>;
31
+ private pauseCrawl;
32
+ private resumeCrawl;
33
+ start(): Promise<void>;
34
+ shutdown(): Promise<void>;
35
+ }
@@ -1,13 +1,19 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.CrawlServer = void 0;
4
- const tscommons_async_1 = require("tscommons-async");
5
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- const nodecommons_process_1 = require("nodecommons-process");
8
- const crawler_1 = require("../classes/crawler");
9
- const eavailable_strategy_1 = require("../enums/eavailable-strategy");
10
- class CrawlServer {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { commonsAsyncAbortTimeout, commonsAsyncTimeout } from 'tscommons-es-async';
11
+ import { EStatus } from 'hydra-crawler-ts-assets';
12
+ import { commonsOutputAlert, commonsOutputCompleted, commonsOutputDebug, commonsOutputDoing, commonsOutputProgress, commonsOutputStarting, commonsOutputSuccess } from 'nodecommons-es-cli';
13
+ import { commonsGracefulAbortAddCallback } from 'nodecommons-es-process';
14
+ import { Crawler } from '../classes/crawler';
15
+ import { EAvailableStrategy } from '../enums/eavailable-strategy';
16
+ export class CrawlServer {
11
17
  constructor(database, hydraConfig, crawlConfig, parsersConfig, robotsConfig, lists, tracker) {
12
18
  this.database = database;
13
19
  this.hydraConfig = hydraConfig;
@@ -22,11 +28,11 @@ class CrawlServer {
22
28
  this.isPaused = false;
23
29
  this.crawlers = new Map();
24
30
  this.strategies = new Map();
25
- nodecommons_process_1.CommonsGracefulAbort.addCallback(() => {
26
- nodecommons_cli_1.CommonsOutput.alert(`SIGINT abort flag is set. Aborting crawl server.`);
31
+ commonsGracefulAbortAddCallback(() => {
32
+ commonsOutputAlert('SIGINT abort flag is set. Aborting crawl server.');
27
33
  this.abort();
28
34
  for (const preDelayId of this.preDelayIds)
29
- tscommons_async_1.CommonsAsync.abortTimeout(preDelayId);
35
+ commonsAsyncAbortTimeout(preDelayId);
30
36
  });
31
37
  }
32
38
  addParser(parser) {
@@ -36,14 +42,14 @@ class CrawlServer {
36
42
  return this.parsers.slice();
37
43
  }
38
44
  pause() {
39
- nodecommons_cli_1.CommonsOutput.alert(`Pausing crawl server`);
45
+ commonsOutputAlert('Pausing crawl server');
40
46
  this.isPaused = true;
41
47
  for (const domain of this.crawlers.keys()) {
42
48
  this.pauseCrawl(domain);
43
49
  }
44
50
  }
45
51
  resume() {
46
- nodecommons_cli_1.CommonsOutput.completed(`Resuming crawl server`);
52
+ commonsOutputCompleted('Resuming crawl server');
47
53
  this.isPaused = false;
48
54
  for (const domain of this.crawlers.keys()) {
49
55
  this.resumeCrawl(domain);
@@ -51,132 +57,138 @@ class CrawlServer {
51
57
  }
52
58
  abort() {
53
59
  this.isAborted = true;
54
- tscommons_async_1.CommonsAsync.abortTimeout('find-new-available');
60
+ commonsAsyncAbortTimeout('find-new-available');
55
61
  }
56
- async terminate() {
57
- this.abort();
58
- await this.database.close();
59
- process.exit(0);
62
+ terminate() {
63
+ return __awaiter(this, void 0, void 0, function* () {
64
+ this.abort();
65
+ yield this.database.close();
66
+ process.exit(0);
67
+ });
60
68
  }
61
- async pauseCrawl(domain) {
69
+ pauseCrawl(domain) {
62
70
  if (!this.crawlers.has(domain))
63
71
  return;
64
- nodecommons_cli_1.CommonsOutput.alert(`Pausing crawler for ${domain}`);
72
+ commonsOutputAlert(`Pausing crawler for ${domain}`);
65
73
  this.crawlers.get(domain).pause();
66
74
  }
67
- async resumeCrawl(domain) {
75
+ resumeCrawl(domain) {
68
76
  if (!this.crawlers.has(domain))
69
77
  return;
70
- nodecommons_cli_1.CommonsOutput.completed(`Resuming crawler for ${domain}`);
78
+ commonsOutputCompleted(`Resuming crawler for ${domain}`);
71
79
  this.crawlers.get(domain).resume();
72
80
  }
73
- async start() {
74
- await this.database.resetActive();
75
- const added = await this.database.queue(this.hydraConfig.startUrl);
76
- if (added)
77
- this.tracker.delta(hydra_crawler_ts_assets_1.EStatus.QUEUED, 1);
78
- while (!this.isAborted) {
79
- if (!this.isPaused) {
80
- const existing = [...this.crawlers.keys()];
81
- const space = this.crawlConfig.maxCrawlers - existing.length;
82
- if (space > 0) {
83
- nodecommons_cli_1.CommonsOutput.debug(`Space for ${space} new crawls available`);
84
- const largestStrategyDomains = existing
85
- .filter((domain) => this.strategies.get(domain) === eavailable_strategy_1.EAvailableStrategy.LARGEST);
86
- const smallestStrategyDomains = existing
87
- .filter((domain) => this.strategies.get(domain) === eavailable_strategy_1.EAvailableStrategy.SMALLEST);
88
- nodecommons_cli_1.CommonsOutput.debug(`Current crawl has LARGEST=${largestStrategyDomains.length}; SMALLEST=${smallestStrategyDomains.length}`);
89
- let spaceForLargestDomains = 0;
90
- let spaceForSmallestDomains = space;
91
- if (largestStrategyDomains.length >= Math.floor(this.crawlConfig.maxCrawlers / 2)) {
92
- // already 50% large crawling, so all remaining space goes to small crawls
93
- }
94
- else {
95
- // allow a new large crawl, if enough space
96
- if (space > 1) {
97
- spaceForLargestDomains = 1;
98
- spaceForSmallestDomains--;
81
+ start() {
82
+ return __awaiter(this, void 0, void 0, function* () {
83
+ yield this.database.resetActive();
84
+ const added = yield this.database.queue(this.hydraConfig.startUrl);
85
+ if (added)
86
+ this.tracker.delta(EStatus.QUEUED, 1);
87
+ while (!this.isAborted) {
88
+ if (!this.isPaused) {
89
+ const existing = [...this.crawlers.keys()];
90
+ const space = this.crawlConfig.maxCrawlers - existing.length;
91
+ if (space > 0) {
92
+ commonsOutputDebug(`Space for ${space} new crawls available`);
93
+ const largestStrategyDomains = existing
94
+ .filter((domain) => this.strategies.get(domain) === EAvailableStrategy.LARGEST);
95
+ const smallestStrategyDomains = existing
96
+ .filter((domain) => this.strategies.get(domain) === EAvailableStrategy.SMALLEST);
97
+ commonsOutputDebug(`Current crawl has LARGEST=${largestStrategyDomains.length}; SMALLEST=${smallestStrategyDomains.length}`);
98
+ let spaceForLargestDomains = 0;
99
+ let spaceForSmallestDomains = space;
100
+ if (largestStrategyDomains.length >= Math.floor(this.crawlConfig.maxCrawlers / 2)) {
101
+ // already 50% large crawling, so all remaining space goes to small crawls
99
102
  }
100
- }
101
- nodecommons_cli_1.CommonsOutput.debug(`Space for LARGEST=${spaceForLargestDomains}; SMALLEST=${spaceForSmallestDomains}`);
102
- const availablesLargest = await this.database.available(eavailable_strategy_1.EAvailableStrategy.LARGEST, this.crawlConfig.availableStrategyThreshold, spaceForLargestDomains, existing);
103
- existing.push(...availablesLargest);
104
- const availablesSmallest = await this.database.available(eavailable_strategy_1.EAvailableStrategy.SMALLEST, this.crawlConfig.availableStrategyThreshold, spaceForSmallestDomains, existing);
105
- const availables = [
106
- ...availablesSmallest,
107
- ...availablesLargest
108
- ];
109
- if (!this.isAborted) {
110
- for (const domain of availables) {
111
- nodecommons_cli_1.CommonsOutput.starting(`Creating new crawl head for ${domain}`);
112
- const crawler = new crawler_1.Crawler(domain, this.database, this.crawlConfig, this.parsersConfig, this.robotsConfig, this.parsers, this.lists, this.tracker);
113
- this.crawlers.set(domain, crawler);
114
- if (availablesLargest.includes(domain)) {
115
- this.strategies.set(domain, eavailable_strategy_1.EAvailableStrategy.LARGEST);
103
+ else {
104
+ // allow a new large crawl, if enough space
105
+ if (space > 1) {
106
+ spaceForLargestDomains = 1;
107
+ spaceForSmallestDomains--;
116
108
  }
117
- else {
118
- this.strategies.set(domain, eavailable_strategy_1.EAvailableStrategy.SMALLEST);
119
- }
120
- // called without await in order to do parallel crawls
121
- (async () => {
122
- try {
123
- // delay randomly to prevent network socket request spikes
124
- const preDelayId = `predelay_${domain}`;
125
- this.preDelayIds.push(preDelayId);
109
+ }
110
+ commonsOutputDebug(`Space for LARGEST=${spaceForLargestDomains}; SMALLEST=${spaceForSmallestDomains}`);
111
+ const availablesLargest = yield this.database.available(EAvailableStrategy.LARGEST, this.crawlConfig.availableStrategyThreshold, spaceForLargestDomains, existing);
112
+ existing.push(...availablesLargest);
113
+ const availablesSmallest = yield this.database.available(EAvailableStrategy.SMALLEST, this.crawlConfig.availableStrategyThreshold, spaceForSmallestDomains, existing);
114
+ const availables = [
115
+ ...availablesSmallest,
116
+ ...availablesLargest
117
+ ];
118
+ if (!this.isAborted) {
119
+ for (const domain of availables) {
120
+ commonsOutputStarting(`Creating new crawl head for ${domain}`);
121
+ const crawler = new Crawler(domain, this.database, this.crawlConfig, this.parsersConfig, this.robotsConfig, this.parsers, this.lists, this.tracker);
122
+ this.crawlers.set(domain, crawler);
123
+ if (availablesLargest.includes(domain)) {
124
+ this.strategies.set(domain, EAvailableStrategy.LARGEST);
125
+ }
126
+ else {
127
+ this.strategies.set(domain, EAvailableStrategy.SMALLEST);
128
+ }
129
+ // called without await in order to do parallel crawls
130
+ void (() => __awaiter(this, void 0, void 0, function* () {
126
131
  try {
127
- await tscommons_async_1.CommonsAsync.timeout(Math.random() * (this.crawlConfig.findNewAvailableDelay - 1000), preDelayId);
128
- }
129
- catch (e) {
130
- if (e.message === 'abortTimeout called')
132
+ // delay randomly to prevent network socket request spikes
133
+ const preDelayId = `predelay_${domain}`;
134
+ this.preDelayIds.push(preDelayId);
135
+ try {
136
+ yield commonsAsyncTimeout(Math.random() * (this.crawlConfig.findNewAvailableDelay - 1000), preDelayId);
137
+ }
138
+ catch (e) {
139
+ if (e.message === 'abortTimeout called')
140
+ return;
141
+ throw e;
142
+ }
143
+ this.preDelayIds = this.preDelayIds
144
+ .filter((pid) => pid !== preDelayId);
145
+ if (this.isAborted)
131
146
  return;
132
- throw e;
147
+ if (this.isPaused)
148
+ this.pauseCrawl(domain);
149
+ commonsOutputStarting(`Starting crawler for ${domain}`);
150
+ yield crawler.crawl();
133
151
  }
134
- this.preDelayIds = this.preDelayIds
135
- .filter((pid) => pid !== preDelayId);
136
- if (this.isAborted)
137
- return;
138
- if (this.isPaused)
139
- this.pauseCrawl(domain);
140
- nodecommons_cli_1.CommonsOutput.starting(`Starting crawler for ${domain}`);
141
- await crawler.crawl();
142
- }
143
- catch (ex) {
144
- nodecommons_cli_1.CommonsOutput.debug('debug position 6');
145
- console.log(ex);
146
- }
147
- finally {
148
- this.crawlers.delete(domain); // doesn't get called until after doCrawl() is called, so have to do it here if aborted
149
- this.strategies.delete(domain);
150
- }
151
- })();
152
+ catch (ex) {
153
+ commonsOutputDebug('debug position 6');
154
+ console.log(ex);
155
+ }
156
+ finally {
157
+ this.crawlers.delete(domain); // doesn't get called until after doCrawl() is called, so have to do it here if aborted
158
+ this.strategies.delete(domain);
159
+ }
160
+ }))();
161
+ }
152
162
  }
153
163
  }
154
164
  }
165
+ try {
166
+ yield commonsAsyncTimeout(this.crawlConfig.findNewAvailableDelay, 'find-new-available');
167
+ }
168
+ catch (ex) {
169
+ // ignore
170
+ }
171
+ if (this.isAborted) {
172
+ commonsOutputAlert('SIGINT abort is set. Aborting new head loop.');
173
+ break;
174
+ }
155
175
  }
156
- try {
157
- await tscommons_async_1.CommonsAsync.timeout(this.crawlConfig.findNewAvailableDelay, 'find-new-available');
158
- }
159
- catch (ex) {
160
- // ignore
161
- }
162
- if (this.isAborted) {
163
- nodecommons_cli_1.CommonsOutput.alert(`SIGINT abort is set. Aborting new head loop.`);
164
- break;
165
- }
166
- }
176
+ });
167
177
  }
168
- async shutdown() {
169
- nodecommons_cli_1.CommonsOutput.doing('Waiting for all crawlers to abort');
170
- while (this.crawlers.size > 0) {
171
- nodecommons_cli_1.CommonsOutput.progress(this.crawlers.size);
172
- try {
173
- await tscommons_async_1.CommonsAsync.timeout(1000);
174
- }
175
- catch (ex) {
176
- // ignore
178
+ shutdown() {
179
+ return __awaiter(this, void 0, void 0, function* () {
180
+ commonsOutputDoing('Waiting for all crawlers to abort');
181
+ while (this.crawlers.size > 0) {
182
+ commonsOutputProgress(this.crawlers.size);
183
+ try {
184
+ yield commonsAsyncTimeout(1000);
185
+ }
186
+ catch (ex) {
187
+ // ignore
188
+ }
177
189
  }
178
- }
179
- nodecommons_cli_1.CommonsOutput.success();
190
+ commonsOutputSuccess();
191
+ });
180
192
  }
181
193
  }
182
- exports.CrawlServer = CrawlServer;
194
+ //# sourceMappingURL=crawl.server.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawl.server.js","sourceRoot":"","sources":["../../src/servers/crawl.server.ts"],"names":[],"mappings":";;;;;;;;;AACA,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAEnF,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC5L,OAAO,EAAE,+BAA+B,EAAE,MAAM,wBAAwB,CAAC;AAEzE,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAa7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAElE,MAAM,OAAO,WAAW;IAUvB,YACU,QAAyB,EACzB,WAAyB,EACzB,WAAyB,EACzB,aAAwC,EACxC,YAA2B,EAC3B,KAAY,EACZ,OAAgB;QANhB,aAAQ,GAAR,QAAQ,CAAiB;QACzB,gBAAW,GAAX,WAAW,CAAc;QACzB,gBAAW,GAAX,WAAW,CAAc;QACzB,kBAAa,GAAb,aAAa,CAA2B;QACxC,iBAAY,GAAZ,YAAY,CAAe;QAC3B,UAAK,GAAL,KAAK,CAAO;QACZ,YAAO,GAAP,OAAO,CAAS;QAhBlB,YAAO,GAAkB,EAAE,CAAC;QAG5B,cAAS,GAAY,KAAK,CAAC;QAE3B,gBAAW,GAAa,EAAE,CAAC;QAE3B,aAAQ,GAAY,KAAK,CAAC;QAWjC,IAAI,CAAC,QAAQ,GAAG,IAAI,GAAG,EAAmB,CAAC;QAC3C,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,EAA8B,CAAC;QAExD,+BAA+B,CAAC,GAAS,EAAE;YAC1C,kBAAkB,CAAC,kDAAkD,CAAC,CAAC;YACvE,IAAI,CAAC,KAAK,EAAE,CAAC;YAEb,KAAK,MAAM,UAAU,IAAI,IAAI,CAAC,WAAW;gBAAE,wBAAwB,CAAC,UAAU,CAAC,CAAC;QACjF,CAAC,CAAC,CAAC;IACJ,CAAC;IAEM,SAAS,CAAC,MAAmB;QACnC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3B,CAAC;IAEM,WAAW;QACjB,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IAC7B,CAAC;IAEM,KAAK;QACX,kBAAkB,CAAC,sBAAsB,CAAC,CAAC;QAE3C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QAErB,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE;YAC1C,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;SACxB;IACF,CAAC;IAEM,MAAM;QACZ,sBAAsB,CAAC,uBAAuB,CAAC,CAAC;QAEhD,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;QAEtB,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE;YAC1C,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;SACzB;IACF,CAAC;IAEO,KAAK;QACZ,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,wBAAwB,CAAC,oBAAoB,CAAC,CAAC;IAChD,CAAC;IAEY,SAAS;;YACrB,IAAI,CAAC,KAAK,EAAE,CAAC;YAEb,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;YAC5B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;KAAA;IAEO,UAAU,CAAC,MAAc;QAChC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC;YAAE,OAAO;QAEvC,kBAAkB,CAAC,uBAAuB,MAAM,EAAE,CAAC,CAAC;QAEpD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,KAAK,EAAE,CAAC;IACpC,CAAC;IAEO,WAAW,CAAC,MAAc;QACjC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC;YAAE,OAAO;QAEvC,sBAAsB,CAAC,wBAAwB,MAAM,EAAE,CAAC,CAAC;QAEzD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,MAAM,EAAE,CAAC;IACrC,CAAC;IAEY,KAAK;;YACjB,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAElC,MAAM,KAAK,GAAY,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YAC5E,IAAI,KAAK;gBAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YAEjD,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE;gBACvB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;oBACnB,MAAM,QAAQ,GAAa,CAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAE,CAAC;oBAEvD,MAAM,KAAK,GAAW,IAAI,CAAC,WAAW,CAAC,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC;oBAErE,IAAI,KAAK,GAAG,CAAC,EAAE;wBACd,kBAAkB,CAAC,aAAa,KAAK,uBAAuB,CAAC,CAAC;wBAE9D,MAAM,sBAAsB,GAAa,QAAQ;6BAC9C,MAAM,CAAC,CAAC,MAAc,EAAW,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,kBAAkB,CAAC,OAAO,CAAC,CAAC;wBAEnG,MAAM,uBAAuB,GAAa,QAAQ;6BAC/C,MAAM,CAAC,CAAC,MAAc,EAAW,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,kBAAkB,CAAC,QAAQ,CAAC,CAAC;wBAEpG,kBAAkB,CAAC,6BAA6B,sBAAsB,CAAC,MAAM,cAAc,uBAAuB,CAAC,MAAM,EAAE,CAAC,CAAC;wBAE7H,IAAI,sBAAsB,GAAW,CAAC,CAAC;wBACvC,IAAI,uBAAuB,GAAW,KAAK,CAAC;wBAE5C,IAAI,sBAAsB,CAAC,MAAM,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,WAAW,GAAG,CAAC,CAAC,EAAE;4BAClF,0EAA0E;yBAC1E;6BAAM;4BACN,2CAA2C;4BAC3C,IAAI,KAAK,GAAG,CAAC,EAAE;gCACd,sBAAsB,GAAG,CAAC,CAAC;gCAC3B,uBAAuB,EAAE,CAAC;6BAC1B;yBACD;wBAED,kBAAkB,CAAC,qBAAqB,sBAAsB,cAAc,uBAAuB,EAAE,CAAC,CAAC;wBAEvG,MAAM,iBAAiB,GAAa,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAC/D,kBAAkB,CAAC,OAAO,EAC1B,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAC3C,sBAAsB,EACtB,QAAQ,CACT,CAAC;wBAEF,QAAQ,CAAC,IAAI,CAAC,GAAG,iBAAiB,CAAC,CAAC;wBAEpC,MAAM,kBAAkB,GAAa,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAChE,kBAAkB,CAAC,QAAQ,EAC3B,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAC3C,uBAAuB,EACvB,QAAQ,CACT,CAAC;wBAEF,MAAM,UAAU,GAAa;4BAC3B,GAAG,kBAAkB;4BACrB,GAAG,iBAAiB;yBACrB,CAAC;wBAEF,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE;4BACpB,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE;gCAChC,qBAAqB,CAAC,+BAA+B,MAAM,EAAE,CAAC,CAAC;gCAE/D,MAAM,OAAO,GAAY,IAAI,OAAO,CAClC,MAAM,EACN,IAAI,CAAC,QAAQ,EACb,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,aAAa,EAClB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,OAAO,EACZ,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,OAAO,CACb,CAAC;gCAEF,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;gCAEnC,IAAI,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;oCACvC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,kBAAkB,CAAC,OAAO,CAAC,CAAC;iCACxD;qCAAM;oCACN,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,kBAAkB,CAAC,QAAQ,CAAC,CAAC;iCACzD;gCAED,sDAAsD;gCACtD,KAAK,CAAC,GAAwB,EAAE;oCAC/B,IAAI;wCACH,0DAA0D;wCAC1D,MAAM,UAAU,GAAW,YAAY,MAAM,EAAE,CAAC;wCAChD,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;wCAClC,IAAI;4CACH,MAAM,mBAAmB,CACvB,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,IAAI,CAAC,EAC/D,UAAU,CACX,CAAC;yCACF;wCAAC,OAAO,CAAC,EAAE;4CACX,IAAK,CAAW,CAAC,OAAO,KAAK,qBAAqB;gDAAE,OAAO;4CAC3D,MAAM,CAAC,CAAC;yCACR;wCAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW;6CAChC,MAAM,CAAC,CAAC,GAAW,EAAW,EAAE,CAAC,GAAG,KAAK,UAAU,CAAC,CAAC;wCAExD,IAAI,IAAI,CAAC,SAAS;4CAAE,OAAO;wCAC3B,IAAI,IAAI,CAAC,QAAQ;4CAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;wCAE3C,qBAAqB,CAAC,wBAAwB,MAAM,EAAE,CAAC,CAAC;wCACxD,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;qCACtB;oCAAC,OAAO,EAAE,EAAE;wCACZ,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;wCACvC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;qCAChB;4CAAS;wCACT,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,uFAAuF;wCACrH,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;qCAC/B;gCACF,CAAC,CAAA,CAAC,EAAE,CAAC;6BACL;yBACD;qBACD;iBACD;gBAED,IAAI;oBACH,MAAM,mBAAmB,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,EAAE,oBAAoB,CAAC,CAAC;iBACxF;gBAAC,OAAO,EAAE,EAAE;oBACZ,SAAS;iBACT;gBAED,IAAI,IAAI,CAAC,SAAS,EAAE;oBACnB,kBAAkB,CAAC,8CAA8C,CAAC,CAAC;oBACnE,MAAM;iBACN;aACD;QACF,CAAC;KAAA;IAEY,QAAQ;;YACpB,kBAAkB,CAAC,mCAAmC,CAAC,CAAC;YACxD,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,EAAE;gBAC9B,qBAAqB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBAE1C,IAAI;oBACH,MAAM,mBAAmB,CAAC,IAAI,CAAC,CAAC;iBAChC;gBAAC,OAAO,EAAE,EAAE;oBACZ,SAAS;iBACT;aACD;YACD,oBAAoB,EAAE,CAAC;QACxB,CAAC;KAAA;CACD"}
@@ -0,0 +1,8 @@
1
+ /// <reference types="node" />
2
+ import * as http from 'http';
3
+ import * as express from 'express';
4
+ import { CommonsStrictExpressServer } from 'nodecommons-es-express';
5
+ import { ICommonsExpressConfig } from 'nodecommons-es-express';
6
+ export declare class ExpressServer extends CommonsStrictExpressServer {
7
+ constructor(ex: express.Express, server: http.Server, config: ICommonsExpressConfig);
8
+ }
@@ -1,16 +1,13 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ExpressServer = void 0;
4
- const nodecommons_express_1 = require("nodecommons-express");
5
- const nodecommons_process_1 = require("nodecommons-process");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- class ExpressServer extends nodecommons_express_1.CommonsExpressServer {
1
+ import { CommonsStrictExpressServer } from 'nodecommons-es-express';
2
+ import { commonsGracefulAbortAddCallback } from 'nodecommons-es-process';
3
+ import { commonsOutputAlert } from 'nodecommons-es-cli';
4
+ export class ExpressServer extends CommonsStrictExpressServer {
8
5
  constructor(ex, server, config) {
9
6
  super(ex, server, config.port);
10
- nodecommons_process_1.CommonsGracefulAbort.addCallback(() => {
11
- nodecommons_cli_1.CommonsOutput.alert(`SIGINT abort flag is set. Aborting Express server.`);
7
+ commonsGracefulAbortAddCallback(() => {
8
+ commonsOutputAlert('SIGINT abort flag is set. Aborting Express server.');
12
9
  super.close();
13
10
  });
14
11
  }
15
12
  }
16
- exports.ExpressServer = ExpressServer;
13
+ //# sourceMappingURL=express.server.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"express.server.js","sourceRoot":"","sources":["../../src/servers/express.server.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,0BAA0B,EAAE,MAAM,wBAAwB,CAAC;AAEpE,OAAO,EAAE,+BAA+B,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAExD,MAAM,OAAO,aAAc,SAAQ,0BAA0B;IAC5D,YACE,EAAmB,EACnB,MAAmB,EACnB,MAA6B;QAE9B,KAAK,CAAC,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QAE/B,+BAA+B,CAAC,GAAS,EAAE;YAC1C,kBAAkB,CAAC,oDAAoD,CAAC,CAAC;YACzE,KAAK,CAAC,KAAK,EAAE,CAAC;QACf,CAAC,CAAC,CAAC;IACJ,CAAC;CACD"}
@@ -0,0 +1,22 @@
1
+ import { TCommonsScheduleTime } from 'tscommons-es-async';
2
+ import { Expiry } from '../classes/expiry';
3
+ import { Lists } from '../classes/lists';
4
+ import { CrawlServer } from '../servers/crawl.server';
5
+ import { DatabaseService } from '../services/database.service';
6
+ declare enum EAction {
7
+ EXPIRE = "expire",
8
+ PURGE_EMPTY_DOMAINS = "purge-empty-domains",
9
+ PURGE_ORPHAN_URLS = "purge-orphan-urls"
10
+ }
11
+ export declare function toEAction(value: string): EAction | undefined;
12
+ export declare class MaintenanceServer {
13
+ private crawl;
14
+ private schedule;
15
+ private expirer;
16
+ private cleaner;
17
+ private isPaused;
18
+ constructor(times: TCommonsScheduleTime[], expiry: Expiry, lists: Lists, database: DatabaseService, crawl: CrawlServer);
19
+ private perform;
20
+ start(): void;
21
+ }
22
+ export {};