hydra-crawler 1.4.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/dist/apis/autocomplete.api.d.ts +7 -0
  2. package/dist/apis/autocomplete.api.js +15 -9
  3. package/dist/apis/autocomplete.api.js.map +1 -0
  4. package/dist/apis/bugs.api.d.ts +7 -0
  5. package/dist/apis/bugs.api.js +21 -15
  6. package/dist/apis/bugs.api.js.map +1 -0
  7. package/dist/apis/crawl.api.d.ts +7 -0
  8. package/dist/apis/crawl.api.js +15 -9
  9. package/dist/apis/crawl.api.js.map +1 -0
  10. package/dist/apis/domains.api.d.ts +7 -0
  11. package/dist/apis/domains.api.js +24 -19
  12. package/dist/apis/domains.api.js.map +1 -0
  13. package/dist/apis/images.api.d.ts +7 -0
  14. package/dist/apis/images.api.js +20 -14
  15. package/dist/apis/images.api.js.map +1 -0
  16. package/dist/apis/statistics.api.d.ts +8 -0
  17. package/dist/apis/statistics.api.js +27 -20
  18. package/dist/apis/statistics.api.js.map +1 -0
  19. package/dist/apis/test.api.d.ts +5 -0
  20. package/dist/apis/test.api.js +15 -9
  21. package/dist/apis/test.api.js.map +1 -0
  22. package/dist/apis/urls.api.d.ts +7 -0
  23. package/dist/apis/urls.api.js +21 -15
  24. package/dist/apis/urls.api.js.map +1 -0
  25. package/dist/apps/cleanup.app.d.ts +19 -0
  26. package/dist/apps/cleanup.app.js +118 -100
  27. package/dist/apps/cleanup.app.js.map +1 -0
  28. package/dist/apps/cross-populate-export.app.d.ts +12 -0
  29. package/dist/apps/cross-populate-export.app.js +60 -47
  30. package/dist/apps/cross-populate-export.app.js.map +1 -0
  31. package/dist/apps/cross-populate-import.app.d.ts +12 -0
  32. package/dist/apps/cross-populate-import.app.js +64 -51
  33. package/dist/apps/cross-populate-import.app.js.map +1 -0
  34. package/dist/apps/denylist.app.d.ts +17 -0
  35. package/dist/apps/denylist.app.js +115 -98
  36. package/dist/apps/denylist.app.js.map +1 -0
  37. package/dist/apps/expire.app.d.ts +19 -0
  38. package/dist/apps/expire.app.js +44 -31
  39. package/dist/apps/expire.app.js.map +1 -0
  40. package/dist/apps/extract-text.app.d.ts +8 -0
  41. package/dist/apps/extract-text.app.js +43 -35
  42. package/dist/apps/extract-text.app.js.map +1 -0
  43. package/dist/apps/hydra.app.d.ts +34 -0
  44. package/dist/apps/hydra.app.js +150 -137
  45. package/dist/apps/hydra.app.js.map +1 -0
  46. package/dist/apps/import.app.d.ts +11 -0
  47. package/dist/apps/import.app.js +44 -32
  48. package/dist/apps/import.app.js.map +1 -0
  49. package/dist/apps/internal-hydra-common.app.d.ts +28 -0
  50. package/dist/apps/internal-hydra-common.app.js +5 -11
  51. package/dist/apps/internal-hydra-common.app.js.map +1 -0
  52. package/dist/apps/query.app.d.ts +20 -0
  53. package/dist/apps/query.app.js +63 -49
  54. package/dist/apps/query.app.js.map +1 -0
  55. package/dist/apps/reattempt.app.d.ts +17 -0
  56. package/dist/apps/reattempt.app.js +66 -53
  57. package/dist/apps/reattempt.app.js.map +1 -0
  58. package/dist/apps/requeue-domain.app.d.ts +13 -0
  59. package/dist/apps/requeue-domain.app.js +50 -37
  60. package/dist/apps/requeue-domain.app.js.map +1 -0
  61. package/dist/apps/seed.app.d.ts +15 -0
  62. package/dist/apps/seed.app.js +53 -40
  63. package/dist/apps/seed.app.js.map +1 -0
  64. package/dist/apps/startup.app.d.ts +11 -0
  65. package/dist/apps/startup.app.js +51 -38
  66. package/dist/apps/startup.app.js.map +1 -0
  67. package/dist/apps/unarchive.app.d.ts +15 -0
  68. package/dist/apps/unarchive.app.js +67 -54
  69. package/dist/apps/unarchive.app.js.map +1 -0
  70. package/dist/classes/cleaner.d.ts +12 -0
  71. package/dist/classes/cleaner.js +227 -207
  72. package/dist/classes/cleaner.js.map +1 -0
  73. package/dist/classes/crawler.d.ts +34 -0
  74. package/dist/classes/crawler.js +248 -241
  75. package/dist/classes/crawler.js.map +1 -0
  76. package/dist/classes/dns.d.ts +3 -0
  77. package/dist/classes/dns.js +10 -13
  78. package/dist/classes/dns.js.map +1 -0
  79. package/dist/classes/expirer.d.ts +10 -0
  80. package/dist/classes/expirer.js +107 -94
  81. package/dist/classes/expirer.js.map +1 -0
  82. package/dist/classes/expiry.d.ts +8 -0
  83. package/dist/classes/expiry.js +16 -19
  84. package/dist/classes/expiry.js.map +1 -0
  85. package/dist/classes/lists.d.ts +9 -0
  86. package/dist/classes/lists.js +13 -18
  87. package/dist/classes/lists.js.map +1 -0
  88. package/dist/classes/robot.d.ts +15 -0
  89. package/dist/classes/robot.js +40 -30
  90. package/dist/classes/robot.js.map +1 -0
  91. package/dist/classes/tracker.d.ts +25 -0
  92. package/dist/classes/tracker.js +82 -64
  93. package/dist/classes/tracker.js.map +1 -0
  94. package/dist/cli.d.ts +1 -0
  95. package/dist/cli.js +72 -65
  96. package/dist/cli.js.map +1 -0
  97. package/dist/enums/eavailable-strategy.d.ts +4 -0
  98. package/dist/enums/eavailable-strategy.js +3 -5
  99. package/dist/enums/eavailable-strategy.js.map +1 -0
  100. package/dist/enums/elist.d.ts +7 -0
  101. package/dist/enums/elist.js +7 -11
  102. package/dist/enums/elist.js.map +1 -0
  103. package/dist/enums/eserver.d.ts +8 -0
  104. package/dist/enums/eserver.js +3 -5
  105. package/dist/enums/eserver.js.map +1 -0
  106. package/dist/enums/ex-powered-by.d.ts +6 -0
  107. package/dist/enums/ex-powered-by.js +3 -5
  108. package/dist/enums/ex-powered-by.js.map +1 -0
  109. package/dist/helpers/matcher.d.ts +5 -0
  110. package/dist/helpers/matcher.js +2 -5
  111. package/dist/helpers/matcher.js.map +1 -0
  112. package/dist/helpers/random.d.ts +4 -0
  113. package/dist/helpers/random.js +2 -5
  114. package/dist/helpers/random.js.map +1 -0
  115. package/dist/helpers/utf-decoder.d.ts +4 -0
  116. package/dist/helpers/utf-decoder.js +3 -6
  117. package/dist/helpers/utf-decoder.js.map +1 -0
  118. package/dist/interfaces/iexpiry.d.ts +7 -0
  119. package/dist/interfaces/iexpiry.js +9 -13
  120. package/dist/interfaces/iexpiry.js.map +1 -0
  121. package/dist/interfaces/imatch.d.ts +6 -0
  122. package/dist/interfaces/imatch.js +6 -9
  123. package/dist/interfaces/imatch.js.map +1 -0
  124. package/dist/interfaces/iparser-config.d.ts +4 -0
  125. package/dist/interfaces/iparser-config.js +4 -7
  126. package/dist/interfaces/iparser-config.js.map +1 -0
  127. package/dist/interfaces/iparser.d.ts +8 -0
  128. package/dist/interfaces/iparser.js +2 -2
  129. package/dist/interfaces/iparser.js.map +1 -0
  130. package/dist/interfaces/irequest-outcome.d.ts +11 -0
  131. package/dist/interfaces/irequest-outcome.js +2 -2
  132. package/dist/interfaces/irequest-outcome.js.map +1 -0
  133. package/dist/interfaces/iserver.d.ts +4 -0
  134. package/dist/interfaces/iserver.js +2 -2
  135. package/dist/interfaces/iserver.js.map +1 -0
  136. package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
  137. package/dist/parsers/accessibility-metrics.parser.js +34 -26
  138. package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
  139. package/dist/parsers/asp-error.parser.d.ts +12 -0
  140. package/dist/parsers/asp-error.parser.js +36 -28
  141. package/dist/parsers/asp-error.parser.js.map +1 -0
  142. package/dist/parsers/bad-words.parser.d.ts +10 -0
  143. package/dist/parsers/bad-words.parser.js +21 -13
  144. package/dist/parsers/bad-words.parser.js.map +1 -0
  145. package/dist/parsers/complex-english.parser.d.ts +15 -0
  146. package/dist/parsers/complex-english.parser.js +33 -25
  147. package/dist/parsers/complex-english.parser.js.map +1 -0
  148. package/dist/parsers/data.parser.d.ts +14 -0
  149. package/dist/parsers/data.parser.js +12 -16
  150. package/dist/parsers/data.parser.js.map +1 -0
  151. package/dist/parsers/dictionary.parser.d.ts +19 -0
  152. package/dist/parsers/dictionary.parser.js +47 -39
  153. package/dist/parsers/dictionary.parser.js.map +1 -0
  154. package/dist/parsers/html.parser.d.ts +13 -0
  155. package/dist/parsers/html.parser.js +4 -8
  156. package/dist/parsers/html.parser.js.map +1 -0
  157. package/dist/parsers/hyperlinks.parser.d.ts +20 -0
  158. package/dist/parsers/hyperlinks.parser.js +82 -77
  159. package/dist/parsers/hyperlinks.parser.js.map +1 -0
  160. package/dist/parsers/image-tags.parser.d.ts +20 -0
  161. package/dist/parsers/image-tags.parser.js +38 -34
  162. package/dist/parsers/image-tags.parser.js.map +1 -0
  163. package/dist/parsers/jpeg.parser.d.ts +11 -0
  164. package/dist/parsers/jpeg.parser.js +28 -20
  165. package/dist/parsers/jpeg.parser.js.map +1 -0
  166. package/dist/parsers/paragraphs.parser.d.ts +13 -0
  167. package/dist/parsers/paragraphs.parser.js +33 -40
  168. package/dist/parsers/paragraphs.parser.js.map +1 -0
  169. package/dist/parsers/parser.d.ts +19 -0
  170. package/dist/parsers/parser.js +30 -17
  171. package/dist/parsers/parser.js.map +1 -0
  172. package/dist/parsers/php-error.parser.d.ts +12 -0
  173. package/dist/parsers/php-error.parser.js +42 -34
  174. package/dist/parsers/php-error.parser.js.map +1 -0
  175. package/dist/parsers/phrase.parser.d.ts +8 -0
  176. package/dist/parsers/phrase.parser.js +16 -11
  177. package/dist/parsers/phrase.parser.js.map +1 -0
  178. package/dist/parsers/regex.parser.d.ts +10 -0
  179. package/dist/parsers/regex.parser.js +30 -22
  180. package/dist/parsers/regex.parser.js.map +1 -0
  181. package/dist/parsers/server.parser.d.ts +12 -0
  182. package/dist/parsers/server.parser.js +66 -56
  183. package/dist/parsers/server.parser.js.map +1 -0
  184. package/dist/parsers/spelling.parser.d.ts +10 -0
  185. package/dist/parsers/spelling.parser.js +21 -13
  186. package/dist/parsers/spelling.parser.js.map +1 -0
  187. package/dist/parsers/string.parser.d.ts +8 -0
  188. package/dist/parsers/string.parser.js +5 -8
  189. package/dist/parsers/string.parser.js.map +1 -0
  190. package/dist/parsers/text.parser.d.ts +8 -0
  191. package/dist/parsers/text.parser.js +24 -18
  192. package/dist/parsers/text.parser.js.map +1 -0
  193. package/dist/parsers/words.parser.d.ts +11 -0
  194. package/dist/parsers/words.parser.js +32 -28
  195. package/dist/parsers/words.parser.js.map +1 -0
  196. package/dist/queries/complex-english.query.d.ts +2 -0
  197. package/dist/queries/complex-english.query.js +37 -38
  198. package/dist/queries/complex-english.query.js.map +1 -0
  199. package/dist/queries/flash-content.query.d.ts +2 -0
  200. package/dist/queries/flash-content.query.js +39 -30
  201. package/dist/queries/flash-content.query.js.map +1 -0
  202. package/dist/queries/linking-to-domains.query.d.ts +2 -0
  203. package/dist/queries/linking-to-domains.query.js +35 -27
  204. package/dist/queries/linking-to-domains.query.js.map +1 -0
  205. package/dist/queries/readability-score.query.d.ts +2 -0
  206. package/dist/queries/readability-score.query.js +21 -13
  207. package/dist/queries/readability-score.query.js.map +1 -0
  208. package/dist/servers/crawl.server.d.ts +35 -0
  209. package/dist/servers/crawl.server.js +133 -121
  210. package/dist/servers/crawl.server.js.map +1 -0
  211. package/dist/servers/express.server.d.ts +8 -0
  212. package/dist/servers/express.server.js +7 -10
  213. package/dist/servers/express.server.js.map +1 -0
  214. package/dist/servers/maintenance.server.d.ts +22 -0
  215. package/dist/servers/maintenance.server.js +42 -36
  216. package/dist/servers/maintenance.server.js.map +1 -0
  217. package/dist/servers/rest.server.d.ts +7 -0
  218. package/dist/servers/rest.server.js +40 -51
  219. package/dist/servers/rest.server.js.map +1 -0
  220. package/dist/servers/socket-io.server.d.ts +12 -0
  221. package/dist/servers/socket-io.server.js +48 -15
  222. package/dist/servers/socket-io.server.js.map +1 -0
  223. package/dist/services/database.service.d.ts +68 -0
  224. package/dist/services/database.service.js +528 -462
  225. package/dist/services/database.service.js.map +1 -0
  226. package/dist/types/tcrawl-config.d.ts +14 -0
  227. package/dist/types/tcrawl-config.js +14 -17
  228. package/dist/types/tcrawl-config.js.map +1 -0
  229. package/dist/types/thydra-config.d.ts +4 -0
  230. package/dist/types/thydra-config.js +4 -7
  231. package/dist/types/thydra-config.js.map +1 -0
  232. package/dist/types/tparser-ctor.d.ts +7 -0
  233. package/dist/types/tparser-ctor.js +2 -2
  234. package/dist/types/tparser-ctor.js.map +1 -0
  235. package/dist/types/tquery.d.ts +7 -0
  236. package/dist/types/tquery.js +2 -2
  237. package/dist/types/tquery.js.map +1 -0
  238. package/dist/types/trobots-config.d.ts +4 -0
  239. package/dist/types/trobots-config.js +4 -7
  240. package/dist/types/trobots-config.js.map +1 -0
  241. package/package.json +41 -29
  242. package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
  243. package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
  244. package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
  245. package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
  246. package/angular/3rdpartylicenses.txt +0 -1127
  247. package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
  248. package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
  249. package/angular/6-es2015.65f680261a3506b88381.js +0 -1
  250. package/angular/6-es5.65f680261a3506b88381.js +0 -1
  251. package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
  252. package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
  253. package/angular/8-es2015.55518901987a5b834309.js +0 -1
  254. package/angular/8-es5.55518901987a5b834309.js +0 -1
  255. package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
  256. package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
  257. package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
  258. package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
  259. package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
  260. package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
  261. package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
  262. package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
  263. package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
  264. package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
  265. package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
  266. package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
  267. package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
  268. package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
  269. package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
  270. package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
  271. package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
  272. package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
  273. package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
  274. package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
  275. package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
  276. package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
  277. package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
  278. package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
  279. package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
  280. package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
  281. package/angular/assets/config/app-config.json +0 -16
  282. package/angular/assets/images/splashbg.jpg +0 -0
  283. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
  284. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
  285. package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
  286. package/angular/favicon.ico +0 -0
  287. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
  288. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
  289. package/angular/index.html +0 -16
  290. package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
  291. package/angular/main-es5.3a582572476c7f292e52.js +0 -1
  292. package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
  293. package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
  294. package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
  295. package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
  296. package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
  297. package/config/bad-words.json +0 -1
  298. package/config/complex-english.json +0 -400
  299. package/config/hydra-auth.json +0 -8
  300. package/config/hydra-crawler.json +0 -84
  301. package/config/list-allow.json +0 -171
  302. package/config/list-deny.json +0 -248
  303. package/config/list-expiry.json +0 -7
  304. package/config/schedule.json +0 -25
  305. package/config/spelling.json +0 -1
@@ -1,17 +1,23 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.DenylistApp = void 0;
4
- const tscommons_core_1 = require("tscommons-core");
5
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- const nodecommons_cli_2 = require("nodecommons-cli");
8
- const nodecommons_app_1 = require("nodecommons-app");
9
- const lists_1 = require("../classes/lists");
10
- const elist_1 = require("../enums/elist");
11
- class DenylistApp extends nodecommons_app_1.CommonsApp {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { commonsArrayChunk } from 'tscommons-es-core';
11
+ import { EStatus } from 'hydra-crawler-ts-assets';
12
+ import { commonsOutputDoing, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
13
+ import { CommonsArgs } from 'nodecommons-es-cli';
14
+ import { CommonsApp } from 'nodecommons-es-app';
15
+ import { Lists } from '../classes/lists';
16
+ import { EList } from '../enums/elist';
17
+ export class DenylistApp extends CommonsApp {
12
18
  constructor() {
13
19
  super('hydra-crawler');
14
- this.lists = new lists_1.Lists();
20
+ this.lists = new Lists();
15
21
  }
16
22
  getAppName() {
17
23
  return 'Hydra - Denylist';
@@ -22,94 +28,105 @@ class DenylistApp extends nodecommons_app_1.CommonsApp {
22
28
  addToList(list, entries) {
23
29
  this.lists.add(list, entries);
24
30
  }
25
- async init() {
26
- if (!this.databaseService)
27
- throw new Error('Database service has not been set yet');
28
- nodecommons_cli_1.CommonsOutput.doing('Connecting to database');
29
- await this.databaseService.init();
30
- nodecommons_cli_1.CommonsOutput.success();
31
- await super.init();
31
+ init() {
32
+ const _super = Object.create(null, {
33
+ init: { get: () => super.init }
34
+ });
35
+ return __awaiter(this, void 0, void 0, function* () {
36
+ if (!this.databaseService)
37
+ throw new Error('Database service has not been set yet');
38
+ commonsOutputDoing('Connecting to database');
39
+ yield this.databaseService.init();
40
+ commonsOutputSuccess();
41
+ yield _super.init.call(this);
42
+ });
32
43
  }
33
- async forward(limit) {
34
- if (!this.databaseService)
35
- throw new Error('Database service has not been set');
36
- let tally = 0;
37
- let found = 0;
38
- nodecommons_cli_1.CommonsOutput.doing(`Searching for non-DENY urls on the denylist`);
39
- const urls = [];
40
- const result = this.databaseService.getUrls()
41
- .find({ status: { $nin: [hydra_crawler_ts_assets_1.EStatus.DENY, hydra_crawler_ts_assets_1.EStatus.ARCHIVED] } });
42
- while (true) {
43
- tally++;
44
- if ((tally % 100) === 0)
45
- nodecommons_cli_1.CommonsOutput.progress(`${tally}, ${found}`);
46
- if (limit !== undefined && tally > limit)
47
- break;
48
- const row = await result.next();
49
- if (row === null)
50
- break;
51
- if (!this.lists.match(elist_1.EList.DENY, row.url))
52
- continue;
53
- found++;
54
- urls.push(row.url);
55
- }
56
- nodecommons_cli_1.CommonsOutput.result(found);
57
- nodecommons_cli_1.CommonsOutput.doing(`Marking detected as DENY`);
58
- const batches = tscommons_core_1.CommonsArray.chunk(urls, 100);
59
- tally = 0;
60
- for (const batch of batches) {
61
- await this.databaseService.getUrls().updateMany({ url: { $in: batch } }, { $set: {
62
- status: hydra_crawler_ts_assets_1.EStatus.DENY
63
- }, $unset: {
64
- ttl: true
65
- } });
66
- tally += 100;
67
- nodecommons_cli_1.CommonsOutput.progress(tally);
68
- }
69
- nodecommons_cli_1.CommonsOutput.success();
44
+ forward(limit) {
45
+ return __awaiter(this, void 0, void 0, function* () {
46
+ if (!this.databaseService)
47
+ throw new Error('Database service has not been set');
48
+ let tally = 0;
49
+ let found = 0;
50
+ commonsOutputDoing('Searching for non-DENY urls on the denylist');
51
+ const urls = [];
52
+ const result = this.databaseService.getUrls()
53
+ .find({ status: { $nin: [EStatus.DENY, EStatus.ARCHIVED] } }, {});
54
+ while (true) {
55
+ tally++;
56
+ if ((tally % 100) === 0)
57
+ commonsOutputProgress(`${tally}, ${found}`);
58
+ if (limit !== undefined && tally > limit)
59
+ break;
60
+ const row = yield result.next();
61
+ if (row === null)
62
+ break;
63
+ if (!this.lists.match(EList.DENY, row.url))
64
+ continue;
65
+ found++;
66
+ urls.push(row.url);
67
+ }
68
+ commonsOutputResult(found);
69
+ commonsOutputDoing('Marking detected as DENY');
70
+ const batches = commonsArrayChunk(urls, 100);
71
+ tally = 0;
72
+ for (const batch of batches) {
73
+ yield this.databaseService.getUrls().updateMany({ url: { $in: batch } }, { $set: {
74
+ status: EStatus.DENY
75
+ }, $unset: {
76
+ ttl: true
77
+ } });
78
+ tally += 100;
79
+ commonsOutputProgress(tally);
80
+ }
81
+ commonsOutputSuccess();
82
+ });
70
83
  }
71
- async reverse(limit) {
72
- if (!this.databaseService)
73
- throw new Error('Database service has not been set');
74
- let tally = 0;
75
- let found = 0;
76
- nodecommons_cli_1.CommonsOutput.doing(`Searching for DENY urls not on the denylist`);
77
- const urls = [];
78
- const result = this.databaseService.getUrls()
79
- .find({ status: hydra_crawler_ts_assets_1.EStatus.DENY });
80
- while (true) {
81
- tally++;
82
- if ((tally % 100) === 0)
83
- nodecommons_cli_1.CommonsOutput.progress(`${tally}, ${found}`);
84
- if (limit !== undefined && tally > limit)
85
- break;
86
- const row = await result.next();
87
- if (row === null)
88
- break;
89
- if (this.lists.match(elist_1.EList.DENY, row.url))
90
- continue;
91
- found++;
92
- urls.push(row.url);
93
- }
94
- nodecommons_cli_1.CommonsOutput.result(found);
95
- nodecommons_cli_1.CommonsOutput.doing(`Marking detected as QUEUED`);
96
- const batches = tscommons_core_1.CommonsArray.chunk(urls, 100);
97
- tally = 0;
98
- for (const batch of batches) {
99
- await this.databaseService.getUrls().updateMany({ url: { $in: batch } }, { $set: {
100
- status: hydra_crawler_ts_assets_1.EStatus.QUEUED
101
- }, $unset: {
102
- ttl: true
103
- } });
104
- tally += 100;
105
- nodecommons_cli_1.CommonsOutput.progress(tally);
106
- }
107
- nodecommons_cli_1.CommonsOutput.success();
84
+ reverse(limit) {
85
+ return __awaiter(this, void 0, void 0, function* () {
86
+ if (!this.databaseService)
87
+ throw new Error('Database service has not been set');
88
+ let tally = 0;
89
+ let found = 0;
90
+ commonsOutputDoing('Searching for DENY urls not on the denylist');
91
+ const urls = [];
92
+ const result = this.databaseService.getUrls()
93
+ .find({ status: EStatus.DENY }, {});
94
+ while (true) {
95
+ tally++;
96
+ if ((tally % 100) === 0)
97
+ commonsOutputProgress(`${tally}, ${found}`);
98
+ if (limit !== undefined && tally > limit)
99
+ break;
100
+ const row = yield result.next();
101
+ if (row === null)
102
+ break;
103
+ if (this.lists.match(EList.DENY, row.url))
104
+ continue;
105
+ found++;
106
+ urls.push(row.url);
107
+ }
108
+ commonsOutputResult(found);
109
+ commonsOutputDoing('Marking detected as QUEUED');
110
+ const batches = commonsArrayChunk(urls, 100);
111
+ tally = 0;
112
+ for (const batch of batches) {
113
+ yield this.databaseService.getUrls().updateMany({ url: { $in: batch } }, { $set: {
114
+ status: EStatus.QUEUED
115
+ }, $unset: {
116
+ ttl: true
117
+ } });
118
+ tally += 100;
119
+ commonsOutputProgress(tally);
120
+ }
121
+ commonsOutputSuccess();
122
+ });
108
123
  }
109
- async run() {
110
- const args = new nodecommons_cli_2.CommonsArgs();
111
- await this.forward(args.getNumberOrUndefined('limit'));
112
- await this.reverse(args.getNumberOrUndefined('limit'));
124
+ run() {
125
+ return __awaiter(this, void 0, void 0, function* () {
126
+ const args = new CommonsArgs();
127
+ yield this.forward(args.getNumberOrUndefined('limit'));
128
+ yield this.reverse(args.getNumberOrUndefined('limit'));
129
+ });
113
130
  }
114
131
  }
115
- exports.DenylistApp = DenylistApp;
132
+ //# sourceMappingURL=denylist.app.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"denylist.app.js","sourceRoot":"","sources":["../../src/apps/denylist.app.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAEtD,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAGlD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAMzC,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAIvC,MAAM,OAAO,WAAY,SAAQ,UAAU;IAK1C;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEvB,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,kBAAkB,CAAC;IAC3B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEa,OAAO,CAAC,KAAc;;YACnC,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,kBAAkB,CAAC,6CAA6C,CAAC,CAAC;YAElE,MAAM,IAAI,GAAa,EAAE,CAAC;YAE1B,MAAM,MAAM,GAAiB,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACxD,IAAI,CACH,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,QAAQ,CAAE,EAAE,EAAE,EACxD,EAAE,CACH,CAAC;YAEJ,OAAO,IAAI,EAAE;gBACZ,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,qBAAqB,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBACrE,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAErD,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;aACnB;YAED,mBAAmB,CAAC,KAAK,CAAC,CAAC;YAE3B,kBAAkB,CAAC,0BAA0B,CAAC,CAAC;YAE/C,MAAM,OAAO,GAAe,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEzD,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;gBAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB,EAAE,IAAI,EAAE;wBACN,MAAM,EAAE,OAAO,CAAC,IAAI;qBACrB,EAAE,MAAM,EAAE;wBACT,GAAG,EAAE,IAAI;qBACV,EAAE,CACJ,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,qBAAqB,CAAC,KAAK,CAAC,CAAC;aAC7B;YACD,oBAAoB,EAAE,CAAC;QACxB,CAAC;KAAA;IAEa,OAAO,CAAC,KAAc;;YACnC,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,kBAAkB,CAAC,6CAA6C,CAAC,CAAC;YAElE,MAAM,IAAI,GAAa,EAAE,CAAC;YAE1B,MAAM,MAAM,GAAiB,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACxD,IAAI,CACH,EAAE,MAAM,EAAE,OAAO,CAAC,IAAI,EAAE,EACxB,EAAE,CACH,CAAC;YAEJ,OAAO,IAAI,EAAE;gBACZ,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,qBAAqB,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBACrE,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAEpD,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;aACnB;YAED,mBAAmB,CAAC,KAAK,CAAC,CAAC;YAE3B,kBAAkB,CAAC,4BAA4B,CAAC,CAAC;YAEjD,MAAM,OAAO,GAAe,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEzD,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;gBAC5B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB,EAAE,IAAI,EAAE;wBACN,MAAM,EAAE,OAAO,CAAC,MAAM;qBACvB,EAAE,MAAM,EAAE;wBACT,GAAG,EAAE,IAAI;qBACV,EAAE,CACJ,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,qBAAqB,CAAC,KAAK,CAAC,CAAC;aAC7B;YACD,oBAAoB,EAAE,CAAC;QACxB,CAAC;KAAA;IAEY,GAAG;;YACf,MAAM,IAAI,GAAgB,IAAI,WAAW,EAAE,CAAC;YAE5C,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC,CAAC;YACvD,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC,CAAC;QACxD,CAAC;KAAA;CACD"}
@@ -0,0 +1,19 @@
1
+ import { CommonsApp } from 'nodecommons-es-app';
2
+ import { DatabaseService } from '../services/database.service';
3
+ import { IMatch } from '../interfaces/imatch';
4
+ import { IExpiry } from '../interfaces/iexpiry';
5
+ import { EList } from '../enums/elist';
6
+ import { IInternalHydraCommonListApp, IInternalHydraCommonExpiryApp } from './internal-hydra-common.app';
7
+ export declare class ExpireApp extends CommonsApp implements IInternalHydraCommonListApp, IInternalHydraCommonExpiryApp {
8
+ private threshold;
9
+ private databaseService;
10
+ private lists;
11
+ private expiry;
12
+ constructor(threshold: Date | undefined);
13
+ getAppName(): string;
14
+ setDatabaseService(databaseService: DatabaseService): void;
15
+ addToList(list: EList, entries: IMatch[]): void;
16
+ addToExpiry(expiries: IExpiry[]): void;
17
+ init(): Promise<void>;
18
+ run(): Promise<void>;
19
+ }
@@ -1,18 +1,24 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ExpireApp = void 0;
4
- const nodecommons_cli_1 = require("nodecommons-cli");
5
- const nodecommons_cli_2 = require("nodecommons-cli");
6
- const nodecommons_app_1 = require("nodecommons-app");
7
- const lists_1 = require("../classes/lists");
8
- const expirer_1 = require("../classes/expirer");
9
- const expiry_1 = require("../classes/expiry");
10
- class ExpireApp extends nodecommons_app_1.CommonsApp {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { commonsOutputDoing, commonsOutputSuccess } from 'nodecommons-es-cli';
11
+ import { CommonsArgs } from 'nodecommons-es-cli';
12
+ import { CommonsApp } from 'nodecommons-es-app';
13
+ import { Lists } from '../classes/lists';
14
+ import { Expirer } from '../classes/expirer';
15
+ import { Expiry } from '../classes/expiry';
16
+ export class ExpireApp extends CommonsApp {
11
17
  constructor(threshold) {
12
18
  super('hydra-crawler');
13
19
  this.threshold = threshold;
14
- this.lists = new lists_1.Lists();
15
- this.expiry = new expiry_1.Expiry();
20
+ this.lists = new Lists();
21
+ this.expiry = new Expiry();
16
22
  }
17
23
  getAppName() {
18
24
  return 'Hydra - Expire';
@@ -26,25 +32,32 @@ class ExpireApp extends nodecommons_app_1.CommonsApp {
26
32
  addToExpiry(expiries) {
27
33
  this.expiry.add(expiries);
28
34
  }
29
- async init() {
30
- if (!this.databaseService)
31
- throw new Error('Database service has not been set yet');
32
- nodecommons_cli_1.CommonsOutput.doing('Connecting to database');
33
- await this.databaseService.init();
34
- nodecommons_cli_1.CommonsOutput.success();
35
- await super.init();
35
+ init() {
36
+ const _super = Object.create(null, {
37
+ init: { get: () => super.init }
38
+ });
39
+ return __awaiter(this, void 0, void 0, function* () {
40
+ if (!this.databaseService)
41
+ throw new Error('Database service has not been set yet');
42
+ commonsOutputDoing('Connecting to database');
43
+ yield this.databaseService.init();
44
+ commonsOutputSuccess();
45
+ yield _super.init.call(this);
46
+ });
36
47
  }
37
- async run() {
38
- if (!this.databaseService)
39
- throw new Error('Database service has not been set');
40
- const args = new nodecommons_cli_2.CommonsArgs();
41
- const expirer = new expirer_1.Expirer(this.expiry, this.databaseService);
42
- if (this.threshold !== undefined) {
43
- await expirer.expireFixed(this.threshold);
44
- }
45
- else {
46
- await expirer.expire(args.getNumberOrUndefined('limit'));
47
- }
48
+ run() {
49
+ return __awaiter(this, void 0, void 0, function* () {
50
+ if (!this.databaseService)
51
+ throw new Error('Database service has not been set');
52
+ const args = new CommonsArgs();
53
+ const expirer = new Expirer(this.expiry, this.databaseService);
54
+ if (this.threshold !== undefined) {
55
+ yield expirer.expireFixed(this.threshold);
56
+ }
57
+ else {
58
+ yield expirer.expire(args.getNumberOrUndefined('limit'));
59
+ }
60
+ });
48
61
  }
49
62
  }
50
- exports.ExpireApp = ExpireApp;
63
+ //# sourceMappingURL=expire.app.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"expire.app.js","sourceRoot":"","sources":["../../src/apps/expire.app.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC9E,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAW3C,MAAM,OAAO,SAAU,SAAQ,UAAU;IAKxC,YACU,SAAyB;QAElC,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,cAAS,GAAT,SAAS,CAAgB;QAIlC,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;IAC5B,CAAC;IAEM,UAAU;QAChB,OAAO,gBAAgB,CAAC;IACzB,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEM,WAAW,CAChB,QAAmB;QAEpB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,IAAI,GAAgB,IAAI,WAAW,EAAE,CAAC;YAE5C,MAAM,OAAO,GAAY,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;YAExE,IAAI,IAAI,CAAC,SAAS,KAAK,SAAS,EAAE;gBACjC,MAAM,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;aAC1C;iBAAM;gBACN,MAAM,OAAO,CAAC,MAAM,CAClB,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CACnC,CAAC;aACF;QACF,CAAC;KAAA;CACD"}
@@ -0,0 +1,8 @@
1
+ import { CommonsApp } from 'nodecommons-es-app';
2
+ import { IInternalHydraCommonApp } from './internal-hydra-common.app';
3
+ export declare class ExtractTextApp extends CommonsApp implements IInternalHydraCommonApp {
4
+ private url;
5
+ constructor(url: string);
6
+ getAppName(): string;
7
+ run(): Promise<void>;
8
+ }
@@ -1,22 +1,28 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ExtractTextApp = void 0;
4
- const http = require("http");
5
- const https = require("https");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- const nodecommons_app_1 = require("nodecommons-app");
8
- const crawler_1 = require("../classes/crawler");
9
- const text_parser_1 = require("../parsers/text.parser");
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import * as http from 'http';
11
+ import * as https from 'https';
12
+ import { commonsOutputDoing, commonsOutputSuccess } from 'nodecommons-es-cli';
13
+ import { CommonsApp } from 'nodecommons-es-app';
14
+ import { Crawler } from '../classes/crawler';
15
+ import { TextParser } from '../parsers/text.parser';
10
16
  // extract the text from the HTML of a URL
11
- class TextParserInstance extends text_parser_1.TextParser {
12
- constructor(outcome) {
13
- super(outcome);
14
- }
15
- async parseText(_database, text) {
16
- console.log(text);
17
+ class TextParserInstance extends TextParser {
18
+ // eslint-disable-next-line @typescript-eslint/require-await
19
+ parseText(_database, text) {
20
+ return __awaiter(this, void 0, void 0, function* () {
21
+ console.log(text);
22
+ });
17
23
  }
18
24
  }
19
- class ExtractTextApp extends nodecommons_app_1.CommonsApp {
25
+ export class ExtractTextApp extends CommonsApp {
20
26
  constructor(url) {
21
27
  super('hydra-crawler');
22
28
  this.url = url;
@@ -24,24 +30,26 @@ class ExtractTextApp extends nodecommons_app_1.CommonsApp {
24
30
  getAppName() {
25
31
  return 'Hydra - Extract Text';
26
32
  }
27
- async run() {
28
- nodecommons_cli_1.CommonsOutput.doing(`Downloading from URL at ${this.url}`);
29
- const whatwg = new URL(this.url);
30
- let handler;
31
- switch (whatwg.protocol) {
32
- case 'http:':
33
- handler = http;
34
- break;
35
- case 'https:':
36
- handler = https;
37
- break;
38
- default:
39
- throw new Error(`unable to handle protocol ${whatwg.protocol}`);
40
- }
41
- const outcome = await crawler_1.Crawler.request(handler, this.url, 10000, 4096000);
42
- nodecommons_cli_1.CommonsOutput.success();
43
- const parser = new TextParserInstance(outcome);
44
- await parser.parse(undefined);
33
+ run() {
34
+ return __awaiter(this, void 0, void 0, function* () {
35
+ commonsOutputDoing(`Downloading from URL at ${this.url}`);
36
+ const whatwg = new URL(this.url);
37
+ let handler;
38
+ switch (whatwg.protocol) {
39
+ case 'http:':
40
+ handler = http;
41
+ break;
42
+ case 'https:':
43
+ handler = https;
44
+ break;
45
+ default:
46
+ throw new Error(`unable to handle protocol ${whatwg.protocol}`);
47
+ }
48
+ const outcome = yield Crawler.request(handler, this.url, 10000, 4096000);
49
+ commonsOutputSuccess();
50
+ const parser = new TextParserInstance(outcome);
51
+ yield parser.parse(undefined);
52
+ });
45
53
  }
46
54
  }
47
- exports.ExtractTextApp = ExtractTextApp;
55
+ //# sourceMappingURL=extract-text.app.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extract-text.app.js","sourceRoot":"","sources":["../../src/apps/extract-text.app.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAE/B,OAAO,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC9E,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAI7C,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAOpD,0CAA0C;AAE1C,MAAM,kBAAmB,SAAQ,UAAyB;IACzD,4DAA4D;IAC5C,SAAS,CACvB,SAA0B,EAC1B,IAAY;;YAEb,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;KAAA;CACD;AAED,MAAM,OAAO,cAAe,SAAQ,UAAU;IAC7C,YACU,GAAW;QAEpB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,QAAG,GAAH,GAAG,CAAQ;IAGrB,CAAC;IAEM,UAAU;QAChB,OAAO,sBAAsB,CAAC;IAC/B,CAAC;IAEY,GAAG;;YACf,kBAAkB,CAAC,2BAA2B,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;YAE1D,MAAM,MAAM,GAAQ,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAEtC,IAAI,OAA2C,CAAC;YAChD,QAAQ,MAAM,CAAC,QAAQ,EAAE;gBACxB,KAAK,OAAO;oBAAE,OAAO,GAAG,IAAI,CAAC;oBAAC,MAAM;gBACpC,KAAK,QAAQ;oBAAE,OAAO,GAAG,KAAK,CAAC;oBAAC,MAAM;gBACtC;oBACC,MAAM,IAAI,KAAK,CAAC,6BAA6B,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;aACjE;YAED,MAAM,OAAO,GAAoB,MAAM,OAAO,CAAC,OAAO,CACpD,OAAO,EACP,IAAI,CAAC,GAAG,EACR,KAAK,EACL,OAAO,CACR,CAAC;YACF,oBAAoB,EAAE,CAAC;YAEvB,MAAM,MAAM,GAAuB,IAAI,kBAAkB,CACvD,OAAO,CACR,CAAC;YACF,MAAM,MAAM,CAAC,KAAK,CAAC,SAAuC,CAAC,CAAC;QAC7D,CAAC;KAAA;CACD"}
@@ -0,0 +1,34 @@
1
+ import { TCommonsScheduleTime } from 'tscommons-es-async';
2
+ import { ICommonsExpressConfig } from 'nodecommons-es-express';
3
+ import { CommonsStrictExpressServer } from 'nodecommons-es-express';
4
+ import { CommonsSocketIoApp } from 'nodecommons-es-app-socket-io';
5
+ import { SocketIoServer } from '../servers/socket-io.server';
6
+ import { DatabaseService } from '../services/database.service';
7
+ import { IMatch } from '../interfaces/imatch';
8
+ import { IExpiry } from '../interfaces/iexpiry';
9
+ import { EList } from '../enums/elist';
10
+ import { IInternalHydraCommonListApp, IInternalHydraCommonExpiryApp, IInternalHydraCommonMaintenanceApp } from './internal-hydra-common.app';
11
+ export declare class HydraApp extends CommonsSocketIoApp<SocketIoServer> implements IInternalHydraCommonListApp, IInternalHydraCommonExpiryApp, IInternalHydraCommonMaintenanceApp {
12
+ private databaseService;
13
+ private lists;
14
+ private expiry;
15
+ private hydraConfig;
16
+ private crawlConfig;
17
+ private parsersConfig;
18
+ private robotsConfig;
19
+ private crawlServer;
20
+ private maintenanceTimes;
21
+ private maintenanceServer;
22
+ private tracker;
23
+ constructor();
24
+ protected buildSocketIoServer(expressServer: CommonsStrictExpressServer, expressConfig: ICommonsExpressConfig): SocketIoServer;
25
+ getAppName(): string;
26
+ setDatabaseService(databaseService: DatabaseService): void;
27
+ addToList(list: EList, entries: IMatch[]): void;
28
+ addToExpiry(expiries: IExpiry[]): void;
29
+ setMaintenanceSchedule(times: TCommonsScheduleTime[]): void;
30
+ init(): Promise<void>;
31
+ protected listening(): void;
32
+ run(): Promise<void>;
33
+ protected shutdown(): Promise<void>;
34
+ }