hydra-crawler 1.4.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/dist/apis/autocomplete.api.d.ts +7 -0
  2. package/dist/apis/autocomplete.api.js +15 -9
  3. package/dist/apis/autocomplete.api.js.map +1 -0
  4. package/dist/apis/bugs.api.d.ts +7 -0
  5. package/dist/apis/bugs.api.js +21 -15
  6. package/dist/apis/bugs.api.js.map +1 -0
  7. package/dist/apis/crawl.api.d.ts +7 -0
  8. package/dist/apis/crawl.api.js +15 -9
  9. package/dist/apis/crawl.api.js.map +1 -0
  10. package/dist/apis/domains.api.d.ts +7 -0
  11. package/dist/apis/domains.api.js +24 -19
  12. package/dist/apis/domains.api.js.map +1 -0
  13. package/dist/apis/images.api.d.ts +7 -0
  14. package/dist/apis/images.api.js +20 -14
  15. package/dist/apis/images.api.js.map +1 -0
  16. package/dist/apis/statistics.api.d.ts +8 -0
  17. package/dist/apis/statistics.api.js +27 -20
  18. package/dist/apis/statistics.api.js.map +1 -0
  19. package/dist/apis/test.api.d.ts +5 -0
  20. package/dist/apis/test.api.js +15 -9
  21. package/dist/apis/test.api.js.map +1 -0
  22. package/dist/apis/urls.api.d.ts +7 -0
  23. package/dist/apis/urls.api.js +21 -15
  24. package/dist/apis/urls.api.js.map +1 -0
  25. package/dist/apps/cleanup.app.d.ts +19 -0
  26. package/dist/apps/cleanup.app.js +118 -100
  27. package/dist/apps/cleanup.app.js.map +1 -0
  28. package/dist/apps/cross-populate-export.app.d.ts +12 -0
  29. package/dist/apps/cross-populate-export.app.js +60 -47
  30. package/dist/apps/cross-populate-export.app.js.map +1 -0
  31. package/dist/apps/cross-populate-import.app.d.ts +12 -0
  32. package/dist/apps/cross-populate-import.app.js +64 -51
  33. package/dist/apps/cross-populate-import.app.js.map +1 -0
  34. package/dist/apps/denylist.app.d.ts +17 -0
  35. package/dist/apps/denylist.app.js +115 -98
  36. package/dist/apps/denylist.app.js.map +1 -0
  37. package/dist/apps/expire.app.d.ts +19 -0
  38. package/dist/apps/expire.app.js +44 -31
  39. package/dist/apps/expire.app.js.map +1 -0
  40. package/dist/apps/extract-text.app.d.ts +8 -0
  41. package/dist/apps/extract-text.app.js +43 -35
  42. package/dist/apps/extract-text.app.js.map +1 -0
  43. package/dist/apps/hydra.app.d.ts +34 -0
  44. package/dist/apps/hydra.app.js +150 -137
  45. package/dist/apps/hydra.app.js.map +1 -0
  46. package/dist/apps/import.app.d.ts +11 -0
  47. package/dist/apps/import.app.js +44 -32
  48. package/dist/apps/import.app.js.map +1 -0
  49. package/dist/apps/internal-hydra-common.app.d.ts +28 -0
  50. package/dist/apps/internal-hydra-common.app.js +5 -11
  51. package/dist/apps/internal-hydra-common.app.js.map +1 -0
  52. package/dist/apps/query.app.d.ts +20 -0
  53. package/dist/apps/query.app.js +63 -49
  54. package/dist/apps/query.app.js.map +1 -0
  55. package/dist/apps/reattempt.app.d.ts +17 -0
  56. package/dist/apps/reattempt.app.js +66 -53
  57. package/dist/apps/reattempt.app.js.map +1 -0
  58. package/dist/apps/requeue-domain.app.d.ts +13 -0
  59. package/dist/apps/requeue-domain.app.js +50 -37
  60. package/dist/apps/requeue-domain.app.js.map +1 -0
  61. package/dist/apps/seed.app.d.ts +15 -0
  62. package/dist/apps/seed.app.js +53 -40
  63. package/dist/apps/seed.app.js.map +1 -0
  64. package/dist/apps/startup.app.d.ts +11 -0
  65. package/dist/apps/startup.app.js +51 -38
  66. package/dist/apps/startup.app.js.map +1 -0
  67. package/dist/apps/unarchive.app.d.ts +15 -0
  68. package/dist/apps/unarchive.app.js +67 -54
  69. package/dist/apps/unarchive.app.js.map +1 -0
  70. package/dist/classes/cleaner.d.ts +12 -0
  71. package/dist/classes/cleaner.js +227 -207
  72. package/dist/classes/cleaner.js.map +1 -0
  73. package/dist/classes/crawler.d.ts +34 -0
  74. package/dist/classes/crawler.js +248 -241
  75. package/dist/classes/crawler.js.map +1 -0
  76. package/dist/classes/dns.d.ts +3 -0
  77. package/dist/classes/dns.js +10 -13
  78. package/dist/classes/dns.js.map +1 -0
  79. package/dist/classes/expirer.d.ts +10 -0
  80. package/dist/classes/expirer.js +107 -94
  81. package/dist/classes/expirer.js.map +1 -0
  82. package/dist/classes/expiry.d.ts +8 -0
  83. package/dist/classes/expiry.js +16 -19
  84. package/dist/classes/expiry.js.map +1 -0
  85. package/dist/classes/lists.d.ts +9 -0
  86. package/dist/classes/lists.js +13 -18
  87. package/dist/classes/lists.js.map +1 -0
  88. package/dist/classes/robot.d.ts +15 -0
  89. package/dist/classes/robot.js +40 -30
  90. package/dist/classes/robot.js.map +1 -0
  91. package/dist/classes/tracker.d.ts +25 -0
  92. package/dist/classes/tracker.js +82 -64
  93. package/dist/classes/tracker.js.map +1 -0
  94. package/dist/cli.d.ts +1 -0
  95. package/dist/cli.js +72 -65
  96. package/dist/cli.js.map +1 -0
  97. package/dist/enums/eavailable-strategy.d.ts +4 -0
  98. package/dist/enums/eavailable-strategy.js +3 -5
  99. package/dist/enums/eavailable-strategy.js.map +1 -0
  100. package/dist/enums/elist.d.ts +7 -0
  101. package/dist/enums/elist.js +7 -11
  102. package/dist/enums/elist.js.map +1 -0
  103. package/dist/enums/eserver.d.ts +8 -0
  104. package/dist/enums/eserver.js +3 -5
  105. package/dist/enums/eserver.js.map +1 -0
  106. package/dist/enums/ex-powered-by.d.ts +6 -0
  107. package/dist/enums/ex-powered-by.js +3 -5
  108. package/dist/enums/ex-powered-by.js.map +1 -0
  109. package/dist/helpers/matcher.d.ts +5 -0
  110. package/dist/helpers/matcher.js +2 -5
  111. package/dist/helpers/matcher.js.map +1 -0
  112. package/dist/helpers/random.d.ts +4 -0
  113. package/dist/helpers/random.js +2 -5
  114. package/dist/helpers/random.js.map +1 -0
  115. package/dist/helpers/utf-decoder.d.ts +4 -0
  116. package/dist/helpers/utf-decoder.js +3 -6
  117. package/dist/helpers/utf-decoder.js.map +1 -0
  118. package/dist/interfaces/iexpiry.d.ts +7 -0
  119. package/dist/interfaces/iexpiry.js +9 -13
  120. package/dist/interfaces/iexpiry.js.map +1 -0
  121. package/dist/interfaces/imatch.d.ts +6 -0
  122. package/dist/interfaces/imatch.js +6 -9
  123. package/dist/interfaces/imatch.js.map +1 -0
  124. package/dist/interfaces/iparser-config.d.ts +4 -0
  125. package/dist/interfaces/iparser-config.js +4 -7
  126. package/dist/interfaces/iparser-config.js.map +1 -0
  127. package/dist/interfaces/iparser.d.ts +8 -0
  128. package/dist/interfaces/iparser.js +2 -2
  129. package/dist/interfaces/iparser.js.map +1 -0
  130. package/dist/interfaces/irequest-outcome.d.ts +11 -0
  131. package/dist/interfaces/irequest-outcome.js +2 -2
  132. package/dist/interfaces/irequest-outcome.js.map +1 -0
  133. package/dist/interfaces/iserver.d.ts +4 -0
  134. package/dist/interfaces/iserver.js +2 -2
  135. package/dist/interfaces/iserver.js.map +1 -0
  136. package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
  137. package/dist/parsers/accessibility-metrics.parser.js +34 -26
  138. package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
  139. package/dist/parsers/asp-error.parser.d.ts +12 -0
  140. package/dist/parsers/asp-error.parser.js +36 -28
  141. package/dist/parsers/asp-error.parser.js.map +1 -0
  142. package/dist/parsers/bad-words.parser.d.ts +10 -0
  143. package/dist/parsers/bad-words.parser.js +21 -13
  144. package/dist/parsers/bad-words.parser.js.map +1 -0
  145. package/dist/parsers/complex-english.parser.d.ts +15 -0
  146. package/dist/parsers/complex-english.parser.js +33 -25
  147. package/dist/parsers/complex-english.parser.js.map +1 -0
  148. package/dist/parsers/data.parser.d.ts +14 -0
  149. package/dist/parsers/data.parser.js +12 -16
  150. package/dist/parsers/data.parser.js.map +1 -0
  151. package/dist/parsers/dictionary.parser.d.ts +19 -0
  152. package/dist/parsers/dictionary.parser.js +47 -39
  153. package/dist/parsers/dictionary.parser.js.map +1 -0
  154. package/dist/parsers/html.parser.d.ts +13 -0
  155. package/dist/parsers/html.parser.js +4 -8
  156. package/dist/parsers/html.parser.js.map +1 -0
  157. package/dist/parsers/hyperlinks.parser.d.ts +20 -0
  158. package/dist/parsers/hyperlinks.parser.js +82 -77
  159. package/dist/parsers/hyperlinks.parser.js.map +1 -0
  160. package/dist/parsers/image-tags.parser.d.ts +20 -0
  161. package/dist/parsers/image-tags.parser.js +38 -34
  162. package/dist/parsers/image-tags.parser.js.map +1 -0
  163. package/dist/parsers/jpeg.parser.d.ts +11 -0
  164. package/dist/parsers/jpeg.parser.js +28 -20
  165. package/dist/parsers/jpeg.parser.js.map +1 -0
  166. package/dist/parsers/paragraphs.parser.d.ts +13 -0
  167. package/dist/parsers/paragraphs.parser.js +33 -40
  168. package/dist/parsers/paragraphs.parser.js.map +1 -0
  169. package/dist/parsers/parser.d.ts +19 -0
  170. package/dist/parsers/parser.js +30 -17
  171. package/dist/parsers/parser.js.map +1 -0
  172. package/dist/parsers/php-error.parser.d.ts +12 -0
  173. package/dist/parsers/php-error.parser.js +42 -34
  174. package/dist/parsers/php-error.parser.js.map +1 -0
  175. package/dist/parsers/phrase.parser.d.ts +8 -0
  176. package/dist/parsers/phrase.parser.js +16 -11
  177. package/dist/parsers/phrase.parser.js.map +1 -0
  178. package/dist/parsers/regex.parser.d.ts +10 -0
  179. package/dist/parsers/regex.parser.js +30 -22
  180. package/dist/parsers/regex.parser.js.map +1 -0
  181. package/dist/parsers/server.parser.d.ts +12 -0
  182. package/dist/parsers/server.parser.js +66 -56
  183. package/dist/parsers/server.parser.js.map +1 -0
  184. package/dist/parsers/spelling.parser.d.ts +10 -0
  185. package/dist/parsers/spelling.parser.js +21 -13
  186. package/dist/parsers/spelling.parser.js.map +1 -0
  187. package/dist/parsers/string.parser.d.ts +8 -0
  188. package/dist/parsers/string.parser.js +5 -8
  189. package/dist/parsers/string.parser.js.map +1 -0
  190. package/dist/parsers/text.parser.d.ts +8 -0
  191. package/dist/parsers/text.parser.js +24 -18
  192. package/dist/parsers/text.parser.js.map +1 -0
  193. package/dist/parsers/words.parser.d.ts +11 -0
  194. package/dist/parsers/words.parser.js +32 -28
  195. package/dist/parsers/words.parser.js.map +1 -0
  196. package/dist/queries/complex-english.query.d.ts +2 -0
  197. package/dist/queries/complex-english.query.js +37 -38
  198. package/dist/queries/complex-english.query.js.map +1 -0
  199. package/dist/queries/flash-content.query.d.ts +2 -0
  200. package/dist/queries/flash-content.query.js +45 -32
  201. package/dist/queries/flash-content.query.js.map +1 -0
  202. package/dist/queries/linking-to-domains.query.d.ts +2 -0
  203. package/dist/queries/linking-to-domains.query.js +35 -27
  204. package/dist/queries/linking-to-domains.query.js.map +1 -0
  205. package/dist/queries/readability-score.query.d.ts +2 -0
  206. package/dist/queries/readability-score.query.js +21 -13
  207. package/dist/queries/readability-score.query.js.map +1 -0
  208. package/dist/servers/crawl.server.d.ts +35 -0
  209. package/dist/servers/crawl.server.js +133 -121
  210. package/dist/servers/crawl.server.js.map +1 -0
  211. package/dist/servers/express.server.d.ts +8 -0
  212. package/dist/servers/express.server.js +7 -10
  213. package/dist/servers/express.server.js.map +1 -0
  214. package/dist/servers/maintenance.server.d.ts +22 -0
  215. package/dist/servers/maintenance.server.js +42 -36
  216. package/dist/servers/maintenance.server.js.map +1 -0
  217. package/dist/servers/rest.server.d.ts +7 -0
  218. package/dist/servers/rest.server.js +40 -51
  219. package/dist/servers/rest.server.js.map +1 -0
  220. package/dist/servers/socket-io.server.d.ts +12 -0
  221. package/dist/servers/socket-io.server.js +48 -15
  222. package/dist/servers/socket-io.server.js.map +1 -0
  223. package/dist/services/database.service.d.ts +68 -0
  224. package/dist/services/database.service.js +528 -462
  225. package/dist/services/database.service.js.map +1 -0
  226. package/dist/types/tcrawl-config.d.ts +14 -0
  227. package/dist/types/tcrawl-config.js +14 -17
  228. package/dist/types/tcrawl-config.js.map +1 -0
  229. package/dist/types/thydra-config.d.ts +4 -0
  230. package/dist/types/thydra-config.js +4 -7
  231. package/dist/types/thydra-config.js.map +1 -0
  232. package/dist/types/tparser-ctor.d.ts +7 -0
  233. package/dist/types/tparser-ctor.js +2 -2
  234. package/dist/types/tparser-ctor.js.map +1 -0
  235. package/dist/types/tquery.d.ts +7 -0
  236. package/dist/types/tquery.js +2 -2
  237. package/dist/types/tquery.js.map +1 -0
  238. package/dist/types/trobots-config.d.ts +4 -0
  239. package/dist/types/trobots-config.js +4 -7
  240. package/dist/types/trobots-config.js.map +1 -0
  241. package/package.json +41 -29
  242. package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
  243. package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
  244. package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
  245. package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
  246. package/angular/3rdpartylicenses.txt +0 -1127
  247. package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
  248. package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
  249. package/angular/6-es2015.65f680261a3506b88381.js +0 -1
  250. package/angular/6-es5.65f680261a3506b88381.js +0 -1
  251. package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
  252. package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
  253. package/angular/8-es2015.55518901987a5b834309.js +0 -1
  254. package/angular/8-es5.55518901987a5b834309.js +0 -1
  255. package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
  256. package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
  257. package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
  258. package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
  259. package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
  260. package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
  261. package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
  262. package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
  263. package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
  264. package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
  265. package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
  266. package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
  267. package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
  268. package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
  269. package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
  270. package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
  271. package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
  272. package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
  273. package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
  274. package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
  275. package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
  276. package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
  277. package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
  278. package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
  279. package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
  280. package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
  281. package/angular/assets/config/app-config.json +0 -16
  282. package/angular/assets/images/splashbg.jpg +0 -0
  283. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
  284. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
  285. package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
  286. package/angular/favicon.ico +0 -0
  287. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
  288. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
  289. package/angular/index.html +0 -16
  290. package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
  291. package/angular/main-es5.3a582572476c7f292e52.js +0 -1
  292. package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
  293. package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
  294. package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
  295. package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
  296. package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
  297. package/config/bad-words.json +0 -1
  298. package/config/complex-english.json +0 -400
  299. package/config/hydra-auth.json +0 -8
  300. package/config/hydra-crawler.json +0 -84
  301. package/config/list-allow.json +0 -171
  302. package/config/list-deny.json +0 -248
  303. package/config/list-expiry.json +0 -7
  304. package/config/schedule.json +0 -25
  305. package/config/spelling.json +0 -1
@@ -1,18 +1,25 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.CleanupApp = void 0;
4
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
5
- const hydra_crawler_ts_assets_2 = require("hydra-crawler-ts-assets");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- const nodecommons_app_1 = require("nodecommons-app");
8
- const lists_1 = require("../classes/lists");
9
- const cleaner_1 = require("../classes/cleaner");
10
- class CleanupApp extends nodecommons_app_1.CommonsApp {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { commonsTypeHasPropertyNumber } from 'tscommons-es-core';
11
+ import { isIUrl } from 'hydra-crawler-ts-assets';
12
+ import { EStatus } from 'hydra-crawler-ts-assets';
13
+ import { commonsOutputDoing, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
14
+ import { CommonsApp } from 'nodecommons-es-app';
15
+ import { Lists } from '../classes/lists';
16
+ import { Cleaner } from '../classes/cleaner';
17
+ export class CleanupApp extends CommonsApp {
11
18
  constructor(includeReset = false, includeLinks = false) {
12
19
  super('hydra-crawler');
13
20
  this.includeReset = includeReset;
14
21
  this.includeLinks = includeLinks;
15
- this.lists = new lists_1.Lists();
22
+ this.lists = new Lists();
16
23
  }
17
24
  getAppName() {
18
25
  return 'Hydra - Cleanup';
@@ -23,100 +30,111 @@ class CleanupApp extends nodecommons_app_1.CommonsApp {
23
30
  addToList(list, entries) {
24
31
  this.lists.add(list, entries);
25
32
  }
26
- async init() {
27
- if (!this.databaseService)
28
- throw new Error('Database service has not been set yet');
29
- nodecommons_cli_1.CommonsOutput.doing('Connecting to database');
30
- await this.databaseService.init();
31
- nodecommons_cli_1.CommonsOutput.success();
32
- await super.init();
33
+ init() {
34
+ const _super = Object.create(null, {
35
+ init: { get: () => super.init }
36
+ });
37
+ return __awaiter(this, void 0, void 0, function* () {
38
+ if (!this.databaseService)
39
+ throw new Error('Database service has not been set yet');
40
+ commonsOutputDoing('Connecting to database');
41
+ yield this.databaseService.init();
42
+ commonsOutputSuccess();
43
+ yield _super.init.call(this);
44
+ });
33
45
  }
34
- async reset() {
35
- if (!this.databaseService)
36
- throw new Error('Database service has not been set');
37
- nodecommons_cli_1.CommonsOutput.doing(`Reseting urls for DENY, FAILED, DEAD and DISALLOWED`);
38
- let tally = 0;
39
- while (true) {
40
- const result = this.databaseService.getUrls()
41
- .find({ status: { $in: [
42
- hydra_crawler_ts_assets_2.EStatus.DENY,
43
- hydra_crawler_ts_assets_2.EStatus.FAILED,
44
- hydra_crawler_ts_assets_2.EStatus.DEAD,
45
- hydra_crawler_ts_assets_2.EStatus.DISALLOWED
46
- ] } })
47
- .sort({ _id: 1 })
48
- .skip(tally);
49
- try {
50
- while (true) {
51
- tally++;
52
- if ((tally % 100) === 0)
53
- nodecommons_cli_1.CommonsOutput.progress(tally);
54
- const row = await result.next();
55
- if (row === null)
56
- break;
57
- await this.databaseService.getLinks().deleteMany({ url: row.url });
46
+ reset() {
47
+ return __awaiter(this, void 0, void 0, function* () {
48
+ if (!this.databaseService)
49
+ throw new Error('Database service has not been set');
50
+ commonsOutputDoing('Reseting urls for DENY, FAILED, DEAD and DISALLOWED');
51
+ let tally = 0;
52
+ while (true) {
53
+ const result = this.databaseService.getUrls()
54
+ .find({ status: { $in: [
55
+ EStatus.DENY,
56
+ EStatus.FAILED,
57
+ EStatus.DEAD,
58
+ EStatus.DISALLOWED
59
+ ] } })
60
+ .sort({ _id: 1 })
61
+ .skip(tally);
62
+ try {
63
+ while (true) {
64
+ tally++;
65
+ if ((tally % 100) === 0)
66
+ commonsOutputProgress(tally);
67
+ const row = yield result.next();
68
+ if (row === null)
69
+ break;
70
+ yield this.databaseService.getLinks().deleteMany({ url: row.url });
71
+ }
72
+ break;
73
+ }
74
+ catch (err) {
75
+ if (!commonsTypeHasPropertyNumber(err, 'code') || err.code !== 43)
76
+ throw err;
58
77
  }
59
- break;
60
- }
61
- catch (err) {
62
- if (err.code !== 43)
63
- throw err;
64
78
  }
65
- }
66
- nodecommons_cli_1.CommonsOutput.result(tally);
79
+ commonsOutputResult(tally);
80
+ });
67
81
  }
68
- async links() {
69
- if (!this.databaseService)
70
- throw new Error('Database service has not been set');
71
- nodecommons_cli_1.CommonsOutput.doing(`Searching for link orphans`);
72
- let tally = 0;
73
- const result = this.databaseService.getLinks().find();
74
- let batch = [];
75
- const orphans = [];
76
- while (true) {
77
- tally++;
78
- if ((tally % 1000) === 0)
79
- nodecommons_cli_1.CommonsOutput.progress(`${tally}, ${orphans.length}`);
80
- const link = await result.next();
81
- if (link === null)
82
- break;
83
- if (!batch.includes(link.url))
84
- batch.push(link.url);
85
- if (!batch.includes(link.outgoing))
86
- batch.push(link.outgoing);
87
- if (batch.length < 10000)
88
- continue;
89
- const result2 = this.databaseService.getUrls().find({ url: { $in: batch } });
90
- const matches = (await this.databaseService.listQueryResults(result2, hydra_crawler_ts_assets_1.isIUrl))
91
- .map((u) => u.url);
92
- for (const l of batch) {
93
- if (!matches.includes(l))
94
- orphans.push(l);
82
+ links() {
83
+ return __awaiter(this, void 0, void 0, function* () {
84
+ if (!this.databaseService)
85
+ throw new Error('Database service has not been set');
86
+ commonsOutputDoing('Searching for link orphans');
87
+ let tally = 0;
88
+ const result = this.databaseService.getLinks().find({}, {});
89
+ let batch = [];
90
+ const orphans = [];
91
+ while (true) {
92
+ tally++;
93
+ if ((tally % 1000) === 0)
94
+ commonsOutputProgress(`${tally}, ${orphans.length}`);
95
+ const link = yield result.next();
96
+ if (link === null)
97
+ break;
98
+ if (!batch.includes(link.url))
99
+ batch.push(link.url);
100
+ if (!batch.includes(link.outgoing))
101
+ batch.push(link.outgoing);
102
+ if (batch.length < 10000)
103
+ continue;
104
+ const result2 = this.databaseService.getUrls().find({ url: { $in: batch } });
105
+ const matches = (yield this.databaseService.listQueryResults(result2, isIUrl))
106
+ .map((u) => u.url);
107
+ for (const l of batch) {
108
+ if (!matches.includes(l))
109
+ orphans.push(l);
110
+ }
111
+ batch = [];
112
+ }
113
+ commonsOutputResult(orphans.length);
114
+ commonsOutputDoing('Removing orphan links');
115
+ tally = 0;
116
+ for (const orphan of orphans) {
117
+ tally++;
118
+ if ((tally % 100) === 0)
119
+ commonsOutputProgress(`${tally}`);
120
+ yield this.databaseService.getLinks().deleteMany({ url: orphan });
121
+ yield this.databaseService.getLinks().deleteMany({ outgoing: orphan });
95
122
  }
96
- batch = [];
97
- }
98
- nodecommons_cli_1.CommonsOutput.result(orphans.length);
99
- nodecommons_cli_1.CommonsOutput.doing(`Removing orphan links`);
100
- tally = 0;
101
- for (const orphan of orphans) {
102
- tally++;
103
- if ((tally % 100) === 0)
104
- nodecommons_cli_1.CommonsOutput.progress(`${tally}`);
105
- await this.databaseService.getLinks().deleteMany({ url: orphan });
106
- await this.databaseService.getLinks().deleteMany({ outgoing: orphan });
107
- }
108
- nodecommons_cli_1.CommonsOutput.success();
123
+ commonsOutputSuccess();
124
+ });
109
125
  }
110
- async run() {
111
- if (!this.databaseService)
112
- throw new Error('Database service has not been set');
113
- if (!this.includeReset)
114
- await this.reset();
115
- if (!this.includeLinks)
116
- await this.links();
117
- const cleaner = new cleaner_1.Cleaner(this.lists, this.databaseService);
118
- await cleaner.purgeOrphanUrls();
119
- await cleaner.purgeEmptyDomains();
126
+ run() {
127
+ return __awaiter(this, void 0, void 0, function* () {
128
+ if (!this.databaseService)
129
+ throw new Error('Database service has not been set');
130
+ if (!this.includeReset)
131
+ yield this.reset();
132
+ if (!this.includeLinks)
133
+ yield this.links();
134
+ const cleaner = new Cleaner(this.lists, this.databaseService);
135
+ yield cleaner.purgeOrphanUrls();
136
+ yield cleaner.purgeEmptyDomains();
137
+ });
120
138
  }
121
139
  }
122
- exports.CleanupApp = CleanupApp;
140
+ //# sourceMappingURL=cleanup.app.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cleanup.app.js","sourceRoot":"","sources":["../../src/apps/cleanup.app.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,4BAA4B,EAAE,MAAM,mBAAmB,CAAC;AAEjE,OAAO,EAAQ,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAEvD,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAU7C,MAAM,OAAO,UAAW,SAAQ,UAAU;IAKzC,YACU,eAAwB,KAAK,EAC7B,eAAwB,KAAK;QAEtC,KAAK,CAAC,eAAe,CAAC,CAAC;QAHd,iBAAY,GAAZ,YAAY,CAAiB;QAC7B,iBAAY,GAAZ,YAAY,CAAiB;QAItC,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,iBAAiB,CAAC;IAC1B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEa,KAAK;;YAClB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,kBAAkB,CAAC,qDAAqD,CAAC,CAAC;YAE1E,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,OAAO,IAAI,EAAE;gBACZ,MAAM,MAAM,GAAiB,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;qBACxD,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE;4BACrB,OAAO,CAAC,IAAI;4BACZ,OAAO,CAAC,MAAM;4BACd,OAAO,CAAC,IAAI;4BACZ,OAAO,CAAC,UAAU;yBACnB,EAAE,EAAE,CAAC;qBACL,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;qBAChB,IAAI,CAAC,KAAK,CAAC,CAAC;gBAEf,IAAI;oBACH,OAAO,IAAI,EAAE;wBACZ,KAAK,EAAE,CAAC;wBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;4BAAE,qBAAqB,CAAC,KAAK,CAAC,CAAC;wBAEtD,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;wBAC3C,IAAI,GAAG,KAAK,IAAI;4BAAE,MAAM;wBAExB,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;qBACnE;oBAED,MAAM;iBACN;gBAAC,OAAO,GAAG,EAAE;oBACb,IAAI,CAAC,4BAA4B,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,GAAG,CAAC,IAAI,KAAK,EAAE;wBAAE,MAAM,GAAG,CAAC;iBAC7E;aACD;YAED,mBAAmB,CAAC,KAAK,CAAC,CAAC;QAC5B,CAAC;KAAA;IAEa,KAAK;;YAClB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,kBAAkB,CAAC,4BAA4B,CAAC,CAAC;YACjD,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,MAAM,MAAM,GAAkB,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAQ,EAAE,EAAE,EAAE,CAAC,CAAC;YAElF,IAAI,KAAK,GAAa,EAAE,CAAC;YACzB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,OAAO,IAAI,EAAE;gBACZ,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;oBAAE,qBAAqB,CAAC,GAAG,KAAK,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;gBAE/E,MAAM,IAAI,GAAe,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC7C,IAAI,IAAI,KAAK,IAAI;oBAAE,MAAM;gBAEzB,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpD,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAE9D,IAAI,KAAK,CAAC,MAAM,GAAG,KAAK;oBAAE,SAAS;gBAEnC,MAAM,OAAO,GAAiB,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CAC/D,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,CACxB,CAAC;gBAEF,MAAM,OAAO,GAAa,CAAC,MAAM,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;qBACrF,GAAG,CAAC,CAAC,CAAO,EAAU,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAEnC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE;oBACtB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;wBAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;iBAC1C;gBAED,KAAK,GAAG,EAAE,CAAC;aACX;YACD,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YAEpC,kBAAkB,CAAC,uBAAuB,CAAC,CAAC;YAC5C,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;gBAC7B,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,qBAAqB,CAAC,GAAG,KAAK,EAAE,CAAC,CAAC;gBAC3D,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBAClE,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC;aACvE;YACD,oBAAoB,EAAE,CAAC;QACxB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,IAAI,CAAC,IAAI,CAAC,YAAY;gBAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YAC3C,IAAI,CAAC,IAAI,CAAC,YAAY;gBAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YAE3C,MAAM,OAAO,GAAY,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;YACvE,MAAM,OAAO,CAAC,eAAe,EAAE,CAAC;YAChC,MAAM,OAAO,CAAC,iBAAiB,EAAE,CAAC;QACnC,CAAC;KAAA;CACD"}
@@ -0,0 +1,12 @@
1
+ import { CommonsApp } from 'nodecommons-es-app';
2
+ import { DatabaseService } from '../services/database.service';
3
+ import { IInternalHydraCommonDbApp } from './internal-hydra-common.app';
4
+ export declare class CrossPopulateExportApp extends CommonsApp implements IInternalHydraCommonDbApp {
5
+ private filename;
6
+ private databaseService;
7
+ constructor(filename: string);
8
+ getAppName(): string;
9
+ setDatabaseService(databaseService: DatabaseService): void;
10
+ init(): Promise<void>;
11
+ run(): Promise<void>;
12
+ }
@@ -1,12 +1,18 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.CrossPopulateExportApp = void 0;
4
- const fs = require("fs");
5
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- const nodecommons_app_1 = require("nodecommons-app");
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import * as fs from 'fs';
11
+ import { EStatus } from 'hydra-crawler-ts-assets';
12
+ import { commonsOutputDoing, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
13
+ import { CommonsApp } from 'nodecommons-es-app';
8
14
  // export the QUEUED, DONE and FAILED urls for importing into another instance
9
- class CrossPopulateExportApp extends nodecommons_app_1.CommonsApp {
15
+ export class CrossPopulateExportApp extends CommonsApp {
10
16
  constructor(filename) {
11
17
  super('hydra-crawler');
12
18
  this.filename = filename;
@@ -17,46 +23,53 @@ class CrossPopulateExportApp extends nodecommons_app_1.CommonsApp {
17
23
  setDatabaseService(databaseService) {
18
24
  this.databaseService = databaseService;
19
25
  }
20
- async init() {
21
- if (!this.databaseService)
22
- throw new Error('Database service has not been set yet');
23
- nodecommons_cli_1.CommonsOutput.doing('Connecting to database');
24
- await this.databaseService.init();
25
- nodecommons_cli_1.CommonsOutput.success();
26
- await super.init();
26
+ init() {
27
+ const _super = Object.create(null, {
28
+ init: { get: () => super.init }
29
+ });
30
+ return __awaiter(this, void 0, void 0, function* () {
31
+ if (!this.databaseService)
32
+ throw new Error('Database service has not been set yet');
33
+ commonsOutputDoing('Connecting to database');
34
+ yield this.databaseService.init();
35
+ commonsOutputSuccess();
36
+ yield _super.init.call(this);
37
+ });
27
38
  }
28
- async run() {
29
- if (!this.databaseService)
30
- throw new Error('Database service has not been set');
31
- const results = this.databaseService.getUrls().find({
32
- status: { $in: [
33
- hydra_crawler_ts_assets_1.EStatus.QUEUED,
34
- hydra_crawler_ts_assets_1.EStatus.DONE,
35
- hydra_crawler_ts_assets_1.EStatus.FAILED
36
- ] }
39
+ run() {
40
+ return __awaiter(this, void 0, void 0, function* () {
41
+ if (!this.databaseService)
42
+ throw new Error('Database service has not been set');
43
+ const results = this.databaseService.getUrls().find({
44
+ status: { $in: [
45
+ EStatus.QUEUED,
46
+ EStatus.DONE,
47
+ EStatus.FAILED
48
+ ] }
49
+ }, {});
50
+ // safer to do this directly rather than a call to listQueryResults
51
+ // ditto not using CommonsFile
52
+ const fd = fs.openSync(this.filename, 'w');
53
+ commonsOutputDoing('Exporting QUEUED, DONE and FAILED urls');
54
+ let tally = 0;
55
+ while (true) {
56
+ if (tally % 1000 === 0)
57
+ commonsOutputProgress(tally);
58
+ const row = yield results.next();
59
+ if (row === null)
60
+ break;
61
+ fs.writeSync(fd, JSON.stringify({
62
+ domain: row.domain,
63
+ url: row.url
64
+ }) + '\n');
65
+ if (tally % 1000 === 0)
66
+ fs.fdatasyncSync(fd);
67
+ tally++;
68
+ }
69
+ fs.fdatasyncSync(fd);
70
+ fs.closeSync(fd);
71
+ commonsOutputResult(tally);
37
72
  });
38
- // safer to do this directly rather than a call to listQueryResults
39
- // ditto not using CommonsFile
40
- const fd = fs.openSync(this.filename, 'w');
41
- nodecommons_cli_1.CommonsOutput.doing('Exporting QUEUED, DONE and FAILED urls');
42
- let tally = 0;
43
- while (true) {
44
- if (tally % 1000 === 0)
45
- nodecommons_cli_1.CommonsOutput.progress(tally);
46
- const row = await results.next();
47
- if (row === null)
48
- break;
49
- fs.writeSync(fd, JSON.stringify({
50
- domain: row.domain,
51
- url: row.url
52
- }) + '\n');
53
- if (tally % 1000 === 0)
54
- fs.fdatasyncSync(fd);
55
- tally++;
56
- }
57
- fs.fdatasyncSync(fd);
58
- fs.closeSync(fd);
59
- nodecommons_cli_1.CommonsOutput.result(tally);
60
73
  }
61
74
  }
62
- exports.CrossPopulateExportApp = CrossPopulateExportApp;
75
+ //# sourceMappingURL=cross-populate-export.app.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cross-populate-export.app.js","sourceRoot":"","sources":["../../src/apps/cross-populate-export.app.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AAKzB,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAMhD,8EAA8E;AAE9E,MAAM,OAAO,sBAAuB,SAAQ,UAAU;IAGrD,YACU,QAAgB;QAEzB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,aAAQ,GAAR,QAAQ,CAAQ;IAG1B,CAAC;IAEM,UAAU;QAChB,OAAO,+BAA+B,CAAC;IACxC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,OAAO,GAAiB,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CAAO;gBACtE,MAAM,EAAE,EAAE,GAAG,EAAE;wBACb,OAAO,CAAC,MAAM;wBACd,OAAO,CAAC,IAAI;wBACZ,OAAO,CAAC,MAAM;qBACf,EAAE;aACJ,EAAE,EAAE,CAAC,CAAC;YAEP,mEAAmE;YACnE,8BAA8B;YAE9B,MAAM,EAAE,GAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;YAEnD,kBAAkB,CAAC,wCAAwC,CAAC,CAAC;YAE7D,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,OAAO,IAAI,EAAE;gBACZ,IAAI,KAAK,GAAG,IAAI,KAAK,CAAC;oBAAE,qBAAqB,CAAC,KAAK,CAAC,CAAC;gBAErD,MAAM,GAAG,GAAc,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,EAAE,CAAC,SAAS,CACV,EAAE,EACF,IAAI,CAAC,SAAS,CAAC;oBACb,MAAM,EAAE,GAAG,CAAC,MAAM;oBAClB,GAAG,EAAE,GAAG,CAAC,GAAG;iBACb,CAAC,GAAG,IAAI,CACV,CAAC;gBAEF,IAAI,KAAK,GAAG,IAAI,KAAK,CAAC;oBAAE,EAAE,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;gBAC7C,KAAK,EAAE,CAAC;aACR;YAED,EAAE,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;YACrB,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;YAEjB,mBAAmB,CAAC,KAAK,CAAC,CAAC;QAC5B,CAAC;KAAA;CACD"}
@@ -0,0 +1,12 @@
1
+ import { CommonsApp } from 'nodecommons-es-app';
2
+ import { DatabaseService } from '../services/database.service';
3
+ import { IInternalHydraCommonDbApp } from './internal-hydra-common.app';
4
+ export declare class CrossPopulateImportApp extends CommonsApp implements IInternalHydraCommonDbApp {
5
+ private filename;
6
+ private databaseService;
7
+ constructor(filename: string);
8
+ getAppName(): string;
9
+ setDatabaseService(databaseService: DatabaseService): void;
10
+ init(): Promise<void>;
11
+ run(): Promise<void>;
12
+ }
@@ -1,4 +1,12 @@
1
- "use strict";
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
2
10
  var __asyncValues = (this && this.__asyncValues) || function (o) {
3
11
  if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
4
12
  var m = o[Symbol.asyncIterator], i;
@@ -6,21 +14,19 @@ var __asyncValues = (this && this.__asyncValues) || function (o) {
6
14
  function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
7
15
  function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
8
16
  };
9
- Object.defineProperty(exports, "__esModule", { value: true });
10
- exports.CrossPopulateImportApp = void 0;
11
- const fs = require("fs");
12
- const readline = require("readline");
13
- const tscommons_core_1 = require("tscommons-core");
14
- const nodecommons_cli_1 = require("nodecommons-cli");
15
- const nodecommons_app_1 = require("nodecommons-app");
17
+ import * as fs from 'fs';
18
+ import * as readline from 'readline';
19
+ import { commonsTypeHasPropertyString } from 'tscommons-es-core';
20
+ import { commonsOutputDoing, commonsOutputError, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
21
+ import { CommonsApp } from 'nodecommons-es-app';
16
22
  function isTRow(test) {
17
- if (!tscommons_core_1.CommonsType.hasPropertyString(test, 'domain'))
23
+ if (!commonsTypeHasPropertyString(test, 'domain'))
18
24
  return false;
19
- if (!tscommons_core_1.CommonsType.hasPropertyString(test, 'url'))
25
+ if (!commonsTypeHasPropertyString(test, 'url'))
20
26
  return false;
21
27
  return true;
22
28
  }
23
- class CrossPopulateImportApp extends nodecommons_app_1.CommonsApp {
29
+ export class CrossPopulateImportApp extends CommonsApp {
24
30
  constructor(filename) {
25
31
  super('hydra-crawler');
26
32
  this.filename = filename;
@@ -31,50 +37,57 @@ class CrossPopulateImportApp extends nodecommons_app_1.CommonsApp {
31
37
  setDatabaseService(databaseService) {
32
38
  this.databaseService = databaseService;
33
39
  }
34
- async init() {
35
- if (!this.databaseService)
36
- throw new Error('Database service has not been set yet');
37
- nodecommons_cli_1.CommonsOutput.doing('Connecting to database');
38
- await this.databaseService.init();
39
- nodecommons_cli_1.CommonsOutput.success();
40
- await super.init();
40
+ init() {
41
+ const _super = Object.create(null, {
42
+ init: { get: () => super.init }
43
+ });
44
+ return __awaiter(this, void 0, void 0, function* () {
45
+ if (!this.databaseService)
46
+ throw new Error('Database service has not been set yet');
47
+ commonsOutputDoing('Connecting to database');
48
+ yield this.databaseService.init();
49
+ commonsOutputSuccess();
50
+ yield _super.init.call(this);
51
+ });
41
52
  }
42
- async run() {
53
+ run() {
43
54
  var e_1, _a;
44
- if (!this.databaseService)
45
- throw new Error('Database service has not been set');
46
- const fileStream = fs.createReadStream(this.filename);
47
- const rl = readline.createInterface({
48
- input: fileStream,
49
- crlfDelay: Infinity
50
- });
51
- nodecommons_cli_1.CommonsOutput.doing('Importing urls');
52
- let tally = 0;
53
- try {
54
- for (var rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = await rl_1.next(), !rl_1_1.done;) {
55
- const line = rl_1_1.value;
56
- if (line.trim() === '')
57
- continue;
58
- if (tally % 1000 === 0)
59
- nodecommons_cli_1.CommonsOutput.progress(tally);
60
- const json = JSON.parse(line);
61
- if (!isTRow(json)) {
62
- nodecommons_cli_1.CommonsOutput.error('Invalid row. Skipping');
63
- continue;
55
+ return __awaiter(this, void 0, void 0, function* () {
56
+ if (!this.databaseService)
57
+ throw new Error('Database service has not been set');
58
+ const fileStream = fs.createReadStream(this.filename);
59
+ const rl = readline.createInterface({
60
+ input: fileStream,
61
+ crlfDelay: Infinity
62
+ });
63
+ commonsOutputDoing('Importing urls');
64
+ let tally = 0;
65
+ try {
66
+ for (var rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), !rl_1_1.done;) {
67
+ const line = rl_1_1.value;
68
+ if (line.trim() === '')
69
+ continue;
70
+ if (tally % 1000 === 0)
71
+ commonsOutputProgress(tally);
72
+ const json = JSON.parse(line);
73
+ if (!isTRow(json)) {
74
+ commonsOutputError('Invalid row. Skipping');
75
+ continue;
76
+ }
77
+ yield this.databaseService.queue(json.url, false);
78
+ tally++;
64
79
  }
65
- await this.databaseService.queue(json.url, false);
66
- tally++;
67
80
  }
68
- }
69
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
70
- finally {
71
- try {
72
- if (rl_1_1 && !rl_1_1.done && (_a = rl_1.return)) await _a.call(rl_1);
81
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
82
+ finally {
83
+ try {
84
+ if (rl_1_1 && !rl_1_1.done && (_a = rl_1.return)) yield _a.call(rl_1);
85
+ }
86
+ finally { if (e_1) throw e_1.error; }
73
87
  }
74
- finally { if (e_1) throw e_1.error; }
75
- }
76
- nodecommons_cli_1.CommonsOutput.result(tally);
77
- fileStream.close();
88
+ commonsOutputResult(tally);
89
+ fileStream.close();
90
+ });
78
91
  }
79
92
  }
80
- exports.CrossPopulateImportApp = CrossPopulateImportApp;
93
+ //# sourceMappingURL=cross-populate-import.app.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cross-populate-import.app.js","sourceRoot":"","sources":["../../src/apps/cross-populate-import.app.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC;AAErC,OAAO,EAAE,4BAA4B,EAAE,MAAM,mBAAmB,CAAC;AAEjE,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC9I,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAahD,SAAS,MAAM,CAAC,IAAa;IAC5B,IAAI,CAAC,4BAA4B,CAAC,IAAI,EAAE,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAC;IAChE,IAAI,CAAC,4BAA4B,CAAC,IAAI,EAAE,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAE7D,OAAO,IAAI,CAAC;AACb,CAAC;AAED,MAAM,OAAO,sBAAuB,SAAQ,UAAU;IAGrD,YACU,QAAgB;QAEzB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,aAAQ,GAAR,QAAQ,CAAQ;IAG1B,CAAC;IAEM,UAAU;QAChB,OAAO,+BAA+B,CAAC;IACxC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,UAAU,GAAkB,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAErE,MAAM,EAAE,GAAuB,QAAQ,CAAC,eAAe,CAAC;gBACtD,KAAK,EAAE,UAAU;gBACjB,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YAEH,kBAAkB,CAAC,gBAAgB,CAAC,CAAC;YAErC,IAAI,KAAK,GAAW,CAAC,CAAC;;gBACtB,KAAyB,IAAA,OAAA,cAAA,EAAE,CAAA,QAAA;oBAAhB,MAAM,IAAI,eAAA,CAAA;oBACpB,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE;wBAAE,SAAS;oBAEjC,IAAI,KAAK,GAAG,IAAI,KAAK,CAAC;wBAAE,qBAAqB,CAAC,KAAK,CAAC,CAAC;oBAErD,MAAM,IAAI,GAAY,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBACvC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;wBAClB,kBAAkB,CAAC,uBAAuB,CAAC,CAAC;wBAC5C,SAAS;qBACT;oBAED,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;oBAClD,KAAK,EAAE,CAAC;iBACR;;;;;;;;;YACD,mBAAmB,CAAC,KAAK,CAAC,CAAC;YAE3B,UAAU,CAAC,KAAK,EAAE,CAAC;;KACnB;CACD"}
@@ -0,0 +1,17 @@
1
+ import { CommonsApp } from 'nodecommons-es-app';
2
+ import { DatabaseService } from '../services/database.service';
3
+ import { IMatch } from '../interfaces/imatch';
4
+ import { EList } from '../enums/elist';
5
+ import { IInternalHydraCommonListApp } from './internal-hydra-common.app';
6
+ export declare class DenylistApp extends CommonsApp implements IInternalHydraCommonListApp {
7
+ private databaseService;
8
+ private lists;
9
+ constructor();
10
+ getAppName(): string;
11
+ setDatabaseService(databaseService: DatabaseService): void;
12
+ addToList(list: EList, entries: IMatch[]): void;
13
+ init(): Promise<void>;
14
+ private forward;
15
+ private reverse;
16
+ run(): Promise<void>;
17
+ }