hydra-crawler 1.4.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/dist/apis/autocomplete.api.d.ts +7 -0
  2. package/dist/apis/autocomplete.api.js +15 -9
  3. package/dist/apis/autocomplete.api.js.map +1 -0
  4. package/dist/apis/bugs.api.d.ts +7 -0
  5. package/dist/apis/bugs.api.js +21 -15
  6. package/dist/apis/bugs.api.js.map +1 -0
  7. package/dist/apis/crawl.api.d.ts +7 -0
  8. package/dist/apis/crawl.api.js +15 -9
  9. package/dist/apis/crawl.api.js.map +1 -0
  10. package/dist/apis/domains.api.d.ts +7 -0
  11. package/dist/apis/domains.api.js +24 -19
  12. package/dist/apis/domains.api.js.map +1 -0
  13. package/dist/apis/images.api.d.ts +7 -0
  14. package/dist/apis/images.api.js +20 -14
  15. package/dist/apis/images.api.js.map +1 -0
  16. package/dist/apis/statistics.api.d.ts +8 -0
  17. package/dist/apis/statistics.api.js +27 -20
  18. package/dist/apis/statistics.api.js.map +1 -0
  19. package/dist/apis/test.api.d.ts +5 -0
  20. package/dist/apis/test.api.js +15 -9
  21. package/dist/apis/test.api.js.map +1 -0
  22. package/dist/apis/urls.api.d.ts +7 -0
  23. package/dist/apis/urls.api.js +21 -15
  24. package/dist/apis/urls.api.js.map +1 -0
  25. package/dist/apps/cleanup.app.d.ts +19 -0
  26. package/dist/apps/cleanup.app.js +118 -100
  27. package/dist/apps/cleanup.app.js.map +1 -0
  28. package/dist/apps/cross-populate-export.app.d.ts +12 -0
  29. package/dist/apps/cross-populate-export.app.js +60 -47
  30. package/dist/apps/cross-populate-export.app.js.map +1 -0
  31. package/dist/apps/cross-populate-import.app.d.ts +12 -0
  32. package/dist/apps/cross-populate-import.app.js +64 -51
  33. package/dist/apps/cross-populate-import.app.js.map +1 -0
  34. package/dist/apps/denylist.app.d.ts +17 -0
  35. package/dist/apps/denylist.app.js +115 -98
  36. package/dist/apps/denylist.app.js.map +1 -0
  37. package/dist/apps/expire.app.d.ts +19 -0
  38. package/dist/apps/expire.app.js +44 -31
  39. package/dist/apps/expire.app.js.map +1 -0
  40. package/dist/apps/extract-text.app.d.ts +8 -0
  41. package/dist/apps/extract-text.app.js +43 -35
  42. package/dist/apps/extract-text.app.js.map +1 -0
  43. package/dist/apps/hydra.app.d.ts +34 -0
  44. package/dist/apps/hydra.app.js +150 -137
  45. package/dist/apps/hydra.app.js.map +1 -0
  46. package/dist/apps/import.app.d.ts +11 -0
  47. package/dist/apps/import.app.js +44 -32
  48. package/dist/apps/import.app.js.map +1 -0
  49. package/dist/apps/internal-hydra-common.app.d.ts +28 -0
  50. package/dist/apps/internal-hydra-common.app.js +5 -11
  51. package/dist/apps/internal-hydra-common.app.js.map +1 -0
  52. package/dist/apps/query.app.d.ts +20 -0
  53. package/dist/apps/query.app.js +63 -49
  54. package/dist/apps/query.app.js.map +1 -0
  55. package/dist/apps/reattempt.app.d.ts +17 -0
  56. package/dist/apps/reattempt.app.js +66 -53
  57. package/dist/apps/reattempt.app.js.map +1 -0
  58. package/dist/apps/requeue-domain.app.d.ts +13 -0
  59. package/dist/apps/requeue-domain.app.js +50 -37
  60. package/dist/apps/requeue-domain.app.js.map +1 -0
  61. package/dist/apps/seed.app.d.ts +15 -0
  62. package/dist/apps/seed.app.js +53 -40
  63. package/dist/apps/seed.app.js.map +1 -0
  64. package/dist/apps/startup.app.d.ts +11 -0
  65. package/dist/apps/startup.app.js +51 -38
  66. package/dist/apps/startup.app.js.map +1 -0
  67. package/dist/apps/unarchive.app.d.ts +15 -0
  68. package/dist/apps/unarchive.app.js +67 -54
  69. package/dist/apps/unarchive.app.js.map +1 -0
  70. package/dist/classes/cleaner.d.ts +12 -0
  71. package/dist/classes/cleaner.js +227 -207
  72. package/dist/classes/cleaner.js.map +1 -0
  73. package/dist/classes/crawler.d.ts +34 -0
  74. package/dist/classes/crawler.js +248 -241
  75. package/dist/classes/crawler.js.map +1 -0
  76. package/dist/classes/dns.d.ts +3 -0
  77. package/dist/classes/dns.js +10 -13
  78. package/dist/classes/dns.js.map +1 -0
  79. package/dist/classes/expirer.d.ts +10 -0
  80. package/dist/classes/expirer.js +107 -94
  81. package/dist/classes/expirer.js.map +1 -0
  82. package/dist/classes/expiry.d.ts +8 -0
  83. package/dist/classes/expiry.js +16 -19
  84. package/dist/classes/expiry.js.map +1 -0
  85. package/dist/classes/lists.d.ts +9 -0
  86. package/dist/classes/lists.js +13 -18
  87. package/dist/classes/lists.js.map +1 -0
  88. package/dist/classes/robot.d.ts +15 -0
  89. package/dist/classes/robot.js +40 -30
  90. package/dist/classes/robot.js.map +1 -0
  91. package/dist/classes/tracker.d.ts +25 -0
  92. package/dist/classes/tracker.js +82 -64
  93. package/dist/classes/tracker.js.map +1 -0
  94. package/dist/cli.d.ts +1 -0
  95. package/dist/cli.js +72 -65
  96. package/dist/cli.js.map +1 -0
  97. package/dist/enums/eavailable-strategy.d.ts +4 -0
  98. package/dist/enums/eavailable-strategy.js +3 -5
  99. package/dist/enums/eavailable-strategy.js.map +1 -0
  100. package/dist/enums/elist.d.ts +7 -0
  101. package/dist/enums/elist.js +7 -11
  102. package/dist/enums/elist.js.map +1 -0
  103. package/dist/enums/eserver.d.ts +8 -0
  104. package/dist/enums/eserver.js +3 -5
  105. package/dist/enums/eserver.js.map +1 -0
  106. package/dist/enums/ex-powered-by.d.ts +6 -0
  107. package/dist/enums/ex-powered-by.js +3 -5
  108. package/dist/enums/ex-powered-by.js.map +1 -0
  109. package/dist/helpers/matcher.d.ts +5 -0
  110. package/dist/helpers/matcher.js +2 -5
  111. package/dist/helpers/matcher.js.map +1 -0
  112. package/dist/helpers/random.d.ts +4 -0
  113. package/dist/helpers/random.js +2 -5
  114. package/dist/helpers/random.js.map +1 -0
  115. package/dist/helpers/utf-decoder.d.ts +4 -0
  116. package/dist/helpers/utf-decoder.js +3 -6
  117. package/dist/helpers/utf-decoder.js.map +1 -0
  118. package/dist/interfaces/iexpiry.d.ts +7 -0
  119. package/dist/interfaces/iexpiry.js +9 -13
  120. package/dist/interfaces/iexpiry.js.map +1 -0
  121. package/dist/interfaces/imatch.d.ts +6 -0
  122. package/dist/interfaces/imatch.js +6 -9
  123. package/dist/interfaces/imatch.js.map +1 -0
  124. package/dist/interfaces/iparser-config.d.ts +4 -0
  125. package/dist/interfaces/iparser-config.js +4 -7
  126. package/dist/interfaces/iparser-config.js.map +1 -0
  127. package/dist/interfaces/iparser.d.ts +8 -0
  128. package/dist/interfaces/iparser.js +2 -2
  129. package/dist/interfaces/iparser.js.map +1 -0
  130. package/dist/interfaces/irequest-outcome.d.ts +11 -0
  131. package/dist/interfaces/irequest-outcome.js +2 -2
  132. package/dist/interfaces/irequest-outcome.js.map +1 -0
  133. package/dist/interfaces/iserver.d.ts +4 -0
  134. package/dist/interfaces/iserver.js +2 -2
  135. package/dist/interfaces/iserver.js.map +1 -0
  136. package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
  137. package/dist/parsers/accessibility-metrics.parser.js +34 -26
  138. package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
  139. package/dist/parsers/asp-error.parser.d.ts +12 -0
  140. package/dist/parsers/asp-error.parser.js +36 -28
  141. package/dist/parsers/asp-error.parser.js.map +1 -0
  142. package/dist/parsers/bad-words.parser.d.ts +10 -0
  143. package/dist/parsers/bad-words.parser.js +21 -13
  144. package/dist/parsers/bad-words.parser.js.map +1 -0
  145. package/dist/parsers/complex-english.parser.d.ts +15 -0
  146. package/dist/parsers/complex-english.parser.js +33 -25
  147. package/dist/parsers/complex-english.parser.js.map +1 -0
  148. package/dist/parsers/data.parser.d.ts +14 -0
  149. package/dist/parsers/data.parser.js +12 -16
  150. package/dist/parsers/data.parser.js.map +1 -0
  151. package/dist/parsers/dictionary.parser.d.ts +19 -0
  152. package/dist/parsers/dictionary.parser.js +47 -39
  153. package/dist/parsers/dictionary.parser.js.map +1 -0
  154. package/dist/parsers/html.parser.d.ts +13 -0
  155. package/dist/parsers/html.parser.js +4 -8
  156. package/dist/parsers/html.parser.js.map +1 -0
  157. package/dist/parsers/hyperlinks.parser.d.ts +20 -0
  158. package/dist/parsers/hyperlinks.parser.js +82 -77
  159. package/dist/parsers/hyperlinks.parser.js.map +1 -0
  160. package/dist/parsers/image-tags.parser.d.ts +20 -0
  161. package/dist/parsers/image-tags.parser.js +38 -34
  162. package/dist/parsers/image-tags.parser.js.map +1 -0
  163. package/dist/parsers/jpeg.parser.d.ts +11 -0
  164. package/dist/parsers/jpeg.parser.js +28 -20
  165. package/dist/parsers/jpeg.parser.js.map +1 -0
  166. package/dist/parsers/paragraphs.parser.d.ts +13 -0
  167. package/dist/parsers/paragraphs.parser.js +33 -40
  168. package/dist/parsers/paragraphs.parser.js.map +1 -0
  169. package/dist/parsers/parser.d.ts +19 -0
  170. package/dist/parsers/parser.js +30 -17
  171. package/dist/parsers/parser.js.map +1 -0
  172. package/dist/parsers/php-error.parser.d.ts +12 -0
  173. package/dist/parsers/php-error.parser.js +42 -34
  174. package/dist/parsers/php-error.parser.js.map +1 -0
  175. package/dist/parsers/phrase.parser.d.ts +8 -0
  176. package/dist/parsers/phrase.parser.js +16 -11
  177. package/dist/parsers/phrase.parser.js.map +1 -0
  178. package/dist/parsers/regex.parser.d.ts +10 -0
  179. package/dist/parsers/regex.parser.js +30 -22
  180. package/dist/parsers/regex.parser.js.map +1 -0
  181. package/dist/parsers/server.parser.d.ts +12 -0
  182. package/dist/parsers/server.parser.js +66 -56
  183. package/dist/parsers/server.parser.js.map +1 -0
  184. package/dist/parsers/spelling.parser.d.ts +10 -0
  185. package/dist/parsers/spelling.parser.js +21 -13
  186. package/dist/parsers/spelling.parser.js.map +1 -0
  187. package/dist/parsers/string.parser.d.ts +8 -0
  188. package/dist/parsers/string.parser.js +5 -8
  189. package/dist/parsers/string.parser.js.map +1 -0
  190. package/dist/parsers/text.parser.d.ts +8 -0
  191. package/dist/parsers/text.parser.js +24 -18
  192. package/dist/parsers/text.parser.js.map +1 -0
  193. package/dist/parsers/words.parser.d.ts +11 -0
  194. package/dist/parsers/words.parser.js +32 -28
  195. package/dist/parsers/words.parser.js.map +1 -0
  196. package/dist/queries/complex-english.query.d.ts +2 -0
  197. package/dist/queries/complex-english.query.js +37 -38
  198. package/dist/queries/complex-english.query.js.map +1 -0
  199. package/dist/queries/flash-content.query.d.ts +2 -0
  200. package/dist/queries/flash-content.query.js +45 -32
  201. package/dist/queries/flash-content.query.js.map +1 -0
  202. package/dist/queries/linking-to-domains.query.d.ts +2 -0
  203. package/dist/queries/linking-to-domains.query.js +35 -27
  204. package/dist/queries/linking-to-domains.query.js.map +1 -0
  205. package/dist/queries/readability-score.query.d.ts +2 -0
  206. package/dist/queries/readability-score.query.js +21 -13
  207. package/dist/queries/readability-score.query.js.map +1 -0
  208. package/dist/servers/crawl.server.d.ts +35 -0
  209. package/dist/servers/crawl.server.js +133 -121
  210. package/dist/servers/crawl.server.js.map +1 -0
  211. package/dist/servers/express.server.d.ts +8 -0
  212. package/dist/servers/express.server.js +7 -10
  213. package/dist/servers/express.server.js.map +1 -0
  214. package/dist/servers/maintenance.server.d.ts +22 -0
  215. package/dist/servers/maintenance.server.js +42 -36
  216. package/dist/servers/maintenance.server.js.map +1 -0
  217. package/dist/servers/rest.server.d.ts +7 -0
  218. package/dist/servers/rest.server.js +40 -51
  219. package/dist/servers/rest.server.js.map +1 -0
  220. package/dist/servers/socket-io.server.d.ts +12 -0
  221. package/dist/servers/socket-io.server.js +48 -15
  222. package/dist/servers/socket-io.server.js.map +1 -0
  223. package/dist/services/database.service.d.ts +68 -0
  224. package/dist/services/database.service.js +528 -462
  225. package/dist/services/database.service.js.map +1 -0
  226. package/dist/types/tcrawl-config.d.ts +14 -0
  227. package/dist/types/tcrawl-config.js +14 -17
  228. package/dist/types/tcrawl-config.js.map +1 -0
  229. package/dist/types/thydra-config.d.ts +4 -0
  230. package/dist/types/thydra-config.js +4 -7
  231. package/dist/types/thydra-config.js.map +1 -0
  232. package/dist/types/tparser-ctor.d.ts +7 -0
  233. package/dist/types/tparser-ctor.js +2 -2
  234. package/dist/types/tparser-ctor.js.map +1 -0
  235. package/dist/types/tquery.d.ts +7 -0
  236. package/dist/types/tquery.js +2 -2
  237. package/dist/types/tquery.js.map +1 -0
  238. package/dist/types/trobots-config.d.ts +4 -0
  239. package/dist/types/trobots-config.js +4 -7
  240. package/dist/types/trobots-config.js.map +1 -0
  241. package/package.json +41 -29
  242. package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
  243. package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
  244. package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
  245. package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
  246. package/angular/3rdpartylicenses.txt +0 -1127
  247. package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
  248. package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
  249. package/angular/6-es2015.65f680261a3506b88381.js +0 -1
  250. package/angular/6-es5.65f680261a3506b88381.js +0 -1
  251. package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
  252. package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
  253. package/angular/8-es2015.55518901987a5b834309.js +0 -1
  254. package/angular/8-es5.55518901987a5b834309.js +0 -1
  255. package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
  256. package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
  257. package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
  258. package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
  259. package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
  260. package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
  261. package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
  262. package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
  263. package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
  264. package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
  265. package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
  266. package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
  267. package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
  268. package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
  269. package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
  270. package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
  271. package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
  272. package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
  273. package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
  274. package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
  275. package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
  276. package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
  277. package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
  278. package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
  279. package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
  280. package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
  281. package/angular/assets/config/app-config.json +0 -16
  282. package/angular/assets/images/splashbg.jpg +0 -0
  283. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
  284. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
  285. package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
  286. package/angular/favicon.ico +0 -0
  287. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
  288. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
  289. package/angular/index.html +0 -16
  290. package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
  291. package/angular/main-es5.3a582572476c7f292e52.js +0 -1
  292. package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
  293. package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
  294. package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
  295. package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
  296. package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
  297. package/config/bad-words.json +0 -1
  298. package/config/complex-english.json +0 -400
  299. package/config/hydra-auth.json +0 -8
  300. package/config/hydra-crawler.json +0 -84
  301. package/config/list-allow.json +0 -171
  302. package/config/list-deny.json +0 -248
  303. package/config/list-expiry.json +0 -7
  304. package/config/schedule.json +0 -25
  305. package/config/spelling.json +0 -1
@@ -1,42 +1,37 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.DatabaseService = exports.isTMongoIdTallyRow = exports.isTMongoIdRow = void 0;
4
- const url_1 = require("url");
5
- const tscommons_core_1 = require("tscommons-core");
6
- const tscommons_core_2 = require("tscommons-core");
7
- const tscommons_core_3 = require("tscommons-core");
8
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
9
- const hydra_crawler_ts_assets_2 = require("hydra-crawler-ts-assets");
10
- const hydra_crawler_ts_assets_3 = require("hydra-crawler-ts-assets");
11
- const hydra_crawler_ts_assets_4 = require("hydra-crawler-ts-assets");
12
- const hydra_crawler_ts_assets_5 = require("hydra-crawler-ts-assets");
13
- const hydra_crawler_ts_assets_6 = require("hydra-crawler-ts-assets");
14
- const hydra_crawler_ts_assets_7 = require("hydra-crawler-ts-assets");
15
- const nodecommons_cli_1 = require("nodecommons-cli");
16
- const nodecommons_database_mongodb_1 = require("nodecommons-database-mongodb");
17
- const eavailable_strategy_1 = require("../enums/eavailable-strategy");
18
- function isTMongoIdRow(test) {
19
- if (!tscommons_core_1.CommonsType.hasPropertyString(test, '_id'))
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { URL } from 'url';
11
+ import { commonsObjectStripNulls, commonsStringRegexLike, commonsTypeAttemptNumber, commonsTypeHasPropertyNumber, commonsTypeHasPropertyString, commonsTypeHasPropertyTArray } from 'tscommons-es-core';
12
+ import { isIUrl } from 'hydra-crawler-ts-assets';
13
+ import { isTPhpError } from 'hydra-crawler-ts-assets';
14
+ import { isTAspError } from 'hydra-crawler-ts-assets';
15
+ import { isTDomain } from 'hydra-crawler-ts-assets';
16
+ import { isTLink } from 'hydra-crawler-ts-assets';
17
+ import { EStatus, toEStatus } from 'hydra-crawler-ts-assets';
18
+ import { EComparator } from 'hydra-crawler-ts-assets';
19
+ import { commonsOutputDebug, commonsOutputError } from 'nodecommons-es-cli';
20
+ import { CommonsMongodbService } from 'nodecommons-es-database-mongodb';
21
+ import { EAvailableStrategy } from '../enums/eavailable-strategy';
22
+ export function isTMongoIdRow(test) {
23
+ if (!commonsTypeHasPropertyString(test, '_id'))
20
24
  return false;
21
25
  return true;
22
26
  }
23
- exports.isTMongoIdRow = isTMongoIdRow;
24
- function isTMongoIdTallyRow(test) {
27
+ export function isTMongoIdTallyRow(test) {
25
28
  if (!isTMongoIdRow(test))
26
29
  return false;
27
- if (!tscommons_core_1.CommonsType.hasPropertyNumber(test, 'tally'))
30
+ if (!commonsTypeHasPropertyNumber(test, 'tally'))
28
31
  return false;
29
32
  return true;
30
33
  }
31
- exports.isTMongoIdTallyRow = isTMongoIdTallyRow;
32
- class DatabaseService extends nodecommons_database_mongodb_1.CommonsMongodbService {
33
- constructor(credentials) {
34
- super(credentials);
35
- // CommonsGracefulAbort.addCallback((): void => {
36
- // CommonsOutput.alert(`SIGINT abort flag is set. Closing MongoDB connection.`);
37
- // super.close();
38
- // });
39
- }
34
+ export class DatabaseService extends CommonsMongodbService {
40
35
  getDomains() {
41
36
  if (!this.domains)
42
37
  throw new Error('Domains collected has not been instantiated yet');
@@ -52,454 +47,525 @@ class DatabaseService extends nodecommons_database_mongodb_1.CommonsMongodbServi
52
47
  throw new Error('Links collected has not been instantiated yet');
53
48
  return this.links;
54
49
  }
55
- async init() {
56
- await super.init();
57
- if (!this.database)
58
- throw new Error('Database has not been instantiated yet');
59
- this.domains = await this.ensureCollection('domains');
60
- await this.domains.createIndex({ domain: 1 }, { unique: true });
61
- await this.domains.createIndex({ ip: 1 }, { unique: false });
62
- this.urls = await this.ensureCollection('urls');
63
- await this.urls.createIndex({ url: 1 }, { unique: true });
64
- await this.urls.createIndex({ domain: 1 }, { unique: false });
65
- await this.urls.createIndex({ status: 1 }, { unique: false });
66
- await this.urls.createIndex({ attempted: 1 }, { unique: false });
67
- await this.urls.createIndex({ done: 1 }, { unique: false });
68
- await this.urls.createIndex({ orphan: 1 }, { unique: false });
69
- await this.urls.createIndex({ statusCode: 1 }, { unique: false });
70
- await this.urls.createIndex({ reason: 1 }, { unique: false });
71
- await this.urls.createIndex({ 'headers.content-type': 1 }, { unique: false });
72
- this.links = await this.ensureCollection('links');
73
- await this.links.createIndex({ url: 1 }, { unique: false });
74
- await this.links.createIndex({ outgoing: 1 }, { unique: false });
75
- await this.links.createIndex({ url: 1, outgoing: 1 }, { unique: true });
76
- }
77
- async initParser(ctor) {
78
- const parser = new ctor();
79
- await parser.init(this);
50
+ init() {
51
+ const _super = Object.create(null, {
52
+ init: { get: () => super.init }
53
+ });
54
+ return __awaiter(this, void 0, void 0, function* () {
55
+ yield _super.init.call(this);
56
+ if (!this.database)
57
+ throw new Error('Database has not been instantiated yet');
58
+ this.domains = yield this.ensureCollection('domains');
59
+ yield this.domains.createIndex({ domain: 1 }, { unique: true });
60
+ yield this.domains.createIndex({ ip: 1 }, { unique: false });
61
+ this.urls = yield this.ensureCollection('urls');
62
+ yield this.urls.createIndex({ url: 1 }, { unique: true });
63
+ yield this.urls.createIndex({ domain: 1 }, { unique: false });
64
+ yield this.urls.createIndex({ status: 1 }, { unique: false });
65
+ yield this.urls.createIndex({ attempted: 1 }, { unique: false });
66
+ yield this.urls.createIndex({ done: 1 }, { unique: false });
67
+ yield this.urls.createIndex({ orphan: 1 }, { unique: false });
68
+ yield this.urls.createIndex({ statusCode: 1 }, { unique: false });
69
+ yield this.urls.createIndex({ reason: 1 }, { unique: false });
70
+ yield this.urls.createIndex({ 'headers.content-type': 1 }, { unique: false });
71
+ this.links = yield this.ensureCollection('links');
72
+ yield this.links.createIndex({ url: 1 }, { unique: false });
73
+ yield this.links.createIndex({ outgoing: 1 }, { unique: false });
74
+ yield this.links.createIndex({ url: 1, outgoing: 1 }, { unique: true });
75
+ });
76
+ }
77
+ initParser(ctor) {
78
+ return __awaiter(this, void 0, void 0, function* () {
79
+ const parser = new ctor();
80
+ yield parser.init(this);
81
+ });
80
82
  }
81
83
  getRawDatabase() {
82
84
  return super.getRawDatabase();
83
85
  }
84
- async wipe() {
85
- await this.getLinks().deleteMany({});
86
- await this.getUrls().deleteMany({});
87
- await this.getDomains().deleteMany({});
86
+ wipe() {
87
+ return __awaiter(this, void 0, void 0, function* () {
88
+ yield this.getLinks().deleteMany({});
89
+ yield this.getUrls().deleteMany({});
90
+ yield this.getDomains().deleteMany({});
91
+ });
88
92
  }
89
- async resetActive() {
90
- await this.getUrls().updateMany({ status: hydra_crawler_ts_assets_6.EStatus.ACTIVE }, { $set: { status: hydra_crawler_ts_assets_6.EStatus.QUEUED } });
93
+ resetActive() {
94
+ return __awaiter(this, void 0, void 0, function* () {
95
+ yield this.getUrls().updateMany({ status: EStatus.ACTIVE }, { $set: { status: EStatus.QUEUED } });
96
+ });
91
97
  }
92
- async domain(domain, ip) {
93
- try {
94
- await this.getDomains().updateOne({ domain: domain }, { $set: { ip: ip } }, { upsert: true });
95
- return true;
96
- }
97
- catch (ex) {
98
- nodecommons_cli_1.CommonsOutput.debug('debug position 7');
99
- console.log(ex);
100
- return false;
101
- }
102
- }
103
- async queue(url, isDeny) {
104
- const whatwg = new url_1.URL(url);
105
- if (!whatwg.protocol.match(/^http(s?):$/))
106
- return false;
107
- try {
108
- const status = isDeny ? hydra_crawler_ts_assets_6.EStatus.DENY : hydra_crawler_ts_assets_6.EStatus.QUEUED;
109
- // un-archive if currently archived
110
- await this.getUrls().deleteOne({ url: url, status: hydra_crawler_ts_assets_6.EStatus.ARCHIVED });
111
- await this.getUrls().insertOne({
112
- url: url,
113
- domain: whatwg.hostname,
114
- status: status
115
- });
116
- return true;
117
- }
118
- catch (ex) {
119
- return false;
120
- }
121
- }
122
- async available(strategy, threshold, limit, existing) {
123
- if (limit === 0)
124
- return [];
125
- const comparator = strategy === eavailable_strategy_1.EAvailableStrategy.LARGEST ? hydra_crawler_ts_assets_7.EComparator.GTE : hydra_crawler_ts_assets_7.EComparator.LT;
126
- let thresholdMatch = {};
127
- switch (comparator) {
128
- case hydra_crawler_ts_assets_7.EComparator.GTE:
129
- thresholdMatch = { tally: { $gte: threshold } };
130
- break;
131
- case hydra_crawler_ts_assets_7.EComparator.LT:
132
- thresholdMatch = { tally: { $lt: threshold } };
133
- break;
134
- }
135
- const sortOrder = strategy === eavailable_strategy_1.EAvailableStrategy.SMALLEST ? 1 : -1;
136
- const results = this.getUrls().aggregate([
137
- { $match: { status: hydra_crawler_ts_assets_6.EStatus.QUEUED, domain: { $nin: existing } } },
138
- { $group: { _id: '$domain', tally: { $sum: 1 } } },
139
- { $match: thresholdMatch },
140
- { $sort: { tally: sortOrder } },
141
- { $limit: limit }
142
- ], { allowDiskUse: true });
143
- const entries = await this.listQueryResults(results, isTMongoIdTallyRow);
144
- return entries
145
- .map((entry) => entry._id);
146
- }
147
- async next(domain) {
148
- const next = await this.getUrls().findOne({
149
- status: hydra_crawler_ts_assets_6.EStatus.QUEUED,
150
- domain: domain
151
- });
152
- if (next === null)
153
- return undefined;
154
- return next.url;
155
- }
156
- async setStatus(url, status) {
157
- const updates = {};
158
- updates['status'] = status;
159
- if (![hydra_crawler_ts_assets_6.EStatus.QUEUED, hydra_crawler_ts_assets_6.EStatus.ACTIVE].includes(status))
160
- updates['attempted'] = new Date();
161
- if (status === hydra_crawler_ts_assets_6.EStatus.DONE)
162
- updates['done'] = new Date();
163
- try {
164
- await this.getUrls().updateOne({ url: url }, { $set: updates });
165
- return true;
166
- }
167
- catch (ex) {
168
- console.error(ex);
169
- return false;
170
- }
171
- }
172
- async setStatusCode(url, code) {
173
- try {
174
- await this.getUrls().updateOne({ url: url }, { $set: { statusCode: code }
175
- });
176
- return true;
177
- }
178
- catch (ex) {
179
- console.error(ex);
180
- return false;
181
- }
182
- }
183
- async setHeaders(url, headers) {
184
- try {
185
- await this.getUrls().updateOne({ url: url }, { $set: { headers: headers }
186
- });
187
- return true;
188
- }
189
- catch (ex) {
190
- console.error(ex);
191
- return false;
192
- }
193
- }
194
- async setData(url, context, data) {
195
- try {
196
- const update = {};
197
- update[context] = data;
198
- await this.getUrls().updateOne({ url: url }, { $set: update });
199
- return true;
200
- }
201
- catch (ex) {
202
- console.error(ex);
203
- return false;
204
- }
205
- }
206
- async unsetData(url, context) {
207
- try {
208
- const update = {};
209
- update[context] = true;
210
- await this.getUrls().updateOne({ url: url }, { $unset: update });
211
- return true;
212
- }
213
- catch (ex) {
214
- console.error(ex);
215
- return false;
216
- }
217
- }
218
- async getTtl(url) {
219
- const row = await this.getUrls().findOne({ url: url });
220
- if (row === null)
221
- return undefined;
222
- if (!tscommons_core_1.CommonsType.hasPropertyNumber(row, 'ttl'))
223
- return undefined;
224
- return row['ttl'];
225
- }
226
- async setTtl(url, ttl) {
227
- try {
228
- await this.getUrls().updateOne({ url: url }, { $set: { ttl: ttl }
229
- });
230
- return true;
231
- }
232
- catch (ex) {
233
- console.error(ex);
234
- return false;
235
- }
236
- }
237
- async unsetTtl(url) {
238
- try {
239
- await this.getUrls().updateOne({ url: url }, { $unset: { ttl: true }
240
- });
241
- return true;
242
- }
243
- catch (ex) {
244
- console.error(ex);
245
- return false;
246
- }
247
- }
248
- async getHash(url) {
249
- const row = await this.getUrls().findOne({ url: url });
250
- if (row === null)
251
- return undefined;
252
- if (!tscommons_core_1.CommonsType.hasPropertyString(row, 'hash'))
253
- return undefined;
254
- return row['hash'];
255
- }
256
- async setHash(url, hash) {
257
- try {
258
- await this.getUrls().updateOne({ url: url }, { $set: {
259
- hash: hash,
260
- hashSet: new Date()
261
- } });
262
- return true;
263
- }
264
- catch (ex) {
265
- console.error(ex);
266
- return false;
267
- }
268
- }
269
- async unsetHash(url) {
270
- try {
271
- await this.getUrls().updateOne({ url: url }, { $unset: {
272
- hash: true,
273
- hashSet: true
274
- } });
275
- return true;
276
- }
277
- catch (ex) {
278
- console.error(ex);
279
- return false;
280
- }
281
- }
282
- async setFailReason(url, reason) {
283
- try {
284
- await this.getUrls().updateOne({ url: url }, { $set: {
285
- reason: reason
286
- } });
287
- return true;
288
- }
289
- catch (ex) {
290
- console.error(ex);
291
- return false;
292
- }
293
- }
294
- async unsetFailReason(url) {
295
- try {
296
- await this.getUrls().updateOne({ url: url }, { $unset: {
297
- reason: true
298
- } });
299
- return true;
300
- }
301
- catch (ex) {
302
- console.error(ex);
303
- return false;
304
- }
305
- }
306
- async link(url, links) {
307
- // more efficient to only remove removed and only add new
308
- // rather than just wiping all existing and re-adding
309
- const find = this.getLinks().find({
310
- url: url
311
- });
312
- const existing = (await this.listQueryResults(find, hydra_crawler_ts_assets_5.isTLink))
313
- .map((link) => link.outgoing);
314
- const removed = [];
315
- const added = [];
316
- for (const link of links) {
317
- if (!existing.includes(link) && !added.includes(link))
318
- added.push(link);
319
- }
320
- for (const link of existing) {
321
- if (!links.includes(link))
322
- removed.push(link);
323
- }
324
- for (const outgoing of removed) {
98
+ domain(domain, ip) {
99
+ return __awaiter(this, void 0, void 0, function* () {
325
100
  try {
326
- await this.getLinks().deleteMany({
327
- url: url,
328
- outgoing: outgoing
329
- });
101
+ yield this.getDomains().updateOne({ domain: domain }, { $set: { ip: ip } }, { upsert: true });
102
+ return true;
330
103
  }
331
104
  catch (ex) {
332
- /* do nothing */
105
+ commonsOutputDebug('debug position 7');
106
+ console.log(ex);
107
+ return false;
333
108
  }
334
- }
335
- for (const outgoing of added) {
109
+ });
110
+ }
111
+ queue(url, isDeny) {
112
+ return __awaiter(this, void 0, void 0, function* () {
113
+ const whatwg = new URL(url);
114
+ if (!whatwg.protocol.match(/^http(s?):$/))
115
+ return false;
336
116
  try {
337
- await this.getLinks().insertOne({
117
+ const status = isDeny ? EStatus.DENY : EStatus.QUEUED;
118
+ // un-archive if currently archived
119
+ yield this.getUrls().deleteOne({ url: url, status: EStatus.ARCHIVED });
120
+ yield this.getUrls().insertOne({
338
121
  url: url,
339
- outgoing: outgoing
122
+ domain: whatwg.hostname,
123
+ status: status
340
124
  });
125
+ return true;
341
126
  }
342
127
  catch (ex) {
343
- switch (ex.code) {
344
- case 11000:
345
- // ignore duplicates
346
- nodecommons_cli_1.CommonsOutput.error(`DUPLICATE: ${url}, ${outgoing}`);
347
- break;
348
- case 17280:
349
- case 17282:
350
- nodecommons_cli_1.CommonsOutput.error(`INDEX TOO LARGE: ${url}, ${outgoing}`);
351
- // ignore index too large
352
- break;
353
- default:
354
- nodecommons_cli_1.CommonsOutput.debug('debug position 8');
355
- console.log(ex);
356
- throw ex;
357
- }
128
+ return false;
129
+ }
130
+ });
131
+ }
132
+ available(strategy, threshold, limit, existing) {
133
+ return __awaiter(this, void 0, void 0, function* () {
134
+ if (limit === 0)
135
+ return [];
136
+ const comparator = strategy === EAvailableStrategy.LARGEST ? EComparator.GTE : EComparator.LT;
137
+ const thresholdMatch = { tally: {} };
138
+ switch (comparator) {
139
+ case EComparator.GTE:
140
+ thresholdMatch.tally = { $gte: threshold };
141
+ break;
142
+ case EComparator.LT:
143
+ thresholdMatch.tally = { $lt: threshold };
144
+ break;
145
+ }
146
+ const sortOrder = strategy === EAvailableStrategy.SMALLEST ? 1 : -1;
147
+ const results = this.getUrls().aggregate([
148
+ { $match: { status: EStatus.QUEUED, domain: { $nin: existing } } },
149
+ { $group: { _id: '$domain', tally: { $sum: 1 } } },
150
+ { $match: thresholdMatch },
151
+ { $sort: { tally: sortOrder } },
152
+ { $limit: limit }
153
+ ], { allowDiskUse: true });
154
+ const entries = yield this.listQueryResults(results, isTMongoIdTallyRow);
155
+ return entries
156
+ // eslint-disable-next-line no-underscore-dangle
157
+ .map((entry) => entry._id);
158
+ });
159
+ }
160
+ next(domain) {
161
+ return __awaiter(this, void 0, void 0, function* () {
162
+ const next = yield this.getUrls().findOne({
163
+ status: EStatus.QUEUED,
164
+ domain: domain
165
+ });
166
+ if (next === null)
167
+ return undefined;
168
+ return next.url;
169
+ });
170
+ }
171
+ setStatus(url, status) {
172
+ return __awaiter(this, void 0, void 0, function* () {
173
+ const updates = { status: status };
174
+ if (![EStatus.QUEUED, EStatus.ACTIVE].includes(status))
175
+ updates['attempted'] = new Date();
176
+ if (status === EStatus.DONE)
177
+ updates['done'] = new Date();
178
+ try {
179
+ yield this.getUrls().updateOne({ url: url }, { $set: updates });
180
+ return true;
181
+ }
182
+ catch (ex) {
183
+ console.error(ex);
184
+ return false;
185
+ }
186
+ });
187
+ }
188
+ setStatusCode(url, code) {
189
+ return __awaiter(this, void 0, void 0, function* () {
190
+ try {
191
+ yield this.getUrls().updateOne({ url: url }, { $set: { statusCode: code } });
192
+ return true;
193
+ }
194
+ catch (ex) {
195
+ console.error(ex);
196
+ return false;
197
+ }
198
+ });
199
+ }
200
+ setHeaders(url, headers) {
201
+ return __awaiter(this, void 0, void 0, function* () {
202
+ try {
203
+ yield this.getUrls().updateOne({ url: url }, { $set: { headers: headers } });
204
+ return true;
205
+ }
206
+ catch (ex) {
207
+ console.error(ex);
208
+ return false;
209
+ }
210
+ });
211
+ }
212
+ setData(url, context, data) {
213
+ return __awaiter(this, void 0, void 0, function* () {
214
+ try {
215
+ const update = {};
216
+ update[context] = data;
217
+ yield this.getUrls().updateOne({ url: url }, { $set: update });
218
+ return true;
219
+ }
220
+ catch (ex) {
221
+ console.error(ex);
222
+ return false;
223
+ }
224
+ });
225
+ }
226
+ unsetData(url, context) {
227
+ return __awaiter(this, void 0, void 0, function* () {
228
+ try {
229
+ const update = {};
230
+ update[context] = true;
231
+ yield this.getUrls().updateOne({ url: url }, { $unset: update });
232
+ return true;
358
233
  }
359
- }
360
- return true;
234
+ catch (ex) {
235
+ console.error(ex);
236
+ return false;
237
+ }
238
+ });
361
239
  }
362
- async markDead(domain) {
363
- try {
364
- await this.getUrls().updateMany({ domain: domain, status: { $in: [hydra_crawler_ts_assets_6.EStatus.QUEUED, hydra_crawler_ts_assets_6.EStatus.ACTIVE] } }, { $set: { status: hydra_crawler_ts_assets_6.EStatus.DEAD, attempted: new Date() } });
240
+ getTtl(url) {
241
+ return __awaiter(this, void 0, void 0, function* () {
242
+ const row = yield this.getUrls().findOne({ url: url });
243
+ if (row === null)
244
+ return undefined;
245
+ if (!commonsTypeHasPropertyNumber(row, 'ttl'))
246
+ return undefined;
247
+ return row['ttl'];
248
+ });
249
+ }
250
+ setTtl(url, ttl) {
251
+ return __awaiter(this, void 0, void 0, function* () {
252
+ try {
253
+ yield this.getUrls().updateOne({ url: url }, { $set: { ttl: ttl } });
254
+ return true;
255
+ }
256
+ catch (ex) {
257
+ console.error(ex);
258
+ return false;
259
+ }
260
+ });
261
+ }
262
+ unsetTtl(url) {
263
+ return __awaiter(this, void 0, void 0, function* () {
264
+ try {
265
+ yield this.getUrls().updateOne({ url: url }, { $unset: { ttl: true } });
266
+ return true;
267
+ }
268
+ catch (ex) {
269
+ console.error(ex);
270
+ return false;
271
+ }
272
+ });
273
+ }
274
+ getHash(url) {
275
+ return __awaiter(this, void 0, void 0, function* () {
276
+ const row = yield this.getUrls().findOne({ url: url });
277
+ if (row === null)
278
+ return undefined;
279
+ if (!commonsTypeHasPropertyString(row, 'hash'))
280
+ return undefined;
281
+ return row['hash'];
282
+ });
283
+ }
284
+ setHash(url, hash) {
285
+ return __awaiter(this, void 0, void 0, function* () {
286
+ try {
287
+ yield this.getUrls().updateOne({ url: url }, { $set: {
288
+ hash: hash,
289
+ hashSet: new Date()
290
+ } });
291
+ return true;
292
+ }
293
+ catch (ex) {
294
+ console.error(ex);
295
+ return false;
296
+ }
297
+ });
298
+ }
299
+ unsetHash(url) {
300
+ return __awaiter(this, void 0, void 0, function* () {
301
+ try {
302
+ yield this.getUrls().updateOne({ url: url }, { $unset: {
303
+ hash: true,
304
+ hashSet: true
305
+ } });
306
+ return true;
307
+ }
308
+ catch (ex) {
309
+ console.error(ex);
310
+ return false;
311
+ }
312
+ });
313
+ }
314
+ setFailReason(url, reason) {
315
+ return __awaiter(this, void 0, void 0, function* () {
316
+ try {
317
+ yield this.getUrls().updateOne({ url: url }, { $set: {
318
+ reason: reason
319
+ } });
320
+ return true;
321
+ }
322
+ catch (ex) {
323
+ console.error(ex);
324
+ return false;
325
+ }
326
+ });
327
+ }
328
+ unsetFailReason(url) {
329
+ return __awaiter(this, void 0, void 0, function* () {
330
+ try {
331
+ yield this.getUrls().updateOne({ url: url }, { $unset: {
332
+ reason: true
333
+ } });
334
+ return true;
335
+ }
336
+ catch (ex) {
337
+ console.error(ex);
338
+ return false;
339
+ }
340
+ });
341
+ }
342
+ link(url, links) {
343
+ return __awaiter(this, void 0, void 0, function* () {
344
+ // more efficient to only remove removed and only add new
345
+ // rather than just wiping all existing and re-adding
346
+ const find = this.getLinks().find({
347
+ url: url
348
+ }, {});
349
+ const existing = (yield this.listQueryResults(find, isTLink))
350
+ .map((link) => link.outgoing);
351
+ const removed = [];
352
+ const added = [];
353
+ for (const link of links) {
354
+ if (!existing.includes(link) && !added.includes(link))
355
+ added.push(link);
356
+ }
357
+ for (const link of existing) {
358
+ if (!links.includes(link))
359
+ removed.push(link);
360
+ }
361
+ for (const outgoing of removed) {
362
+ try {
363
+ yield this.getLinks().deleteMany({
364
+ url: url,
365
+ outgoing: outgoing
366
+ });
367
+ }
368
+ catch (ex) {
369
+ /* do nothing */
370
+ }
371
+ }
372
+ for (const outgoing of added) {
373
+ try {
374
+ yield this.getLinks().insertOne({
375
+ url: url,
376
+ outgoing: outgoing
377
+ });
378
+ }
379
+ catch (ex) {
380
+ switch (ex.code || -1) {
381
+ case 11000:
382
+ // ignore duplicates
383
+ commonsOutputError(`DUPLICATE: ${url}, ${outgoing}`);
384
+ break;
385
+ case 17280:
386
+ case 17282:
387
+ commonsOutputError(`INDEX TOO LARGE: ${url}, ${outgoing}`);
388
+ // ignore index too large
389
+ break;
390
+ default:
391
+ commonsOutputDebug('debug position 8');
392
+ console.log(ex);
393
+ throw ex;
394
+ }
395
+ }
396
+ }
365
397
  return true;
366
- }
367
- catch (ex) {
368
- console.error(ex);
369
- return false;
370
- }
371
- }
372
- async listStatusTallies() {
373
- const results = this.getUrls().aggregate([
374
- { $match: { status: { $ne: hydra_crawler_ts_assets_6.EStatus.ARCHIVED } } },
375
- { $group: {
376
- _id: '$status',
377
- tally: { $sum: 1 }
378
- } }
379
- ]);
380
- const rows = await this.listQueryResults(results, isTMongoIdTallyRow);
381
- const map = new Map();
382
- for (const row of rows) {
383
- const status = hydra_crawler_ts_assets_6.toEStatus(row._id);
384
- if (status)
385
- map.set(status, row.tally);
386
- }
387
- return map;
388
- }
389
- async getLinkTalliesCount() {
390
- return await this.getLinks().countDocuments();
391
- }
392
- async getDomainTalliesCount() {
393
- return await this.getDomains().countDocuments();
394
- }
395
- async listDomainQueuedTallies() {
396
- const results = this.getUrls().aggregate([
397
- { $match: {
398
- status: hydra_crawler_ts_assets_6.EStatus.QUEUED
399
- } },
400
- { $group: {
401
- _id: '$domain',
402
- tally: { $sum: 1 }
403
- } }
404
- ]);
405
- const rows = await this.listQueryResults(results, isTMongoIdTallyRow);
406
- const map = new Map();
407
- for (const row of rows) {
408
- map.set(row._id, row.tally);
409
- }
410
- return map;
411
- }
412
- async listPhpErrors() {
413
- const results = this.getUrls().find({
414
- status: { $ne: hydra_crawler_ts_assets_6.EStatus.ARCHIVED },
415
- phpErrors: { $exists: true }
416
- });
417
- return (await this.listQueryResults(results, hydra_crawler_ts_assets_1.isIUrl))
418
- .map((url) => {
419
- if (!tscommons_core_1.CommonsType.hasPropertyTArray(url, 'phpErrors', hydra_crawler_ts_assets_2.isTPhpError))
420
- throw new Error('Invalid PHP error object');
421
- return {
422
- url: url.url,
423
- errors: url['phpErrors']
424
- };
425
- });
426
- }
427
- async listAspErrors() {
428
- const results = this.getUrls().find({
429
- status: { $ne: hydra_crawler_ts_assets_6.EStatus.ARCHIVED },
430
- aspErrors: { $exists: true }
431
- });
432
- return (await this.listQueryResults(results, hydra_crawler_ts_assets_1.isIUrl))
433
- .map((url) => {
434
- if (!tscommons_core_1.CommonsType.hasPropertyTArray(url, 'aspErrors', hydra_crawler_ts_assets_3.isTAspError))
435
- throw new Error('Invalid PHP error object');
436
- return {
437
- url: url.url,
438
- errors: url['aspErrors']
439
- };
440
- });
441
- }
442
- async listDone200DomainUrls(domain) {
443
- const results = this.getUrls().find({ $and: [
444
- { domain: domain },
445
- { status: hydra_crawler_ts_assets_6.EStatus.DONE },
446
- { statusCode: { $gte: 200 } },
447
- { statusCode: { $lt: 300 } }
448
- ] });
449
- return (await this.listQueryResults(results, hydra_crawler_ts_assets_1.isIUrl))
450
- .map((url) => url.url);
451
- }
452
- async listDomains() {
453
- const results = this.getDomains().find({ ip: { $exists: true, $ne: null } } // this is ok, despite the type objection to null
454
- );
455
- // since we're doing $ne: null above, we don't need to strip nulls, as there won't be any
456
- return await this.listQueryResults(results, hydra_crawler_ts_assets_4.isTDomain);
457
- }
458
- async listDomainsByLike(term) {
459
- const results = this.getDomains().find({
460
- domain: new RegExp(tscommons_core_3.CommonsString.regexLike(`%${term}%`), 'i')
461
- });
462
- return (await this.listQueryResults(results, hydra_crawler_ts_assets_4.isTDomain))
463
- .map((encoded) => {
464
- return tscommons_core_2.CommonsObject.stripNulls(encoded);
465
- });
466
- }
467
- async listInboundLinks(url) {
468
- const results = this.getLinks().find({ outgoing: url });
469
- return (await this.listQueryResults(results, hydra_crawler_ts_assets_5.isTLink))
470
- .map((link) => link.url);
471
- }
472
- async listOutboundLinks(url) {
473
- const results = this.getLinks().find({ url: url });
474
- return (await this.listQueryResults(results, hydra_crawler_ts_assets_5.isTLink))
475
- .map((link) => link.outgoing);
476
- }
477
- async listImagesBySizeThreshold(size, comparator) {
478
- const queries = [
479
- { status: hydra_crawler_ts_assets_6.EStatus.DONE },
480
- { 'headers.content-type': /^image\/(jpeg)/ },
481
- { 'headers.content-length': { $exists: true } }
482
- ];
483
- switch (comparator) {
484
- case hydra_crawler_ts_assets_7.EComparator.GT:
485
- queries.push({ 'headers.content-length': { $gt: size } });
486
- break;
487
- case hydra_crawler_ts_assets_7.EComparator.LT:
488
- queries.push({ 'headers.content-length': { $lt: size } });
489
- break;
490
- case hydra_crawler_ts_assets_7.EComparator.GTE:
491
- queries.push({ 'headers.content-length': { $gte: size } });
492
- break;
493
- case hydra_crawler_ts_assets_7.EComparator.LTE:
494
- queries.push({ 'headers.content-length': { $lte: size } });
495
- break;
496
- }
497
- const results = this.getUrls().find({ $and: queries });
498
- return (await this.listQueryResults(results, hydra_crawler_ts_assets_1.isIUrl))
499
- .map((row) => ({
500
- url: row.url,
501
- size: row['headers']['content-length']
502
- }));
398
+ });
399
+ }
400
+ markDead(domain) {
401
+ return __awaiter(this, void 0, void 0, function* () {
402
+ try {
403
+ yield this.getUrls().updateMany({ domain: domain, status: { $in: [EStatus.QUEUED, EStatus.ACTIVE] } }, { $set: { status: EStatus.DEAD, attempted: new Date() } });
404
+ return true;
405
+ }
406
+ catch (ex) {
407
+ console.error(ex);
408
+ return false;
409
+ }
410
+ });
411
+ }
412
+ listStatusTallies() {
413
+ return __awaiter(this, void 0, void 0, function* () {
414
+ const results = this.getUrls().aggregate([
415
+ { $match: { status: { $ne: EStatus.ARCHIVED } } },
416
+ { $group: {
417
+ _id: '$status',
418
+ tally: { $sum: 1 }
419
+ } }
420
+ ]);
421
+ const rows = yield this.listQueryResults(results, isTMongoIdTallyRow);
422
+ const map = new Map();
423
+ for (const row of rows) {
424
+ // eslint-disable-next-line no-underscore-dangle
425
+ const status = toEStatus(row._id);
426
+ if (status)
427
+ map.set(status, row.tally);
428
+ }
429
+ return map;
430
+ });
431
+ }
432
+ getLinkTalliesCount() {
433
+ return __awaiter(this, void 0, void 0, function* () {
434
+ return yield this.getLinks().countDocuments();
435
+ });
436
+ }
437
+ getDomainTalliesCount() {
438
+ return __awaiter(this, void 0, void 0, function* () {
439
+ return yield this.getDomains().countDocuments();
440
+ });
441
+ }
442
+ listDomainQueuedTallies() {
443
+ return __awaiter(this, void 0, void 0, function* () {
444
+ const results = this.getUrls().aggregate([
445
+ { $match: {
446
+ status: EStatus.QUEUED
447
+ } },
448
+ { $group: {
449
+ _id: '$domain',
450
+ tally: { $sum: 1 }
451
+ } }
452
+ ]);
453
+ const rows = yield this.listQueryResults(results, isTMongoIdTallyRow);
454
+ const map = new Map();
455
+ for (const row of rows) {
456
+ // eslint-disable-next-line no-underscore-dangle
457
+ map.set(row._id, row.tally);
458
+ }
459
+ return map;
460
+ });
461
+ }
462
+ listPhpErrors() {
463
+ return __awaiter(this, void 0, void 0, function* () {
464
+ const results = this.getUrls().find({
465
+ status: { $ne: EStatus.ARCHIVED },
466
+ phpErrors: { $exists: true }
467
+ }, {});
468
+ return (yield this.listQueryResults(results, isIUrl))
469
+ .map((url) => {
470
+ if (!commonsTypeHasPropertyTArray(url, 'phpErrors', isTPhpError))
471
+ throw new Error('Invalid PHP error object');
472
+ return {
473
+ url: url.url,
474
+ errors: url.phpErrors
475
+ };
476
+ });
477
+ });
478
+ }
479
+ listAspErrors() {
480
+ return __awaiter(this, void 0, void 0, function* () {
481
+ const results = this.getUrls().find({
482
+ status: { $ne: EStatus.ARCHIVED },
483
+ aspErrors: { $exists: true }
484
+ }, {});
485
+ return (yield this.listQueryResults(results, isIUrl))
486
+ .map((url) => {
487
+ if (!commonsTypeHasPropertyTArray(url, 'aspErrors', isTAspError))
488
+ throw new Error('Invalid PHP error object');
489
+ return {
490
+ url: url.url,
491
+ errors: url.aspErrors
492
+ };
493
+ });
494
+ });
495
+ }
496
+ listDone200DomainUrls(domain) {
497
+ return __awaiter(this, void 0, void 0, function* () {
498
+ const results = this.getUrls().find({ $and: [
499
+ { domain: domain },
500
+ { status: EStatus.DONE },
501
+ { statusCode: { $gte: 200 } },
502
+ { statusCode: { $lt: 300 } }
503
+ ] }, {});
504
+ return (yield this.listQueryResults(results, isIUrl))
505
+ .map((url) => url.url);
506
+ });
507
+ }
508
+ listDomains() {
509
+ return __awaiter(this, void 0, void 0, function* () {
510
+ const results = this.getDomains().find(
511
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
512
+ { ip: { $exists: true, $ne: null } }, // this is ok, despite the type objection to null
513
+ {});
514
+ // since we're doing $ne: null above, we don't need to strip nulls, as there won't be any
515
+ return yield this.listQueryResults(results, isTDomain);
516
+ });
517
+ }
518
+ listDomainsByLike(term) {
519
+ return __awaiter(this, void 0, void 0, function* () {
520
+ const results = this.getDomains().find({
521
+ domain: new RegExp(commonsStringRegexLike(`%${term}%`), 'i')
522
+ }, {});
523
+ return (yield this.listQueryResults(results, isTDomain))
524
+ .map((encoded) => commonsObjectStripNulls(encoded));
525
+ });
526
+ }
527
+ listInboundLinks(url) {
528
+ return __awaiter(this, void 0, void 0, function* () {
529
+ const results = this.getLinks().find({ outgoing: url }, {});
530
+ return (yield this.listQueryResults(results, isTLink))
531
+ .map((link) => link.url);
532
+ });
533
+ }
534
+ listOutboundLinks(url) {
535
+ return __awaiter(this, void 0, void 0, function* () {
536
+ const results = this.getLinks().find({ url: url }, {});
537
+ return (yield this.listQueryResults(results, isTLink))
538
+ .map((link) => link.outgoing);
539
+ });
540
+ }
541
+ listImagesBySizeThreshold(size, comparator) {
542
+ return __awaiter(this, void 0, void 0, function* () {
543
+ const queries = [
544
+ { status: EStatus.DONE },
545
+ { 'headers.content-type': /^image\/(jpeg)/ },
546
+ { 'headers.content-length': { $exists: true } }
547
+ ];
548
+ switch (comparator) {
549
+ case EComparator.GT:
550
+ queries.push({ 'headers.content-length': { $gt: size } });
551
+ break;
552
+ case EComparator.LT:
553
+ queries.push({ 'headers.content-length': { $lt: size } });
554
+ break;
555
+ case EComparator.GTE:
556
+ queries.push({ 'headers.content-length': { $gte: size } });
557
+ break;
558
+ case EComparator.LTE:
559
+ queries.push({ 'headers.content-length': { $lte: size } });
560
+ break;
561
+ }
562
+ const results = this.getUrls().find({ $and: queries }, {});
563
+ return (yield this.listQueryResults(results, isIUrl))
564
+ .map((row) => ({
565
+ url: row.url,
566
+ size: commonsTypeAttemptNumber(row['headers']['content-length']) || -1
567
+ }));
568
+ });
503
569
  }
504
570
  }
505
- exports.DatabaseService = DatabaseService;
571
+ //# sourceMappingURL=database.service.js.map