hydra-crawler 1.4.6 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. package/dist/apis/autocomplete.api.d.ts +7 -0
  2. package/dist/apis/autocomplete.api.js +15 -9
  3. package/dist/apis/autocomplete.api.js.map +1 -0
  4. package/dist/apis/bugs.api.d.ts +7 -0
  5. package/dist/apis/bugs.api.js +21 -15
  6. package/dist/apis/bugs.api.js.map +1 -0
  7. package/dist/apis/crawl.api.d.ts +7 -0
  8. package/dist/apis/crawl.api.js +15 -9
  9. package/dist/apis/crawl.api.js.map +1 -0
  10. package/dist/apis/domains.api.d.ts +7 -0
  11. package/dist/apis/domains.api.js +24 -19
  12. package/dist/apis/domains.api.js.map +1 -0
  13. package/dist/apis/images.api.d.ts +7 -0
  14. package/dist/apis/images.api.js +20 -14
  15. package/dist/apis/images.api.js.map +1 -0
  16. package/dist/apis/statistics.api.d.ts +8 -0
  17. package/dist/apis/statistics.api.js +27 -20
  18. package/dist/apis/statistics.api.js.map +1 -0
  19. package/dist/apis/test.api.d.ts +5 -0
  20. package/dist/apis/test.api.js +15 -9
  21. package/dist/apis/test.api.js.map +1 -0
  22. package/dist/apis/urls.api.d.ts +7 -0
  23. package/dist/apis/urls.api.js +21 -15
  24. package/dist/apis/urls.api.js.map +1 -0
  25. package/dist/apps/cleanup.app.d.ts +19 -0
  26. package/dist/apps/cleanup.app.js +118 -100
  27. package/dist/apps/cleanup.app.js.map +1 -0
  28. package/dist/apps/cross-populate-export.app.d.ts +12 -0
  29. package/dist/apps/cross-populate-export.app.js +60 -47
  30. package/dist/apps/cross-populate-export.app.js.map +1 -0
  31. package/dist/apps/cross-populate-import.app.d.ts +12 -0
  32. package/dist/apps/cross-populate-import.app.js +64 -51
  33. package/dist/apps/cross-populate-import.app.js.map +1 -0
  34. package/dist/apps/denylist.app.d.ts +17 -0
  35. package/dist/apps/denylist.app.js +115 -98
  36. package/dist/apps/denylist.app.js.map +1 -0
  37. package/dist/apps/expire.app.d.ts +19 -0
  38. package/dist/apps/expire.app.js +44 -31
  39. package/dist/apps/expire.app.js.map +1 -0
  40. package/dist/apps/extract-text.app.d.ts +8 -0
  41. package/dist/apps/extract-text.app.js +43 -35
  42. package/dist/apps/extract-text.app.js.map +1 -0
  43. package/dist/apps/hydra.app.d.ts +34 -0
  44. package/dist/apps/hydra.app.js +150 -137
  45. package/dist/apps/hydra.app.js.map +1 -0
  46. package/dist/apps/import.app.d.ts +11 -0
  47. package/dist/apps/import.app.js +44 -32
  48. package/dist/apps/import.app.js.map +1 -0
  49. package/dist/apps/internal-hydra-common.app.d.ts +28 -0
  50. package/dist/apps/internal-hydra-common.app.js +5 -11
  51. package/dist/apps/internal-hydra-common.app.js.map +1 -0
  52. package/dist/apps/query.app.d.ts +20 -0
  53. package/dist/apps/query.app.js +63 -49
  54. package/dist/apps/query.app.js.map +1 -0
  55. package/dist/apps/reattempt.app.d.ts +17 -0
  56. package/dist/apps/reattempt.app.js +66 -53
  57. package/dist/apps/reattempt.app.js.map +1 -0
  58. package/dist/apps/requeue-domain.app.d.ts +13 -0
  59. package/dist/apps/requeue-domain.app.js +50 -37
  60. package/dist/apps/requeue-domain.app.js.map +1 -0
  61. package/dist/apps/seed.app.d.ts +15 -0
  62. package/dist/apps/seed.app.js +53 -40
  63. package/dist/apps/seed.app.js.map +1 -0
  64. package/dist/apps/startup.app.d.ts +11 -0
  65. package/dist/apps/startup.app.js +51 -38
  66. package/dist/apps/startup.app.js.map +1 -0
  67. package/dist/apps/unarchive.app.d.ts +15 -0
  68. package/dist/apps/unarchive.app.js +67 -54
  69. package/dist/apps/unarchive.app.js.map +1 -0
  70. package/dist/classes/cleaner.d.ts +12 -0
  71. package/dist/classes/cleaner.js +227 -207
  72. package/dist/classes/cleaner.js.map +1 -0
  73. package/dist/classes/crawler.d.ts +34 -0
  74. package/dist/classes/crawler.js +248 -241
  75. package/dist/classes/crawler.js.map +1 -0
  76. package/dist/classes/dns.d.ts +3 -0
  77. package/dist/classes/dns.js +10 -13
  78. package/dist/classes/dns.js.map +1 -0
  79. package/dist/classes/expirer.d.ts +10 -0
  80. package/dist/classes/expirer.js +107 -94
  81. package/dist/classes/expirer.js.map +1 -0
  82. package/dist/classes/expiry.d.ts +8 -0
  83. package/dist/classes/expiry.js +16 -19
  84. package/dist/classes/expiry.js.map +1 -0
  85. package/dist/classes/lists.d.ts +9 -0
  86. package/dist/classes/lists.js +13 -18
  87. package/dist/classes/lists.js.map +1 -0
  88. package/dist/classes/robot.d.ts +15 -0
  89. package/dist/classes/robot.js +40 -30
  90. package/dist/classes/robot.js.map +1 -0
  91. package/dist/classes/tracker.d.ts +25 -0
  92. package/dist/classes/tracker.js +82 -64
  93. package/dist/classes/tracker.js.map +1 -0
  94. package/dist/cli.d.ts +1 -0
  95. package/dist/cli.js +72 -65
  96. package/dist/cli.js.map +1 -0
  97. package/dist/enums/eavailable-strategy.d.ts +4 -0
  98. package/dist/enums/eavailable-strategy.js +3 -5
  99. package/dist/enums/eavailable-strategy.js.map +1 -0
  100. package/dist/enums/elist.d.ts +7 -0
  101. package/dist/enums/elist.js +7 -11
  102. package/dist/enums/elist.js.map +1 -0
  103. package/dist/enums/eserver.d.ts +8 -0
  104. package/dist/enums/eserver.js +3 -5
  105. package/dist/enums/eserver.js.map +1 -0
  106. package/dist/enums/ex-powered-by.d.ts +6 -0
  107. package/dist/enums/ex-powered-by.js +3 -5
  108. package/dist/enums/ex-powered-by.js.map +1 -0
  109. package/dist/helpers/matcher.d.ts +5 -0
  110. package/dist/helpers/matcher.js +2 -5
  111. package/dist/helpers/matcher.js.map +1 -0
  112. package/dist/helpers/random.d.ts +4 -0
  113. package/dist/helpers/random.js +2 -5
  114. package/dist/helpers/random.js.map +1 -0
  115. package/dist/helpers/utf-decoder.d.ts +4 -0
  116. package/dist/helpers/utf-decoder.js +3 -6
  117. package/dist/helpers/utf-decoder.js.map +1 -0
  118. package/dist/interfaces/iexpiry.d.ts +7 -0
  119. package/dist/interfaces/iexpiry.js +9 -13
  120. package/dist/interfaces/iexpiry.js.map +1 -0
  121. package/dist/interfaces/imatch.d.ts +6 -0
  122. package/dist/interfaces/imatch.js +6 -9
  123. package/dist/interfaces/imatch.js.map +1 -0
  124. package/dist/interfaces/iparser-config.d.ts +4 -0
  125. package/dist/interfaces/iparser-config.js +4 -7
  126. package/dist/interfaces/iparser-config.js.map +1 -0
  127. package/dist/interfaces/iparser.d.ts +8 -0
  128. package/dist/interfaces/iparser.js +2 -2
  129. package/dist/interfaces/iparser.js.map +1 -0
  130. package/dist/interfaces/irequest-outcome.d.ts +11 -0
  131. package/dist/interfaces/irequest-outcome.js +2 -2
  132. package/dist/interfaces/irequest-outcome.js.map +1 -0
  133. package/dist/interfaces/iserver.d.ts +4 -0
  134. package/dist/interfaces/iserver.js +2 -2
  135. package/dist/interfaces/iserver.js.map +1 -0
  136. package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
  137. package/dist/parsers/accessibility-metrics.parser.js +34 -26
  138. package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
  139. package/dist/parsers/asp-error.parser.d.ts +12 -0
  140. package/dist/parsers/asp-error.parser.js +36 -28
  141. package/dist/parsers/asp-error.parser.js.map +1 -0
  142. package/dist/parsers/bad-words.parser.d.ts +10 -0
  143. package/dist/parsers/bad-words.parser.js +21 -13
  144. package/dist/parsers/bad-words.parser.js.map +1 -0
  145. package/dist/parsers/complex-english.parser.d.ts +15 -0
  146. package/dist/parsers/complex-english.parser.js +33 -25
  147. package/dist/parsers/complex-english.parser.js.map +1 -0
  148. package/dist/parsers/data.parser.d.ts +14 -0
  149. package/dist/parsers/data.parser.js +12 -16
  150. package/dist/parsers/data.parser.js.map +1 -0
  151. package/dist/parsers/dictionary.parser.d.ts +19 -0
  152. package/dist/parsers/dictionary.parser.js +47 -39
  153. package/dist/parsers/dictionary.parser.js.map +1 -0
  154. package/dist/parsers/html.parser.d.ts +13 -0
  155. package/dist/parsers/html.parser.js +4 -8
  156. package/dist/parsers/html.parser.js.map +1 -0
  157. package/dist/parsers/hyperlinks.parser.d.ts +20 -0
  158. package/dist/parsers/hyperlinks.parser.js +82 -77
  159. package/dist/parsers/hyperlinks.parser.js.map +1 -0
  160. package/dist/parsers/image-tags.parser.d.ts +19 -0
  161. package/dist/parsers/image-tags.parser.js +31 -35
  162. package/dist/parsers/image-tags.parser.js.map +1 -0
  163. package/dist/parsers/jpeg.parser.d.ts +11 -0
  164. package/dist/parsers/jpeg.parser.js +28 -20
  165. package/dist/parsers/jpeg.parser.js.map +1 -0
  166. package/dist/parsers/paragraphs.parser.d.ts +13 -0
  167. package/dist/parsers/paragraphs.parser.js +33 -40
  168. package/dist/parsers/paragraphs.parser.js.map +1 -0
  169. package/dist/parsers/parser.d.ts +19 -0
  170. package/dist/parsers/parser.js +30 -17
  171. package/dist/parsers/parser.js.map +1 -0
  172. package/dist/parsers/php-error.parser.d.ts +12 -0
  173. package/dist/parsers/php-error.parser.js +42 -34
  174. package/dist/parsers/php-error.parser.js.map +1 -0
  175. package/dist/parsers/phrase.parser.d.ts +8 -0
  176. package/dist/parsers/phrase.parser.js +16 -11
  177. package/dist/parsers/phrase.parser.js.map +1 -0
  178. package/dist/parsers/regex.parser.d.ts +10 -0
  179. package/dist/parsers/regex.parser.js +30 -22
  180. package/dist/parsers/regex.parser.js.map +1 -0
  181. package/dist/parsers/server.parser.d.ts +11 -0
  182. package/dist/parsers/server.parser.js +58 -57
  183. package/dist/parsers/server.parser.js.map +1 -0
  184. package/dist/parsers/spelling.parser.d.ts +10 -0
  185. package/dist/parsers/spelling.parser.js +21 -13
  186. package/dist/parsers/spelling.parser.js.map +1 -0
  187. package/dist/parsers/string.parser.d.ts +8 -0
  188. package/dist/parsers/string.parser.js +5 -8
  189. package/dist/parsers/string.parser.js.map +1 -0
  190. package/dist/parsers/text.parser.d.ts +8 -0
  191. package/dist/parsers/text.parser.js +24 -18
  192. package/dist/parsers/text.parser.js.map +1 -0
  193. package/dist/parsers/words.parser.d.ts +11 -0
  194. package/dist/parsers/words.parser.js +32 -28
  195. package/dist/parsers/words.parser.js.map +1 -0
  196. package/dist/queries/complex-english.query.d.ts +2 -0
  197. package/dist/queries/complex-english.query.js +37 -38
  198. package/dist/queries/complex-english.query.js.map +1 -0
  199. package/dist/queries/flash-content.query.d.ts +2 -0
  200. package/dist/queries/flash-content.query.js +39 -30
  201. package/dist/queries/flash-content.query.js.map +1 -0
  202. package/dist/queries/linking-to-domains.query.d.ts +2 -0
  203. package/dist/queries/linking-to-domains.query.js +35 -27
  204. package/dist/queries/linking-to-domains.query.js.map +1 -0
  205. package/dist/queries/readability-score.query.d.ts +2 -0
  206. package/dist/queries/readability-score.query.js +21 -13
  207. package/dist/queries/readability-score.query.js.map +1 -0
  208. package/dist/servers/crawl.server.d.ts +35 -0
  209. package/dist/servers/crawl.server.js +133 -121
  210. package/dist/servers/crawl.server.js.map +1 -0
  211. package/dist/servers/express.server.d.ts +8 -0
  212. package/dist/servers/express.server.js +7 -10
  213. package/dist/servers/express.server.js.map +1 -0
  214. package/dist/servers/maintenance.server.d.ts +22 -0
  215. package/dist/servers/maintenance.server.js +42 -36
  216. package/dist/servers/maintenance.server.js.map +1 -0
  217. package/dist/servers/rest.server.d.ts +7 -0
  218. package/dist/servers/rest.server.js +40 -51
  219. package/dist/servers/rest.server.js.map +1 -0
  220. package/dist/servers/socket-io.server.d.ts +12 -0
  221. package/dist/servers/socket-io.server.js +48 -15
  222. package/dist/servers/socket-io.server.js.map +1 -0
  223. package/dist/services/database.service.d.ts +68 -0
  224. package/dist/services/database.service.js +527 -462
  225. package/dist/services/database.service.js.map +1 -0
  226. package/dist/types/tcrawl-config.d.ts +14 -0
  227. package/dist/types/tcrawl-config.js +14 -17
  228. package/dist/types/tcrawl-config.js.map +1 -0
  229. package/dist/types/thydra-config.d.ts +4 -0
  230. package/dist/types/thydra-config.js +4 -7
  231. package/dist/types/thydra-config.js.map +1 -0
  232. package/dist/types/tparser-ctor.d.ts +7 -0
  233. package/dist/types/tparser-ctor.js +2 -2
  234. package/dist/types/tparser-ctor.js.map +1 -0
  235. package/dist/types/tquery.d.ts +7 -0
  236. package/dist/types/tquery.js +2 -2
  237. package/dist/types/tquery.js.map +1 -0
  238. package/dist/types/trobots-config.d.ts +4 -0
  239. package/dist/types/trobots-config.js +4 -7
  240. package/dist/types/trobots-config.js.map +1 -0
  241. package/package.json +41 -29
  242. package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
  243. package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
  244. package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
  245. package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
  246. package/angular/3rdpartylicenses.txt +0 -1127
  247. package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
  248. package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
  249. package/angular/6-es2015.65f680261a3506b88381.js +0 -1
  250. package/angular/6-es5.65f680261a3506b88381.js +0 -1
  251. package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
  252. package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
  253. package/angular/8-es2015.55518901987a5b834309.js +0 -1
  254. package/angular/8-es5.55518901987a5b834309.js +0 -1
  255. package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
  256. package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
  257. package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
  258. package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
  259. package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
  260. package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
  261. package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
  262. package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
  263. package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
  264. package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
  265. package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
  266. package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
  267. package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
  268. package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
  269. package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
  270. package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
  271. package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
  272. package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
  273. package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
  274. package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
  275. package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
  276. package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
  277. package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
  278. package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
  279. package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
  280. package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
  281. package/angular/assets/config/app-config.json +0 -16
  282. package/angular/assets/images/splashbg.jpg +0 -0
  283. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
  284. package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
  285. package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
  286. package/angular/favicon.ico +0 -0
  287. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
  288. package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
  289. package/angular/index.html +0 -16
  290. package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
  291. package/angular/main-es5.3a582572476c7f292e52.js +0 -1
  292. package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
  293. package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
  294. package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
  295. package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
  296. package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
  297. package/config/bad-words.json +0 -1
  298. package/config/complex-english.json +0 -400
  299. package/config/hydra-auth.json +0 -8
  300. package/config/hydra-crawler.json +0 -84
  301. package/config/list-allow.json +0 -171
  302. package/config/list-deny.json +0 -248
  303. package/config/list-expiry.json +0 -7
  304. package/config/schedule.json +0 -25
  305. package/config/spelling.json +0 -1
@@ -1,19 +1,27 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.SpellingParser = void 0;
4
- const dictionary_parser_1 = require("./dictionary.parser");
5
- class SpellingParser extends dictionary_parser_1.DictionaryParser {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { DictionaryParser } from './dictionary.parser';
11
+ export class SpellingParser extends DictionaryParser {
6
12
  constructor(url, outcome, config) {
7
13
  super(outcome, config, 'spelling');
8
14
  this.url = url;
9
15
  }
10
- async parseMatches(database, matches, _nonMatches) {
11
- if (!this.url)
12
- return;
13
- if (matches.length > 0)
14
- await database.setData(this.url, 'spelling', matches);
15
- else
16
- await database.unsetData(this.url, 'spelling');
16
+ parseMatches(database, matches, _nonMatches) {
17
+ return __awaiter(this, void 0, void 0, function* () {
18
+ if (!this.url)
19
+ return;
20
+ if (matches.length > 0)
21
+ yield database.setData(this.url, 'spelling', matches);
22
+ else
23
+ yield database.unsetData(this.url, 'spelling');
24
+ });
17
25
  }
18
26
  }
19
- exports.SpellingParser = SpellingParser;
27
+ //# sourceMappingURL=spelling.parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"spelling.parser.js","sourceRoot":"","sources":["../../src/parsers/spelling.parser.ts"],"names":[],"mappings":";;;;;;;;;AAOA,OAAO,EAAE,gBAAgB,EAAqB,MAAM,qBAAqB,CAAC;AAE1E,MAAM,OAAO,cAAe,SAAQ,gBAAmC;IACtE,YACU,GAAY,EACpB,OAAyB,EACzB,MAAkC;QAEnC,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAJ1B,QAAG,GAAH,GAAG,CAAS;IAKtB,CAAC;IAEe,YAAY,CAAC,QAAyB,EAAE,OAAiB,EAAE,WAAqB;;YAC/F,IAAI,CAAC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEtB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;;gBACzE,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QACrD,CAAC;KAAA;CACD"}
@@ -0,0 +1,8 @@
1
+ import { TKeyObject } from 'tscommons-es-core';
2
+ import { IRequestOutcome } from '../interfaces/irequest-outcome';
3
+ import { IParserConfig } from '../interfaces/iparser-config';
4
+ import { DataParser, IDataConfig } from './data.parser';
5
+ export declare abstract class StringParser<T extends IDataConfig> extends DataParser<T> {
6
+ protected stringData: string | undefined;
7
+ constructor(outcome?: IRequestOutcome, config?: TKeyObject<IParserConfig>, configKey?: string);
8
+ }
@@ -1,19 +1,16 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.StringParser = void 0;
4
- const utf_decoder_1 = require("../helpers/utf-decoder");
5
- const data_parser_1 = require("./data.parser");
6
- class StringParser extends data_parser_1.DataParser {
1
+ import { UtfDecoder } from '../helpers/utf-decoder';
2
+ import { DataParser } from './data.parser';
3
+ export class StringParser extends DataParser {
7
4
  constructor(outcome, config, configKey) {
8
5
  super(outcome, config, configKey);
9
6
  if (!this.data)
10
7
  return;
11
8
  try {
12
- this.stringData = utf_decoder_1.UtfDecoder.fromBuffer(this.data);
9
+ this.stringData = UtfDecoder.fromBuffer(this.data);
13
10
  }
14
11
  catch (ex) {
15
12
  // ignore
16
13
  }
17
14
  }
18
15
  }
19
- exports.StringParser = StringParser;
16
+ //# sourceMappingURL=string.parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"string.parser.js","sourceRoot":"","sources":["../../src/parsers/string.parser.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAKpD,OAAO,EAAE,UAAU,EAAe,MAAM,eAAe,CAAC;AAExD,MAAM,OAAgB,YAAoC,SAAQ,UAAa;IAG9E,YACE,OAAyB,EACzB,MAAkC,EAClC,SAAkB;QAEnB,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QAElC,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,OAAO;QAEvB,IAAI;YACH,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACnD;QAAC,OAAO,EAAE,EAAE;YACZ,SAAS;SACT;IACF,CAAC;CACD"}
@@ -0,0 +1,8 @@
1
+ import { DatabaseService } from '../services/database.service';
2
+ import { HtmlParser } from './html.parser';
3
+ import { IDataConfig } from './data.parser';
4
+ export declare abstract class TextParser<T extends IDataConfig> extends HtmlParser<T> {
5
+ static attemptExtract(html: string): string;
6
+ protected abstract parseText(database: DatabaseService, text: string): Promise<void>;
7
+ parse(database: DatabaseService): Promise<void>;
8
+ }
@@ -1,11 +1,18 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.TextParser = void 0;
4
- const extractor = require("unfluff");
5
- const html_parser_1 = require("./html.parser");
6
- class TextParser extends html_parser_1.HtmlParser {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import extractor from 'unfluff';
11
+ import { HtmlParser } from './html.parser';
12
+ export class TextParser extends HtmlParser {
7
13
  static attemptExtract(html) {
8
14
  try {
15
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-call
9
16
  const extracted = extractor(html);
10
17
  if (!extracted)
11
18
  throw new Error('Unable to run unfluff');
@@ -19,17 +26,16 @@ class TextParser extends html_parser_1.HtmlParser {
19
26
  return '';
20
27
  }
21
28
  }
22
- constructor(outcome, config, configKey) {
23
- super(outcome, config, configKey);
24
- }
25
- async parse(database) {
26
- if (!this.dom) {
27
- await this.parseText(database, '');
28
- return;
29
- }
30
- const html = this.dom.html();
31
- const extracted = TextParser.attemptExtract(html);
32
- await this.parseText(database, extracted);
29
+ parse(database) {
30
+ return __awaiter(this, void 0, void 0, function* () {
31
+ if (!this.dom) {
32
+ yield this.parseText(database, '');
33
+ return;
34
+ }
35
+ const html = this.dom.html();
36
+ const extracted = TextParser.attemptExtract(html);
37
+ yield this.parseText(database, extracted);
38
+ });
33
39
  }
34
40
  }
35
- exports.TextParser = TextParser;
41
+ //# sourceMappingURL=text.parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.parser.js","sourceRoot":"","sources":["../../src/parsers/text.parser.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,SAAS,MAAM,SAAS,CAAC;AAIhC,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAG3C,MAAM,OAAgB,UAAkC,SAAQ,UAAa;IACrE,MAAM,CAAC,cAAc,CAAC,IAAY;QACxC,IAAI;YACH,sGAAsG;YACtG,MAAM,SAAS,GAAqB,SAAS,CAAC,IAAI,CAAC,CAAC;YACpD,IAAI,CAAC,SAAS;gBAAE,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;YAEzD,OAAO,SAAS,CAAC,IAAI;iBAClB,KAAK,CAAC,KAAK,CAAC;iBACZ,GAAG,CAAC,CAAC,CAAS,EAAU,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;iBACpC,MAAM,CAAC,CAAC,CAAS,EAAW,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;iBACxC,IAAI,CAAC,IAAI,CAAC,CAAC;SACd;QAAC,OAAO,EAAE,EAAE;YACZ,OAAO,EAAE,CAAC;SACV;IACF,CAAC;IAIY,KAAK,CAAC,QAAyB;;YAC3C,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACd,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;gBACnC,OAAO;aACP;YAED,MAAM,IAAI,GAAW,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;YACrC,MAAM,SAAS,GAAW,UAAU,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;YAE1D,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAC3C,CAAC;KAAA;CACD"}
@@ -0,0 +1,11 @@
1
+ import { DatabaseService } from '../services/database.service';
2
+ import { TextParser } from './text.parser';
3
+ import { IDataConfig } from './data.parser';
4
+ export interface IWordsConfig extends IDataConfig {
5
+ allowHyphenatedWords?: boolean;
6
+ }
7
+ export declare function isIWordsConfig(test: unknown): test is IWordsConfig;
8
+ export declare abstract class WordsParser<T extends IWordsConfig> extends TextParser<T> {
9
+ protected abstract parseWords(database: DatabaseService, words: string[]): Promise<void>;
10
+ protected parseText(database: DatabaseService, text: string): Promise<void>;
11
+ }
@@ -1,36 +1,40 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.WordsParser = exports.isIWordsConfig = void 0;
4
- const tscommons_core_1 = require("tscommons-core");
5
- const text_parser_1 = require("./text.parser");
6
- const data_parser_1 = require("./data.parser");
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { commonsTypeHasPropertyBooleanOrUndefined } from 'tscommons-es-core';
11
+ import { TextParser } from './text.parser';
12
+ import { isIDataConfig } from './data.parser';
7
13
  const WORD_WITH_HYPHEN = '(?<![a-z0-9])[a-z0-9](?:[-\']?[a-z0-9])*(?![a-z0-9])';
8
14
  const WORD_WITHOUT_HYPHEN = '(?<![a-z0-9])[a-z0-9](?:[\']?[a-z0-9])*(?![a-z0-9])';
9
- function isIWordsConfig(test) {
10
- if (!data_parser_1.isIDataConfig(test))
15
+ export function isIWordsConfig(test) {
16
+ if (!isIDataConfig(test))
11
17
  return false;
12
- if (!tscommons_core_1.CommonsType.hasPropertyBooleanOrUndefined(test, 'allowHyphenatedWords'))
18
+ if (!commonsTypeHasPropertyBooleanOrUndefined(test, 'allowHyphenatedWords'))
13
19
  return false;
14
20
  return true;
15
21
  }
16
- exports.isIWordsConfig = isIWordsConfig;
17
- class WordsParser extends text_parser_1.TextParser {
18
- constructor(outcome, config, configKey) {
19
- super(outcome, config, configKey);
20
- }
21
- async parseText(database, text) {
22
- const wordsConfig = this.getConfig(isIWordsConfig);
23
- if (!wordsConfig)
24
- return;
25
- const pattern = wordsConfig.allowHyphenatedWords ? new RegExp(WORD_WITH_HYPHEN, 'ig') : new RegExp(WORD_WITHOUT_HYPHEN, 'ig');
26
- const words = [];
27
- while (true) {
28
- const result = pattern.exec(text);
29
- if (result === null)
30
- break;
31
- words.push(result[0]);
32
- }
33
- await this.parseWords(database, words);
22
+ export class WordsParser extends TextParser {
23
+ parseText(database, text) {
24
+ return __awaiter(this, void 0, void 0, function* () {
25
+ const wordsConfig = this.getConfig(isIWordsConfig);
26
+ if (!wordsConfig)
27
+ return;
28
+ const pattern = wordsConfig.allowHyphenatedWords ? new RegExp(WORD_WITH_HYPHEN, 'ig') : new RegExp(WORD_WITHOUT_HYPHEN, 'ig');
29
+ const words = [];
30
+ while (true) {
31
+ const result = pattern.exec(text);
32
+ if (result === null)
33
+ break;
34
+ words.push(result[0]);
35
+ }
36
+ yield this.parseWords(database, words);
37
+ });
34
38
  }
35
39
  }
36
- exports.WordsParser = WordsParser;
40
+ //# sourceMappingURL=words.parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"words.parser.js","sourceRoot":"","sources":["../../src/parsers/words.parser.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,wCAAwC,EAAE,MAAM,mBAAmB,CAAC;AAI7E,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAC3C,OAAO,EAAe,aAAa,EAAE,MAAM,eAAe,CAAC;AAE3D,MAAM,gBAAgB,GAAW,sDAAsD,CAAC;AACxF,MAAM,mBAAmB,GAAW,qDAAqD,CAAC;AAK1F,MAAM,UAAU,cAAc,CAAC,IAAa;IAC3C,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAEvC,IAAI,CAAC,wCAAwC,CAAC,IAAI,EAAE,sBAAsB,CAAC;QAAE,OAAO,KAAK,CAAC;IAE1F,OAAO,IAAI,CAAC;AACb,CAAC;AAED,MAAM,OAAgB,WAAoC,SAAQ,UAAa;IAG9D,SAAS,CAAC,QAAyB,EAAE,IAAY;;YAChE,MAAM,WAAW,GAA2B,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;YAC3E,IAAI,CAAC,WAAW;gBAAE,OAAO;YAEzB,MAAM,OAAO,GAAW,WAAW,CAAC,oBAAoB,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,mBAAmB,EAAE,IAAI,CAAC,CAAC;YAEtI,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,OAAO,IAAI,EAAE;gBACZ,MAAM,MAAM,GAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACxD,IAAI,MAAM,KAAK,IAAI;oBAAE,MAAM;gBAE3B,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;aACtB;YAED,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACxC,CAAC;KAAA;CACD"}
@@ -0,0 +1,2 @@
1
+ import { TQuery } from '../types/tquery';
2
+ export declare const query: TQuery;
@@ -1,42 +1,49 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const cheerio = require("cheerio");
4
- const tscommons_core_1 = require("tscommons-core");
5
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- const nodecommons_url_1 = require("nodecommons-url");
8
- const complex_english_parser_1 = require("../parsers/complex-english.parser");
9
- const elist_1 = require("../enums/elist");
10
- const query = async (args, databaseService, lists, _expiry, parsersConfig) => {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import * as cheerio from 'cheerio';
11
+ import { commonsArrayUnique } from 'tscommons-es-core';
12
+ import { EStatus } from 'hydra-crawler-ts-assets';
13
+ import { commonsOutputDoing, commonsOutputError, commonsOutputProgress, commonsOutputResult } from 'nodecommons-es-cli';
14
+ import { commonsHttpReadUrlAsBuffer } from 'nodecommons-es-http';
15
+ import { ComplexEnglishParser } from '../parsers/complex-english.parser';
16
+ import { EList } from '../enums/elist';
17
+ export const query = (args, databaseService, lists, _expiry, parsersConfig) => __awaiter(void 0, void 0, void 0, function* () {
11
18
  const domain = args.getString('domain');
12
19
  if (!domain) {
13
- nodecommons_cli_1.CommonsOutput.error('No domain specified');
20
+ commonsOutputError('No domain specified');
14
21
  return;
15
22
  }
16
- const parser = new complex_english_parser_1.ComplexEnglishParser(undefined, undefined, parsersConfig);
23
+ const parser = new ComplexEnglishParser(undefined, undefined, parsersConfig);
17
24
  const regexs = parser.getRegExs();
18
25
  const keyDictionary = parser.getDictionary();
19
26
  if (!keyDictionary)
20
27
  throw new Error('No dictionary available');
21
- nodecommons_cli_1.CommonsOutput.doing(`Searching for complex english detections for domain ${domain}`);
28
+ commonsOutputDoing(`Searching for complex english detections for domain ${domain}`);
22
29
  const result = databaseService.getUrls().find({
23
30
  domain: domain,
24
- status: hydra_crawler_ts_assets_1.EStatus.DONE,
31
+ status: EStatus.DONE,
25
32
  statusCode: 200,
26
33
  complexEnglish: { $exists: true }
27
- });
34
+ }, {});
28
35
  const matches = [];
29
36
  let tally = 0;
30
37
  while (true) {
31
38
  tally++;
32
39
  if ((tally % 100) === 0)
33
- nodecommons_cli_1.CommonsOutput.progress(`${tally}`);
34
- const row = await result.next();
40
+ commonsOutputProgress(`${tally}`);
41
+ const row = yield result.next();
35
42
  if (row === null)
36
43
  break;
37
44
  const typecast = row;
38
45
  if (typecast.headers !== undefined && typecast.headers['content-type'] !== undefined) {
39
- if (!parser.supports(typecast.headers['content-type'], lists.match(elist_1.EList.ALLOW, row.url)))
46
+ if (!parser.supports(typecast.headers['content-type'], lists.match(EList.ALLOW, row.url)))
40
47
  continue;
41
48
  }
42
49
  matches.push({
@@ -44,44 +51,35 @@ const query = async (args, databaseService, lists, _expiry, parsersConfig) => {
44
51
  complexEnglish: typecast.complexEnglish
45
52
  });
46
53
  }
47
- nodecommons_cli_1.CommonsOutput.result(tally);
54
+ commonsOutputResult(tally);
48
55
  for (const match of matches) {
49
56
  console.log('----------------------------------------------------');
50
57
  console.log(match.url);
51
- const data = await nodecommons_url_1.CommonsUrl.readUrlAsBuffer(match.url);
58
+ const data = yield commonsHttpReadUrlAsBuffer(match.url);
52
59
  if (!data) {
53
- nodecommons_cli_1.CommonsOutput.error('Unable to read URL. Skipping');
60
+ commonsOutputError('Unable to read URL. Skipping');
54
61
  continue;
55
62
  }
56
- // @ts-ignore
57
63
  const dom = cheerio.load(data);
58
64
  if (!dom) {
59
- nodecommons_cli_1.CommonsOutput.error('Unable to parse HTML. Skipping');
65
+ commonsOutputError('Unable to parse HTML. Skipping');
60
66
  continue;
61
67
  }
62
- // @ts-ignore
63
- // tslint:disable:no-invalid-this
64
68
  const nodes = dom('*')
65
69
  .contents()
66
- .filter(function () {
67
- // @ts-ignore
68
- return this.nodeType === 3;
69
- });
70
- // tslint:enable:no-invalid-this
70
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
71
+ .filter((_index, element) => element.nodeType === 3);
71
72
  const deconstruct = [];
72
- // tslint:disable:no-invalid-this
73
- dom(nodes).each(function () {
74
- // @ts-ignore
75
- deconstruct.push(dom(this).text());
73
+ dom(nodes).each((_index, element) => {
74
+ deconstruct.push(dom(element).text());
76
75
  });
77
- // tslint:enable:no-invalid-this
78
76
  const lines = deconstruct
79
77
  .join('\n')
80
78
  .replace(/[\t\r\n]+/g, '\n')
81
79
  .split('\n')
82
80
  .map((s) => s.trim())
83
81
  .filter((s) => s !== '');
84
- const unique = tscommons_core_1.CommonsArray.unique(lines);
82
+ const unique = commonsArrayUnique(lines);
85
83
  for (const line of unique) {
86
84
  let changed = line;
87
85
  for (const complex of match.complexEnglish) {
@@ -102,5 +100,6 @@ const query = async (args, databaseService, lists, _expiry, parsersConfig) => {
102
100
  }
103
101
  }
104
102
  }
105
- };
106
- exports.default = query;
103
+ });
104
+ // export default query;
105
+ //# sourceMappingURL=complex-english.query.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"complex-english.query.js","sourceRoot":"","sources":["../../src/queries/complex-english.query.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,OAAO,EAAE,kBAAkB,EAAc,MAAM,mBAAmB,CAAC;AAEnE,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAGlD,OAAO,EAAe,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACrI,OAAO,EAAE,0BAA0B,EAAE,MAAM,qBAAqB,CAAC;AAOjE,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AAMzE,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAUvC,MAAM,CAAC,MAAM,KAAK,GAAW,CAC3B,IAAiB,EACjB,eAAgC,EAChC,KAAY,EACZ,OAAe,EACf,aAAwC,EACzB,EAAE;IAClB,MAAM,MAAM,GAAqB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAC1D,IAAI,CAAC,MAAM,EAAE;QACZ,kBAAkB,CAAC,qBAAqB,CAAC,CAAC;QAC1C,OAAO;KACP;IAED,MAAM,MAAM,GAAyB,IAAI,oBAAoB,CAC3D,SAAS,EACT,SAAS,EACT,aAAa,CACd,CAAC;IACF,MAAM,MAAM,GAAwB,MAAM,CAAC,SAAS,EAAE,CAAC;IAEvD,MAAM,aAAa,GAAmC,MAAM,CAAC,aAAa,EAAE,CAAC;IAC7E,IAAI,CAAC,aAAa;QAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;IAE/D,kBAAkB,CAAC,uDAAuD,MAAM,EAAE,CAAC,CAAC;IAEpF,MAAM,MAAM,GAAiB,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CACzD;QACE,MAAM,EAAE,MAAM;QACd,MAAM,EAAE,OAAO,CAAC,IAAI;QACpB,UAAU,EAAE,GAAG;QACf,cAAc,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE;KAClC,EACD,EAAE,CACH,CAAC;IAEF,MAAM,OAAO,GAAoB,EAAE,CAAC;IACpC,IAAI,KAAK,GAAW,CAAC,CAAC;IACtB,OAAO,IAAI,EAAE;QACZ,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;YAAE,qBAAqB,CAAC,GAAG,KAAK,EAAE,CAAC,CAAC;QAE3D,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QAC3C,IAAI,GAAG,KAAK,IAAI;YAAE,MAAM;QAExB,MAAM,QAAQ,GAAkB,GAA+B,CAAC;QAEhE,IAAI,QAAQ,CAAC,OAAO,KAAK,SAAS,IAAI,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,KAAK,SAAS,EAAE;YACrF,IAAI,CAAC,MAAM,CAAC,QAAQ,CAClB,QAAQ,CAAC,OAAO,CAAC,cAAc,CAAC,EAChC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC,CAClC;gBAAE,SAAS;SACZ;QAED,OAAO,CAAC,IAAI,CAAC;YACX,GAAG,EAAE,GAAG,CAAC,GAAG;YACZ,cAAc,EAAE,QAAQ,CAAC,cAAc;SACxC,CAAC,CAAC;KACH;IAED,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE3B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;QAC5B,OAAO,CAAC,GAAG,CAAC,sDAAsD,CAAC,CAAC;QACpE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAEvB,MAAM,IAAI,GAAqB,MAAM,0BAA0B,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3E,IAAI,CAAC,IAAI,EAAE;YACV,kBAAkB,CAAC,8BAA8B,CAAC,CAAC;YACnD,SAAS;SACT;QAED,MAAM,GAAG,GAA2B,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,IAAI,CAAC,GAAG,EAAE;YACT,kBAAkB,CAAC,gCAAgC,CAAC,CAAC;YACrD,SAAS;SACT;QAED,MAAM,KAAK,GAAoB,GAAG,CAAC,GAAG,CAAC;aACpC,QAAQ,EAAE;YACX,sEAAsE;aACrE,MAAM,CAAC,CAAC,MAAc,EAAE,OAAwB,EAAW,EAAE,CAAE,OAAe,CAAC,QAAQ,KAAK,CAAC,CAAC,CAAC;QAElG,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,MAAc,EAAE,OAAwB,EAAQ,EAAE;YAClE,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;QAEH,MAAM,KAAK,GAAa,WAAW;aAChC,IAAI,CAAC,IAAI,CAAC;aACV,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC;aAC3B,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,CAAS,EAAU,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpC,MAAM,CAAC,CAAC,CAAS,EAAW,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QAE5C,MAAM,MAAM,GAAa,kBAAkB,CAAC,KAAK,CAAC,CAAC;QAEnD,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;YAC1B,IAAI,OAAO,GAAW,IAAI,CAAC;YAE3B,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,cAAc,EAAE;gBAC3C,MAAM,KAAK,GAAqB,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;gBACpD,IAAI,CAAC,KAAK;oBAAE,SAAS;gBAErB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;oBAAE,SAAS;gBAEhC,MAAM,WAAW,GAAa,aAAa,CAAC,OAAO,CAAC,CAAC;gBAErD,MAAM,KAAK,GAAa,CAAE,KAAK,OAAO,EAAE,CAAE,CAAC;gBAC3C,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,KAAK,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAErE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;aAC7D;YAED,IAAI,OAAO,KAAK,IAAI,EAAE;gBACrB,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;gBACrB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;aAChB;SACD;KACD;AACF,CAAC,CAAA,CAAC;AAEF,wBAAwB"}
@@ -0,0 +1,2 @@
1
+ import { TQuery } from '../types/tquery';
2
+ export declare const query: TQuery;
@@ -1,42 +1,50 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
4
- const hydra_crawler_ts_assets_2 = require("hydra-crawler-ts-assets");
5
- const hydra_crawler_ts_assets_3 = require("hydra-crawler-ts-assets");
6
- const nodecommons_cli_1 = require("nodecommons-cli");
7
- const nodecommons_file_1 = require("nodecommons-file");
8
- const query = async (args, databaseService, _lists, _expiry, _parsersConfig) => {
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { ECommonsCsvColumnType } from 'tscommons-es-format';
11
+ import { isIUrl } from 'hydra-crawler-ts-assets';
12
+ import { isTLink } from 'hydra-crawler-ts-assets';
13
+ import { EStatus } from 'hydra-crawler-ts-assets';
14
+ import { commonsOutputDoing, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
15
+ import { CommonsCsv } from 'nodecommons-es-file';
16
+ export const query = (args, databaseService, _lists, _expiry, _parsersConfig) => __awaiter(void 0, void 0, void 0, function* () {
9
17
  const filename = args.getString('filename');
10
- nodecommons_cli_1.CommonsOutput.doing('Searching for FLV and SWF URLs');
18
+ commonsOutputDoing('Searching for FLV and SWF URLs');
11
19
  const results = databaseService.getUrls().find({
12
20
  url: /\.(flv|swf)$/i,
13
21
  status: { $in: [
14
- hydra_crawler_ts_assets_3.EStatus.ACTIVE,
15
- hydra_crawler_ts_assets_3.EStatus.DENY,
16
- hydra_crawler_ts_assets_3.EStatus.DISALLOWED,
17
- hydra_crawler_ts_assets_3.EStatus.DONE,
18
- hydra_crawler_ts_assets_3.EStatus.FAILED,
19
- hydra_crawler_ts_assets_3.EStatus.QUEUED
22
+ EStatus.ACTIVE,
23
+ EStatus.DENY,
24
+ EStatus.DISALLOWED,
25
+ EStatus.DONE,
26
+ EStatus.FAILED,
27
+ EStatus.QUEUED
20
28
  ] }
21
- });
22
- const urls = await databaseService.listQueryResults(results, hydra_crawler_ts_assets_1.isIUrl);
23
- nodecommons_cli_1.CommonsOutput.result(urls.length);
24
- nodecommons_cli_1.CommonsOutput.doing(`Searching for outgoing links to URLs`);
29
+ }, {});
30
+ const urls = yield databaseService.listQueryResults(results, isIUrl);
31
+ commonsOutputResult(urls.length);
32
+ commonsOutputDoing('Searching for outgoing links to URLs');
25
33
  let tally = 0;
26
34
  const urlLinks = new Map();
27
35
  for (const url of urls) {
28
36
  const results3 = databaseService.getLinks().find({
29
37
  outgoing: url.url
30
- });
31
- const links = await databaseService.listQueryResults(results3, hydra_crawler_ts_assets_2.isTLink);
38
+ }, {});
39
+ const links = yield databaseService.listQueryResults(results3, isTLink);
32
40
  if (links.length === 0)
33
41
  continue;
34
42
  tally += links.length;
35
- nodecommons_cli_1.CommonsOutput.progress(tally);
43
+ commonsOutputProgress(tally);
36
44
  urlLinks.set(url, links);
37
45
  }
38
- nodecommons_cli_1.CommonsOutput.result(tally);
39
- nodecommons_cli_1.CommonsOutput.doing(`Building CSV array`);
46
+ commonsOutputResult(tally);
47
+ commonsOutputDoing('Building CSV array');
40
48
  const rows = [];
41
49
  for (const url of urls) {
42
50
  for (const link of (urlLinks.get(url) || [])) {
@@ -58,17 +66,18 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
58
66
  return 1;
59
67
  return 0;
60
68
  });
61
- nodecommons_cli_1.CommonsOutput.success();
62
- const csv = new nodecommons_file_1.CommonsCsv([
69
+ commonsOutputSuccess();
70
+ const csv = new CommonsCsv([
63
71
  {
64
72
  name: 'src',
65
- type: nodecommons_file_1.ECsvColumnType.STRING
73
+ type: ECommonsCsvColumnType.STRING
66
74
  },
67
75
  {
68
76
  name: 'dest',
69
- type: nodecommons_file_1.ECsvColumnType.STRING
77
+ type: ECommonsCsvColumnType.STRING
70
78
  }
71
79
  ]);
72
80
  csv.save(rows, filename, true);
73
- };
74
- exports.default = query;
81
+ });
82
+ // export default query;
83
+ //# sourceMappingURL=flash-content.query.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"flash-content.query.js","sourceRoot":"","sources":["../../src/queries/flash-content.query.ts"],"names":[],"mappings":";;;;;;;;;AAGA,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAE5D,OAAO,EAAQ,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAS,OAAO,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAe,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AACvI,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAgBjD,MAAM,CAAC,MAAM,KAAK,GAAW,CAC3B,IAAiB,EACjB,eAAgC,EAChC,MAAa,EACb,OAAe,EACf,cAAyC,EAC1B,EAAE;IAClB,MAAM,QAAQ,GAAW,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAEpD,kBAAkB,CAAC,gCAAgC,CAAC,CAAC;IACrD,MAAM,OAAO,GAAiB,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CAC1D;QACE,GAAG,EAAE,eAAe;QACpB,MAAM,EAAE,EAAE,GAAG,EAAE;gBACb,OAAO,CAAC,MAAM;gBACd,OAAO,CAAC,IAAI;gBACZ,OAAO,CAAC,UAAU;gBAClB,OAAO,CAAC,IAAI;gBACZ,OAAO,CAAC,MAAM;gBACd,OAAO,CAAC,MAAM;aACf,EAAE;KACJ,EACD,EAAE,CACH,CAAC;IACF,MAAM,IAAI,GAAW,MAAM,eAAe,CAAC,gBAAgB,CACzD,OAAO,EACP,MAAM,CACP,CAAC;IACF,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEjC,kBAAkB,CAAC,sCAAsC,CAAC,CAAC;IAC3D,IAAI,KAAK,GAAW,CAAC,CAAC;IACtB,MAAM,QAAQ,GAAuB,IAAI,GAAG,EAAiB,CAAC;IAC9D,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;QACvB,MAAM,QAAQ,GAAkB,eAAe,CAAC,QAAQ,EAAE,CAAC,IAAI,CAC7D;YACE,QAAQ,EAAE,GAAG,CAAC,GAAG;SAClB,EACD,EAAE,CACH,CAAC;QACF,MAAM,KAAK,GAAY,MAAM,eAAe,CAAC,gBAAgB,CAC3D,QAAQ,EACR,OAAO,CACR,CAAC;QAEF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEjC,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC;QACtB,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAE7B,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;KACzB;IACD,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE3B,kBAAkB,CAAC,oBAAoB,CAAC,CAAC;IAEzC,MAAM,IAAI,GAAW,EAAE,CAAC;IACxB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;QACvB,KAAK,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,EAAE;YAC7C,IAAI,CAAC,IAAI,CAAC;gBACR,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,IAAI,EAAE,GAAG,CAAC,GAAG;aACd,CAAC,CAAC;SACH;KACD;IACD,IAAI;SACD,IAAI,CAAC,CAAC,CAAO,EAAE,CAAO,EAAU,EAAE;QAClC,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG;YAAE,OAAO,CAAC,CAAC,CAAC;QAC7B,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG;YAAE,OAAO,CAAC,CAAC;QAC5B,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC,CAAC;QAC/B,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QAC9B,OAAO,CAAC,CAAC;IACV,CAAC,CAAC,CAAC;IAEL,oBAAoB,EAAE,CAAC;IAEvB,MAAM,GAAG,GAAe,IAAI,UAAU,CAAC;QACrC;YACE,IAAI,EAAE,KAAK;YACX,IAAI,EAAE,qBAAqB,CAAC,MAAM;SACnC;QACD;YACE,IAAI,EAAE,MAAM;YACZ,IAAI,EAAE,qBAAqB,CAAC,MAAM;SACnC;KACF,CAAC,CAAC;IACH,GAAG,CAAC,IAAI,CACN,IAAI,EACJ,QAAQ,EACR,IAAI,CACL,CAAC;AACH,CAAC,CAAA,CAAC;AAEF,wBAAwB"}
@@ -0,0 +1,2 @@
1
+ import { TQuery } from '../types/tquery';
2
+ export declare const query: TQuery;
@@ -1,10 +1,17 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const hydra_crawler_ts_assets_1 = require("hydra-crawler-ts-assets");
4
- const hydra_crawler_ts_assets_2 = require("hydra-crawler-ts-assets");
5
- const hydra_crawler_ts_assets_3 = require("hydra-crawler-ts-assets");
6
- const hydra_crawler_ts_assets_4 = require("hydra-crawler-ts-assets");
7
- const nodecommons_cli_1 = require("nodecommons-cli");
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { isIUrl } from 'hydra-crawler-ts-assets';
11
+ import { isTLink } from 'hydra-crawler-ts-assets';
12
+ import { isTDomain } from 'hydra-crawler-ts-assets';
13
+ import { EStatus } from 'hydra-crawler-ts-assets';
14
+ import { commonsOutputDoing, commonsOutputError, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
8
15
  const domainSort = (a, b) => {
9
16
  if (a.startsWith('http://'))
10
17
  a = a.substr(7);
@@ -24,31 +31,31 @@ const domainSort = (a, b) => {
24
31
  return 1;
25
32
  return 0;
26
33
  };
27
- const query = async (args, databaseService, _lists, _expiry, _parsersConfig) => {
34
+ export const query = (args, databaseService, _lists, _expiry, _parsersConfig) => __awaiter(void 0, void 0, void 0, function* () {
28
35
  const regexStr = args.getString('regex');
29
36
  if (!regexStr) {
30
- nodecommons_cli_1.CommonsOutput.error('No domain regex specified');
37
+ commonsOutputError('No domain regex specified');
31
38
  return;
32
39
  }
33
40
  const regex = new RegExp(regexStr);
34
- nodecommons_cli_1.CommonsOutput.doing('Searching for matching domains');
41
+ commonsOutputDoing('Searching for matching domains');
35
42
  const results = databaseService.getDomains().find({
36
43
  domain: regex
37
- });
38
- const domains = await databaseService.listQueryResults(results, hydra_crawler_ts_assets_3.isTDomain);
39
- nodecommons_cli_1.CommonsOutput.result(domains.length);
44
+ }, {});
45
+ const domains = yield databaseService.listQueryResults(results, isTDomain);
46
+ commonsOutputResult(domains.length);
40
47
  const domainUrls = new Map();
41
48
  for (const domain of domains) {
42
- nodecommons_cli_1.CommonsOutput.doing(`Searching for URLs for ${domain.domain}`);
49
+ commonsOutputDoing(`Searching for URLs for ${domain.domain}`);
43
50
  const results2 = databaseService.getUrls().find({
44
51
  domain: domain.domain,
45
- status: { $ne: hydra_crawler_ts_assets_4.EStatus.ARCHIVED }
46
- });
47
- const urls = await databaseService.listQueryResults(results2, hydra_crawler_ts_assets_1.isIUrl);
48
- nodecommons_cli_1.CommonsOutput.result(urls.length);
52
+ status: { $ne: EStatus.ARCHIVED }
53
+ }, {});
54
+ const urls = yield databaseService.listQueryResults(results2, isIUrl);
55
+ commonsOutputResult(urls.length);
49
56
  domainUrls.set(domain, urls);
50
57
  }
51
- nodecommons_cli_1.CommonsOutput.doing(`Searching for outgoing links to URLs`);
58
+ commonsOutputDoing('Searching for outgoing links to URLs');
52
59
  let tally = 0;
53
60
  const domainUrlLinks = new Map();
54
61
  for (const domain of domains) {
@@ -59,20 +66,20 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
59
66
  for (const url of urls) {
60
67
  const results3 = databaseService.getLinks().find({
61
68
  outgoing: url.url
62
- });
63
- const links = await databaseService.listQueryResults(results3, hydra_crawler_ts_assets_2.isTLink);
69
+ }, {});
70
+ const links = yield databaseService.listQueryResults(results3, isTLink);
64
71
  if (links.length === 0)
65
72
  continue;
66
73
  tally += links.length;
67
- nodecommons_cli_1.CommonsOutput.progress(tally);
74
+ commonsOutputProgress(tally);
68
75
  map.set(url, links);
69
76
  }
70
77
  if (map.size === 0)
71
78
  continue;
72
79
  domainUrlLinks.set(domain, map);
73
80
  }
74
- nodecommons_cli_1.CommonsOutput.result(tally);
75
- nodecommons_cli_1.CommonsOutput.doing(`Constructing JSON object`);
81
+ commonsOutputResult(tally);
82
+ commonsOutputDoing('Constructing JSON object');
76
83
  const outcome = {};
77
84
  for (const domain of domainUrlLinks.keys()) {
78
85
  outcome[domain.domain] = {};
@@ -81,7 +88,7 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
81
88
  .map((link) => link.url);
82
89
  }
83
90
  }
84
- nodecommons_cli_1.CommonsOutput.success();
91
+ commonsOutputSuccess();
85
92
  if (args.hasAttribute('domains-only')) {
86
93
  Object.keys(outcome)
87
94
  .sort(domainSort)
@@ -119,5 +126,6 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
119
126
  else if (args.hasAttribute('json-dump')) {
120
127
  console.log(JSON.stringify(outcome));
121
128
  }
122
- };
123
- exports.default = query;
129
+ });
130
+ // export default query;
131
+ //# sourceMappingURL=linking-to-domains.query.js.map