hydra-crawler 1.4.6 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/apis/autocomplete.api.d.ts +7 -0
- package/dist/apis/autocomplete.api.js +15 -9
- package/dist/apis/autocomplete.api.js.map +1 -0
- package/dist/apis/bugs.api.d.ts +7 -0
- package/dist/apis/bugs.api.js +21 -15
- package/dist/apis/bugs.api.js.map +1 -0
- package/dist/apis/crawl.api.d.ts +7 -0
- package/dist/apis/crawl.api.js +15 -9
- package/dist/apis/crawl.api.js.map +1 -0
- package/dist/apis/domains.api.d.ts +7 -0
- package/dist/apis/domains.api.js +24 -19
- package/dist/apis/domains.api.js.map +1 -0
- package/dist/apis/images.api.d.ts +7 -0
- package/dist/apis/images.api.js +20 -14
- package/dist/apis/images.api.js.map +1 -0
- package/dist/apis/statistics.api.d.ts +8 -0
- package/dist/apis/statistics.api.js +27 -20
- package/dist/apis/statistics.api.js.map +1 -0
- package/dist/apis/test.api.d.ts +5 -0
- package/dist/apis/test.api.js +15 -9
- package/dist/apis/test.api.js.map +1 -0
- package/dist/apis/urls.api.d.ts +7 -0
- package/dist/apis/urls.api.js +21 -15
- package/dist/apis/urls.api.js.map +1 -0
- package/dist/apps/cleanup.app.d.ts +19 -0
- package/dist/apps/cleanup.app.js +118 -100
- package/dist/apps/cleanup.app.js.map +1 -0
- package/dist/apps/cross-populate-export.app.d.ts +12 -0
- package/dist/apps/cross-populate-export.app.js +60 -47
- package/dist/apps/cross-populate-export.app.js.map +1 -0
- package/dist/apps/cross-populate-import.app.d.ts +12 -0
- package/dist/apps/cross-populate-import.app.js +64 -51
- package/dist/apps/cross-populate-import.app.js.map +1 -0
- package/dist/apps/denylist.app.d.ts +17 -0
- package/dist/apps/denylist.app.js +115 -98
- package/dist/apps/denylist.app.js.map +1 -0
- package/dist/apps/expire.app.d.ts +19 -0
- package/dist/apps/expire.app.js +44 -31
- package/dist/apps/expire.app.js.map +1 -0
- package/dist/apps/extract-text.app.d.ts +8 -0
- package/dist/apps/extract-text.app.js +43 -35
- package/dist/apps/extract-text.app.js.map +1 -0
- package/dist/apps/hydra.app.d.ts +34 -0
- package/dist/apps/hydra.app.js +150 -137
- package/dist/apps/hydra.app.js.map +1 -0
- package/dist/apps/import.app.d.ts +11 -0
- package/dist/apps/import.app.js +44 -32
- package/dist/apps/import.app.js.map +1 -0
- package/dist/apps/internal-hydra-common.app.d.ts +28 -0
- package/dist/apps/internal-hydra-common.app.js +5 -11
- package/dist/apps/internal-hydra-common.app.js.map +1 -0
- package/dist/apps/query.app.d.ts +20 -0
- package/dist/apps/query.app.js +63 -49
- package/dist/apps/query.app.js.map +1 -0
- package/dist/apps/reattempt.app.d.ts +17 -0
- package/dist/apps/reattempt.app.js +66 -53
- package/dist/apps/reattempt.app.js.map +1 -0
- package/dist/apps/requeue-domain.app.d.ts +13 -0
- package/dist/apps/requeue-domain.app.js +50 -37
- package/dist/apps/requeue-domain.app.js.map +1 -0
- package/dist/apps/seed.app.d.ts +15 -0
- package/dist/apps/seed.app.js +53 -40
- package/dist/apps/seed.app.js.map +1 -0
- package/dist/apps/startup.app.d.ts +11 -0
- package/dist/apps/startup.app.js +51 -38
- package/dist/apps/startup.app.js.map +1 -0
- package/dist/apps/unarchive.app.d.ts +15 -0
- package/dist/apps/unarchive.app.js +67 -54
- package/dist/apps/unarchive.app.js.map +1 -0
- package/dist/classes/cleaner.d.ts +12 -0
- package/dist/classes/cleaner.js +227 -207
- package/dist/classes/cleaner.js.map +1 -0
- package/dist/classes/crawler.d.ts +34 -0
- package/dist/classes/crawler.js +248 -241
- package/dist/classes/crawler.js.map +1 -0
- package/dist/classes/dns.d.ts +3 -0
- package/dist/classes/dns.js +10 -13
- package/dist/classes/dns.js.map +1 -0
- package/dist/classes/expirer.d.ts +10 -0
- package/dist/classes/expirer.js +107 -94
- package/dist/classes/expirer.js.map +1 -0
- package/dist/classes/expiry.d.ts +8 -0
- package/dist/classes/expiry.js +16 -19
- package/dist/classes/expiry.js.map +1 -0
- package/dist/classes/lists.d.ts +9 -0
- package/dist/classes/lists.js +13 -18
- package/dist/classes/lists.js.map +1 -0
- package/dist/classes/robot.d.ts +15 -0
- package/dist/classes/robot.js +40 -30
- package/dist/classes/robot.js.map +1 -0
- package/dist/classes/tracker.d.ts +25 -0
- package/dist/classes/tracker.js +82 -64
- package/dist/classes/tracker.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +72 -65
- package/dist/cli.js.map +1 -0
- package/dist/enums/eavailable-strategy.d.ts +4 -0
- package/dist/enums/eavailable-strategy.js +3 -5
- package/dist/enums/eavailable-strategy.js.map +1 -0
- package/dist/enums/elist.d.ts +7 -0
- package/dist/enums/elist.js +7 -11
- package/dist/enums/elist.js.map +1 -0
- package/dist/enums/eserver.d.ts +8 -0
- package/dist/enums/eserver.js +3 -5
- package/dist/enums/eserver.js.map +1 -0
- package/dist/enums/ex-powered-by.d.ts +6 -0
- package/dist/enums/ex-powered-by.js +3 -5
- package/dist/enums/ex-powered-by.js.map +1 -0
- package/dist/helpers/matcher.d.ts +5 -0
- package/dist/helpers/matcher.js +2 -5
- package/dist/helpers/matcher.js.map +1 -0
- package/dist/helpers/random.d.ts +4 -0
- package/dist/helpers/random.js +2 -5
- package/dist/helpers/random.js.map +1 -0
- package/dist/helpers/utf-decoder.d.ts +4 -0
- package/dist/helpers/utf-decoder.js +3 -6
- package/dist/helpers/utf-decoder.js.map +1 -0
- package/dist/interfaces/iexpiry.d.ts +7 -0
- package/dist/interfaces/iexpiry.js +9 -13
- package/dist/interfaces/iexpiry.js.map +1 -0
- package/dist/interfaces/imatch.d.ts +6 -0
- package/dist/interfaces/imatch.js +6 -9
- package/dist/interfaces/imatch.js.map +1 -0
- package/dist/interfaces/iparser-config.d.ts +4 -0
- package/dist/interfaces/iparser-config.js +4 -7
- package/dist/interfaces/iparser-config.js.map +1 -0
- package/dist/interfaces/iparser.d.ts +8 -0
- package/dist/interfaces/iparser.js +2 -2
- package/dist/interfaces/iparser.js.map +1 -0
- package/dist/interfaces/irequest-outcome.d.ts +11 -0
- package/dist/interfaces/irequest-outcome.js +2 -2
- package/dist/interfaces/irequest-outcome.js.map +1 -0
- package/dist/interfaces/iserver.d.ts +4 -0
- package/dist/interfaces/iserver.js +2 -2
- package/dist/interfaces/iserver.js.map +1 -0
- package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
- package/dist/parsers/accessibility-metrics.parser.js +34 -26
- package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
- package/dist/parsers/asp-error.parser.d.ts +12 -0
- package/dist/parsers/asp-error.parser.js +36 -28
- package/dist/parsers/asp-error.parser.js.map +1 -0
- package/dist/parsers/bad-words.parser.d.ts +10 -0
- package/dist/parsers/bad-words.parser.js +21 -13
- package/dist/parsers/bad-words.parser.js.map +1 -0
- package/dist/parsers/complex-english.parser.d.ts +15 -0
- package/dist/parsers/complex-english.parser.js +33 -25
- package/dist/parsers/complex-english.parser.js.map +1 -0
- package/dist/parsers/data.parser.d.ts +14 -0
- package/dist/parsers/data.parser.js +12 -16
- package/dist/parsers/data.parser.js.map +1 -0
- package/dist/parsers/dictionary.parser.d.ts +19 -0
- package/dist/parsers/dictionary.parser.js +47 -39
- package/dist/parsers/dictionary.parser.js.map +1 -0
- package/dist/parsers/html.parser.d.ts +13 -0
- package/dist/parsers/html.parser.js +4 -8
- package/dist/parsers/html.parser.js.map +1 -0
- package/dist/parsers/hyperlinks.parser.d.ts +20 -0
- package/dist/parsers/hyperlinks.parser.js +82 -77
- package/dist/parsers/hyperlinks.parser.js.map +1 -0
- package/dist/parsers/image-tags.parser.d.ts +19 -0
- package/dist/parsers/image-tags.parser.js +31 -35
- package/dist/parsers/image-tags.parser.js.map +1 -0
- package/dist/parsers/jpeg.parser.d.ts +11 -0
- package/dist/parsers/jpeg.parser.js +28 -20
- package/dist/parsers/jpeg.parser.js.map +1 -0
- package/dist/parsers/paragraphs.parser.d.ts +13 -0
- package/dist/parsers/paragraphs.parser.js +33 -40
- package/dist/parsers/paragraphs.parser.js.map +1 -0
- package/dist/parsers/parser.d.ts +19 -0
- package/dist/parsers/parser.js +30 -17
- package/dist/parsers/parser.js.map +1 -0
- package/dist/parsers/php-error.parser.d.ts +12 -0
- package/dist/parsers/php-error.parser.js +42 -34
- package/dist/parsers/php-error.parser.js.map +1 -0
- package/dist/parsers/phrase.parser.d.ts +8 -0
- package/dist/parsers/phrase.parser.js +16 -11
- package/dist/parsers/phrase.parser.js.map +1 -0
- package/dist/parsers/regex.parser.d.ts +10 -0
- package/dist/parsers/regex.parser.js +30 -22
- package/dist/parsers/regex.parser.js.map +1 -0
- package/dist/parsers/server.parser.d.ts +11 -0
- package/dist/parsers/server.parser.js +58 -57
- package/dist/parsers/server.parser.js.map +1 -0
- package/dist/parsers/spelling.parser.d.ts +10 -0
- package/dist/parsers/spelling.parser.js +21 -13
- package/dist/parsers/spelling.parser.js.map +1 -0
- package/dist/parsers/string.parser.d.ts +8 -0
- package/dist/parsers/string.parser.js +5 -8
- package/dist/parsers/string.parser.js.map +1 -0
- package/dist/parsers/text.parser.d.ts +8 -0
- package/dist/parsers/text.parser.js +24 -18
- package/dist/parsers/text.parser.js.map +1 -0
- package/dist/parsers/words.parser.d.ts +11 -0
- package/dist/parsers/words.parser.js +32 -28
- package/dist/parsers/words.parser.js.map +1 -0
- package/dist/queries/complex-english.query.d.ts +2 -0
- package/dist/queries/complex-english.query.js +37 -38
- package/dist/queries/complex-english.query.js.map +1 -0
- package/dist/queries/flash-content.query.d.ts +2 -0
- package/dist/queries/flash-content.query.js +39 -30
- package/dist/queries/flash-content.query.js.map +1 -0
- package/dist/queries/linking-to-domains.query.d.ts +2 -0
- package/dist/queries/linking-to-domains.query.js +35 -27
- package/dist/queries/linking-to-domains.query.js.map +1 -0
- package/dist/queries/readability-score.query.d.ts +2 -0
- package/dist/queries/readability-score.query.js +21 -13
- package/dist/queries/readability-score.query.js.map +1 -0
- package/dist/servers/crawl.server.d.ts +35 -0
- package/dist/servers/crawl.server.js +133 -121
- package/dist/servers/crawl.server.js.map +1 -0
- package/dist/servers/express.server.d.ts +8 -0
- package/dist/servers/express.server.js +7 -10
- package/dist/servers/express.server.js.map +1 -0
- package/dist/servers/maintenance.server.d.ts +22 -0
- package/dist/servers/maintenance.server.js +42 -36
- package/dist/servers/maintenance.server.js.map +1 -0
- package/dist/servers/rest.server.d.ts +7 -0
- package/dist/servers/rest.server.js +40 -51
- package/dist/servers/rest.server.js.map +1 -0
- package/dist/servers/socket-io.server.d.ts +12 -0
- package/dist/servers/socket-io.server.js +48 -15
- package/dist/servers/socket-io.server.js.map +1 -0
- package/dist/services/database.service.d.ts +68 -0
- package/dist/services/database.service.js +527 -462
- package/dist/services/database.service.js.map +1 -0
- package/dist/types/tcrawl-config.d.ts +14 -0
- package/dist/types/tcrawl-config.js +14 -17
- package/dist/types/tcrawl-config.js.map +1 -0
- package/dist/types/thydra-config.d.ts +4 -0
- package/dist/types/thydra-config.js +4 -7
- package/dist/types/thydra-config.js.map +1 -0
- package/dist/types/tparser-ctor.d.ts +7 -0
- package/dist/types/tparser-ctor.js +2 -2
- package/dist/types/tparser-ctor.js.map +1 -0
- package/dist/types/tquery.d.ts +7 -0
- package/dist/types/tquery.js +2 -2
- package/dist/types/tquery.js.map +1 -0
- package/dist/types/trobots-config.d.ts +4 -0
- package/dist/types/trobots-config.js +4 -7
- package/dist/types/trobots-config.js.map +1 -0
- package/package.json +41 -29
- package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
- package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
- package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
- package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
- package/angular/3rdpartylicenses.txt +0 -1127
- package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
- package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
- package/angular/6-es2015.65f680261a3506b88381.js +0 -1
- package/angular/6-es5.65f680261a3506b88381.js +0 -1
- package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
- package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
- package/angular/8-es2015.55518901987a5b834309.js +0 -1
- package/angular/8-es5.55518901987a5b834309.js +0 -1
- package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
- package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
- package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
- package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
- package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
- package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
- package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
- package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
- package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
- package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
- package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
- package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
- package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
- package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
- package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
- package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
- package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
- package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
- package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
- package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
- package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
- package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
- package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
- package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
- package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
- package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
- package/angular/assets/config/app-config.json +0 -16
- package/angular/assets/images/splashbg.jpg +0 -0
- package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
- package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
- package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
- package/angular/favicon.ico +0 -0
- package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
- package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
- package/angular/index.html +0 -16
- package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
- package/angular/main-es5.3a582572476c7f292e52.js +0 -1
- package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
- package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
- package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
- package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
- package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
- package/config/bad-words.json +0 -1
- package/config/complex-english.json +0 -400
- package/config/hydra-auth.json +0 -8
- package/config/hydra-crawler.json +0 -84
- package/config/list-allow.json +0 -171
- package/config/list-deny.json +0 -248
- package/config/list-expiry.json +0 -7
- package/config/schedule.json +0 -25
- package/config/spelling.json +0 -1
|
@@ -1,56 +1,49 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { commonsStringSplitWords, commonsTypeHasPropertyNumberOrUndefined } from 'tscommons-es-core';
|
|
11
|
+
import { HtmlParser } from './html.parser';
|
|
12
|
+
import { isIDataConfig } from './data.parser';
|
|
13
|
+
export function isIParagraphsConfig(test) {
|
|
14
|
+
if (!isIDataConfig(test))
|
|
10
15
|
return false;
|
|
11
|
-
if (!
|
|
16
|
+
if (!commonsTypeHasPropertyNumberOrUndefined(test, 'paragraphWordsThreshold'))
|
|
12
17
|
return false;
|
|
13
18
|
return true;
|
|
14
19
|
}
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
static parseParagraphs(
|
|
18
|
-
// @ts-ignore
|
|
19
|
-
dom, paragraphsConfig) {
|
|
20
|
-
// tslint:disable:no-invalid-this
|
|
21
|
-
// @ts-ignore
|
|
20
|
+
export class ParagraphsParser extends HtmlParser {
|
|
21
|
+
static parseParagraphs(dom, paragraphsConfig) {
|
|
22
22
|
const nodes = dom('p,dd')
|
|
23
23
|
.contents()
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
return this.nodeType === 3;
|
|
27
|
-
});
|
|
28
|
-
// tslint:enable:no-invalid-this
|
|
24
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
|
|
25
|
+
.filter((_index, element) => element.nodeType === 3);
|
|
29
26
|
const paragraphs = [];
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
const paragraph = dom(this).text();
|
|
34
|
-
if (paragraphsConfig.paragraphWordsThreshold && tscommons_core_2.CommonsString.splitWords(paragraph).length >= paragraphsConfig.paragraphWordsThreshold) {
|
|
27
|
+
dom(nodes).each((_index, element) => {
|
|
28
|
+
const paragraph = dom(element).text();
|
|
29
|
+
if (paragraphsConfig.paragraphWordsThreshold && commonsStringSplitWords(paragraph).length >= paragraphsConfig.paragraphWordsThreshold) {
|
|
35
30
|
paragraphs.push(paragraph);
|
|
36
31
|
}
|
|
37
32
|
});
|
|
38
|
-
// tslint:enable:no-invalid-this
|
|
39
33
|
return paragraphs
|
|
40
34
|
.map((p) => p.trim())
|
|
41
35
|
.filter((p) => p !== '');
|
|
42
36
|
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
await this.parseParagraphs(database, paragraphs);
|
|
37
|
+
parse(database) {
|
|
38
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
39
|
+
if (!this.dom)
|
|
40
|
+
return; // invalid parse
|
|
41
|
+
const paragraphsConfig = this.getConfig(isIParagraphsConfig);
|
|
42
|
+
if (!paragraphsConfig)
|
|
43
|
+
return;
|
|
44
|
+
const paragraphs = ParagraphsParser.parseParagraphs(this.dom, paragraphsConfig);
|
|
45
|
+
yield this.parseParagraphs(database, paragraphs);
|
|
46
|
+
});
|
|
54
47
|
}
|
|
55
48
|
}
|
|
56
|
-
|
|
49
|
+
//# sourceMappingURL=paragraphs.parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"paragraphs.parser.js","sourceRoot":"","sources":["../../src/parsers/paragraphs.parser.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,uBAAuB,EAAE,uCAAuC,EAAE,MAAM,mBAAmB,CAAC;AAIrG,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAC3C,OAAO,EAAe,aAAa,EAAE,MAAM,eAAe,CAAC;AAK3D,MAAM,UAAU,mBAAmB,CAAC,IAAa;IAChD,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAEvC,IAAI,CAAC,uCAAuC,CAAC,IAAI,EAAE,yBAAyB,CAAC;QAAE,OAAO,KAAK,CAAC;IAE5F,OAAO,IAAI,CAAC;AACb,CAAC;AAED,MAAM,OAAgB,gBAA8C,SAAQ,UAAa;IACjF,MAAM,CAAC,eAAe,CAC3B,GAAiB,EACjB,gBAAmC;QAEpC,MAAM,KAAK,GAAoB,GAAG,CAAC,MAAM,CAAC;aACvC,QAAQ,EAAE;YACX,sEAAsE;aACrE,MAAM,CAAC,CAAC,MAAc,EAAE,OAAwB,EAAW,EAAE,CAAE,OAAe,CAAC,QAAQ,KAAK,CAAC,CAAC,CAAC;QAElG,MAAM,UAAU,GAAa,EAAE,CAAC;QAEhC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,CACb,CAAC,MAAc,EAAE,OAAwB,EAAQ,EAAE;YAClD,MAAM,SAAS,GAAW,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;YAE9C,IAAI,gBAAgB,CAAC,uBAAuB,IAAI,uBAAuB,CAAC,SAAS,CAAC,CAAC,MAAM,IAAI,gBAAgB,CAAC,uBAAuB,EAAE;gBACtI,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;aAC3B;QACF,CAAC,CACF,CAAC;QAEF,OAAO,UAAU;aACd,GAAG,CAAC,CAAC,CAAS,EAAU,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpC,MAAM,CAAC,CAAC,CAAS,EAAW,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;IAC7C,CAAC;IAIY,KAAK,CAAC,QAAyB;;YAC3C,IAAI,CAAC,IAAI,CAAC,GAAG;gBAAE,OAAO,CAAC,gBAAgB;YAEvC,MAAM,gBAAgB,GAAgC,IAAI,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;YAC1F,IAAI,CAAC,gBAAgB;gBAAE,OAAO;YAE9B,MAAM,UAAU,GAAa,gBAAgB,CAAC,eAAe,CAC3D,IAAI,CAAC,GAAG,EACR,gBAAgB,CACjB,CAAC;YAEF,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAClD,CAAC;KAAA;CACD"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { TKeyObject } from 'tscommons-es-core';
|
|
2
|
+
import { DatabaseService } from '../services/database.service';
|
|
3
|
+
import { IParser } from '../interfaces/iparser';
|
|
4
|
+
import { IRequestOutcome } from '../interfaces/irequest-outcome';
|
|
5
|
+
import { IParserConfig } from '../interfaces/iparser-config';
|
|
6
|
+
export declare abstract class Parser<T extends IParserConfig = IParserConfig> implements IParser {
|
|
7
|
+
protected outcome?: IRequestOutcome | undefined;
|
|
8
|
+
private configKey?;
|
|
9
|
+
private enabled;
|
|
10
|
+
private parserConfig;
|
|
11
|
+
constructor(outcome?: IRequestOutcome | undefined, config?: TKeyObject<IParserConfig>, configKey?: string | undefined);
|
|
12
|
+
protected getConfig<P>(// this allows subclasses to get their config without having to have all the T attributes of their own children
|
|
13
|
+
checker: (test: unknown) => test is P): P | undefined;
|
|
14
|
+
isEnabled(): boolean;
|
|
15
|
+
abstract supports(contentType: string, isAllow: boolean): boolean;
|
|
16
|
+
init(_database: DatabaseService): Promise<void>;
|
|
17
|
+
parse(_database: DatabaseService): Promise<void>;
|
|
18
|
+
links(): Promise<string[]>;
|
|
19
|
+
}
|
package/dist/parsers/parser.js
CHANGED
|
@@ -1,10 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { commonsTypeHasPropertyT } from 'tscommons-es-core';
|
|
11
|
+
import { commonsOutputError } from 'nodecommons-es-cli';
|
|
12
|
+
import { isIParserConfig } from '../interfaces/iparser-config';
|
|
13
|
+
export class Parser {
|
|
8
14
|
// these all have to be optional because of the Ctor approach used elsewhere
|
|
9
15
|
// similarly, we don't check types of the config (T) other than the enabled in TParserConfig
|
|
10
16
|
constructor(outcome, config, configKey) {
|
|
@@ -13,8 +19,8 @@ class Parser {
|
|
|
13
19
|
this.enabled = false;
|
|
14
20
|
if (!config || !this.configKey)
|
|
15
21
|
return;
|
|
16
|
-
if (!
|
|
17
|
-
|
|
22
|
+
if (!commonsTypeHasPropertyT(config, this.configKey, isIParserConfig)) {
|
|
23
|
+
commonsOutputError(`Invalid config for Parser (${this.configKey})`);
|
|
18
24
|
}
|
|
19
25
|
this.parserConfig = config[this.configKey];
|
|
20
26
|
this.enabled = this.parserConfig.enabled;
|
|
@@ -30,15 +36,22 @@ class Parser {
|
|
|
30
36
|
isEnabled() {
|
|
31
37
|
return this.enabled;
|
|
32
38
|
}
|
|
33
|
-
|
|
34
|
-
|
|
39
|
+
init(_database) {
|
|
40
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
41
|
+
// subclasses can implement
|
|
42
|
+
});
|
|
35
43
|
}
|
|
36
|
-
|
|
37
|
-
|
|
44
|
+
parse(_database) {
|
|
45
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
46
|
+
// subclasses can implement
|
|
47
|
+
});
|
|
38
48
|
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
return
|
|
49
|
+
// eslint-disable-next-line @typescript-eslint/require-await
|
|
50
|
+
links() {
|
|
51
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
52
|
+
// subclasses can implement
|
|
53
|
+
return [];
|
|
54
|
+
});
|
|
42
55
|
}
|
|
43
56
|
}
|
|
44
|
-
|
|
57
|
+
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/parsers/parser.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,uBAAuB,EAAc,MAAM,mBAAmB,CAAC;AAExE,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAMxD,OAAO,EAAiB,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAE9E,MAAM,OAAgB,MAAM;IAI3B,4EAA4E;IAC5E,4FAA4F;IAC5F,YACY,OAAyB,EACnC,MAAkC,EAC1B,SAAkB;QAFhB,YAAO,GAAP,OAAO,CAAkB;QAE3B,cAAS,GAAT,SAAS,CAAS;QARpB,YAAO,GAAY,KAAK,CAAC;QAUhC,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAEvC,IAAI,CAAC,uBAAuB,CAAgB,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,EAAE;YACrF,kBAAkB,CAAC,8BAA8B,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;SACpE;QAED,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,SAAS,CAAM,CAAC;QAChD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;IAC1C,CAAC;IAES,SAAS,CAAK,+GAA+G;IACrI,OAAqC;QAEtC,IAAI,CAAC,IAAI,CAAC,YAAY;YAAE,OAAO,SAAS,CAAC;QAEzC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,IAAI,CAAC,SAAU,EAAE,CAAC,CAAC;QAEjG,OAAO,IAAI,CAAC,YAAiB,CAAC;IAC/B,CAAC;IAEM,SAAS;QACf,OAAO,IAAI,CAAC,OAAO,CAAC;IACrB,CAAC;IAIY,IAAI,CAAC,SAA0B;;YAC3C,2BAA2B;QAC5B,CAAC;KAAA;IAEY,KAAK,CAAC,SAA0B;;YAC5C,2BAA2B;QAC5B,CAAC;KAAA;IAED,4DAA4D;IAC/C,KAAK;;YACjB,2BAA2B;YAE3B,OAAO,EAAE,CAAC;QACX,CAAC;KAAA;CACD"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { TKeyObject } from 'tscommons-es-core';
|
|
2
|
+
import { DatabaseService } from '../services/database.service';
|
|
3
|
+
import { IParserConfig } from '../interfaces/iparser-config';
|
|
4
|
+
import { IRequestOutcome } from '../interfaces/irequest-outcome';
|
|
5
|
+
import { StringParser } from './string.parser';
|
|
6
|
+
import { IDataConfig } from './data.parser';
|
|
7
|
+
export declare class PhpErrorParser extends StringParser<IDataConfig> {
|
|
8
|
+
private url?;
|
|
9
|
+
constructor(url?: string | undefined, outcome?: IRequestOutcome, config?: TKeyObject<IParserConfig>);
|
|
10
|
+
parse(database: DatabaseService): Promise<void>;
|
|
11
|
+
supports(_contentType: string, isAllow: boolean): boolean;
|
|
12
|
+
}
|
|
@@ -1,44 +1,52 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { deriveEPhpErrorType } from 'hydra-crawler-ts-assets';
|
|
11
|
+
import { StringParser } from './string.parser';
|
|
6
12
|
const PHP_ERROR = '<br />\\s{1,2}<b>([A-Za-z ]+?)</b>: ([^<]+?) in <b>([^<]+?)</b> on line <b>([0-9]+?)</b><br />';
|
|
7
|
-
class PhpErrorParser extends
|
|
13
|
+
export class PhpErrorParser extends StringParser {
|
|
8
14
|
constructor(url, outcome, config) {
|
|
9
15
|
super(outcome, config, 'phpError');
|
|
10
16
|
this.url = url;
|
|
11
17
|
}
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
18
|
+
parse(database) {
|
|
19
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
20
|
+
if (!this.stringData || !this.url)
|
|
21
|
+
return;
|
|
22
|
+
const pattern = new RegExp(PHP_ERROR, 'g');
|
|
23
|
+
const errors = [];
|
|
24
|
+
while (true) {
|
|
25
|
+
const result = pattern.exec(this.stringData);
|
|
26
|
+
if (result === null)
|
|
27
|
+
break;
|
|
28
|
+
const type = deriveEPhpErrorType(result[1]);
|
|
29
|
+
if (type === undefined)
|
|
30
|
+
continue;
|
|
31
|
+
const message = result[2];
|
|
32
|
+
const file = result[3];
|
|
33
|
+
const line = parseInt(result[4], 10);
|
|
34
|
+
const error = {
|
|
35
|
+
type: type,
|
|
36
|
+
message: message,
|
|
37
|
+
file: file,
|
|
38
|
+
line: line
|
|
39
|
+
};
|
|
40
|
+
errors.push(error);
|
|
41
|
+
}
|
|
42
|
+
if (errors.length > 0)
|
|
43
|
+
yield database.setData(this.url, 'phpErrors', errors);
|
|
44
|
+
else
|
|
45
|
+
yield database.unsetData(this.url, 'phpErrors');
|
|
46
|
+
});
|
|
39
47
|
}
|
|
40
48
|
supports(_contentType, isAllow) {
|
|
41
49
|
return isAllow; // everything allowlisted
|
|
42
50
|
}
|
|
43
51
|
}
|
|
44
|
-
|
|
52
|
+
//# sourceMappingURL=php-error.parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"php-error.parser.js","sourceRoot":"","sources":["../../src/parsers/php-error.parser.ts"],"names":[],"mappings":";;;;;;;;;AAGA,OAAO,EAAiB,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAO7E,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAG/C,MAAM,SAAS,GAAW,iGAAiG,CAAC;AAE5H,MAAM,OAAO,cAAe,SAAQ,YAAyB;IAC5D,YACU,GAAY,EACpB,OAAyB,EACzB,MAAkC;QAEnC,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAJ1B,QAAG,GAAH,GAAG,CAAS;IAKtB,CAAC;IAEY,KAAK,CAAC,QAAyB;;YAC3C,IAAI,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAE1C,MAAM,OAAO,GAAW,IAAI,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;YAEnD,MAAM,MAAM,GAAgB,EAAE,CAAC;YAC/B,OAAO,IAAI,EAAE;gBACZ,MAAM,MAAM,GAAyB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBACnE,IAAI,MAAM,KAAK,IAAI;oBAAE,MAAM;gBAE3B,MAAM,IAAI,GAA4B,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACrE,IAAI,IAAI,KAAK,SAAS;oBAAE,SAAS;gBAEjC,MAAM,OAAO,GAAW,MAAM,CAAC,CAAC,CAAC,CAAC;gBAClC,MAAM,IAAI,GAAW,MAAM,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,IAAI,GAAW,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAE7C,MAAM,KAAK,GAAc;oBACvB,IAAI,EAAE,IAAI;oBACV,OAAO,EAAE,OAAO;oBAChB,IAAI,EAAE,IAAI;oBACV,IAAI,EAAE,IAAI;iBACX,CAAC;gBACF,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACnB;YAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;;gBACxE,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;QACtD,CAAC;KAAA;IAEM,QAAQ,CAAC,YAAoB,EAAE,OAAgB;QACrD,OAAO,OAAO,CAAC,CAAC,yBAAyB;IAC1C,CAAC;CACD"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { DatabaseService } from '../services/database.service';
|
|
2
|
+
import { RegexParser } from './regex.parser';
|
|
3
|
+
import { IDictionaryConfig } from './dictionary.parser';
|
|
4
|
+
export declare abstract class PhraseParser<T extends IDictionaryConfig> extends RegexParser<T> {
|
|
5
|
+
setPhrases(phrases: string[], caseInsensitive: boolean): void;
|
|
6
|
+
protected abstract parsePhrases(database: DatabaseService, phrases: string[]): Promise<void>;
|
|
7
|
+
protected parseRegExs(database: DatabaseService, matches: Map<string, RegExpExecArray[]>): Promise<void>;
|
|
8
|
+
}
|
|
@@ -1,11 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
}
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { RegexParser } from './regex.parser';
|
|
11
|
+
export class PhraseParser extends RegexParser {
|
|
9
12
|
setPhrases(phrases, caseInsensitive) {
|
|
10
13
|
const map = new Map();
|
|
11
14
|
for (const phrase of phrases) {
|
|
@@ -14,8 +17,10 @@ class PhraseParser extends regex_parser_1.RegexParser {
|
|
|
14
17
|
}
|
|
15
18
|
this.setRegExs(map);
|
|
16
19
|
}
|
|
17
|
-
|
|
18
|
-
this
|
|
20
|
+
parseRegExs(database, matches) {
|
|
21
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
22
|
+
yield this.parsePhrases(database, Array.from(matches.keys()));
|
|
23
|
+
});
|
|
19
24
|
}
|
|
20
25
|
}
|
|
21
|
-
|
|
26
|
+
//# sourceMappingURL=phrase.parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phrase.parser.js","sourceRoot":"","sources":["../../src/parsers/phrase.parser.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAG7C,MAAM,OAAgB,YAA0C,SAAQ,WAAc;IAC9E,UAAU,CACf,OAAiB,EACjB,eAAwB;QAEzB,MAAM,GAAG,GAAwB,IAAI,GAAG,EAAkB,CAAC;QAC3D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;YAC7B,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,mBAAmB,MAAM,kBAAkB,EAAE,IAAI,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAChH,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;SACvB;QAED,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACrB,CAAC;IAIe,WAAW,CAAC,QAAyB,EAAE,OAAuC;;YAC7F,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC;KAAA;CACD"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { DatabaseService } from '../services/database.service';
|
|
2
|
+
import { TextParser } from './text.parser';
|
|
3
|
+
import { IDataConfig } from './data.parser';
|
|
4
|
+
export declare abstract class RegexParser<T extends IDataConfig> extends TextParser<T> {
|
|
5
|
+
private regexs;
|
|
6
|
+
protected setRegExs(regexs: Map<string, RegExp>): void;
|
|
7
|
+
getRegExs(): Map<string, RegExp>;
|
|
8
|
+
protected abstract parseRegExs(database: DatabaseService, matches: Map<string, RegExpExecArray[]>): Promise<void>;
|
|
9
|
+
protected parseText(database: DatabaseService, text: string): Promise<void>;
|
|
10
|
+
}
|
|
@@ -1,10 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { TextParser } from './text.parser';
|
|
11
|
+
export class RegexParser extends TextParser {
|
|
12
|
+
constructor() {
|
|
13
|
+
super(...arguments);
|
|
8
14
|
this.regexs = new Map();
|
|
9
15
|
}
|
|
10
16
|
setRegExs(regexs) {
|
|
@@ -15,21 +21,23 @@ class RegexParser extends text_parser_1.TextParser {
|
|
|
15
21
|
getRegExs() {
|
|
16
22
|
return this.regexs;
|
|
17
23
|
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
parseText(database, text) {
|
|
25
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
26
|
+
const matches = new Map();
|
|
27
|
+
for (const key of this.regexs.keys()) {
|
|
28
|
+
const regex = this.regexs.get(key);
|
|
29
|
+
const results = [];
|
|
30
|
+
while (true) {
|
|
31
|
+
const result = regex.exec(text);
|
|
32
|
+
if (!result || result === null)
|
|
33
|
+
break;
|
|
34
|
+
results.push(result);
|
|
35
|
+
}
|
|
36
|
+
if (results.length > 0)
|
|
37
|
+
matches.set(key, results);
|
|
28
38
|
}
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
}
|
|
32
|
-
await this.parseRegExs(database, matches);
|
|
39
|
+
yield this.parseRegExs(database, matches);
|
|
40
|
+
});
|
|
33
41
|
}
|
|
34
42
|
}
|
|
35
|
-
|
|
43
|
+
//# sourceMappingURL=regex.parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"regex.parser.js","sourceRoot":"","sources":["../../src/parsers/regex.parser.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAG3C,MAAM,OAAgB,WAAmC,SAAQ,UAAa;IAA9E;;QACS,WAAM,GAAwB,IAAI,GAAG,EAAkB,CAAC;IAgCjE,CAAC;IA9BU,SAAS,CAAC,MAA2B;QAC9C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,EAAE,EAAE;YAChC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,CAAC;SACvC;IACF,CAAC;IAEM,SAAS;QACf,OAAO,IAAI,CAAC,MAAM,CAAC;IACpB,CAAC;IAIe,SAAS,CAAC,QAAyB,EAAE,IAAY;;YAChE,MAAM,OAAO,GAAmC,IAAI,GAAG,EAA6B,CAAC;YACrF,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE;gBACrC,MAAM,KAAK,GAAW,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC;gBAE5C,MAAM,OAAO,GAAsB,EAAE,CAAC;gBACtC,OAAO,IAAI,EAAE;oBACZ,MAAM,MAAM,GAAyB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACtD,IAAI,CAAC,MAAM,IAAI,MAAM,KAAK,IAAI;wBAAE,MAAM;oBAEtC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;iBACrB;gBAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;oBAAE,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;aAClD;YAED,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC3C,CAAC;KAAA;CACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { TKeyObject } from 'tscommons-es-core';
|
|
2
|
+
import { DatabaseService } from '../services/database.service';
|
|
3
|
+
import { IRequestOutcome } from '../interfaces/irequest-outcome';
|
|
4
|
+
import { IParserConfig } from '../interfaces/iparser-config';
|
|
5
|
+
import { Parser } from './parser';
|
|
6
|
+
export declare class ServerParser extends Parser<IParserConfig> {
|
|
7
|
+
private url?;
|
|
8
|
+
constructor(url?: string | undefined, outcome?: IRequestOutcome, config?: TKeyObject<IParserConfig>);
|
|
9
|
+
supports(_contentType: string): boolean;
|
|
10
|
+
parse(database: DatabaseService): Promise<void>;
|
|
11
|
+
}
|
|
@@ -1,66 +1,67 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { EServer } from '../enums/eserver';
|
|
11
|
+
import { EXPoweredBy } from '../enums/ex-powered-by';
|
|
12
|
+
import { Parser } from './parser';
|
|
13
|
+
export class ServerParser extends Parser {
|
|
8
14
|
constructor(url, outcome, config) {
|
|
9
15
|
super(outcome, config, 'server');
|
|
10
16
|
this.url = url;
|
|
11
17
|
}
|
|
12
|
-
async init(database) {
|
|
13
|
-
const urls = database.getUrls();
|
|
14
|
-
if (!urls)
|
|
15
|
-
return;
|
|
16
|
-
await urls.createIndex({ 'server.server': 1 }, { unique: false });
|
|
17
|
-
await urls.createIndex({ 'server.xPoweredBy': 1 }, { unique: false });
|
|
18
|
-
}
|
|
19
18
|
supports(_contentType) {
|
|
20
19
|
return true; // everything
|
|
21
20
|
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
if (
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if (
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
21
|
+
parse(database) {
|
|
22
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
if (!this.outcome || !this.url)
|
|
24
|
+
return;
|
|
25
|
+
const server = this.outcome.headers['server'] || undefined;
|
|
26
|
+
const xPoweredBy = this.outcome.headers['x-powered-by'] || undefined;
|
|
27
|
+
if (server === undefined && xPoweredBy === undefined) {
|
|
28
|
+
yield database.unsetData(this.url, 'server');
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
const data = {};
|
|
32
|
+
if (server) {
|
|
33
|
+
if (/^Apache($|\/)/.test(server))
|
|
34
|
+
data.server = EServer.APACHE;
|
|
35
|
+
if (/^Microsoft-IIS($|\/)/.test(server))
|
|
36
|
+
data.server = EServer.IIS;
|
|
37
|
+
if (/^nginx($|\/)/.test(server))
|
|
38
|
+
data.server = EServer.NGINX;
|
|
39
|
+
if (/^cloudflare($|\/)/.test(server))
|
|
40
|
+
data.server = EServer.CLOUDFLARE;
|
|
41
|
+
if (/^AmazonS3($|\/)/.test(server))
|
|
42
|
+
data.server = EServer.AMAZONS3;
|
|
43
|
+
if (/^Jetty($|\/)/.test(server))
|
|
44
|
+
data.server = EServer.JETTY;
|
|
45
|
+
}
|
|
46
|
+
if (xPoweredBy) {
|
|
47
|
+
if (/^PHP($|\/)/.test(xPoweredBy))
|
|
48
|
+
data.xPoweredBy = EXPoweredBy.PHP;
|
|
49
|
+
if (/^ASP.NET($|\/)/.test(xPoweredBy))
|
|
50
|
+
data.xPoweredBy = EXPoweredBy.ASPNET;
|
|
51
|
+
if (/^Express($|\/)/.test(xPoweredBy))
|
|
52
|
+
data.xPoweredBy = EXPoweredBy.EXPRESS;
|
|
53
|
+
if (/^Zope($|\/)/.test(xPoweredBy))
|
|
54
|
+
data.xPoweredBy = EXPoweredBy.ZOPE;
|
|
55
|
+
}
|
|
56
|
+
if (data.server === undefined && data.xPoweredBy === undefined) {
|
|
57
|
+
yield database.unsetData(this.url, 'server');
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
try {
|
|
61
|
+
yield database.setData(this.url, 'server', data);
|
|
62
|
+
}
|
|
63
|
+
catch (ex) { /* do nothing */ }
|
|
64
|
+
});
|
|
64
65
|
}
|
|
65
66
|
}
|
|
66
|
-
|
|
67
|
+
//# sourceMappingURL=server.parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"server.parser.js","sourceRoot":"","sources":["../../src/parsers/server.parser.ts"],"names":[],"mappings":";;;;;;;;;AAQA,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAErD,OAAO,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAElC,MAAM,OAAO,YAAa,SAAQ,MAAqB;IACtD,YACU,GAAY,EACpB,OAAyB,EACzB,MAAkC;QAEnC,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;QAJxB,QAAG,GAAH,GAAG,CAAS;IAKtB,CAAC;IAEM,QAAQ,CAAC,YAAoB;QACnC,OAAO,IAAI,CAAC,CAAC,aAAa;IAC3B,CAAC;IAEY,KAAK,CAAC,QAAyB;;YAC3C,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEvC,MAAM,MAAM,GAAsB,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAY,IAAI,SAAS,CAAC;YACzF,MAAM,UAAU,GAAsB,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,cAAc,CAAY,IAAI,SAAS,CAAC;YAEnG,IAAI,MAAM,KAAK,SAAS,IAAI,UAAU,KAAK,SAAS,EAAE;gBACrD,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;gBAC7C,OAAO;aACP;YAED,MAAM,IAAI,GAAY,EAAE,CAAC;YAEzB,IAAI,MAAM,EAAE;gBACX,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC;oBAAE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;gBAC/D,IAAI,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC;oBAAE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;gBACnE,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC;oBAAE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC;gBAC7D,IAAI,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC;oBAAE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;gBACvE,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC;oBAAE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC;gBACnE,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC;oBAAE,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC;aAC7D;YAED,IAAI,UAAU,EAAE;gBACf,IAAI,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC;oBAAE,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,GAAG,CAAC;gBACrE,IAAI,gBAAgB,CAAC,IAAI,CAAC,UAAU,CAAC;oBAAE,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,MAAM,CAAC;gBAC5E,IAAI,gBAAgB,CAAC,IAAI,CAAC,UAAU,CAAC;oBAAE,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,OAAO,CAAC;gBAC7E,IAAI,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC;oBAAE,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC;aACvE;YAED,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,IAAI,CAAC,UAAU,KAAK,SAAS,EAAE;gBAC/D,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;gBAC7C,OAAO;aACP;YAED,IAAI;gBACH,MAAM,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC;aACjD;YAAC,OAAO,EAAE,EAAE,EAAE,gBAAgB,EAAE;QAClC,CAAC;KAAA;CACD"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { TKeyObject } from 'tscommons-es-core';
|
|
2
|
+
import { DatabaseService } from '../services/database.service';
|
|
3
|
+
import { IParserConfig } from '../interfaces/iparser-config';
|
|
4
|
+
import { IRequestOutcome } from '../interfaces/irequest-outcome';
|
|
5
|
+
import { DictionaryParser, IDictionaryConfig } from './dictionary.parser';
|
|
6
|
+
export declare class SpellingParser extends DictionaryParser<IDictionaryConfig> {
|
|
7
|
+
private url?;
|
|
8
|
+
constructor(url?: string | undefined, outcome?: IRequestOutcome, config?: TKeyObject<IParserConfig>);
|
|
9
|
+
protected parseMatches(database: DatabaseService, matches: string[], _nonMatches: string[]): Promise<void>;
|
|
10
|
+
}
|