hydra-crawler 1.4.6 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/apis/autocomplete.api.d.ts +7 -0
- package/dist/apis/autocomplete.api.js +15 -9
- package/dist/apis/autocomplete.api.js.map +1 -0
- package/dist/apis/bugs.api.d.ts +7 -0
- package/dist/apis/bugs.api.js +21 -15
- package/dist/apis/bugs.api.js.map +1 -0
- package/dist/apis/crawl.api.d.ts +7 -0
- package/dist/apis/crawl.api.js +15 -9
- package/dist/apis/crawl.api.js.map +1 -0
- package/dist/apis/domains.api.d.ts +7 -0
- package/dist/apis/domains.api.js +24 -19
- package/dist/apis/domains.api.js.map +1 -0
- package/dist/apis/images.api.d.ts +7 -0
- package/dist/apis/images.api.js +20 -14
- package/dist/apis/images.api.js.map +1 -0
- package/dist/apis/statistics.api.d.ts +8 -0
- package/dist/apis/statistics.api.js +27 -20
- package/dist/apis/statistics.api.js.map +1 -0
- package/dist/apis/test.api.d.ts +5 -0
- package/dist/apis/test.api.js +15 -9
- package/dist/apis/test.api.js.map +1 -0
- package/dist/apis/urls.api.d.ts +7 -0
- package/dist/apis/urls.api.js +21 -15
- package/dist/apis/urls.api.js.map +1 -0
- package/dist/apps/cleanup.app.d.ts +19 -0
- package/dist/apps/cleanup.app.js +118 -100
- package/dist/apps/cleanup.app.js.map +1 -0
- package/dist/apps/cross-populate-export.app.d.ts +12 -0
- package/dist/apps/cross-populate-export.app.js +60 -47
- package/dist/apps/cross-populate-export.app.js.map +1 -0
- package/dist/apps/cross-populate-import.app.d.ts +12 -0
- package/dist/apps/cross-populate-import.app.js +64 -51
- package/dist/apps/cross-populate-import.app.js.map +1 -0
- package/dist/apps/denylist.app.d.ts +17 -0
- package/dist/apps/denylist.app.js +115 -98
- package/dist/apps/denylist.app.js.map +1 -0
- package/dist/apps/expire.app.d.ts +19 -0
- package/dist/apps/expire.app.js +44 -31
- package/dist/apps/expire.app.js.map +1 -0
- package/dist/apps/extract-text.app.d.ts +8 -0
- package/dist/apps/extract-text.app.js +43 -35
- package/dist/apps/extract-text.app.js.map +1 -0
- package/dist/apps/hydra.app.d.ts +34 -0
- package/dist/apps/hydra.app.js +150 -137
- package/dist/apps/hydra.app.js.map +1 -0
- package/dist/apps/import.app.d.ts +11 -0
- package/dist/apps/import.app.js +44 -32
- package/dist/apps/import.app.js.map +1 -0
- package/dist/apps/internal-hydra-common.app.d.ts +28 -0
- package/dist/apps/internal-hydra-common.app.js +5 -11
- package/dist/apps/internal-hydra-common.app.js.map +1 -0
- package/dist/apps/query.app.d.ts +20 -0
- package/dist/apps/query.app.js +63 -49
- package/dist/apps/query.app.js.map +1 -0
- package/dist/apps/reattempt.app.d.ts +17 -0
- package/dist/apps/reattempt.app.js +66 -53
- package/dist/apps/reattempt.app.js.map +1 -0
- package/dist/apps/requeue-domain.app.d.ts +13 -0
- package/dist/apps/requeue-domain.app.js +50 -37
- package/dist/apps/requeue-domain.app.js.map +1 -0
- package/dist/apps/seed.app.d.ts +15 -0
- package/dist/apps/seed.app.js +53 -40
- package/dist/apps/seed.app.js.map +1 -0
- package/dist/apps/startup.app.d.ts +11 -0
- package/dist/apps/startup.app.js +51 -38
- package/dist/apps/startup.app.js.map +1 -0
- package/dist/apps/unarchive.app.d.ts +15 -0
- package/dist/apps/unarchive.app.js +67 -54
- package/dist/apps/unarchive.app.js.map +1 -0
- package/dist/classes/cleaner.d.ts +12 -0
- package/dist/classes/cleaner.js +227 -207
- package/dist/classes/cleaner.js.map +1 -0
- package/dist/classes/crawler.d.ts +34 -0
- package/dist/classes/crawler.js +248 -241
- package/dist/classes/crawler.js.map +1 -0
- package/dist/classes/dns.d.ts +3 -0
- package/dist/classes/dns.js +10 -13
- package/dist/classes/dns.js.map +1 -0
- package/dist/classes/expirer.d.ts +10 -0
- package/dist/classes/expirer.js +107 -94
- package/dist/classes/expirer.js.map +1 -0
- package/dist/classes/expiry.d.ts +8 -0
- package/dist/classes/expiry.js +16 -19
- package/dist/classes/expiry.js.map +1 -0
- package/dist/classes/lists.d.ts +9 -0
- package/dist/classes/lists.js +13 -18
- package/dist/classes/lists.js.map +1 -0
- package/dist/classes/robot.d.ts +15 -0
- package/dist/classes/robot.js +40 -30
- package/dist/classes/robot.js.map +1 -0
- package/dist/classes/tracker.d.ts +25 -0
- package/dist/classes/tracker.js +82 -64
- package/dist/classes/tracker.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +72 -65
- package/dist/cli.js.map +1 -0
- package/dist/enums/eavailable-strategy.d.ts +4 -0
- package/dist/enums/eavailable-strategy.js +3 -5
- package/dist/enums/eavailable-strategy.js.map +1 -0
- package/dist/enums/elist.d.ts +7 -0
- package/dist/enums/elist.js +7 -11
- package/dist/enums/elist.js.map +1 -0
- package/dist/enums/eserver.d.ts +8 -0
- package/dist/enums/eserver.js +3 -5
- package/dist/enums/eserver.js.map +1 -0
- package/dist/enums/ex-powered-by.d.ts +6 -0
- package/dist/enums/ex-powered-by.js +3 -5
- package/dist/enums/ex-powered-by.js.map +1 -0
- package/dist/helpers/matcher.d.ts +5 -0
- package/dist/helpers/matcher.js +2 -5
- package/dist/helpers/matcher.js.map +1 -0
- package/dist/helpers/random.d.ts +4 -0
- package/dist/helpers/random.js +2 -5
- package/dist/helpers/random.js.map +1 -0
- package/dist/helpers/utf-decoder.d.ts +4 -0
- package/dist/helpers/utf-decoder.js +3 -6
- package/dist/helpers/utf-decoder.js.map +1 -0
- package/dist/interfaces/iexpiry.d.ts +7 -0
- package/dist/interfaces/iexpiry.js +9 -13
- package/dist/interfaces/iexpiry.js.map +1 -0
- package/dist/interfaces/imatch.d.ts +6 -0
- package/dist/interfaces/imatch.js +6 -9
- package/dist/interfaces/imatch.js.map +1 -0
- package/dist/interfaces/iparser-config.d.ts +4 -0
- package/dist/interfaces/iparser-config.js +4 -7
- package/dist/interfaces/iparser-config.js.map +1 -0
- package/dist/interfaces/iparser.d.ts +8 -0
- package/dist/interfaces/iparser.js +2 -2
- package/dist/interfaces/iparser.js.map +1 -0
- package/dist/interfaces/irequest-outcome.d.ts +11 -0
- package/dist/interfaces/irequest-outcome.js +2 -2
- package/dist/interfaces/irequest-outcome.js.map +1 -0
- package/dist/interfaces/iserver.d.ts +4 -0
- package/dist/interfaces/iserver.js +2 -2
- package/dist/interfaces/iserver.js.map +1 -0
- package/dist/parsers/accessibility-metrics.parser.d.ts +11 -0
- package/dist/parsers/accessibility-metrics.parser.js +34 -26
- package/dist/parsers/accessibility-metrics.parser.js.map +1 -0
- package/dist/parsers/asp-error.parser.d.ts +12 -0
- package/dist/parsers/asp-error.parser.js +36 -28
- package/dist/parsers/asp-error.parser.js.map +1 -0
- package/dist/parsers/bad-words.parser.d.ts +10 -0
- package/dist/parsers/bad-words.parser.js +21 -13
- package/dist/parsers/bad-words.parser.js.map +1 -0
- package/dist/parsers/complex-english.parser.d.ts +15 -0
- package/dist/parsers/complex-english.parser.js +33 -25
- package/dist/parsers/complex-english.parser.js.map +1 -0
- package/dist/parsers/data.parser.d.ts +14 -0
- package/dist/parsers/data.parser.js +12 -16
- package/dist/parsers/data.parser.js.map +1 -0
- package/dist/parsers/dictionary.parser.d.ts +19 -0
- package/dist/parsers/dictionary.parser.js +47 -39
- package/dist/parsers/dictionary.parser.js.map +1 -0
- package/dist/parsers/html.parser.d.ts +13 -0
- package/dist/parsers/html.parser.js +4 -8
- package/dist/parsers/html.parser.js.map +1 -0
- package/dist/parsers/hyperlinks.parser.d.ts +20 -0
- package/dist/parsers/hyperlinks.parser.js +82 -77
- package/dist/parsers/hyperlinks.parser.js.map +1 -0
- package/dist/parsers/image-tags.parser.d.ts +19 -0
- package/dist/parsers/image-tags.parser.js +31 -35
- package/dist/parsers/image-tags.parser.js.map +1 -0
- package/dist/parsers/jpeg.parser.d.ts +11 -0
- package/dist/parsers/jpeg.parser.js +28 -20
- package/dist/parsers/jpeg.parser.js.map +1 -0
- package/dist/parsers/paragraphs.parser.d.ts +13 -0
- package/dist/parsers/paragraphs.parser.js +33 -40
- package/dist/parsers/paragraphs.parser.js.map +1 -0
- package/dist/parsers/parser.d.ts +19 -0
- package/dist/parsers/parser.js +30 -17
- package/dist/parsers/parser.js.map +1 -0
- package/dist/parsers/php-error.parser.d.ts +12 -0
- package/dist/parsers/php-error.parser.js +42 -34
- package/dist/parsers/php-error.parser.js.map +1 -0
- package/dist/parsers/phrase.parser.d.ts +8 -0
- package/dist/parsers/phrase.parser.js +16 -11
- package/dist/parsers/phrase.parser.js.map +1 -0
- package/dist/parsers/regex.parser.d.ts +10 -0
- package/dist/parsers/regex.parser.js +30 -22
- package/dist/parsers/regex.parser.js.map +1 -0
- package/dist/parsers/server.parser.d.ts +11 -0
- package/dist/parsers/server.parser.js +58 -57
- package/dist/parsers/server.parser.js.map +1 -0
- package/dist/parsers/spelling.parser.d.ts +10 -0
- package/dist/parsers/spelling.parser.js +21 -13
- package/dist/parsers/spelling.parser.js.map +1 -0
- package/dist/parsers/string.parser.d.ts +8 -0
- package/dist/parsers/string.parser.js +5 -8
- package/dist/parsers/string.parser.js.map +1 -0
- package/dist/parsers/text.parser.d.ts +8 -0
- package/dist/parsers/text.parser.js +24 -18
- package/dist/parsers/text.parser.js.map +1 -0
- package/dist/parsers/words.parser.d.ts +11 -0
- package/dist/parsers/words.parser.js +32 -28
- package/dist/parsers/words.parser.js.map +1 -0
- package/dist/queries/complex-english.query.d.ts +2 -0
- package/dist/queries/complex-english.query.js +37 -38
- package/dist/queries/complex-english.query.js.map +1 -0
- package/dist/queries/flash-content.query.d.ts +2 -0
- package/dist/queries/flash-content.query.js +39 -30
- package/dist/queries/flash-content.query.js.map +1 -0
- package/dist/queries/linking-to-domains.query.d.ts +2 -0
- package/dist/queries/linking-to-domains.query.js +35 -27
- package/dist/queries/linking-to-domains.query.js.map +1 -0
- package/dist/queries/readability-score.query.d.ts +2 -0
- package/dist/queries/readability-score.query.js +21 -13
- package/dist/queries/readability-score.query.js.map +1 -0
- package/dist/servers/crawl.server.d.ts +35 -0
- package/dist/servers/crawl.server.js +133 -121
- package/dist/servers/crawl.server.js.map +1 -0
- package/dist/servers/express.server.d.ts +8 -0
- package/dist/servers/express.server.js +7 -10
- package/dist/servers/express.server.js.map +1 -0
- package/dist/servers/maintenance.server.d.ts +22 -0
- package/dist/servers/maintenance.server.js +42 -36
- package/dist/servers/maintenance.server.js.map +1 -0
- package/dist/servers/rest.server.d.ts +7 -0
- package/dist/servers/rest.server.js +40 -51
- package/dist/servers/rest.server.js.map +1 -0
- package/dist/servers/socket-io.server.d.ts +12 -0
- package/dist/servers/socket-io.server.js +48 -15
- package/dist/servers/socket-io.server.js.map +1 -0
- package/dist/services/database.service.d.ts +68 -0
- package/dist/services/database.service.js +527 -462
- package/dist/services/database.service.js.map +1 -0
- package/dist/types/tcrawl-config.d.ts +14 -0
- package/dist/types/tcrawl-config.js +14 -17
- package/dist/types/tcrawl-config.js.map +1 -0
- package/dist/types/thydra-config.d.ts +4 -0
- package/dist/types/thydra-config.js +4 -7
- package/dist/types/thydra-config.js.map +1 -0
- package/dist/types/tparser-ctor.d.ts +7 -0
- package/dist/types/tparser-ctor.js +2 -2
- package/dist/types/tparser-ctor.js.map +1 -0
- package/dist/types/tquery.d.ts +7 -0
- package/dist/types/tquery.js +2 -2
- package/dist/types/tquery.js.map +1 -0
- package/dist/types/trobots-config.d.ts +4 -0
- package/dist/types/trobots-config.js +4 -7
- package/dist/types/trobots-config.js.map +1 -0
- package/package.json +41 -29
- package/angular/10-es2015.bacd4ae5dd7913ce55f0.js +0 -1
- package/angular/10-es5.bacd4ae5dd7913ce55f0.js +0 -1
- package/angular/11-es2015.0f031dcf752d1e8eda6b.js +0 -1
- package/angular/11-es5.0f031dcf752d1e8eda6b.js +0 -1
- package/angular/3rdpartylicenses.txt +0 -1127
- package/angular/5-es2015.951498ca9c1bc74e57bf.js +0 -1
- package/angular/5-es5.951498ca9c1bc74e57bf.js +0 -1
- package/angular/6-es2015.65f680261a3506b88381.js +0 -1
- package/angular/6-es5.65f680261a3506b88381.js +0 -1
- package/angular/7-es2015.625197f3af1dbf3e805d.js +0 -1
- package/angular/7-es5.625197f3af1dbf3e805d.js +0 -1
- package/angular/8-es2015.55518901987a5b834309.js +0 -1
- package/angular/8-es5.55518901987a5b834309.js +0 -1
- package/angular/9-es2015.6cc9bde262564e7836f2.js +0 -1
- package/angular/9-es5.6cc9bde262564e7836f2.js +0 -1
- package/angular/Roboto-Black.41ed1105a6ebb8ffe34e.woff2 +0 -0
- package/angular/Roboto-Black.937491dfcbe64ca9a9f1.woff +0 -0
- package/angular/Roboto-BlackItalic.2e1ee657996854c6f427.woff +0 -0
- package/angular/Roboto-BlackItalic.50ca4c51ebc27e7e7d2f.woff2 +0 -0
- package/angular/Roboto-Bold.73288d91c325e82a5b92.woff +0 -0
- package/angular/Roboto-Bold.92fbd4e93cf0a5dbebaa.woff2 +0 -0
- package/angular/Roboto-BoldItalic.5f600d98a73d800ae575.woff2 +0 -0
- package/angular/Roboto-BoldItalic.6d89acbd21d7e3fbecb2.woff +0 -0
- package/angular/Roboto-Light.c27d89ac77468ae18f28.woff2 +0 -0
- package/angular/Roboto-Light.d923dfafc0c5183b59aa.woff +0 -0
- package/angular/Roboto-LightItalic.506274c7228cf81cae4d.woff2 +0 -0
- package/angular/Roboto-LightItalic.d4b8c137518d9d92bb28.woff +0 -0
- package/angular/Roboto-Medium.092c6130df8fd2199888.woff +0 -0
- package/angular/Roboto-Medium.1d3bced88509b0838984.woff2 +0 -0
- package/angular/Roboto-MediumItalic.18ff1628c628080166c1.woff +0 -0
- package/angular/Roboto-MediumItalic.d620b8f53f75966fe42e.woff2 +0 -0
- package/angular/Roboto-Regular.64cfb66c866ea50cad47.woff2 +0 -0
- package/angular/Roboto-Regular.e02e9d6ff5547f7e9962.woff +0 -0
- package/angular/Roboto-RegularItalic.4dd2af1e8df532f41db8.woff2 +0 -0
- package/angular/Roboto-RegularItalic.5ea38fff9eebef99c5df.woff +0 -0
- package/angular/Roboto-Thin.dbd56bd3357dc3617fe5.woff2 +0 -0
- package/angular/Roboto-Thin.e7f7c82374bd0ebef14b.woff +0 -0
- package/angular/Roboto-ThinItalic.5dd9349c940073834e9a.woff +0 -0
- package/angular/Roboto-ThinItalic.a8cef84f735ef887abdc.woff2 +0 -0
- package/angular/assets/config/app-config.json +0 -16
- package/angular/assets/images/splashbg.jpg +0 -0
- package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff +0 -0
- package/angular/assets/web-app-commons/fonts/material-icons/MaterialDesignIcons-Community-2.7.94.woff2 +0 -0
- package/angular/assets/web-app-commons/fonts/material-icons/material-design-icons-community.css +0 -11293
- package/angular/favicon.ico +0 -0
- package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNa.f2a0933406f783065152.woff +0 -0
- package/angular/flUhRq6tzZclQEJ-Vdg-IuiaDsNc.6467d9a24f234e8e8e07.woff2 +0 -0
- package/angular/index.html +0 -16
- package/angular/main-es2015.3a582572476c7f292e52.js +0 -1
- package/angular/main-es5.3a582572476c7f292e52.js +0 -1
- package/angular/polyfills-es2015.7df68534018bc2f6cb09.js +0 -1
- package/angular/polyfills-es5.e79468f406fae2989221.js +0 -1
- package/angular/runtime-es2015.6d2cff76cdb2790d3308.js +0 -1
- package/angular/runtime-es5.6d2cff76cdb2790d3308.js +0 -1
- package/angular/styles.c5c6c2534225b85c4ff0.css +0 -1
- package/config/bad-words.json +0 -1
- package/config/complex-english.json +0 -400
- package/config/hydra-auth.json +0 -8
- package/config/hydra-crawler.json +0 -84
- package/config/list-allow.json +0 -171
- package/config/list-deny.json +0 -248
- package/config/list-expiry.json +0 -7
- package/config/schedule.json +0 -25
- package/config/spelling.json +0 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"linking-to-domains.query.js","sourceRoot":"","sources":["../../src/queries/linking-to-domains.query.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAQ,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAS,OAAO,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAW,SAAS,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAe,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAiB3J,MAAM,UAAU,GAAqC,CAAC,CAAS,EAAE,CAAS,EAAU,EAAE;IACrF,IAAI,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC7C,IAAI,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC9C,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC7C,IAAI,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC9C,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAE1C,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACpB,OAAO,CAAC,CAAC;AACV,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,KAAK,GAAW,CAC3B,IAAiB,EACjB,eAAgC,EAChC,MAAa,EACb,OAAe,EACf,cAAyC,EAC1B,EAAE;IAClB,MAAM,QAAQ,GAAqB,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC3D,IAAI,CAAC,QAAQ,EAAE;QACd,kBAAkB,CAAC,2BAA2B,CAAC,CAAC;QAChD,OAAO;KACP;IACD,MAAM,KAAK,GAAW,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC;IAE3C,kBAAkB,CAAC,gCAAgC,CAAC,CAAC;IACrD,MAAM,OAAO,GAAoB,eAAe,CAAC,UAAU,EAAE,CAAC,IAAI,CAChE;QACE,MAAM,EAAE,KAAK;KACd,EACD,EAAE,CACH,CAAC;IACF,MAAM,OAAO,GAAc,MAAM,eAAe,CAAC,gBAAgB,CAC/D,OAAO,EACP,SAAS,CACV,CAAC;IACF,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAEpC,MAAM,UAAU,GAAyB,IAAI,GAAG,EAAmB,CAAC;IACpE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;QAC7B,kBAAkB,CAAC,0BAA0B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9D,MAAM,QAAQ,GAAiB,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CAC3D;YACE,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE;SAClC,EACD,EAAE,CACH,CAAC;QACF,MAAM,IAAI,GAAW,MAAM,eAAe,CAAC,gBAAgB,CACzD,QAAQ,EACR,MAAM,CACP,CAAC;QACF,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEjC,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;KAC7B;IAED,kBAAkB,CAAC,sCAAsC,CAAC,CAAC;IAC3D,IAAI,KAAK,GAAW,CAAC,CAAC;IACtB,MAAM,cAAc,GAAqC,IAAI,GAAG,EAA+B,CAAC;IAChG,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;QAC7B,MAAM,IAAI,GAAqB,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACtD,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,SAAS;QAEpC,MAAM,GAAG,GAAuB,IAAI,GAAG,EAAiB,CAAC;QAEzD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACvB,MAAM,QAAQ,GAAkB,eAAe,CAAC,QAAQ,EAAE,CAAC,IAAI,CAC7D;gBACE,QAAQ,EAAE,GAAG,CAAC,GAAG;aAClB,EACD,EAAE,CACH,CAAC;YACF,MAAM,KAAK,GAAY,MAAM,eAAe,CAAC,gBAAgB,CAC3D,QAAQ,EACR,OAAO,CACR,CAAC;YAEF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAEjC,KAAK,IAAI,KAAK,CAAC,MAAM,CAAC;YACtB,qBAAqB,CAAC,KAAK,CAAC,CAAC;YAE7B,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;SACpB;QAED,IAAI,GAAG,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE7B,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAChC;IACD,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE3B,kBAAkB,CAAC,0BAA0B,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,MAAM,IAAI,cAAc,CAAC,IAAI,EAAE,EAAE;QAC3C,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC;QAE5B,KAAK,MAAM,GAAG,IAAI,cAAc,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,IAAI,EAAE,EAAE;YACrD,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,cAAc,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,GAAG,CAAC,GAAG,CAAE;iBACpE,GAAG,CAAC,CAAC,IAAW,EAAU,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAC1C;KACD;IACD,oBAAoB,EAAE,CAAC;IAEvB,IAAI,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,EAAE;QACtC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;aACjB,IAAI,CAAC,UAAU,CAAC;aAChB,OAAO,CAAC,CAAC,MAAc,EAAQ,EAAE;YACjC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;KACL;SAAM,IAAI,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,EAAE;QACzC,MAAM,UAAU,GAA0B,IAAI,GAAG,EAAoB,CAAC;QAEtE,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE;YAC1C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE;gBAC/C,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,EAAE;oBACvC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC;wBAAE,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;oBAClD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,QAAQ,CAAC,GAAG,CAAC;wBAAE,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;iBACxE;aACD;SACD;QAED,MAAM,IAAI,GAAa,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;aACjD,IAAI,CAAC,UAAU,CAAC,CAAC;QACpB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACvB,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAEtC,IAAI,KAAK,GAAY,IAAI,CAAC;YAC1B,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAE,EAAE;gBACvC,MAAM,GAAG,GAAa;oBACpB,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;oBAChB,GAAG;iBACJ,CAAC;gBACF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gBAE5B,KAAK,GAAG,KAAK,CAAC;aACd;SACD;KACD;SAAM,IAAI,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,EAAE;QAC1C,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;KACrC;AACF,CAAC,CAAA,CAAC;AAEF,wBAAwB"}
|
|
@@ -1,17 +1,24 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { EStatus } from 'hydra-crawler-ts-assets';
|
|
11
|
+
import { commonsOutputDoing, commonsOutputError, commonsOutputProgress, commonsOutputResult } from 'nodecommons-es-cli';
|
|
12
|
+
export const query = (args, databaseService, _lists, _expiry, _parsersConfig) => __awaiter(void 0, void 0, void 0, function* () {
|
|
6
13
|
const domain = args.getString('domain');
|
|
7
14
|
if (!domain) {
|
|
8
|
-
|
|
15
|
+
commonsOutputError('No domain specified');
|
|
9
16
|
return;
|
|
10
17
|
}
|
|
11
|
-
|
|
18
|
+
commonsOutputDoing(`Scoring and ordering readability of DONE URLs for domain ${domain}`);
|
|
12
19
|
const results = databaseService.getUrls().aggregate([
|
|
13
20
|
{ $match: {
|
|
14
|
-
status: { $ne:
|
|
21
|
+
status: { $ne: EStatus.ARCHIVED },
|
|
15
22
|
domain: domain,
|
|
16
23
|
'headers.content-type': 'text/html'
|
|
17
24
|
} },
|
|
@@ -57,16 +64,17 @@ const query = async (args, databaseService, _lists, _expiry, _parsersConfig) =>
|
|
|
57
64
|
while (true) {
|
|
58
65
|
tally++;
|
|
59
66
|
if ((tally % 100) === 0)
|
|
60
|
-
|
|
61
|
-
const row =
|
|
67
|
+
commonsOutputProgress(`${tally}`);
|
|
68
|
+
const row = yield results.next();
|
|
62
69
|
if (row === null)
|
|
63
70
|
break;
|
|
64
71
|
matches.push(row);
|
|
65
72
|
}
|
|
66
|
-
|
|
73
|
+
commonsOutputResult(tally);
|
|
67
74
|
console.log('url\tparagraphs\tsentences\twords\tfkre\tari\tscore');
|
|
68
75
|
for (const match of matches) {
|
|
69
76
|
console.log(`${match.url}\t${match.stats.paragraphs}\t${match.stats.sentences}\t${match.stats.words}\t${match.stats.fkre}\t${match.stats.ari}\t${match.score}`);
|
|
70
77
|
}
|
|
71
|
-
};
|
|
72
|
-
|
|
78
|
+
});
|
|
79
|
+
// export default query;
|
|
80
|
+
//# sourceMappingURL=readability-score.query.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"readability-score.query.js","sourceRoot":"","sources":["../../src/queries/readability-score.query.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAe,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAuBrI,MAAM,CAAC,MAAM,KAAK,GAAW,CAC3B,IAAiB,EACjB,eAAgC,EAChC,MAAa,EACb,OAAe,EACf,cAAyC,EAC1B,EAAE;IAClB,MAAM,MAAM,GAAqB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAC1D,IAAI,CAAC,MAAM,EAAE;QACZ,kBAAkB,CAAC,qBAAqB,CAAC,CAAC;QAC1C,OAAO;KACP;IAED,kBAAkB,CAAC,4DAA4D,MAAM,EAAE,CAAC,CAAC;IAEzF,MAAM,OAAO,GAA+B,eAAe,CAAC,OAAO,EAAE,CAAC,SAAS,CAAU;QACvF,EAAE,MAAM,EAAE;gBACR,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE;gBACjC,MAAM,EAAE,MAAM;gBACd,sBAAsB,EAAE,WAAW;aACpC,EAAE;QACH,EAAE,MAAM,EAAE;gBACR,2BAA2B,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE;gBAC3C,0BAA0B,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE;aAC3C,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE;oBACL,UAAU,EAAE,kCAAkC;oBAC9C,SAAS,EAAE,iCAAiC;oBAC5C,KAAK,EAAE,6BAA6B;oBACpC,IAAI,EAAE,4BAA4B;oBAClC,GAAG,EAAE,2BAA2B;iBACjC;aACF,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE,IAAI;gBACX,KAAK,EAAE;oBACL,IAAI,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAE,GAAG,EAAE,aAAa,CAAE,EAAE,CAAE,EAAE;oBACtD,GAAG,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAE,EAAE,EAAE,YAAY,CAAE,EAAE,CAAE,EAAE;iBACpD;aACF,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE,IAAI;gBACX,QAAQ,EAAE;oBACR,IAAI,EAAE,EAAE,OAAO,EAAE,CAAE,EAAE,SAAS,EAAE,CAAE,GAAG,EAAE,aAAa,CAAE,EAAE,EAAE,GAAG,CAAE,EAAE;oBACjE,GAAG,EAAE,EAAE,OAAO,EAAE,CAAE,EAAE,SAAS,EAAE,CAAE,YAAY,EAAE,CAAC,CAAE,EAAE,EAAE,EAAE,CAAE,EAAE;iBAC7D;aACF,EAAE;QACH,EAAE,QAAQ,EAAE;gBACV,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE,IAAI;gBACX,KAAK,EAAE,EAAE,OAAO,EAAE,CAAE,EAAE,IAAI,EAAE,CAAE,gBAAgB,EAAE,eAAe,CAAE,EAAE,EAAE,CAAC,CAAE,EAAE;aAC3E,EAAE;QACH,EAAE,KAAK,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE;KACzB,CAAC,CAAC;IAEH,MAAM,OAAO,GAAc,EAAE,CAAC;IAC9B,IAAI,KAAK,GAAW,CAAC,CAAC;IACtB,OAAO,IAAI,EAAE;QACZ,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;YAAE,qBAAqB,CAAC,GAAG,KAAK,EAAE,CAAC,CAAC;QAE3D,MAAM,GAAG,GAAiB,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;QAC/C,IAAI,GAAG,KAAK,IAAI;YAAE,MAAM;QAExB,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;KAClB;IAED,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE3B,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IACnE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;QAC5B,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,KAAK,KAAK,CAAC,KAAK,CAAC,UAAU,KAAK,KAAK,CAAC,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC,KAAK,CAAC,KAAK,KAAK,KAAK,CAAC,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,GAAG,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;KAChK;AACF,CAAC,CAAA,CAAC;AAEF,wBAAwB"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { TKeyObject } from 'tscommons-es-core';
|
|
2
|
+
import { Lists } from '../classes/lists';
|
|
3
|
+
import { Tracker } from '../classes/tracker';
|
|
4
|
+
import { DatabaseService } from '../services/database.service';
|
|
5
|
+
import { IParserConfig } from '../interfaces/iparser-config';
|
|
6
|
+
import { THydraConfig } from '../types/thydra-config';
|
|
7
|
+
import { TCrawlConfig } from '../types/tcrawl-config';
|
|
8
|
+
import { TRobotsConfig } from '../types/trobots-config';
|
|
9
|
+
import { TParserCtor } from '../types/tparser-ctor';
|
|
10
|
+
export declare class CrawlServer {
|
|
11
|
+
private database;
|
|
12
|
+
private hydraConfig;
|
|
13
|
+
private crawlConfig;
|
|
14
|
+
private parsersConfig;
|
|
15
|
+
private robotsConfig;
|
|
16
|
+
private lists;
|
|
17
|
+
private tracker;
|
|
18
|
+
private parsers;
|
|
19
|
+
private crawlers;
|
|
20
|
+
private strategies;
|
|
21
|
+
private isAborted;
|
|
22
|
+
private preDelayIds;
|
|
23
|
+
private isPaused;
|
|
24
|
+
constructor(database: DatabaseService, hydraConfig: THydraConfig, crawlConfig: TCrawlConfig, parsersConfig: TKeyObject<IParserConfig>, robotsConfig: TRobotsConfig, lists: Lists, tracker: Tracker);
|
|
25
|
+
addParser(parser: TParserCtor): void;
|
|
26
|
+
listParsers(): TParserCtor[];
|
|
27
|
+
pause(): void;
|
|
28
|
+
resume(): void;
|
|
29
|
+
private abort;
|
|
30
|
+
terminate(): Promise<void>;
|
|
31
|
+
private pauseCrawl;
|
|
32
|
+
private resumeCrawl;
|
|
33
|
+
start(): Promise<void>;
|
|
34
|
+
shutdown(): Promise<void>;
|
|
35
|
+
}
|
|
@@ -1,13 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { commonsAsyncAbortTimeout, commonsAsyncTimeout } from 'tscommons-es-async';
|
|
11
|
+
import { EStatus } from 'hydra-crawler-ts-assets';
|
|
12
|
+
import { commonsOutputAlert, commonsOutputCompleted, commonsOutputDebug, commonsOutputDoing, commonsOutputProgress, commonsOutputStarting, commonsOutputSuccess } from 'nodecommons-es-cli';
|
|
13
|
+
import { commonsGracefulAbortAddCallback } from 'nodecommons-es-process';
|
|
14
|
+
import { Crawler } from '../classes/crawler';
|
|
15
|
+
import { EAvailableStrategy } from '../enums/eavailable-strategy';
|
|
16
|
+
export class CrawlServer {
|
|
11
17
|
constructor(database, hydraConfig, crawlConfig, parsersConfig, robotsConfig, lists, tracker) {
|
|
12
18
|
this.database = database;
|
|
13
19
|
this.hydraConfig = hydraConfig;
|
|
@@ -22,11 +28,11 @@ class CrawlServer {
|
|
|
22
28
|
this.isPaused = false;
|
|
23
29
|
this.crawlers = new Map();
|
|
24
30
|
this.strategies = new Map();
|
|
25
|
-
|
|
26
|
-
|
|
31
|
+
commonsGracefulAbortAddCallback(() => {
|
|
32
|
+
commonsOutputAlert('SIGINT abort flag is set. Aborting crawl server.');
|
|
27
33
|
this.abort();
|
|
28
34
|
for (const preDelayId of this.preDelayIds)
|
|
29
|
-
|
|
35
|
+
commonsAsyncAbortTimeout(preDelayId);
|
|
30
36
|
});
|
|
31
37
|
}
|
|
32
38
|
addParser(parser) {
|
|
@@ -36,14 +42,14 @@ class CrawlServer {
|
|
|
36
42
|
return this.parsers.slice();
|
|
37
43
|
}
|
|
38
44
|
pause() {
|
|
39
|
-
|
|
45
|
+
commonsOutputAlert('Pausing crawl server');
|
|
40
46
|
this.isPaused = true;
|
|
41
47
|
for (const domain of this.crawlers.keys()) {
|
|
42
48
|
this.pauseCrawl(domain);
|
|
43
49
|
}
|
|
44
50
|
}
|
|
45
51
|
resume() {
|
|
46
|
-
|
|
52
|
+
commonsOutputCompleted('Resuming crawl server');
|
|
47
53
|
this.isPaused = false;
|
|
48
54
|
for (const domain of this.crawlers.keys()) {
|
|
49
55
|
this.resumeCrawl(domain);
|
|
@@ -51,132 +57,138 @@ class CrawlServer {
|
|
|
51
57
|
}
|
|
52
58
|
abort() {
|
|
53
59
|
this.isAborted = true;
|
|
54
|
-
|
|
60
|
+
commonsAsyncAbortTimeout('find-new-available');
|
|
55
61
|
}
|
|
56
|
-
|
|
57
|
-
this
|
|
58
|
-
|
|
59
|
-
|
|
62
|
+
terminate() {
|
|
63
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
64
|
+
this.abort();
|
|
65
|
+
yield this.database.close();
|
|
66
|
+
process.exit(0);
|
|
67
|
+
});
|
|
60
68
|
}
|
|
61
|
-
|
|
69
|
+
pauseCrawl(domain) {
|
|
62
70
|
if (!this.crawlers.has(domain))
|
|
63
71
|
return;
|
|
64
|
-
|
|
72
|
+
commonsOutputAlert(`Pausing crawler for ${domain}`);
|
|
65
73
|
this.crawlers.get(domain).pause();
|
|
66
74
|
}
|
|
67
|
-
|
|
75
|
+
resumeCrawl(domain) {
|
|
68
76
|
if (!this.crawlers.has(domain))
|
|
69
77
|
return;
|
|
70
|
-
|
|
78
|
+
commonsOutputCompleted(`Resuming crawler for ${domain}`);
|
|
71
79
|
this.crawlers.get(domain).resume();
|
|
72
80
|
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
else {
|
|
95
|
-
// allow a new large crawl, if enough space
|
|
96
|
-
if (space > 1) {
|
|
97
|
-
spaceForLargestDomains = 1;
|
|
98
|
-
spaceForSmallestDomains--;
|
|
81
|
+
start() {
|
|
82
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
83
|
+
yield this.database.resetActive();
|
|
84
|
+
const added = yield this.database.queue(this.hydraConfig.startUrl);
|
|
85
|
+
if (added)
|
|
86
|
+
this.tracker.delta(EStatus.QUEUED, 1);
|
|
87
|
+
while (!this.isAborted) {
|
|
88
|
+
if (!this.isPaused) {
|
|
89
|
+
const existing = [...this.crawlers.keys()];
|
|
90
|
+
const space = this.crawlConfig.maxCrawlers - existing.length;
|
|
91
|
+
if (space > 0) {
|
|
92
|
+
commonsOutputDebug(`Space for ${space} new crawls available`);
|
|
93
|
+
const largestStrategyDomains = existing
|
|
94
|
+
.filter((domain) => this.strategies.get(domain) === EAvailableStrategy.LARGEST);
|
|
95
|
+
const smallestStrategyDomains = existing
|
|
96
|
+
.filter((domain) => this.strategies.get(domain) === EAvailableStrategy.SMALLEST);
|
|
97
|
+
commonsOutputDebug(`Current crawl has LARGEST=${largestStrategyDomains.length}; SMALLEST=${smallestStrategyDomains.length}`);
|
|
98
|
+
let spaceForLargestDomains = 0;
|
|
99
|
+
let spaceForSmallestDomains = space;
|
|
100
|
+
if (largestStrategyDomains.length >= Math.floor(this.crawlConfig.maxCrawlers / 2)) {
|
|
101
|
+
// already 50% large crawling, so all remaining space goes to small crawls
|
|
99
102
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
const availables = [
|
|
106
|
-
...availablesSmallest,
|
|
107
|
-
...availablesLargest
|
|
108
|
-
];
|
|
109
|
-
if (!this.isAborted) {
|
|
110
|
-
for (const domain of availables) {
|
|
111
|
-
nodecommons_cli_1.CommonsOutput.starting(`Creating new crawl head for ${domain}`);
|
|
112
|
-
const crawler = new crawler_1.Crawler(domain, this.database, this.crawlConfig, this.parsersConfig, this.robotsConfig, this.parsers, this.lists, this.tracker);
|
|
113
|
-
this.crawlers.set(domain, crawler);
|
|
114
|
-
if (availablesLargest.includes(domain)) {
|
|
115
|
-
this.strategies.set(domain, eavailable_strategy_1.EAvailableStrategy.LARGEST);
|
|
103
|
+
else {
|
|
104
|
+
// allow a new large crawl, if enough space
|
|
105
|
+
if (space > 1) {
|
|
106
|
+
spaceForLargestDomains = 1;
|
|
107
|
+
spaceForSmallestDomains--;
|
|
116
108
|
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
109
|
+
}
|
|
110
|
+
commonsOutputDebug(`Space for LARGEST=${spaceForLargestDomains}; SMALLEST=${spaceForSmallestDomains}`);
|
|
111
|
+
const availablesLargest = yield this.database.available(EAvailableStrategy.LARGEST, this.crawlConfig.availableStrategyThreshold, spaceForLargestDomains, existing);
|
|
112
|
+
existing.push(...availablesLargest);
|
|
113
|
+
const availablesSmallest = yield this.database.available(EAvailableStrategy.SMALLEST, this.crawlConfig.availableStrategyThreshold, spaceForSmallestDomains, existing);
|
|
114
|
+
const availables = [
|
|
115
|
+
...availablesSmallest,
|
|
116
|
+
...availablesLargest
|
|
117
|
+
];
|
|
118
|
+
if (!this.isAborted) {
|
|
119
|
+
for (const domain of availables) {
|
|
120
|
+
commonsOutputStarting(`Creating new crawl head for ${domain}`);
|
|
121
|
+
const crawler = new Crawler(domain, this.database, this.crawlConfig, this.parsersConfig, this.robotsConfig, this.parsers, this.lists, this.tracker);
|
|
122
|
+
this.crawlers.set(domain, crawler);
|
|
123
|
+
if (availablesLargest.includes(domain)) {
|
|
124
|
+
this.strategies.set(domain, EAvailableStrategy.LARGEST);
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
this.strategies.set(domain, EAvailableStrategy.SMALLEST);
|
|
128
|
+
}
|
|
129
|
+
// called without await in order to do parallel crawls
|
|
130
|
+
void (() => __awaiter(this, void 0, void 0, function* () {
|
|
126
131
|
try {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
132
|
+
// delay randomly to prevent network socket request spikes
|
|
133
|
+
const preDelayId = `predelay_${domain}`;
|
|
134
|
+
this.preDelayIds.push(preDelayId);
|
|
135
|
+
try {
|
|
136
|
+
yield commonsAsyncTimeout(Math.random() * (this.crawlConfig.findNewAvailableDelay - 1000), preDelayId);
|
|
137
|
+
}
|
|
138
|
+
catch (e) {
|
|
139
|
+
if (e.message === 'abortTimeout called')
|
|
140
|
+
return;
|
|
141
|
+
throw e;
|
|
142
|
+
}
|
|
143
|
+
this.preDelayIds = this.preDelayIds
|
|
144
|
+
.filter((pid) => pid !== preDelayId);
|
|
145
|
+
if (this.isAborted)
|
|
131
146
|
return;
|
|
132
|
-
|
|
147
|
+
if (this.isPaused)
|
|
148
|
+
this.pauseCrawl(domain);
|
|
149
|
+
commonsOutputStarting(`Starting crawler for ${domain}`);
|
|
150
|
+
yield crawler.crawl();
|
|
133
151
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
this.
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
nodecommons_cli_1.CommonsOutput.debug('debug position 6');
|
|
145
|
-
console.log(ex);
|
|
146
|
-
}
|
|
147
|
-
finally {
|
|
148
|
-
this.crawlers.delete(domain); // doesn't get called until after doCrawl() is called, so have to do it here if aborted
|
|
149
|
-
this.strategies.delete(domain);
|
|
150
|
-
}
|
|
151
|
-
})();
|
|
152
|
+
catch (ex) {
|
|
153
|
+
commonsOutputDebug('debug position 6');
|
|
154
|
+
console.log(ex);
|
|
155
|
+
}
|
|
156
|
+
finally {
|
|
157
|
+
this.crawlers.delete(domain); // doesn't get called until after doCrawl() is called, so have to do it here if aborted
|
|
158
|
+
this.strategies.delete(domain);
|
|
159
|
+
}
|
|
160
|
+
}))();
|
|
161
|
+
}
|
|
152
162
|
}
|
|
153
163
|
}
|
|
154
164
|
}
|
|
165
|
+
try {
|
|
166
|
+
yield commonsAsyncTimeout(this.crawlConfig.findNewAvailableDelay, 'find-new-available');
|
|
167
|
+
}
|
|
168
|
+
catch (ex) {
|
|
169
|
+
// ignore
|
|
170
|
+
}
|
|
171
|
+
if (this.isAborted) {
|
|
172
|
+
commonsOutputAlert('SIGINT abort is set. Aborting new head loop.');
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
155
175
|
}
|
|
156
|
-
|
|
157
|
-
await tscommons_async_1.CommonsAsync.timeout(this.crawlConfig.findNewAvailableDelay, 'find-new-available');
|
|
158
|
-
}
|
|
159
|
-
catch (ex) {
|
|
160
|
-
// ignore
|
|
161
|
-
}
|
|
162
|
-
if (this.isAborted) {
|
|
163
|
-
nodecommons_cli_1.CommonsOutput.alert(`SIGINT abort is set. Aborting new head loop.`);
|
|
164
|
-
break;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
176
|
+
});
|
|
167
177
|
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
178
|
+
shutdown() {
|
|
179
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
180
|
+
commonsOutputDoing('Waiting for all crawlers to abort');
|
|
181
|
+
while (this.crawlers.size > 0) {
|
|
182
|
+
commonsOutputProgress(this.crawlers.size);
|
|
183
|
+
try {
|
|
184
|
+
yield commonsAsyncTimeout(1000);
|
|
185
|
+
}
|
|
186
|
+
catch (ex) {
|
|
187
|
+
// ignore
|
|
188
|
+
}
|
|
177
189
|
}
|
|
178
|
-
|
|
179
|
-
|
|
190
|
+
commonsOutputSuccess();
|
|
191
|
+
});
|
|
180
192
|
}
|
|
181
193
|
}
|
|
182
|
-
|
|
194
|
+
//# sourceMappingURL=crawl.server.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawl.server.js","sourceRoot":"","sources":["../../src/servers/crawl.server.ts"],"names":[],"mappings":";;;;;;;;;AACA,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAEnF,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC5L,OAAO,EAAE,+BAA+B,EAAE,MAAM,wBAAwB,CAAC;AAEzE,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAa7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAElE,MAAM,OAAO,WAAW;IAUvB,YACU,QAAyB,EACzB,WAAyB,EACzB,WAAyB,EACzB,aAAwC,EACxC,YAA2B,EAC3B,KAAY,EACZ,OAAgB;QANhB,aAAQ,GAAR,QAAQ,CAAiB;QACzB,gBAAW,GAAX,WAAW,CAAc;QACzB,gBAAW,GAAX,WAAW,CAAc;QACzB,kBAAa,GAAb,aAAa,CAA2B;QACxC,iBAAY,GAAZ,YAAY,CAAe;QAC3B,UAAK,GAAL,KAAK,CAAO;QACZ,YAAO,GAAP,OAAO,CAAS;QAhBlB,YAAO,GAAkB,EAAE,CAAC;QAG5B,cAAS,GAAY,KAAK,CAAC;QAE3B,gBAAW,GAAa,EAAE,CAAC;QAE3B,aAAQ,GAAY,KAAK,CAAC;QAWjC,IAAI,CAAC,QAAQ,GAAG,IAAI,GAAG,EAAmB,CAAC;QAC3C,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,EAA8B,CAAC;QAExD,+BAA+B,CAAC,GAAS,EAAE;YAC1C,kBAAkB,CAAC,kDAAkD,CAAC,CAAC;YACvE,IAAI,CAAC,KAAK,EAAE,CAAC;YAEb,KAAK,MAAM,UAAU,IAAI,IAAI,CAAC,WAAW;gBAAE,wBAAwB,CAAC,UAAU,CAAC,CAAC;QACjF,CAAC,CAAC,CAAC;IACJ,CAAC;IAEM,SAAS,CAAC,MAAmB;QACnC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3B,CAAC;IAEM,WAAW;QACjB,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IAC7B,CAAC;IAEM,KAAK;QACX,kBAAkB,CAAC,sBAAsB,CAAC,CAAC;QAE3C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QAErB,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE;YAC1C,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;SACxB;IACF,CAAC;IAEM,MAAM;QACZ,sBAAsB,CAAC,uBAAuB,CAAC,CAAC;QAEhD,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;QAEtB,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE;YAC1C,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;SACzB;IACF,CAAC;IAEO,KAAK;QACZ,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,wBAAwB,CAAC,oBAAoB,CAAC,CAAC;IAChD,CAAC;IAEY,SAAS;;YACrB,IAAI,CAAC,KAAK,EAAE,CAAC;YAEb,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;YAC5B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;KAAA;IAEO,UAAU,CAAC,MAAc;QAChC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC;YAAE,OAAO;QAEvC,kBAAkB,CAAC,uBAAuB,MAAM,EAAE,CAAC,CAAC;QAEpD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,KAAK,EAAE,CAAC;IACpC,CAAC;IAEO,WAAW,CAAC,MAAc;QACjC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC;YAAE,OAAO;QAEvC,sBAAsB,CAAC,wBAAwB,MAAM,EAAE,CAAC,CAAC;QAEzD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC,MAAM,EAAE,CAAC;IACrC,CAAC;IAEY,KAAK;;YACjB,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAElC,MAAM,KAAK,GAAY,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YAC5E,IAAI,KAAK;gBAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YAEjD,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE;gBACvB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;oBACnB,MAAM,QAAQ,GAAa,CAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAE,CAAC;oBAEvD,MAAM,KAAK,GAAW,IAAI,CAAC,WAAW,CAAC,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC;oBAErE,IAAI,KAAK,GAAG,CAAC,EAAE;wBACd,kBAAkB,CAAC,aAAa,KAAK,uBAAuB,CAAC,CAAC;wBAE9D,MAAM,sBAAsB,GAAa,QAAQ;6BAC9C,MAAM,CAAC,CAAC,MAAc,EAAW,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,kBAAkB,CAAC,OAAO,CAAC,CAAC;wBAEnG,MAAM,uBAAuB,GAAa,QAAQ;6BAC/C,MAAM,CAAC,CAAC,MAAc,EAAW,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,kBAAkB,CAAC,QAAQ,CAAC,CAAC;wBAEpG,kBAAkB,CAAC,6BAA6B,sBAAsB,CAAC,MAAM,cAAc,uBAAuB,CAAC,MAAM,EAAE,CAAC,CAAC;wBAE7H,IAAI,sBAAsB,GAAW,CAAC,CAAC;wBACvC,IAAI,uBAAuB,GAAW,KAAK,CAAC;wBAE5C,IAAI,sBAAsB,CAAC,MAAM,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,WAAW,GAAG,CAAC,CAAC,EAAE;4BAClF,0EAA0E;yBAC1E;6BAAM;4BACN,2CAA2C;4BAC3C,IAAI,KAAK,GAAG,CAAC,EAAE;gCACd,sBAAsB,GAAG,CAAC,CAAC;gCAC3B,uBAAuB,EAAE,CAAC;6BAC1B;yBACD;wBAED,kBAAkB,CAAC,qBAAqB,sBAAsB,cAAc,uBAAuB,EAAE,CAAC,CAAC;wBAEvG,MAAM,iBAAiB,GAAa,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAC/D,kBAAkB,CAAC,OAAO,EAC1B,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAC3C,sBAAsB,EACtB,QAAQ,CACT,CAAC;wBAEF,QAAQ,CAAC,IAAI,CAAC,GAAG,iBAAiB,CAAC,CAAC;wBAEpC,MAAM,kBAAkB,GAAa,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAChE,kBAAkB,CAAC,QAAQ,EAC3B,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAC3C,uBAAuB,EACvB,QAAQ,CACT,CAAC;wBAEF,MAAM,UAAU,GAAa;4BAC3B,GAAG,kBAAkB;4BACrB,GAAG,iBAAiB;yBACrB,CAAC;wBAEF,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE;4BACpB,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE;gCAChC,qBAAqB,CAAC,+BAA+B,MAAM,EAAE,CAAC,CAAC;gCAE/D,MAAM,OAAO,GAAY,IAAI,OAAO,CAClC,MAAM,EACN,IAAI,CAAC,QAAQ,EACb,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,aAAa,EAClB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,OAAO,EACZ,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,OAAO,CACb,CAAC;gCAEF,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;gCAEnC,IAAI,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;oCACvC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,kBAAkB,CAAC,OAAO,CAAC,CAAC;iCACxD;qCAAM;oCACN,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,EAAE,kBAAkB,CAAC,QAAQ,CAAC,CAAC;iCACzD;gCAED,sDAAsD;gCACtD,KAAK,CAAC,GAAwB,EAAE;oCAC/B,IAAI;wCACH,0DAA0D;wCAC1D,MAAM,UAAU,GAAW,YAAY,MAAM,EAAE,CAAC;wCAChD,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;wCAClC,IAAI;4CACH,MAAM,mBAAmB,CACvB,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,IAAI,CAAC,EAC/D,UAAU,CACX,CAAC;yCACF;wCAAC,OAAO,CAAC,EAAE;4CACX,IAAK,CAAW,CAAC,OAAO,KAAK,qBAAqB;gDAAE,OAAO;4CAC3D,MAAM,CAAC,CAAC;yCACR;wCAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW;6CAChC,MAAM,CAAC,CAAC,GAAW,EAAW,EAAE,CAAC,GAAG,KAAK,UAAU,CAAC,CAAC;wCAExD,IAAI,IAAI,CAAC,SAAS;4CAAE,OAAO;wCAC3B,IAAI,IAAI,CAAC,QAAQ;4CAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;wCAE3C,qBAAqB,CAAC,wBAAwB,MAAM,EAAE,CAAC,CAAC;wCACxD,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;qCACtB;oCAAC,OAAO,EAAE,EAAE;wCACZ,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;wCACvC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;qCAChB;4CAAS;wCACT,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,uFAAuF;wCACrH,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;qCAC/B;gCACF,CAAC,CAAA,CAAC,EAAE,CAAC;6BACL;yBACD;qBACD;iBACD;gBAED,IAAI;oBACH,MAAM,mBAAmB,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,EAAE,oBAAoB,CAAC,CAAC;iBACxF;gBAAC,OAAO,EAAE,EAAE;oBACZ,SAAS;iBACT;gBAED,IAAI,IAAI,CAAC,SAAS,EAAE;oBACnB,kBAAkB,CAAC,8CAA8C,CAAC,CAAC;oBACnE,MAAM;iBACN;aACD;QACF,CAAC;KAAA;IAEY,QAAQ;;YACpB,kBAAkB,CAAC,mCAAmC,CAAC,CAAC;YACxD,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,EAAE;gBAC9B,qBAAqB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBAE1C,IAAI;oBACH,MAAM,mBAAmB,CAAC,IAAI,CAAC,CAAC;iBAChC;gBAAC,OAAO,EAAE,EAAE;oBACZ,SAAS;iBACT;aACD;YACD,oBAAoB,EAAE,CAAC;QACxB,CAAC;KAAA;CACD"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
import * as http from 'http';
|
|
3
|
+
import * as express from 'express';
|
|
4
|
+
import { CommonsStrictExpressServer } from 'nodecommons-es-express';
|
|
5
|
+
import { ICommonsExpressConfig } from 'nodecommons-es-express';
|
|
6
|
+
export declare class ExpressServer extends CommonsStrictExpressServer {
|
|
7
|
+
constructor(ex: express.Express, server: http.Server, config: ICommonsExpressConfig);
|
|
8
|
+
}
|
|
@@ -1,16 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const nodecommons_process_1 = require("nodecommons-process");
|
|
6
|
-
const nodecommons_cli_1 = require("nodecommons-cli");
|
|
7
|
-
class ExpressServer extends nodecommons_express_1.CommonsExpressServer {
|
|
1
|
+
import { CommonsStrictExpressServer } from 'nodecommons-es-express';
|
|
2
|
+
import { commonsGracefulAbortAddCallback } from 'nodecommons-es-process';
|
|
3
|
+
import { commonsOutputAlert } from 'nodecommons-es-cli';
|
|
4
|
+
export class ExpressServer extends CommonsStrictExpressServer {
|
|
8
5
|
constructor(ex, server, config) {
|
|
9
6
|
super(ex, server, config.port);
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
commonsGracefulAbortAddCallback(() => {
|
|
8
|
+
commonsOutputAlert('SIGINT abort flag is set. Aborting Express server.');
|
|
12
9
|
super.close();
|
|
13
10
|
});
|
|
14
11
|
}
|
|
15
12
|
}
|
|
16
|
-
|
|
13
|
+
//# sourceMappingURL=express.server.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"express.server.js","sourceRoot":"","sources":["../../src/servers/express.server.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,0BAA0B,EAAE,MAAM,wBAAwB,CAAC;AAEpE,OAAO,EAAE,+BAA+B,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAExD,MAAM,OAAO,aAAc,SAAQ,0BAA0B;IAC5D,YACE,EAAmB,EACnB,MAAmB,EACnB,MAA6B;QAE9B,KAAK,CAAC,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;QAE/B,+BAA+B,CAAC,GAAS,EAAE;YAC1C,kBAAkB,CAAC,oDAAoD,CAAC,CAAC;YACzE,KAAK,CAAC,KAAK,EAAE,CAAC;QACf,CAAC,CAAC,CAAC;IACJ,CAAC;CACD"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { TCommonsScheduleTime } from 'tscommons-es-async';
|
|
2
|
+
import { Expiry } from '../classes/expiry';
|
|
3
|
+
import { Lists } from '../classes/lists';
|
|
4
|
+
import { CrawlServer } from '../servers/crawl.server';
|
|
5
|
+
import { DatabaseService } from '../services/database.service';
|
|
6
|
+
declare enum EAction {
|
|
7
|
+
EXPIRE = "expire",
|
|
8
|
+
PURGE_EMPTY_DOMAINS = "purge-empty-domains",
|
|
9
|
+
PURGE_ORPHAN_URLS = "purge-orphan-urls"
|
|
10
|
+
}
|
|
11
|
+
export declare function toEAction(value: string): EAction | undefined;
|
|
12
|
+
export declare class MaintenanceServer {
|
|
13
|
+
private crawl;
|
|
14
|
+
private schedule;
|
|
15
|
+
private expirer;
|
|
16
|
+
private cleaner;
|
|
17
|
+
private isPaused;
|
|
18
|
+
constructor(times: TCommonsScheduleTime[], expiry: Expiry, lists: Lists, database: DatabaseService, crawl: CrawlServer);
|
|
19
|
+
private perform;
|
|
20
|
+
start(): void;
|
|
21
|
+
}
|
|
22
|
+
export {};
|
|
@@ -1,19 +1,24 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { commonsAsyncTimeout, CommonsSchedule } from 'tscommons-es-async';
|
|
11
|
+
import { commonsOutputAlert } from 'nodecommons-es-cli';
|
|
12
|
+
import { commonsGracefulAbortAddCallback } from 'nodecommons-es-process';
|
|
13
|
+
import { Expirer } from '../classes/expirer';
|
|
14
|
+
import { Cleaner } from '../classes/cleaner';
|
|
10
15
|
var EAction;
|
|
11
16
|
(function (EAction) {
|
|
12
17
|
EAction["EXPIRE"] = "expire";
|
|
13
18
|
EAction["PURGE_EMPTY_DOMAINS"] = "purge-empty-domains";
|
|
14
19
|
EAction["PURGE_ORPHAN_URLS"] = "purge-orphan-urls";
|
|
15
20
|
})(EAction || (EAction = {}));
|
|
16
|
-
function toEAction(value) {
|
|
21
|
+
export function toEAction(value) {
|
|
17
22
|
switch (value) {
|
|
18
23
|
case EAction.EXPIRE.toString():
|
|
19
24
|
return EAction.EXPIRE;
|
|
@@ -24,57 +29,58 @@ function toEAction(value) {
|
|
|
24
29
|
}
|
|
25
30
|
return undefined;
|
|
26
31
|
}
|
|
27
|
-
|
|
28
|
-
class MaintenanceServer {
|
|
32
|
+
export class MaintenanceServer {
|
|
29
33
|
constructor(times, expiry, lists, database, crawl) {
|
|
30
34
|
this.crawl = crawl;
|
|
31
35
|
this.isPaused = false;
|
|
32
|
-
this.schedule = new
|
|
33
|
-
this.schedule.parse(times, (action) => toEAction(action),
|
|
36
|
+
this.schedule = new CommonsSchedule('hydra-maintenance');
|
|
37
|
+
this.schedule.parse(times, (action) => toEAction(action), (action) => __awaiter(this, void 0, void 0, function* () {
|
|
34
38
|
let claimedPause = false;
|
|
35
39
|
if (!this.isPaused) {
|
|
36
40
|
this.isPaused = true;
|
|
37
41
|
claimedPause = true;
|
|
38
42
|
this.crawl.pause();
|
|
39
43
|
for (let i = 5; i-- > 0;) {
|
|
40
|
-
|
|
44
|
+
commonsOutputAlert(`Going down for maintenance ... ${i}`);
|
|
41
45
|
try {
|
|
42
|
-
|
|
46
|
+
yield commonsAsyncTimeout(1000);
|
|
43
47
|
}
|
|
44
48
|
catch (ex) {
|
|
45
49
|
/* do nothing */
|
|
46
50
|
}
|
|
47
51
|
}
|
|
48
52
|
}
|
|
49
|
-
|
|
53
|
+
yield this.perform(action);
|
|
50
54
|
if (claimedPause) {
|
|
51
|
-
|
|
55
|
+
commonsOutputAlert('Resuming from maintenance');
|
|
52
56
|
this.isPaused = false;
|
|
53
57
|
this.crawl.resume();
|
|
54
58
|
}
|
|
55
|
-
});
|
|
56
|
-
this.expirer = new
|
|
57
|
-
this.cleaner = new
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
}));
|
|
60
|
+
this.expirer = new Expirer(expiry, database);
|
|
61
|
+
this.cleaner = new Cleaner(lists, database);
|
|
62
|
+
commonsGracefulAbortAddCallback(() => {
|
|
63
|
+
commonsOutputAlert('SIGINT abort flag is set. Aborting maintenance server.');
|
|
60
64
|
this.schedule.stop();
|
|
61
65
|
});
|
|
62
66
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
67
|
+
perform(action) {
|
|
68
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
69
|
+
switch (action) {
|
|
70
|
+
case EAction.EXPIRE:
|
|
71
|
+
yield this.expirer.expire();
|
|
72
|
+
break;
|
|
73
|
+
case EAction.PURGE_EMPTY_DOMAINS:
|
|
74
|
+
yield this.cleaner.purgeEmptyDomains();
|
|
75
|
+
break;
|
|
76
|
+
case EAction.PURGE_ORPHAN_URLS:
|
|
77
|
+
yield this.cleaner.purgeOrphanUrls();
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
});
|
|
75
81
|
}
|
|
76
82
|
start() {
|
|
77
83
|
this.schedule.start();
|
|
78
84
|
}
|
|
79
85
|
}
|
|
80
|
-
|
|
86
|
+
//# sourceMappingURL=maintenance.server.js.map
|