hydra-crawler 2.8.4 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/apis/{autocomplete.api.d.ts → autocomplete.api.d.mts} +4 -4
- package/dist/apis/autocomplete.api.mjs +14 -0
- package/dist/apis/autocomplete.api.mjs.map +1 -0
- package/dist/apis/{bugs.api.d.ts → bugs.api.d.mts} +4 -4
- package/dist/apis/bugs.api.mjs +18 -0
- package/dist/apis/bugs.api.mjs.map +1 -0
- package/dist/apis/{crawl.api.d.ts → crawl.api.d.mts} +4 -4
- package/dist/apis/crawl.api.mjs +17 -0
- package/dist/apis/crawl.api.mjs.map +1 -0
- package/dist/apis/{domains.api.d.ts → domains.api.d.mts} +4 -4
- package/dist/apis/{domains.api.js → domains.api.mjs} +10 -18
- package/dist/apis/domains.api.mjs.map +1 -0
- package/dist/apis/{images.api.d.ts → images.api.d.mts} +4 -4
- package/dist/apis/images.api.mjs +19 -0
- package/dist/apis/images.api.mjs.map +1 -0
- package/dist/apis/{statistics.api.d.ts → statistics.api.d.mts} +4 -4
- package/dist/apis/statistics.api.mjs +33 -0
- package/dist/apis/statistics.api.mjs.map +1 -0
- package/dist/apis/{test.api.d.ts → test.api.d.mts} +3 -3
- package/dist/apis/test.api.mjs +10 -0
- package/dist/apis/test.api.mjs.map +1 -0
- package/dist/apis/{urls.api.d.ts → urls.api.d.mts} +4 -4
- package/dist/apis/urls.api.mjs +21 -0
- package/dist/apis/urls.api.mjs.map +1 -0
- package/dist/apps/{cleanup.app.d.ts → cleanup.app.d.mts} +5 -5
- package/dist/apps/cleanup.app.mjs +129 -0
- package/dist/apps/cleanup.app.mjs.map +1 -0
- package/dist/apps/{cross-populate-export.app.d.ts → cross-populate-export.app.d.mts} +3 -3
- package/dist/apps/cross-populate-export.app.mjs +61 -0
- package/dist/apps/cross-populate-export.app.mjs.map +1 -0
- package/dist/apps/{cross-populate-import.app.d.ts → cross-populate-import.app.d.mts} +3 -3
- package/dist/apps/cross-populate-import.app.mjs +61 -0
- package/dist/apps/cross-populate-import.app.mjs.map +1 -0
- package/dist/apps/{denylist.app.d.ts → denylist.app.d.mts} +5 -5
- package/dist/apps/denylist.app.mjs +122 -0
- package/dist/apps/denylist.app.mjs.map +1 -0
- package/dist/apps/{expire.app.d.ts → expire.app.d.mts} +8 -7
- package/dist/apps/expire.app.mjs +51 -0
- package/dist/apps/expire.app.mjs.map +1 -0
- package/dist/apps/{export-domain-urls.d.ts → export-domain-urls.d.mts} +3 -3
- package/dist/apps/export-domain-urls.mjs +85 -0
- package/dist/apps/export-domain-urls.mjs.map +1 -0
- package/dist/apps/{extract-text.app.d.ts → extract-text.app.d.mts} +2 -2
- package/dist/apps/extract-text.app.mjs +43 -0
- package/dist/apps/extract-text.app.mjs.map +1 -0
- package/dist/apps/{hydra.app.d.ts → hydra.app.d.mts} +10 -10
- package/dist/apps/hydra.app.mjs +222 -0
- package/dist/apps/hydra.app.mjs.map +1 -0
- package/dist/apps/{import.app.d.ts → import.app.d.mts} +3 -3
- package/dist/apps/import.app.mjs +48 -0
- package/dist/apps/import.app.mjs.map +1 -0
- package/dist/apps/{internal-hydra-common.app.d.ts → internal-hydra-common.app.d.mts} +6 -6
- package/dist/apps/{internal-hydra-common.app.js → internal-hydra-common.app.mjs} +1 -1
- package/dist/apps/internal-hydra-common.app.mjs.map +1 -0
- package/dist/apps/{move-to-archive.app.d.ts → move-to-archive.app.d.mts} +2 -2
- package/dist/apps/move-to-archive.app.mjs +31 -0
- package/dist/apps/move-to-archive.app.mjs.map +1 -0
- package/dist/apps/{prune-archive.app.d.ts → prune-archive.app.d.mts} +2 -2
- package/dist/apps/prune-archive.app.mjs +40 -0
- package/dist/apps/prune-archive.app.mjs.map +1 -0
- package/dist/apps/{query.app.d.ts → query.app.d.mts} +6 -6
- package/dist/apps/query.app.mjs +69 -0
- package/dist/apps/query.app.mjs.map +1 -0
- package/dist/apps/{reattempt.app.d.ts → reattempt.app.d.mts} +6 -6
- package/dist/apps/reattempt.app.mjs +74 -0
- package/dist/apps/reattempt.app.mjs.map +1 -0
- package/dist/apps/{requeue-domain.app.d.ts → requeue-domain.app.d.mts} +3 -3
- package/dist/apps/requeue-domain.app.mjs +52 -0
- package/dist/apps/requeue-domain.app.mjs.map +1 -0
- package/dist/apps/{seed.app.d.ts → seed.app.d.mts} +5 -5
- package/dist/apps/seed.app.mjs +59 -0
- package/dist/apps/seed.app.mjs.map +1 -0
- package/dist/apps/{startup.app.d.ts → startup.app.d.mts} +3 -3
- package/dist/apps/startup.app.mjs +55 -0
- package/dist/apps/startup.app.mjs.map +1 -0
- package/dist/apps/{unarchive-urls.app.d.ts → unarchive-urls.app.d.mts} +5 -5
- package/dist/apps/unarchive-urls.app.mjs +71 -0
- package/dist/apps/unarchive-urls.app.mjs.map +1 -0
- package/dist/classes/{cleaner.d.ts → cleaner.d.mts} +2 -2
- package/dist/classes/cleaner.mjs +257 -0
- package/dist/classes/cleaner.mjs.map +1 -0
- package/dist/classes/{crawler.d.ts → crawler.d.mts} +10 -12
- package/dist/classes/crawler.mjs +418 -0
- package/dist/classes/crawler.mjs.map +1 -0
- package/dist/classes/{dns.js → dns.mjs} +4 -4
- package/dist/classes/dns.mjs.map +1 -0
- package/dist/classes/{expirer.d.ts → expirer.d.mts} +2 -2
- package/dist/classes/expirer.mjs +117 -0
- package/dist/classes/expirer.mjs.map +1 -0
- package/dist/classes/{expiry.d.ts → expiry.d.mts} +1 -1
- package/dist/classes/{expiry.js → expiry.mjs} +7 -9
- package/dist/classes/expiry.mjs.map +1 -0
- package/dist/classes/{lists.d.ts → lists.d.mts} +2 -2
- package/dist/classes/{lists.js → lists.mjs} +4 -4
- package/dist/classes/lists.mjs.map +1 -0
- package/dist/classes/{robot.d.ts → robot.d.mts} +3 -3
- package/dist/classes/robot.mjs +74 -0
- package/dist/classes/robot.mjs.map +1 -0
- package/dist/classes/{tracker.d.ts → tracker.d.mts} +3 -3
- package/dist/classes/tracker.mjs +101 -0
- package/dist/classes/tracker.mjs.map +1 -0
- package/dist/{cli.js → cli.mjs} +46 -58
- package/dist/cli.mjs.map +1 -0
- package/dist/enums/{eavailable-strategy.js → eavailable-strategy.mjs} +1 -1
- package/dist/enums/eavailable-strategy.mjs.map +1 -0
- package/dist/enums/{elist.js → elist.mjs} +1 -1
- package/dist/enums/elist.mjs.map +1 -0
- package/dist/enums/{eserver.js → eserver.mjs} +1 -1
- package/dist/enums/eserver.mjs.map +1 -0
- package/dist/enums/{ex-powered-by.js → ex-powered-by.mjs} +1 -1
- package/dist/enums/ex-powered-by.mjs.map +1 -0
- package/dist/helpers/{matcher.d.ts → matcher.d.mts} +1 -1
- package/dist/helpers/{matcher.js → matcher.mjs} +1 -1
- package/dist/helpers/matcher.mjs.map +1 -0
- package/dist/helpers/{random.d.ts → random.d.mts} +1 -1
- package/dist/helpers/{random.js → random.mjs} +1 -1
- package/dist/helpers/random.mjs.map +1 -0
- package/dist/helpers/{utf-decoder.d.ts → utf-decoder.d.mts} +0 -1
- package/dist/helpers/{utf-decoder.js → utf-decoder.mjs} +3 -3
- package/dist/helpers/utf-decoder.mjs.map +1 -0
- package/dist/interfaces/{iexpiry.d.ts → iexpiry.d.mts} +1 -1
- package/dist/interfaces/{iexpiry.js → iexpiry.mjs} +3 -3
- package/dist/interfaces/iexpiry.mjs.map +1 -0
- package/dist/interfaces/{imatch.js → imatch.mjs} +2 -2
- package/dist/interfaces/imatch.mjs.map +1 -0
- package/dist/interfaces/{iparser-config.js → iparser-config.mjs} +2 -2
- package/dist/interfaces/iparser-config.mjs.map +1 -0
- package/dist/interfaces/{iparser.d.ts → iparser.d.mts} +1 -1
- package/dist/interfaces/iparser.mjs +2 -0
- package/dist/interfaces/iparser.mjs.map +1 -0
- package/dist/interfaces/{irequest-outcome.d.ts → irequest-outcome.d.mts} +0 -2
- package/dist/interfaces/irequest-outcome.mjs +2 -0
- package/dist/interfaces/irequest-outcome.mjs.map +1 -0
- package/dist/interfaces/iserver.mjs +2 -0
- package/dist/interfaces/iserver.mjs.map +1 -0
- package/dist/parsers/accessibility-metrics.parser.d.mts +11 -0
- package/dist/parsers/accessibility-metrics.parser.mjs +30 -0
- package/dist/parsers/accessibility-metrics.parser.mjs.map +1 -0
- package/dist/parsers/asp-error.parser.d.mts +12 -0
- package/dist/parsers/asp-error.parser.mjs +38 -0
- package/dist/parsers/asp-error.parser.mjs.map +1 -0
- package/dist/parsers/{bad-words.parser.d.ts → bad-words.parser.d.mts} +6 -6
- package/dist/parsers/bad-words.parser.mjs +17 -0
- package/dist/parsers/bad-words.parser.mjs.map +1 -0
- package/dist/parsers/complex-english.parser.d.mts +15 -0
- package/dist/parsers/complex-english.parser.mjs +52 -0
- package/dist/parsers/complex-english.parser.mjs.map +1 -0
- package/dist/parsers/data.parser.d.mts +13 -0
- package/dist/parsers/{data.parser.js → data.parser.mjs} +8 -7
- package/dist/parsers/data.parser.mjs.map +1 -0
- package/dist/parsers/{dictionary.parser.d.ts → dictionary.parser.d.mts} +6 -6
- package/dist/parsers/dictionary.parser.mjs +63 -0
- package/dist/parsers/dictionary.parser.mjs.map +1 -0
- package/dist/parsers/html.parser.d.mts +13 -0
- package/dist/parsers/{html.parser.js → html.parser.mjs} +4 -3
- package/dist/parsers/html.parser.mjs.map +1 -0
- package/dist/parsers/hyperlinks.parser.d.mts +20 -0
- package/dist/parsers/hyperlinks.parser.mjs +104 -0
- package/dist/parsers/hyperlinks.parser.mjs.map +1 -0
- package/dist/parsers/image-tags.parser.d.mts +19 -0
- package/dist/parsers/image-tags.parser.mjs +42 -0
- package/dist/parsers/image-tags.parser.mjs.map +1 -0
- package/dist/parsers/{interest.parser.d.ts → interest.parser.d.mts} +7 -7
- package/dist/parsers/interest.parser.mjs +60 -0
- package/dist/parsers/interest.parser.mjs.map +1 -0
- package/dist/parsers/jpeg.parser.d.mts +11 -0
- package/dist/parsers/jpeg.parser.mjs +29 -0
- package/dist/parsers/jpeg.parser.mjs.map +1 -0
- package/dist/parsers/{llama-guard.parser.d.ts → llama-guard.parser.d.mts} +7 -7
- package/dist/parsers/llama-guard.parser.mjs +56 -0
- package/dist/parsers/llama-guard.parser.mjs.map +1 -0
- package/dist/parsers/{offence.parser.d.ts → offence.parser.d.mts} +7 -7
- package/dist/parsers/offence.parser.mjs +60 -0
- package/dist/parsers/offence.parser.mjs.map +1 -0
- package/dist/parsers/{ollama.parser.d.ts → ollama.parser.d.mts} +6 -6
- package/dist/parsers/ollama.parser.mjs +43 -0
- package/dist/parsers/ollama.parser.mjs.map +1 -0
- package/dist/parsers/{paragraphs.parser.d.ts → paragraphs.parser.d.mts} +5 -5
- package/dist/parsers/paragraphs.parser.mjs +38 -0
- package/dist/parsers/paragraphs.parser.mjs.map +1 -0
- package/dist/parsers/{parser.d.ts → parser.d.mts} +6 -6
- package/dist/parsers/parser.mjs +45 -0
- package/dist/parsers/parser.mjs.map +1 -0
- package/dist/parsers/php-error.parser.d.mts +12 -0
- package/dist/parsers/php-error.parser.mjs +42 -0
- package/dist/parsers/php-error.parser.mjs.map +1 -0
- package/dist/parsers/{phrase.parser.d.ts → phrase.parser.d.mts} +3 -3
- package/dist/parsers/phrase.parser.mjs +15 -0
- package/dist/parsers/phrase.parser.mjs.map +1 -0
- package/dist/parsers/{regex.parser.d.ts → regex.parser.d.mts} +3 -3
- package/dist/parsers/regex.parser.mjs +29 -0
- package/dist/parsers/regex.parser.mjs.map +1 -0
- package/dist/parsers/server.parser.d.mts +11 -0
- package/dist/parsers/server.parser.mjs +57 -0
- package/dist/parsers/server.parser.mjs.map +1 -0
- package/dist/parsers/{spelling.parser.d.ts → spelling.parser.d.mts} +6 -6
- package/dist/parsers/spelling.parser.mjs +17 -0
- package/dist/parsers/spelling.parser.mjs.map +1 -0
- package/dist/parsers/string.parser.d.mts +8 -0
- package/dist/parsers/{string.parser.js → string.parser.mjs} +5 -4
- package/dist/parsers/string.parser.mjs.map +1 -0
- package/dist/parsers/{text.parser.d.ts → text.parser.d.mts} +3 -3
- package/dist/parsers/text.parser.mjs +30 -0
- package/dist/parsers/text.parser.mjs.map +1 -0
- package/dist/parsers/{words.parser.d.ts → words.parser.d.mts} +3 -3
- package/dist/parsers/words.parser.mjs +29 -0
- package/dist/parsers/words.parser.mjs.map +1 -0
- package/dist/queries/complex-english.query.d.mts +2 -0
- package/dist/queries/{complex-english.query.js → complex-english.query.mjs} +18 -27
- package/dist/queries/complex-english.query.mjs.map +1 -0
- package/dist/queries/flash-content.query.d.mts +2 -0
- package/dist/queries/flash-content.query.mjs +80 -0
- package/dist/queries/flash-content.query.mjs.map +1 -0
- package/dist/queries/linking-to-domains.query.d.mts +2 -0
- package/dist/queries/linking-to-domains.query.mjs +128 -0
- package/dist/queries/linking-to-domains.query.mjs.map +1 -0
- package/dist/queries/llamaguard-unsafe-content.query.d.mts +2 -0
- package/dist/queries/llamaguard-unsafe-content.query.mjs +90 -0
- package/dist/queries/llamaguard-unsafe-content.query.mjs.map +1 -0
- package/dist/queries/readability-score.query.d.mts +2 -0
- package/dist/queries/{readability-score.query.js → readability-score.query.mjs} +11 -20
- package/dist/queries/readability-score.query.mjs.map +1 -0
- package/dist/servers/{crawl.server.d.ts → crawl.server.d.mts} +10 -10
- package/dist/servers/crawl.server.mjs +192 -0
- package/dist/servers/crawl.server.mjs.map +1 -0
- package/dist/servers/{express.server.d.ts → express.server.d.mts} +2 -3
- package/dist/servers/express.server.mjs +13 -0
- package/dist/servers/express.server.mjs.map +1 -0
- package/dist/servers/{maintenance.server.d.ts → maintenance.server.d.mts} +5 -5
- package/dist/servers/maintenance.server.mjs +97 -0
- package/dist/servers/maintenance.server.mjs.map +1 -0
- package/dist/servers/{rest.server.d.ts → rest.server.d.mts} +4 -4
- package/dist/servers/rest.server.mjs +62 -0
- package/dist/servers/rest.server.mjs.map +1 -0
- package/dist/servers/{socket-io.server.d.ts → socket-io.server.d.mts} +4 -4
- package/dist/servers/socket-io.server.mjs +22 -0
- package/dist/servers/socket-io.server.mjs.map +1 -0
- package/dist/services/{database.service.d.ts → database.service.d.mts} +12 -12
- package/dist/services/database.service.mjs +645 -0
- package/dist/services/database.service.mjs.map +1 -0
- package/dist/services/{ollama-rest.service.d.ts → ollama-rest.service.d.mts} +2 -2
- package/dist/services/ollama-rest.service.mjs +27 -0
- package/dist/services/ollama-rest.service.mjs.map +1 -0
- package/dist/services/{rig-llama-guard.service.d.ts → rig-llama-guard.service.d.mts} +2 -2
- package/dist/services/{rig-llama-guard.service.js → rig-llama-guard.service.mjs} +33 -43
- package/dist/services/rig-llama-guard.service.mjs.map +1 -0
- package/dist/services/{rig-queue-length.service.d.ts → rig-queue-length.service.d.mts} +1 -1
- package/dist/services/rig-queue-length.service.mjs +12 -0
- package/dist/services/rig-queue-length.service.mjs.map +1 -0
- package/dist/services/{rig-subjectivity-scale.service.d.ts → rig-subjectivity-scale.service.d.mts} +2 -2
- package/dist/services/rig-subjectivity-scale.service.mjs +82 -0
- package/dist/services/rig-subjectivity-scale.service.mjs.map +1 -0
- package/dist/services/{rig-ticket-rest.service.d.ts → rig-ticket-rest.service.d.mts} +3 -3
- package/dist/services/rig-ticket-rest.service.mjs +15 -0
- package/dist/services/rig-ticket-rest.service.mjs.map +1 -0
- package/dist/services/{rig-ticketed-promise.service.d.ts → rig-ticketed-promise.service.d.mts} +4 -6
- package/dist/services/{rig-ticketed-promise.service.js → rig-ticketed-promise.service.mjs} +7 -6
- package/dist/services/rig-ticketed-promise.service.mjs.map +1 -0
- package/dist/types/{tcrawl-config.js → tcrawl-config.mjs} +2 -2
- package/dist/types/tcrawl-config.mjs.map +1 -0
- package/dist/types/{thydra-config.js → thydra-config.mjs} +2 -2
- package/dist/types/thydra-config.mjs.map +1 -0
- package/dist/types/tparser-ctor.d.mts +7 -0
- package/dist/types/tparser-ctor.mjs +2 -0
- package/dist/types/tparser-ctor.mjs.map +1 -0
- package/dist/types/tquery.d.mts +7 -0
- package/dist/types/tquery.mjs +2 -0
- package/dist/types/tquery.mjs.map +1 -0
- package/dist/types/tqueue-length.mjs +2 -0
- package/dist/types/tqueue-length.mjs.map +1 -0
- package/dist/types/{trobots-config.js → trobots-config.mjs} +2 -2
- package/dist/types/trobots-config.mjs.map +1 -0
- package/package.json +37 -43
- package/dist/apis/autocomplete.api.js +0 -22
- package/dist/apis/autocomplete.api.js.map +0 -1
- package/dist/apis/bugs.api.js +0 -26
- package/dist/apis/bugs.api.js.map +0 -1
- package/dist/apis/crawl.api.js +0 -25
- package/dist/apis/crawl.api.js.map +0 -1
- package/dist/apis/domains.api.js.map +0 -1
- package/dist/apis/images.api.js +0 -27
- package/dist/apis/images.api.js.map +0 -1
- package/dist/apis/statistics.api.js +0 -41
- package/dist/apis/statistics.api.js.map +0 -1
- package/dist/apis/test.api.js +0 -19
- package/dist/apis/test.api.js.map +0 -1
- package/dist/apis/urls.api.js +0 -29
- package/dist/apis/urls.api.js.map +0 -1
- package/dist/apps/cleanup.app.js +0 -151
- package/dist/apps/cleanup.app.js.map +0 -1
- package/dist/apps/cross-populate-export.app.js +0 -75
- package/dist/apps/cross-populate-export.app.js.map +0 -1
- package/dist/apps/cross-populate-import.app.js +0 -100
- package/dist/apps/cross-populate-import.app.js.map +0 -1
- package/dist/apps/denylist.app.js +0 -132
- package/dist/apps/denylist.app.js.map +0 -1
- package/dist/apps/expire.app.js +0 -63
- package/dist/apps/expire.app.js.map +0 -1
- package/dist/apps/export-domain-urls.js +0 -99
- package/dist/apps/export-domain-urls.js.map +0 -1
- package/dist/apps/extract-text.app.js +0 -55
- package/dist/apps/extract-text.app.js.map +0 -1
- package/dist/apps/hydra.app.js +0 -218
- package/dist/apps/hydra.app.js.map +0 -1
- package/dist/apps/import.app.js +0 -57
- package/dist/apps/import.app.js.map +0 -1
- package/dist/apps/internal-hydra-common.app.js.map +0 -1
- package/dist/apps/move-to-archive.app.js +0 -46
- package/dist/apps/move-to-archive.app.js.map +0 -1
- package/dist/apps/prune-archive.app.js +0 -55
- package/dist/apps/prune-archive.app.js.map +0 -1
- package/dist/apps/query.app.js +0 -80
- package/dist/apps/query.app.js.map +0 -1
- package/dist/apps/reattempt.app.js +0 -83
- package/dist/apps/reattempt.app.js.map +0 -1
- package/dist/apps/requeue-domain.app.js +0 -64
- package/dist/apps/requeue-domain.app.js.map +0 -1
- package/dist/apps/seed.app.js +0 -69
- package/dist/apps/seed.app.js.map +0 -1
- package/dist/apps/startup.app.js +0 -64
- package/dist/apps/startup.app.js.map +0 -1
- package/dist/apps/unarchive-urls.app.js +0 -83
- package/dist/apps/unarchive-urls.app.js.map +0 -1
- package/dist/classes/cleaner.js +0 -266
- package/dist/classes/cleaner.js.map +0 -1
- package/dist/classes/crawler.js +0 -422
- package/dist/classes/crawler.js.map +0 -1
- package/dist/classes/dns.js.map +0 -1
- package/dist/classes/expirer.js +0 -121
- package/dist/classes/expirer.js.map +0 -1
- package/dist/classes/expiry.js.map +0 -1
- package/dist/classes/lists.js.map +0 -1
- package/dist/classes/robot.js +0 -82
- package/dist/classes/robot.js.map +0 -1
- package/dist/classes/tracker.js +0 -120
- package/dist/classes/tracker.js.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/enums/eavailable-strategy.js.map +0 -1
- package/dist/enums/elist.js.map +0 -1
- package/dist/enums/eserver.js.map +0 -1
- package/dist/enums/ex-powered-by.js.map +0 -1
- package/dist/helpers/matcher.js.map +0 -1
- package/dist/helpers/random.js.map +0 -1
- package/dist/helpers/utf-decoder.js.map +0 -1
- package/dist/interfaces/iexpiry.js.map +0 -1
- package/dist/interfaces/imatch.js.map +0 -1
- package/dist/interfaces/iparser-config.js.map +0 -1
- package/dist/interfaces/iparser.js +0 -2
- package/dist/interfaces/iparser.js.map +0 -1
- package/dist/interfaces/irequest-outcome.js +0 -2
- package/dist/interfaces/irequest-outcome.js.map +0 -1
- package/dist/interfaces/iserver.js +0 -2
- package/dist/interfaces/iserver.js.map +0 -1
- package/dist/parsers/accessibility-metrics.parser.d.ts +0 -11
- package/dist/parsers/accessibility-metrics.parser.js +0 -40
- package/dist/parsers/accessibility-metrics.parser.js.map +0 -1
- package/dist/parsers/asp-error.parser.d.ts +0 -12
- package/dist/parsers/asp-error.parser.js +0 -48
- package/dist/parsers/asp-error.parser.js.map +0 -1
- package/dist/parsers/bad-words.parser.js +0 -27
- package/dist/parsers/bad-words.parser.js.map +0 -1
- package/dist/parsers/complex-english.parser.d.ts +0 -15
- package/dist/parsers/complex-english.parser.js +0 -61
- package/dist/parsers/complex-english.parser.js.map +0 -1
- package/dist/parsers/data.parser.d.ts +0 -14
- package/dist/parsers/data.parser.js.map +0 -1
- package/dist/parsers/dictionary.parser.js +0 -73
- package/dist/parsers/dictionary.parser.js.map +0 -1
- package/dist/parsers/html.parser.d.ts +0 -13
- package/dist/parsers/html.parser.js.map +0 -1
- package/dist/parsers/hyperlinks.parser.d.ts +0 -20
- package/dist/parsers/hyperlinks.parser.js +0 -115
- package/dist/parsers/hyperlinks.parser.js.map +0 -1
- package/dist/parsers/image-tags.parser.d.ts +0 -19
- package/dist/parsers/image-tags.parser.js +0 -52
- package/dist/parsers/image-tags.parser.js.map +0 -1
- package/dist/parsers/interest.parser.js +0 -69
- package/dist/parsers/interest.parser.js.map +0 -1
- package/dist/parsers/jpeg.parser.d.ts +0 -11
- package/dist/parsers/jpeg.parser.js +0 -39
- package/dist/parsers/jpeg.parser.js.map +0 -1
- package/dist/parsers/llama-guard.parser.js +0 -65
- package/dist/parsers/llama-guard.parser.js.map +0 -1
- package/dist/parsers/offence.parser.js +0 -69
- package/dist/parsers/offence.parser.js.map +0 -1
- package/dist/parsers/ollama.parser.js +0 -51
- package/dist/parsers/ollama.parser.js.map +0 -1
- package/dist/parsers/paragraphs.parser.js +0 -49
- package/dist/parsers/paragraphs.parser.js.map +0 -1
- package/dist/parsers/parser.js +0 -57
- package/dist/parsers/parser.js.map +0 -1
- package/dist/parsers/php-error.parser.d.ts +0 -12
- package/dist/parsers/php-error.parser.js +0 -52
- package/dist/parsers/php-error.parser.js.map +0 -1
- package/dist/parsers/phrase.parser.js +0 -26
- package/dist/parsers/phrase.parser.js.map +0 -1
- package/dist/parsers/regex.parser.js +0 -43
- package/dist/parsers/regex.parser.js.map +0 -1
- package/dist/parsers/server.parser.d.ts +0 -11
- package/dist/parsers/server.parser.js +0 -67
- package/dist/parsers/server.parser.js.map +0 -1
- package/dist/parsers/spelling.parser.js +0 -27
- package/dist/parsers/spelling.parser.js.map +0 -1
- package/dist/parsers/string.parser.d.ts +0 -8
- package/dist/parsers/string.parser.js.map +0 -1
- package/dist/parsers/text.parser.js +0 -41
- package/dist/parsers/text.parser.js.map +0 -1
- package/dist/parsers/words.parser.js +0 -40
- package/dist/parsers/words.parser.js.map +0 -1
- package/dist/queries/complex-english.query.d.ts +0 -2
- package/dist/queries/complex-english.query.js.map +0 -1
- package/dist/queries/flash-content.query.d.ts +0 -2
- package/dist/queries/flash-content.query.js +0 -82
- package/dist/queries/flash-content.query.js.map +0 -1
- package/dist/queries/linking-to-domains.query.d.ts +0 -2
- package/dist/queries/linking-to-domains.query.js +0 -130
- package/dist/queries/linking-to-domains.query.js.map +0 -1
- package/dist/queries/llamaguard-unsafe-content.query.d.ts +0 -2
- package/dist/queries/llamaguard-unsafe-content.query.js +0 -92
- package/dist/queries/llamaguard-unsafe-content.query.js.map +0 -1
- package/dist/queries/readability-score.query.d.ts +0 -2
- package/dist/queries/readability-score.query.js.map +0 -1
- package/dist/servers/crawl.server.js +0 -198
- package/dist/servers/crawl.server.js.map +0 -1
- package/dist/servers/express.server.js +0 -13
- package/dist/servers/express.server.js.map +0 -1
- package/dist/servers/maintenance.server.js +0 -103
- package/dist/servers/maintenance.server.js.map +0 -1
- package/dist/servers/rest.server.js +0 -46
- package/dist/servers/rest.server.js.map +0 -1
- package/dist/servers/socket-io.server.js +0 -61
- package/dist/servers/socket-io.server.js.map +0 -1
- package/dist/services/database.service.js +0 -795
- package/dist/services/database.service.js.map +0 -1
- package/dist/services/ollama-rest.service.js +0 -46
- package/dist/services/ollama-rest.service.js.map +0 -1
- package/dist/services/rig-llama-guard.service.js.map +0 -1
- package/dist/services/rig-queue-length.service.js +0 -22
- package/dist/services/rig-queue-length.service.js.map +0 -1
- package/dist/services/rig-subjectivity-scale.service.js +0 -96
- package/dist/services/rig-subjectivity-scale.service.js.map +0 -1
- package/dist/services/rig-ticket-rest.service.js +0 -29
- package/dist/services/rig-ticket-rest.service.js.map +0 -1
- package/dist/services/rig-ticketed-promise.service.js.map +0 -1
- package/dist/types/tcrawl-config.js.map +0 -1
- package/dist/types/thydra-config.js.map +0 -1
- package/dist/types/tparser-ctor.d.ts +0 -7
- package/dist/types/tparser-ctor.js +0 -2
- package/dist/types/tparser-ctor.js.map +0 -1
- package/dist/types/tquery.d.ts +0 -7
- package/dist/types/tquery.js +0 -2
- package/dist/types/tquery.js.map +0 -1
- package/dist/types/tqueue-length.js +0 -2
- package/dist/types/tqueue-length.js.map +0 -1
- package/dist/types/trobots-config.js.map +0 -1
- /package/dist/classes/{dns.d.ts → dns.d.mts} +0 -0
- /package/dist/{cli.d.ts → cli.d.mts} +0 -0
- /package/dist/enums/{eavailable-strategy.d.ts → eavailable-strategy.d.mts} +0 -0
- /package/dist/enums/{elist.d.ts → elist.d.mts} +0 -0
- /package/dist/enums/{eserver.d.ts → eserver.d.mts} +0 -0
- /package/dist/enums/{ex-powered-by.d.ts → ex-powered-by.d.mts} +0 -0
- /package/dist/interfaces/{imatch.d.ts → imatch.d.mts} +0 -0
- /package/dist/interfaces/{iparser-config.d.ts → iparser-config.d.mts} +0 -0
- /package/dist/interfaces/{iserver.d.ts → iserver.d.mts} +0 -0
- /package/dist/types/{tcrawl-config.d.ts → tcrawl-config.d.mts} +0 -0
- /package/dist/types/{thydra-config.d.ts → thydra-config.d.mts} +0 -0
- /package/dist/types/{tqueue-length.d.ts → tqueue-length.d.mts} +0 -0
- /package/dist/types/{trobots-config.d.ts → trobots-config.d.mts} +0 -0
package/dist/classes/crawler.js
DELETED
|
@@ -1,422 +0,0 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
|
-
import { URL } from 'url';
|
|
11
|
-
import * as http from 'http';
|
|
12
|
-
import * as https from 'https';
|
|
13
|
-
import { commonsMapToObject, commonsTypeHasPropertyString } from 'tscommons-es-core';
|
|
14
|
-
import { commonsAsyncAbortTimeout, commonsAsyncTimeout } from 'tscommons-es-async';
|
|
15
|
-
import { EStatus } from 'hydra-crawler-ts-assets';
|
|
16
|
-
import { commonsOutputAlert, commonsOutputCompleted, commonsOutputDebug, commonsOutputError, commonsOutputInfo } from 'nodecommons-es-cli';
|
|
17
|
-
import { commonsGracefulAbortAddCallback, commonsGracefulAbortRemoveCallback } from 'nodecommons-es-process';
|
|
18
|
-
import { commonsHashMd5 } from 'nodecommons-es-security';
|
|
19
|
-
import { Robot } from '../classes/robot';
|
|
20
|
-
import { Random } from '../helpers/random';
|
|
21
|
-
import { EList } from '../enums/elist';
|
|
22
|
-
import { Dns } from './dns';
|
|
23
|
-
export class Crawler {
|
|
24
|
-
static applyMasqueradeHeaders(request) {
|
|
25
|
-
request.setHeader('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8');
|
|
26
|
-
request.setHeader('Accept-Encoding', 'identity');
|
|
27
|
-
request.setHeader('Connection', 'close');
|
|
28
|
-
request.setHeader('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/68.0.3440.106 Chrome/68.0.3440.106 Safari/537.36 (UoM Hydra)');
|
|
29
|
-
}
|
|
30
|
-
static request(handler, url, connectTimeout, maxFileSize, tracker) {
|
|
31
|
-
return new Promise((resolve, reject) => {
|
|
32
|
-
const start = new Date().getTime();
|
|
33
|
-
let exceeded = false;
|
|
34
|
-
let completed = false;
|
|
35
|
-
let timedOut = false;
|
|
36
|
-
const timedOutMessage = `Timeout on crawl of ${url}`;
|
|
37
|
-
const secondaryTimeoutId = `crawler_${url}_secondary_timeout`;
|
|
38
|
-
commonsAsyncTimeout(connectTimeout + 60000, secondaryTimeoutId)
|
|
39
|
-
.then(() => {
|
|
40
|
-
if (timedOut || completed)
|
|
41
|
-
return;
|
|
42
|
-
commonsOutputDebug(`Secondary timeout for ${url}`);
|
|
43
|
-
timedOut = true;
|
|
44
|
-
reject(new Error(timedOutMessage));
|
|
45
|
-
return;
|
|
46
|
-
})
|
|
47
|
-
.catch((e) => {
|
|
48
|
-
if (e.message === 'abortTimeout called')
|
|
49
|
-
return;
|
|
50
|
-
commonsOutputDebug('debug position 10');
|
|
51
|
-
console.log(e);
|
|
52
|
-
throw e;
|
|
53
|
-
});
|
|
54
|
-
const req = handler.request(url, (res) => {
|
|
55
|
-
// likely unnecessary due to doing it below, but doesn't hurt to do it twice
|
|
56
|
-
req.setTimeout(connectTimeout);
|
|
57
|
-
res.setTimeout(connectTimeout);
|
|
58
|
-
const end = new Date().getTime();
|
|
59
|
-
const latency = end - start;
|
|
60
|
-
// always check for a body, even in 4xx 3xx etc.
|
|
61
|
-
let read = 0;
|
|
62
|
-
const data = [];
|
|
63
|
-
res.on('data', (chunk) => {
|
|
64
|
-
if (read > maxFileSize) {
|
|
65
|
-
if (!exceeded) {
|
|
66
|
-
exceeded = true;
|
|
67
|
-
res.destroy();
|
|
68
|
-
commonsOutputAlert(`Exceeded maxFileSize for URL: ${url}`);
|
|
69
|
-
}
|
|
70
|
-
return;
|
|
71
|
-
}
|
|
72
|
-
data.push(chunk);
|
|
73
|
-
read += chunk.length;
|
|
74
|
-
if (tracker)
|
|
75
|
-
tracker.bandwidth(chunk.length);
|
|
76
|
-
});
|
|
77
|
-
res.on('end', () => {
|
|
78
|
-
if (timedOut || completed)
|
|
79
|
-
return;
|
|
80
|
-
completed = true;
|
|
81
|
-
commonsAsyncAbortTimeout(secondaryTimeoutId);
|
|
82
|
-
resolve({
|
|
83
|
-
latency: latency,
|
|
84
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
|
|
85
|
-
ip: res['remoteIp'],
|
|
86
|
-
statusCode: res.statusCode || 599,
|
|
87
|
-
headers: res.headers,
|
|
88
|
-
data: Buffer.concat(data),
|
|
89
|
-
exceeded: exceeded
|
|
90
|
-
});
|
|
91
|
-
});
|
|
92
|
-
// not sure if this event actually occurs, as opposed to req error below
|
|
93
|
-
res.on('error', (err) => {
|
|
94
|
-
if (timedOut || completed)
|
|
95
|
-
return;
|
|
96
|
-
commonsAsyncAbortTimeout(secondaryTimeoutId);
|
|
97
|
-
reject(err);
|
|
98
|
-
});
|
|
99
|
-
});
|
|
100
|
-
req.setTimeout(connectTimeout);
|
|
101
|
-
Crawler.applyMasqueradeHeaders(req);
|
|
102
|
-
req.on('timeout', () => {
|
|
103
|
-
if (timedOut || completed)
|
|
104
|
-
return;
|
|
105
|
-
timedOut = true;
|
|
106
|
-
req.destroy();
|
|
107
|
-
commonsAsyncAbortTimeout(secondaryTimeoutId);
|
|
108
|
-
reject(new Error(timedOutMessage));
|
|
109
|
-
});
|
|
110
|
-
req.on('response', (res) => {
|
|
111
|
-
res['remoteIp'] = res.connection.remoteAddress;
|
|
112
|
-
});
|
|
113
|
-
req.on('error', (err) => {
|
|
114
|
-
if (timedOut || completed)
|
|
115
|
-
return;
|
|
116
|
-
commonsAsyncAbortTimeout(secondaryTimeoutId);
|
|
117
|
-
reject(err);
|
|
118
|
-
});
|
|
119
|
-
req.on('socket', (socket) => {
|
|
120
|
-
socket.setTimeout(connectTimeout);
|
|
121
|
-
socket.on('timeout', () => {
|
|
122
|
-
if (timedOut || completed)
|
|
123
|
-
return;
|
|
124
|
-
commonsAsyncAbortTimeout(secondaryTimeoutId);
|
|
125
|
-
timedOut = true;
|
|
126
|
-
req.destroy();
|
|
127
|
-
reject(new Error(timedOutMessage));
|
|
128
|
-
});
|
|
129
|
-
});
|
|
130
|
-
req.end();
|
|
131
|
-
});
|
|
132
|
-
}
|
|
133
|
-
static pruneHeaders(headers, keepHeaders) {
|
|
134
|
-
const map = new Map();
|
|
135
|
-
const keys = [];
|
|
136
|
-
for (const k of keepHeaders) {
|
|
137
|
-
const kt = k.split(':');
|
|
138
|
-
keys.push({ key: kt[0], type: kt[1] });
|
|
139
|
-
}
|
|
140
|
-
for (const key of keys) {
|
|
141
|
-
if (!Object.keys(headers).includes(key.key))
|
|
142
|
-
continue;
|
|
143
|
-
if (!commonsTypeHasPropertyString(headers, key.key))
|
|
144
|
-
continue;
|
|
145
|
-
const value = headers[key.key];
|
|
146
|
-
switch (key.type) {
|
|
147
|
-
case 'string':
|
|
148
|
-
map.set(key.key, value.toString());
|
|
149
|
-
break;
|
|
150
|
-
case 'number':
|
|
151
|
-
map.set(key.key, parseInt(value, 10));
|
|
152
|
-
break;
|
|
153
|
-
case 'date':
|
|
154
|
-
map.set(key.key, new Date(value));
|
|
155
|
-
break;
|
|
156
|
-
default: throw new Error('unknown header key type');
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
return map;
|
|
160
|
-
}
|
|
161
|
-
constructor(domain, database, crawlConfig, parsersConfig, robotsConfig, parsers, lists, tracker) {
|
|
162
|
-
this.domain = domain;
|
|
163
|
-
this.database = database;
|
|
164
|
-
this.crawlConfig = crawlConfig;
|
|
165
|
-
this.parsersConfig = parsersConfig;
|
|
166
|
-
this.robotsConfig = robotsConfig;
|
|
167
|
-
this.parsers = parsers;
|
|
168
|
-
this.lists = lists;
|
|
169
|
-
this.tracker = tracker;
|
|
170
|
-
this.isPaused = false;
|
|
171
|
-
this.isAborted = false;
|
|
172
|
-
}
|
|
173
|
-
abort() {
|
|
174
|
-
this.isAborted = true;
|
|
175
|
-
commonsAsyncAbortTimeout(`crawl_${this.domain}`);
|
|
176
|
-
}
|
|
177
|
-
pause() {
|
|
178
|
-
commonsOutputAlert(`Pausing crawler for ${this.domain}`);
|
|
179
|
-
this.isPaused = true;
|
|
180
|
-
}
|
|
181
|
-
resume() {
|
|
182
|
-
commonsOutputCompleted(`Resuming crawler for ${this.domain}`);
|
|
183
|
-
this.isPaused = false;
|
|
184
|
-
}
|
|
185
|
-
fetch(url, setDomainIp) {
|
|
186
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
187
|
-
commonsOutputInfo(`Fetching ${url}`);
|
|
188
|
-
const whatwg = new URL(url);
|
|
189
|
-
let handler;
|
|
190
|
-
switch (whatwg.protocol) {
|
|
191
|
-
case 'http:':
|
|
192
|
-
handler = http;
|
|
193
|
-
break;
|
|
194
|
-
case 'https:':
|
|
195
|
-
handler = https;
|
|
196
|
-
break;
|
|
197
|
-
default:
|
|
198
|
-
throw new Error(`unable to handle protocol ${whatwg.protocol}`);
|
|
199
|
-
}
|
|
200
|
-
let outcome;
|
|
201
|
-
try {
|
|
202
|
-
outcome = yield Crawler.request(handler, url, this.crawlConfig.connectTimeout, this.crawlConfig.maxFileSize, this.tracker);
|
|
203
|
-
}
|
|
204
|
-
catch (ex) {
|
|
205
|
-
//if (!/^Timeout on crawl of/.test(ex.message)) console.error(ex);
|
|
206
|
-
throw ex;
|
|
207
|
-
}
|
|
208
|
-
if (setDomainIp) {
|
|
209
|
-
yield this.database.domain(whatwg.hostname, outcome.ip);
|
|
210
|
-
if (this.tracker)
|
|
211
|
-
this.tracker.domain();
|
|
212
|
-
}
|
|
213
|
-
yield this.database.setStatusCode(url, outcome.statusCode);
|
|
214
|
-
const headers = Crawler.pruneHeaders(outcome.headers, this.crawlConfig.keepHeaders);
|
|
215
|
-
if (!headers.has('content-length') && outcome.data !== undefined)
|
|
216
|
-
headers.set('content-length', outcome.data.length);
|
|
217
|
-
yield this.database.setHeaders(url, commonsMapToObject(headers));
|
|
218
|
-
const outgoing = [];
|
|
219
|
-
if (outcome.statusCode >= 300
|
|
220
|
-
&& outcome.statusCode < 400
|
|
221
|
-
&& headers.has('location')
|
|
222
|
-
&& this.lists.match(EList.ALLOW, url) // only queue redirects for allowlist urls
|
|
223
|
-
) {
|
|
224
|
-
// redirect codes
|
|
225
|
-
const location = new URL(headers.get('location'), url);
|
|
226
|
-
if (location.protocol.match(/^http(s?):$/)) {
|
|
227
|
-
const added = yield this.database.queue(location.toString(), this.lists.match(EList.DENY, location.toString()));
|
|
228
|
-
if (added && this.tracker)
|
|
229
|
-
this.tracker.delta(this.lists.match(EList.DENY, location.toString()) ? EStatus.DENY : EStatus.QUEUED, 1);
|
|
230
|
-
if (location.toString() !== url)
|
|
231
|
-
outgoing.push(location.toString());
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
if (outcome.data) {
|
|
235
|
-
const hash = commonsHashMd5(outcome.data);
|
|
236
|
-
const existing = yield this.database.getHash(url);
|
|
237
|
-
if (hash !== existing)
|
|
238
|
-
yield this.database.setHash(url, hash);
|
|
239
|
-
}
|
|
240
|
-
if (outcome.data && outcome.data.length && headers.has('content-type')) {
|
|
241
|
-
const contentType = headers.get('content-type');
|
|
242
|
-
const isAllow = this.lists.match(EList.ALLOW, url);
|
|
243
|
-
for (const ctor of this.parsers) {
|
|
244
|
-
const parser = new ctor(url, outcome, this.parsersConfig);
|
|
245
|
-
if (!parser.isEnabled())
|
|
246
|
-
continue;
|
|
247
|
-
if (!parser.supports(contentType, isAllow))
|
|
248
|
-
continue;
|
|
249
|
-
try {
|
|
250
|
-
yield parser.parse(this.database);
|
|
251
|
-
}
|
|
252
|
-
catch (ex) {
|
|
253
|
-
commonsOutputDebug('debug position 3');
|
|
254
|
-
console.log(ex);
|
|
255
|
-
}
|
|
256
|
-
if (isAllow) {
|
|
257
|
-
try {
|
|
258
|
-
const links = yield parser.links();
|
|
259
|
-
if (links.length === 0)
|
|
260
|
-
continue;
|
|
261
|
-
for (const link of links) {
|
|
262
|
-
if (link === url)
|
|
263
|
-
continue;
|
|
264
|
-
try {
|
|
265
|
-
outgoing.push(link);
|
|
266
|
-
}
|
|
267
|
-
catch (ex) { /* do nothing */ }
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
catch (ex) {
|
|
271
|
-
commonsOutputDebug('debug position 2');
|
|
272
|
-
console.log(ex);
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
for (const link of outgoing) {
|
|
278
|
-
const added = yield this.database.queue(link, this.lists.match(EList.DENY, link));
|
|
279
|
-
if (added && this.tracker)
|
|
280
|
-
this.tracker.delta(this.lists.match(EList.DENY, link) ? EStatus.DENY : EStatus.QUEUED, 1);
|
|
281
|
-
}
|
|
282
|
-
yield this.database.link(url, outgoing);
|
|
283
|
-
if (this.tracker)
|
|
284
|
-
this.tracker.link(outgoing.length);
|
|
285
|
-
return outcome.statusCode;
|
|
286
|
-
});
|
|
287
|
-
}
|
|
288
|
-
crawl() {
|
|
289
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
290
|
-
const gracefulAbortId = commonsGracefulAbortAddCallback(() => {
|
|
291
|
-
commonsOutputAlert(`SIGINT abort flag is set. Aborting crawler ${this.domain}.`);
|
|
292
|
-
this.abort();
|
|
293
|
-
});
|
|
294
|
-
try {
|
|
295
|
-
try {
|
|
296
|
-
yield Dns.resolve(this.domain, this.crawlConfig.connectTimeout);
|
|
297
|
-
}
|
|
298
|
-
catch (ex) {
|
|
299
|
-
if (ex.message === 'DNS timeout' && !this.crawlConfig.treatTimeoutDnsAsDead) {
|
|
300
|
-
commonsOutputError(`DNS timed out for ${this.domain}. Will allow reattempts`);
|
|
301
|
-
return;
|
|
302
|
-
}
|
|
303
|
-
commonsOutputError(`Domain ${this.domain} is dead.`);
|
|
304
|
-
yield this.database.markDead(this.domain);
|
|
305
|
-
return;
|
|
306
|
-
}
|
|
307
|
-
const robots = new Robot(this.domain, this.crawlConfig, this.robotsConfig, this.tracker);
|
|
308
|
-
yield robots.load();
|
|
309
|
-
let tally = 0;
|
|
310
|
-
while (true) {
|
|
311
|
-
if (this.isAborted)
|
|
312
|
-
break;
|
|
313
|
-
if (this.isPaused) {
|
|
314
|
-
yield commonsAsyncTimeout(1000);
|
|
315
|
-
continue;
|
|
316
|
-
}
|
|
317
|
-
const url = yield this.database.next(this.domain);
|
|
318
|
-
if (!url) {
|
|
319
|
-
commonsOutputCompleted(`Crawl for ${this.domain} completed.`);
|
|
320
|
-
break;
|
|
321
|
-
}
|
|
322
|
-
yield this.database.unsetFailReason(url);
|
|
323
|
-
if (this.tracker)
|
|
324
|
-
this.tracker.delta(EStatus.QUEUED, -1);
|
|
325
|
-
if (this.lists.match(EList.DENY, url)) {
|
|
326
|
-
yield this.database.setStatus(url, EStatus.DENY);
|
|
327
|
-
yield this.database.unsetTtl(url);
|
|
328
|
-
if (this.tracker)
|
|
329
|
-
this.tracker.delta(EStatus.DENY, 1);
|
|
330
|
-
// To get here, a url was queued but is now denylisted.
|
|
331
|
-
// It may have had outgoings, so remove any that were there.
|
|
332
|
-
yield this.database.link(url, []);
|
|
333
|
-
continue;
|
|
334
|
-
}
|
|
335
|
-
if (robots.isDisallowed(url)) {
|
|
336
|
-
yield this.database.setStatus(url, EStatus.DISALLOWED);
|
|
337
|
-
yield this.database.unsetTtl(url);
|
|
338
|
-
if (this.tracker)
|
|
339
|
-
this.tracker.delta(EStatus.DISALLOWED, 1);
|
|
340
|
-
// To get here, a url was queued but is now disallowed.
|
|
341
|
-
// It may have had outgoings, so remove any that were there.
|
|
342
|
-
yield this.database.link(url, []);
|
|
343
|
-
continue;
|
|
344
|
-
}
|
|
345
|
-
if (!(yield this.database.setStatus(url, EStatus.ACTIVE))) {
|
|
346
|
-
commonsOutputError(`Unable to set status to active for ${url}. Aborting crawler.`);
|
|
347
|
-
break;
|
|
348
|
-
}
|
|
349
|
-
if (this.tracker)
|
|
350
|
-
this.tracker.delta(EStatus.ACTIVE, 1);
|
|
351
|
-
if (tally > 0) {
|
|
352
|
-
try {
|
|
353
|
-
const timeout = Random.randomise(this.crawlConfig.betweenFetchDelay, this.crawlConfig);
|
|
354
|
-
yield commonsAsyncTimeout(timeout, `crawl_${this.domain}`);
|
|
355
|
-
while (this.isPaused && !this.isAborted) {
|
|
356
|
-
yield commonsAsyncTimeout(1000);
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
catch (ex) { /* ignore */ }
|
|
360
|
-
}
|
|
361
|
-
if (this.isAborted)
|
|
362
|
-
break;
|
|
363
|
-
if (this.tracker)
|
|
364
|
-
yield this.tracker.fetching(url);
|
|
365
|
-
try {
|
|
366
|
-
const statusCode = yield this.fetch(url, tally === 0);
|
|
367
|
-
if (this.tracker)
|
|
368
|
-
yield this.tracker.done(url, statusCode);
|
|
369
|
-
yield this.database.setStatus(url, EStatus.DONE);
|
|
370
|
-
yield this.database.unsetTtl(url);
|
|
371
|
-
if (this.tracker)
|
|
372
|
-
this.tracker.delta(EStatus.DONE, 1);
|
|
373
|
-
}
|
|
374
|
-
catch (ex) {
|
|
375
|
-
if (this.tracker)
|
|
376
|
-
yield this.tracker.failed(url);
|
|
377
|
-
const ttl = (yield this.database.getTtl(url)) || 0;
|
|
378
|
-
if (ttl < this.crawlConfig.maxFailedTtl) {
|
|
379
|
-
yield this.database.setTtl(url, ttl + 1);
|
|
380
|
-
yield this.database.setStatus(url, EStatus.QUEUED);
|
|
381
|
-
if (this.tracker)
|
|
382
|
-
this.tracker.delta(EStatus.QUEUED, 1);
|
|
383
|
-
}
|
|
384
|
-
else {
|
|
385
|
-
yield this.database.setStatus(url, EStatus.FAILED);
|
|
386
|
-
if (this.tracker)
|
|
387
|
-
this.tracker.delta(EStatus.FAILED, 1);
|
|
388
|
-
if (commonsTypeHasPropertyString(ex, 'code')) {
|
|
389
|
-
yield this.database.setFailReason(url, ex.code);
|
|
390
|
-
}
|
|
391
|
-
else if (commonsTypeHasPropertyString(ex, 'message')) {
|
|
392
|
-
if (/^Timeout on crawl/.test(ex.message)) {
|
|
393
|
-
yield this.database.setFailReason(url, 'TIMEOUT');
|
|
394
|
-
}
|
|
395
|
-
else {
|
|
396
|
-
yield this.database.setFailReason(url, ex.message);
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
finally {
|
|
402
|
-
if (this.tracker)
|
|
403
|
-
this.tracker.delta(EStatus.ACTIVE, -1);
|
|
404
|
-
}
|
|
405
|
-
tally++;
|
|
406
|
-
if (tally >= this.crawlConfig.maxFetchesPerCrawl) {
|
|
407
|
-
commonsOutputAlert(`Crawl for ${this.domain} reached the maximum permitted fetches for this session.`);
|
|
408
|
-
break;
|
|
409
|
-
}
|
|
410
|
-
}
|
|
411
|
-
}
|
|
412
|
-
catch (e) {
|
|
413
|
-
commonsOutputDebug('debug position 1');
|
|
414
|
-
console.log(e);
|
|
415
|
-
}
|
|
416
|
-
finally {
|
|
417
|
-
commonsGracefulAbortRemoveCallback(gracefulAbortId);
|
|
418
|
-
}
|
|
419
|
-
});
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
//# sourceMappingURL=crawler.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/classes/crawler.ts"],"names":[],"mappings":";;;;;;;;;AACA,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAC1B,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAG/B,OAAO,EAAE,kBAAkB,EAAE,4BAA4B,EAAE,MAAM,mBAAmB,CAAC;AAErF,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAEnF,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC3I,OAAO,EAAE,+BAA+B,EAAE,kCAAkC,EAAE,MAAM,wBAAwB,CAAC;AAC7G,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAGzD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAKzC,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAU3C,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAEvC,OAAO,EAAE,GAAG,EAAE,MAAM,OAAO,CAAC;AAE5B,MAAM,OAAO,OAAO;IACZ,MAAM,CAAC,sBAAsB,CAAC,OAA2B;QAC/D,OAAO,CAAC,SAAS,CAAC,QAAQ,EAAE,uFAAuF,CAAC,CAAC;QACrH,OAAO,CAAC,SAAS,CAAC,iBAAiB,EAAE,UAAU,CAAC,CAAC;QACjD,OAAO,CAAC,SAAS,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QACzC,OAAO,CAAC,SAAS,CAAC,YAAY,EAAE,qJAAqJ,CAAC,CAAC;IACxL,CAAC;IAEM,MAAM,CAAC,OAAO,CACnB,OAAiC,EACjC,GAAW,EACX,cAAsB,EACtB,WAAmB,EACnB,OAAiB;QAElB,OAAO,IAAI,OAAO,CAAkB,CAAC,OAAqC,EAAE,MAA0B,EAAQ,EAAE;YAC/G,MAAM,KAAK,GAAW,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;YAC3C,IAAI,QAAQ,GAAY,KAAK,CAAC;YAE9B,IAAI,SAAS,GAAY,KAAK,CAAC;YAC/B,IAAI,QAAQ,GAAY,KAAK,CAAC;YAC9B,MAAM,eAAe,GAAW,uBAAuB,GAAG,EAAE,CAAC;YAE7D,MAAM,kBAAkB,GAAW,WAAW,GAAG,oBAAoB,CAAC;YACtE,mBAAmB,CACjB,cAAc,GAAG,KAAK,EACtB,kBAAkB,CACnB;iBACE,IAAI,CAAC,GAAS,EAAE;gBAChB,IAAI,QAAQ,IAAI,SAAS;oBAAE,OAAO;gBAElC,kBAAkB,CAAC,yBAAyB,GAAG,EAAE,CAAC,CAAC;gBAEnD,QAAQ,GAAG,IAAI,CAAC;gBAChB,MAAM,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;gBACnC,OAAO;YACR,CAAC,CAAC;iBACD,KAAK,CAAC,CAAC,CAAQ,EAAQ,EAAE;gBACzB,IAAI,CAAC,CAAC,OAAO,KAAK,qBAAqB;oBAAE,OAAO;gBAChD,kBAAkB,CAAC,mBAAmB,CAAC,CAAC;gBACxC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBACf,MAAM,CAAC,CAAC;YACT,CAAC,CAAC,CAAC;YAEL,MAAM,GAAG,GAAuB,OAAO,CAAC,OAAO,CAC7C,GAAG,EACH,CAAC,GAAyB,EAAQ,EAAE;gBACnC,4EAA4E;gBAC5E,GAAG,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;gBAE/B,GAAG,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;gBAE/B,MAAM,GAAG,GAAW,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;gBACzC,MAAM,OAAO,GAAW,GAAG,GAAG,KAAK,CAAC;gBAEpC,gDAAgD;gBAEhD,IAAI,IAAI,GAAW,CAAC,CAAC;gBACrB,MAAM,IAAI,GAAa,EAAE,CAAC;gBAC1B,GAAG,CAAC,EAAE,CACJ,MAAM,EACN,CAAC,KAAa,EAAQ,EAAE;oBACvB,IAAI,IAAI,GAAG,WAAW,EAAE;wBACvB,IAAI,CAAC,QAAQ,EAAE;4BACd,QAAQ,GAAG,IAAI,CAAC;4BAChB,GAAG,CAAC,OAAO,EAAE,CAAC;4BAEd,kBAAkB,CAAC,iCAAiC,GAAG,EAAE,CAAC,CAAC;yBAC3D;wBAED,OAAO;qBACP;oBACD,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACjB,IAAI,IAAI,KAAK,CAAC,MAAM,CAAC;oBACrB,IAAI,OAAO;wBAAE,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;gBAC9C,CAAC,CACF,CAAC;gBACF,GAAG,CAAC,EAAE,CACJ,KAAK,EACL,GAAS,EAAE;oBACV,IAAI,QAAQ,IAAI,SAAS;wBAAE,OAAO;oBAElC,SAAS,GAAG,IAAI,CAAC;oBAEjB,wBAAwB,CAAC,kBAAkB,CAAC,CAAC;oBAE7C,OAAO,CAAC;wBACN,OAAO,EAAE,OAAO;wBAChB,mEAAmE;wBACnE,EAAE,EAAE,GAAG,CAAC,UAAU,CAAC;wBACnB,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,GAAG;wBACjC,OAAO,EAAE,GAAG,CAAC,OAAO;wBACpB,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;wBACzB,QAAQ,EAAE,QAAQ;qBACnB,CAAC,CAAC;gBACJ,CAAC,CACF,CAAC;gBAEF,wEAAwE;gBACxE,GAAG,CAAC,EAAE,CACJ,OAAO,EACP,CAAC,GAAU,EAAQ,EAAE;oBACpB,IAAI,QAAQ,IAAI,SAAS;wBAAE,OAAO;oBAElC,wBAAwB,CAAC,kBAAkB,CAAC,CAAC;oBAE7C,MAAM,CAAC,GAAG,CAAC,CAAC;gBACb,CAAC,CACF,CAAC;YACH,CAAC,CACF,CAAC;YAEF,GAAG,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;YAE/B,OAAO,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;YAEpC,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,GAAS,EAAE;gBAC5B,IAAI,QAAQ,IAAI,SAAS;oBAAE,OAAO;gBAElC,QAAQ,GAAG,IAAI,CAAC;gBAChB,GAAG,CAAC,OAAO,EAAE,CAAC;gBAEd,wBAAwB,CAAC,kBAAkB,CAAC,CAAC;gBAE7C,MAAM,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;YACpC,CAAC,CAAC,CAAC;YAEH,GAAG,CAAC,EAAE,CACJ,UAAU,EACV,CAAC,GAAyB,EAAQ,EAAE;gBACnC,GAAG,CAAC,UAAU,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,aAAa,CAAC;YAChD,CAAC,CACF,CAAC;YAEF,GAAG,CAAC,EAAE,CACJ,OAAO,EACP,CAAC,GAAU,EAAQ,EAAE;gBACpB,IAAI,QAAQ,IAAI,SAAS;oBAAE,OAAO;gBAElC,wBAAwB,CAAC,kBAAkB,CAAC,CAAC;gBAE7C,MAAM,CAAC,GAAG,CAAC,CAAC;YACb,CAAC,CACF,CAAC;YAEF,GAAG,CAAC,EAAE,CACJ,QAAQ,EACR,CAAC,MAAc,EAAQ,EAAE;gBACxB,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;gBAClC,MAAM,CAAC,EAAE,CACP,SAAS,EACT,GAAS,EAAE;oBACV,IAAI,QAAQ,IAAI,SAAS;wBAAE,OAAO;oBAElC,wBAAwB,CAAC,kBAAkB,CAAC,CAAC;oBAE7C,QAAQ,GAAG,IAAI,CAAC;oBAChB,GAAG,CAAC,OAAO,EAAE,CAAC;oBAEd,MAAM,CAAC,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC;gBACpC,CAAC,CACF,CAAC;YACH,CAAC,CACF,CAAC;YAEF,GAAG,CAAC,GAAG,EAAE,CAAC;QACX,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,MAAM,CAAC,YAAY,CAAC,OAA4B,EAAE,WAAqB;QAC9E,MAAM,GAAG,GAAoC,IAAI,GAAG,EAA8B,CAAC;QAEnF,MAAM,IAAI,GAAoC,EAAE,CAAC;QACjD,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE;YAC5B,MAAM,EAAE,GAAa,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;SACvC;QAED,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACvB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YACtD,IAAI,CAAC,4BAA4B,CAAC,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAE9D,MAAM,KAAK,GAAW,OAAO,CAAC,GAAG,CAAC,GAAG,CAAW,CAAC;YACjD,QAAQ,GAAG,CAAC,IAAI,EAAE;gBACjB,KAAK,QAAQ;oBAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;oBAAC,MAAM;gBACzD,KAAK,QAAQ;oBAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;oBAAC,MAAM;gBAC5D,KAAK,MAAM;oBAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;oBAAC,MAAM;gBACtD,OAAO,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;aACpD;SACD;QAED,OAAO,GAAG,CAAC;IACZ,CAAC;IAKD,YACU,MAAc,EACd,QAAyB,EACzB,WAAyB,EACzB,aAAwC,EACxC,YAA2B,EAC3B,OAAsB,EACtB,KAAY,EACZ,OAAiB;QAPjB,WAAM,GAAN,MAAM,CAAQ;QACd,aAAQ,GAAR,QAAQ,CAAiB;QACzB,gBAAW,GAAX,WAAW,CAAc;QACzB,kBAAa,GAAb,aAAa,CAA2B;QACxC,iBAAY,GAAZ,YAAY,CAAe;QAC3B,YAAO,GAAP,OAAO,CAAe;QACtB,UAAK,GAAL,KAAK,CAAO;QACZ,YAAO,GAAP,OAAO,CAAU;QAXnB,aAAQ,GAAY,KAAK,CAAC;QAC1B,cAAS,GAAY,KAAK,CAAC;IAWhC,CAAC;IAEI,KAAK;QACZ,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,wBAAwB,CAAC,SAAS,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAClD,CAAC;IAEM,KAAK;QACX,kBAAkB,CAAC,uBAAuB,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAEzD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;IACtB,CAAC;IAEM,MAAM;QACZ,sBAAsB,CAAC,wBAAwB,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAE9D,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;IACvB,CAAC;IAEY,KAAK,CAAC,GAAW,EAAE,WAAoB;;YACnD,iBAAiB,CAAC,YAAY,GAAG,EAAE,CAAC,CAAC;YAErC,MAAM,MAAM,GAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAEjC,IAAI,OAA2C,CAAC;YAChD,QAAQ,MAAM,CAAC,QAAQ,EAAE;gBACxB,KAAK,OAAO;oBAAE,OAAO,GAAG,IAAI,CAAC;oBAAC,MAAM;gBACpC,KAAK,QAAQ;oBAAE,OAAO,GAAG,KAAK,CAAC;oBAAC,MAAM;gBACtC;oBACC,MAAM,IAAI,KAAK,CAAC,6BAA6B,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;aACjE;YAED,IAAI,OAAwB,CAAC;YAC7B,IAAI;gBACH,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,CAC7B,OAAO,EACP,GAAG,EACH,IAAI,CAAC,WAAW,CAAC,cAAc,EAC/B,IAAI,CAAC,WAAW,CAAC,WAAW,EAC5B,IAAI,CAAC,OAAO,CACb,CAAC;aACF;YAAC,OAAO,EAAE,EAAE;gBACZ,kEAAkE;gBAClE,MAAM,EAAE,CAAC;aACT;YAED,IAAI,WAAW,EAAE;gBAChB,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC;gBACxD,IAAI,IAAI,CAAC,OAAO;oBAAE,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;aACxC;YAED,MAAM,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;YAE3D,MAAM,OAAO,GAAoC,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;YACrH,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,SAAS;gBAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrH,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,EAAE,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC;YAEjE,MAAM,QAAQ,GAAa,EAAE,CAAC;YAE9B,IACC,OAAO,CAAC,UAAU,IAAI,GAAG;mBACtB,OAAO,CAAC,UAAU,GAAG,GAAG;mBACxB,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;mBACvB,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,0CAA0C;cAC/E;gBACD,iBAAiB;gBACjB,MAAM,QAAQ,GAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAY,EAAE,GAAG,CAAC,CAAC;gBAEvE,IAAI,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,EAAE;oBAC3C,MAAM,KAAK,GAAY,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;oBACzH,IAAI,KAAK,IAAI,IAAI,CAAC,OAAO;wBAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;oBAEpI,IAAI,QAAQ,CAAC,QAAQ,EAAE,KAAK,GAAG;wBAAE,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;iBACpE;aACD;YAED,IAAI,OAAO,CAAC,IAAI,EAAE;gBACjB,MAAM,IAAI,GAAW,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBAClD,MAAM,QAAQ,GAAqB,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAEpE,IAAI,IAAI,KAAK,QAAQ;oBAAE,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;aAC9D;YAED,IAAI,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE;gBACvE,MAAM,WAAW,GAAW,OAAO,CAAC,GAAG,CAAC,cAAc,CAAY,CAAC;gBACnE,MAAM,OAAO,GAAY,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;gBAE5D,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,OAAO,EAAE;oBAChC,MAAM,MAAM,GAAY,IAAI,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;oBACnE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE;wBAAE,SAAS;oBAElC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;wBAAE,SAAS;oBAErD,IAAI;wBACH,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;qBAClC;oBAAC,OAAO,EAAE,EAAE;wBACZ,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;wBACvC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;qBAChB;oBAED,IAAI,OAAO,EAAE;wBACZ,IAAI;4BACH,MAAM,KAAK,GAAa,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;4BAC7C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gCAAE,SAAS;4BAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE;gCACzB,IAAI,IAAI,KAAK,GAAG;oCAAE,SAAS;gCAC3B,IAAI;oCACH,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iCACpB;gCAAC,OAAO,EAAE,EAAE,EAAE,gBAAgB,EAAE;6BACjC;yBACD;wBAAC,OAAO,EAAE,EAAE;4BACZ,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;4BACvC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;yBAChB;qBACD;iBACD;aACD;YAED,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE;gBAC5B,MAAM,KAAK,GAAY,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC3F,IAAI,KAAK,IAAI,IAAI,CAAC,OAAO;oBAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;aACrH;YACD,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAExC,IAAI,IAAI,CAAC,OAAO;gBAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAErD,OAAO,OAAO,CAAC,UAAU,CAAC;QAC3B,CAAC;KAAA;IAEY,KAAK;;YACjB,MAAM,eAAe,GAAW,+BAA+B,CAAC,GAAS,EAAE;gBAC1E,kBAAkB,CAAC,8CAA8C,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;gBAEjF,IAAI,CAAC,KAAK,EAAE,CAAC;YACd,CAAC,CAAC,CAAC;YAEH,IAAI;gBACH,IAAI;oBACH,MAAM,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;iBAChE;gBAAC,OAAO,EAAE,EAAE;oBACZ,IAAK,EAAY,CAAC,OAAO,KAAK,aAAa,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,EAAE;wBACvF,kBAAkB,CAAC,qBAAqB,IAAI,CAAC,MAAM,yBAAyB,CAAC,CAAC;wBAC9E,OAAO;qBACP;oBAED,kBAAkB,CAAC,UAAU,IAAI,CAAC,MAAM,WAAW,CAAC,CAAC;oBACrD,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBAC1C,OAAO;iBACP;gBAED,MAAM,MAAM,GAAU,IAAI,KAAK,CAC7B,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,OAAO,CACb,CAAC;gBACF,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAEpB,IAAI,KAAK,GAAW,CAAC,CAAC;gBACtB,OAAO,IAAI,EAAE;oBACZ,IAAI,IAAI,CAAC,SAAS;wBAAE,MAAM;oBAE1B,IAAI,IAAI,CAAC,QAAQ,EAAE;wBAClB,MAAM,mBAAmB,CAAC,IAAI,CAAC,CAAC;wBAChC,SAAS;qBACT;oBAED,MAAM,GAAG,GAAqB,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBACpE,IAAI,CAAC,GAAG,EAAE;wBACT,sBAAsB,CAAC,aAAa,IAAI,CAAC,MAAM,aAAa,CAAC,CAAC;wBAC9D,MAAM;qBACN;oBAED,MAAM,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;oBAEzC,IAAI,IAAI,CAAC,OAAO;wBAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC;oBAEzD,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,EAAE;wBACtC,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;wBACjD,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;wBAClC,IAAI,IAAI,CAAC,OAAO;4BAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;wBAEtD,uDAAuD;wBACvD,4DAA4D;wBAC5D,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;wBAClC,SAAS;qBACT;oBAED,IAAI,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE;wBAC7B,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;wBACvD,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;wBAClC,IAAI,IAAI,CAAC,OAAO;4BAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;wBAE5D,uDAAuD;wBACvD,4DAA4D;wBAC5D,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;wBAClC,SAAS;qBACT;oBAED,IAAI,CAAC,CAAA,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA,EAAE;wBACxD,kBAAkB,CAAC,sCAAsC,GAAG,qBAAqB,CAAC,CAAC;wBACnF,MAAM;qBACN;oBACD,IAAI,IAAI,CAAC,OAAO;wBAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;oBAExD,IAAI,KAAK,GAAG,CAAC,EAAE;wBACd,IAAI;4BACH,MAAM,OAAO,GAAW,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;4BAE/F,MAAM,mBAAmB,CAAC,OAAO,EAAE,SAAS,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;4BAE3D,OAAO,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE;gCACxC,MAAM,mBAAmB,CAAC,IAAI,CAAC,CAAC;6BAChC;yBACD;wBAAC,OAAO,EAAE,EAAE,EAAE,YAAY,EAAE;qBAC7B;oBACD,IAAI,IAAI,CAAC,SAAS;wBAAE,MAAM;oBAE1B,IAAI,IAAI,CAAC,OAAO;wBAAE,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;oBACnD,IAAI;wBACH,MAAM,UAAU,GAAW,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,KAAK,KAAK,CAAC,CAAC,CAAC;wBAE9D,IAAI,IAAI,CAAC,OAAO;4BAAE,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;wBAC3D,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;wBACjD,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;wBAClC,IAAI,IAAI,CAAC,OAAO;4BAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;qBACtD;oBAAC,OAAO,EAAE,EAAE;wBACZ,IAAI,IAAI,CAAC,OAAO;4BAAE,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;wBAEjD,MAAM,GAAG,GAAW,CAAA,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,KAAI,CAAC,CAAC;wBACzD,IAAI,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,YAAY,EAAE;4BACxC,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;4BACzC,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;4BACnD,IAAI,IAAI,CAAC,OAAO;gCAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;yBACxD;6BAAM;4BACN,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;4BACnD,IAAI,IAAI,CAAC,OAAO;gCAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;4BAExD,IAAI,4BAA4B,CAAC,EAAE,EAAE,MAAM,CAAC,EAAE;gCAC7C,MAAM,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,GAAG,EAAG,EAAuB,CAAC,IAAI,CAAC,CAAC;6BACtE;iCAAM,IAAI,4BAA4B,CAAC,EAAE,EAAE,SAAS,CAAC,EAAE;gCACvD,IAAI,mBAAmB,CAAC,IAAI,CAAE,EAA0B,CAAC,OAAO,CAAC,EAAE;oCAClE,MAAM,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;iCAClD;qCAAM;oCACN,MAAM,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,GAAG,EAAG,EAA0B,CAAC,OAAO,CAAC,CAAC;iCAC5E;6BACD;yBACD;qBACD;4BAAS;wBACT,IAAI,IAAI,CAAC,OAAO;4BAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC;qBACzD;oBAED,KAAK,EAAE,CAAC;oBACR,IAAI,KAAK,IAAI,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE;wBACjD,kBAAkB,CAAC,aAAa,IAAI,CAAC,MAAM,0DAA0D,CAAC,CAAC;wBACvG,MAAM;qBACN;iBACD;aACD;YAAC,OAAO,CAAC,EAAE;gBACX,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;gBACvC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aACf;oBAAS;gBACT,kCAAkC,CAAC,eAAe,CAAC,CAAC;aACpD;QACF,CAAC;KAAA;CACD"}
|
package/dist/classes/dns.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"dns.js","sourceRoot":"","sources":["../../src/classes/dns.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,GAAG,MAAM,KAAK,CAAC;AAE3B,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAEnF,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAExD,MAAM,OAAgB,GAAG;IACjB,MAAM,CAAC,OAAO,CACnB,MAAc,EACd,cAAsB;QAEvB,0BAA0B;QAE1B,OAAO,IAAI,OAAO,CAAW,CAAC,OAA8B,EAAE,MAA0B,EAAQ,EAAE;YACjG,IAAI,QAAQ,GAAY,KAAK,CAAC;YAC9B,MAAM,SAAS,GAAW,OAAO,MAAM,UAAU,CAAC;YAElD,mBAAmB,CACjB,cAAc,EACd,SAAS,CACV;iBACE,IAAI,CAAC,GAAS,EAAE;gBAChB,QAAQ,GAAG,IAAI,CAAC;gBAChB,MAAM,CAAC,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC;YAClC,CAAC,CAAC;iBACD,KAAK,CAAC,CAAC,CAAQ,EAAQ,EAAE;gBACzB,IAAI,CAAC,CAAC,OAAO,KAAK,qBAAqB;oBAAE,OAAO;gBAChD,kBAAkB,CAAC,kBAAkB,CAAC,CAAC;gBACvC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBACf,MAAM,CAAC,CAAC,CAAC,CAAC;YACX,CAAC,CAAC,CAAC;YAEL,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,GAAe,EAAE,OAAiB,EAAQ,EAAE;gBACjE,IAAI,GAAG,EAAE;oBACR,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAgB,EAAE,QAAkB,EAAQ,EAAE;wBACnE,IAAI,IAAI,EAAE;4BACT,IAAI,CAAC,QAAQ;gCAAE,wBAAwB,CAAC,SAAS,CAAC,CAAC;4BACnD,MAAM,CAAC,IAAI,CAAC,CAAC;4BACb,OAAO;yBACP;wBACD,IAAI,CAAC,QAAQ,EAAE;4BACd,wBAAwB,CAAC,SAAS,CAAC,CAAC;4BACpC,OAAO,CAAC,QAAQ,CAAC,CAAC;yBAClB;wBACD,OAAO;oBACR,CAAC,CAAC,CAAC;oBACH,OAAO;iBACP;gBACD,IAAI,CAAC,QAAQ,EAAE;oBACd,wBAAwB,CAAC,SAAS,CAAC,CAAC;oBACpC,OAAO,CAAC,OAAO,CAAC,CAAC;iBACjB;YACF,CAAC,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;IACJ,CAAC;CACD"}
|
package/dist/classes/expirer.js
DELETED
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
|
-
import mongodb from 'mongodb';
|
|
11
|
-
import { commonsArrayChunk, commonsDateDateToYmdHis, commonsTypeHasPropertyDate } from 'tscommons-es-core';
|
|
12
|
-
import { EStatus } from 'hydra-crawler-ts-assets';
|
|
13
|
-
import { commonsOutputAlert, commonsOutputDoing, commonsOutputProgress, commonsOutputSuccess } from 'nodecommons-es-cli';
|
|
14
|
-
import { isTMongoIdRow } from '../services/database.service';
|
|
15
|
-
export class Expirer {
|
|
16
|
-
constructor(expiry, database) {
|
|
17
|
-
this.expiry = expiry;
|
|
18
|
-
this.database = database;
|
|
19
|
-
}
|
|
20
|
-
listQueuedAndActive() {
|
|
21
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
22
|
-
commonsOutputDoing('Enumering queued and active domains');
|
|
23
|
-
const results = this.database.getUrls().aggregate([
|
|
24
|
-
{ $match: { status: { $in: [EStatus.QUEUED, EStatus.ACTIVE] } } },
|
|
25
|
-
{ $group: { _id: '$domain' } }
|
|
26
|
-
], { allowDiskUse: true });
|
|
27
|
-
const queued = yield this.database.listQueryResults(results, isTMongoIdRow);
|
|
28
|
-
commonsOutputSuccess(queued.length);
|
|
29
|
-
return queued
|
|
30
|
-
// eslint-disable-next-line no-underscore-dangle
|
|
31
|
-
.map((q) => q._id);
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
expire(limit) {
|
|
35
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
36
|
-
commonsOutputAlert('Running expiry thread');
|
|
37
|
-
const queued = yield this.listQueuedAndActive();
|
|
38
|
-
commonsOutputDoing('Searching for expired URLs');
|
|
39
|
-
const results = this.database.getUrls().find({ $and: [
|
|
40
|
-
{ status: { $nin: [
|
|
41
|
-
EStatus.ARCHIVED,
|
|
42
|
-
EStatus.QUEUED,
|
|
43
|
-
EStatus.ACTIVE
|
|
44
|
-
] } },
|
|
45
|
-
{ domain: { $nin: queued } },
|
|
46
|
-
{ attempted: { $exists: true } }
|
|
47
|
-
] }, {});
|
|
48
|
-
const now = new Date().getTime() / 1000;
|
|
49
|
-
const expired = [];
|
|
50
|
-
let tally = 0;
|
|
51
|
-
let found = 0;
|
|
52
|
-
while (true) {
|
|
53
|
-
tally++;
|
|
54
|
-
if ((tally % 1000) === 0)
|
|
55
|
-
commonsOutputProgress(`${tally}, ${found}`);
|
|
56
|
-
if (limit !== undefined && tally > limit)
|
|
57
|
-
break;
|
|
58
|
-
const row = yield results.next();
|
|
59
|
-
if (row === null)
|
|
60
|
-
break;
|
|
61
|
-
const interval = this.expiry.getBestExpiry(row.url);
|
|
62
|
-
if (!commonsTypeHasPropertyDate(row, 'attempted'))
|
|
63
|
-
continue;
|
|
64
|
-
const attempted = row['attempted'].getTime() / 1000;
|
|
65
|
-
if ((attempted + interval) < now) {
|
|
66
|
-
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
|
|
67
|
-
expired.push(row['_id']);
|
|
68
|
-
found++;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
commonsOutputSuccess(found);
|
|
72
|
-
if (found === 0)
|
|
73
|
-
return false;
|
|
74
|
-
const batches = commonsArrayChunk(expired, 100);
|
|
75
|
-
commonsOutputDoing('Re-queuing expired');
|
|
76
|
-
tally = 0;
|
|
77
|
-
for (const batch of batches) {
|
|
78
|
-
const batchIds = batch
|
|
79
|
-
.map((id) => new mongodb.ObjectId(id));
|
|
80
|
-
yield this.database.getUrls().updateMany({ _id: { $in: batchIds } }, {
|
|
81
|
-
$set: { status: EStatus.QUEUED },
|
|
82
|
-
$unset: { ttl: true }
|
|
83
|
-
});
|
|
84
|
-
tally += 100;
|
|
85
|
-
commonsOutputProgress(tally);
|
|
86
|
-
}
|
|
87
|
-
commonsOutputSuccess();
|
|
88
|
-
return true;
|
|
89
|
-
});
|
|
90
|
-
}
|
|
91
|
-
expireFixed(threshold) {
|
|
92
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
93
|
-
commonsOutputAlert('Running fixed date expiry thread');
|
|
94
|
-
const queued = yield this.listQueuedAndActive();
|
|
95
|
-
commonsOutputDoing(`Searching attempted before date ${commonsDateDateToYmdHis(threshold)}`);
|
|
96
|
-
const count = yield this.database.getUrls().find({ $and: [
|
|
97
|
-
{ status: { $ne: EStatus.ARCHIVED } },
|
|
98
|
-
{ domain: { $nin: queued } },
|
|
99
|
-
{ attempted: { $exists: true } },
|
|
100
|
-
{ attempted: { $lt: threshold } }
|
|
101
|
-
] }, {}).count();
|
|
102
|
-
commonsOutputSuccess(count);
|
|
103
|
-
if (count === 0)
|
|
104
|
-
return false;
|
|
105
|
-
commonsOutputDoing('Re-queuing expired');
|
|
106
|
-
yield this.database.getUrls().updateMany({ $and: [
|
|
107
|
-
{ status: { $ne: EStatus.ARCHIVED } },
|
|
108
|
-
{ domain: { $nin: queued } },
|
|
109
|
-
{ attempted: { $exists: true } },
|
|
110
|
-
{ attempted: { $lt: threshold } }
|
|
111
|
-
] }, {
|
|
112
|
-
$set: { status: EStatus.QUEUED },
|
|
113
|
-
$unset: { ttl: true }
|
|
114
|
-
});
|
|
115
|
-
commonsOutputSuccess();
|
|
116
|
-
commonsOutputDoing('Searching for expired URLs');
|
|
117
|
-
return true;
|
|
118
|
-
});
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
//# sourceMappingURL=expirer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"expirer.js","sourceRoot":"","sources":["../../src/classes/expirer.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,iBAAiB,EAAE,uBAAuB,EAAE,0BAA0B,EAAE,MAAM,mBAAmB,CAAC;AAE3G,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAGlD,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAIzH,OAAO,EAAgC,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAE3F,MAAM,OAAO,OAAO;IACnB,YACU,MAAc,EACd,QAAyB;QADzB,WAAM,GAAN,MAAM,CAAQ;QACd,aAAQ,GAAR,QAAQ,CAAiB;IAChC,CAAC;IAEU,mBAAmB;;YAChC,kBAAkB,CAAC,qCAAqC,CAAC,CAAC;YAC1D,MAAM,OAAO,GAA2C,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,SAAS,CAAc;gBACrG,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE,EAAE,EAAE;gBACnE,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE;aAC/B,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;YAE3B,MAAM,MAAM,GAAkB,MAAM,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;YAE3F,oBAAoB,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAEpC,OAAO,MAAM;gBACX,gDAAgD;iBAC/C,GAAG,CAAC,CAAC,CAAc,EAAU,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3C,CAAC;KAAA;IAEY,MAAM,CAAC,KAAc;;YACjC,kBAAkB,CAAC,uBAAuB,CAAC,CAAC;YAE5C,MAAM,MAAM,GAAa,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAE1D,kBAAkB,CAAC,4BAA4B,CAAC,CAAC;YAEjD,MAAM,OAAO,GAA6B,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,IAAI,CACpE,EAAE,IAAI,EAAE;oBACN,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE;gCAChB,OAAO,CAAC,QAAQ;gCAChB,OAAO,CAAC,MAAM;gCACd,OAAO,CAAC,MAAM;6BACf,EAAE,EAAE;oBACL,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE;oBAC5B,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE;iBACjC,EAAC,EACF,EAAE,CACH,CAAC;YAEF,MAAM,GAAG,GAAW,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;YAEhD,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,OAAO,IAAI,EAAE;gBACZ,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;oBAAE,qBAAqB,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBACtE,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,MAAM,QAAQ,GAAW,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAE5D,IAAI,CAAC,0BAA0B,CAAC,GAAG,EAAE,WAAW,CAAC;oBAAE,SAAS;gBAE5D,MAAM,SAAS,GAAY,GAAG,CAAC,WAAW,CAAU,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;gBACtE,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC,GAAG,GAAG,EAAE;oBACjC,iEAAiE;oBACjE,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzB,KAAK,EAAE,CAAC;iBACR;aACD;YACD,oBAAoB,CAAC,KAAK,CAAC,CAAC;YAE5B,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YAE9B,MAAM,OAAO,GAAe,iBAAiB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAE5D,kBAAkB,CAAC,oBAAoB,CAAC,CAAC;YACzC,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;gBAC5B,MAAM,QAAQ,GAAuB,KAAK;qBACvC,GAAG,CAAC,CAAC,EAAU,EAAoB,EAAE,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;gBAEnE,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,UAAU,CACtC,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,EAC1B;oBACE,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;oBAChC,MAAM,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE;iBACtB,CACF,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,qBAAqB,CAAC,KAAK,CAAC,CAAC;aAC7B;YACD,oBAAoB,EAAE,CAAC;YAEvB,OAAO,IAAI,CAAC;QACb,CAAC;KAAA;IAEY,WAAW,CAAC,SAAe;;YACvC,kBAAkB,CAAC,kCAAkC,CAAC,CAAC;YAEvD,MAAM,MAAM,GAAa,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAE1D,kBAAkB,CAAC,mCAAmC,uBAAuB,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;YAC5F,MAAM,KAAK,GAAW,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,IAAI,CACtD,EAAE,IAAI,EAAE;oBACN,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE,EAAE;oBACrC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE;oBAC5B,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE;oBAChC,EAAE,SAAS,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE;iBAClC,EAAC,EACF,EAAE,CACH,CAAC,KAAK,EAAE,CAAC;YACV,oBAAoB,CAAC,KAAK,CAAC,CAAC;YAE5B,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YAE9B,kBAAkB,CAAC,oBAAoB,CAAC,CAAC;YACzC,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC,UAAU,CACtC,EAAE,IAAI,EAAE;oBACN,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,QAAQ,EAAE,EAAE;oBACrC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE;oBAC5B,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE;oBAChC,EAAE,SAAS,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE;iBAClC,EAAC,EACF;gBACE,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;gBAChC,MAAM,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE;aACtB,CACF,CAAC;YACF,oBAAoB,EAAE,CAAC;YAEvB,kBAAkB,CAAC,4BAA4B,CAAC,CAAC;YAEjD,OAAO,IAAI,CAAC;QACb,CAAC;KAAA;CACD"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"expiry.js","sourceRoot":"","sources":["../../src/classes/expiry.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAE1B,OAAO,EAAE,4BAA4B,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAE1G,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAE9D,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAE7C,OAAO,EAAW,SAAS,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAEtE,MAAM,OAAO,MAAM;IAAnB;QAmCS,aAAQ,GAAc,EAAE,CAAC;IA6BlC,CAAC;IA/DO,MAAM,CAAC,YAAY,CAAC,IAAY;QACtC,MAAM,IAAI,GAAY,uBAAuB,CAAC,IAAI,CAAC,CAAC;QACpD,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAEtE,MAAM,QAAQ,GAA0B,IAAI;aACzC,GAAG,CAAC,CAAC,KAAc,EAAqB,EAAE;YAC1C,IAAI;gBACH,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;aACxB;YAAC,OAAO,CAAC,EAAE;gBACX,OAAO,SAAS,CAAC;aACjB;QACF,CAAC,CAAC,CAAC;QAEL,IAAI,CAAC,mBAAmB,CAAU,QAAQ,EAAE,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,EAAE,CAAC,CAAC;QAExG,OAAO,QAAQ,CAAC;IACjB,CAAC;IAEO,MAAM,CAAC,QAAQ,CAAC,MAAe;QACtC,IAAI,KAAK,GAAW,CAAC,CAAC;QAEtB,IAAI,4BAA4B,CAAC,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE;YACzD,KAAK,IAAI,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,MAAO,CAAC,MAAM,CAAC;SAC5C;QACD,IAAI,4BAA4B,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,EAAE;YAC3D,KAAK,IAAI,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,QAAS,CAAC,MAAM,CAAC;SAC9C;QACD,IAAI,4BAA4B,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,EAAE;YAC3D,KAAK,IAAI,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,QAAS,CAAC,MAAM,CAAC;SAC9C;QAED,OAAO,KAAK,CAAC;IACd,CAAC;IAIM,GAAG,CAAC,QAAmB;QAC7B,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE;YAC9B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;SAC3B;IACF,CAAC;IAEM,aAAa,CAAC,GAAW;QAC/B,MAAM,MAAM,GAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,IAAI,IAAuB,CAAC;QAC5B,IAAI,SAA2B,CAAC;QAEhC,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE;YACnC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC;gBAAE,SAAS;YAErD,MAAM,KAAK,GAAW,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAE9C,IAAI,SAAS,KAAK,SAAS,IAAI,KAAK,GAAG,SAAS,EAAE;gBACjD,IAAI,GAAG,MAAM,CAAC;gBACd,SAAS,GAAG,KAAK,CAAC;aAClB;SACD;QAED,IAAI,IAAI,KAAK,SAAS;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAEtE,OAAO,IAAI,CAAC,MAAM,CAAC;IACpB,CAAC;CACD"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"lists.js","sourceRoot":"","sources":["../../src/classes/lists.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAI7C,OAAO,EAAS,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAE/C,MAAM,OAAO,KAAK;IAGjB;QAFQ,UAAK,GAAyB,IAAI,GAAG,EAAmB,CAAC;QAGhE,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;YAC1B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;SACzB;IACF,CAAC;IAEM,GAAG,CAAC,IAAW,EAAE,OAAiB;QACxC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;YAC5B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SAClC;IACF,CAAC;IAEM,KAAK,CAAC,IAAW,EAAE,GAAW;QACpC,MAAM,OAAO,GAAuB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACzD,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,CAAC;QAE9C,OAAO,OAAO,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAEM,aAAa,CAAC,IAAW;QAC/B,MAAM,OAAO,GAAuB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACzD,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,CAAC;QAE9C,OAAO,OAAO;aACX,MAAM,CAAC,CAAC,IAAY,EAAW,EAAE,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC;aAC9D,GAAG,CAAC,CAAC,IAAY,EAAU,EAAE,CAAC,IAAI,CAAC,QAAS,CAAC;aAC7C,GAAG,CAAC,CAAC,QAAgB,EAAU,EAAE,CAAC,QAAQ;aACxC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;aAC3B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;aACpB,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;aAC5B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;aACnB,IAAI,EAAE,CACR;aACA,MAAM,CAAC,CAAC,QAAgB,EAAW,EAAE,CAAC,QAAQ,KAAK,EAAE,CAAC,CAAC;IAC3D,CAAC;CACD"}
|