hydra-crawler 3.0.10 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/apps/cleanup.app.mjs +2 -1
  2. package/dist/apps/cleanup.app.mjs.map +1 -1
  3. package/dist/apps/denylist.app.mjs +4 -17
  4. package/dist/apps/denylist.app.mjs.map +1 -1
  5. package/dist/apps/extract-text.app.mjs +4 -1
  6. package/dist/apps/extract-text.app.mjs.map +1 -1
  7. package/dist/apps/hydra.app.mjs +4 -1
  8. package/dist/apps/hydra.app.mjs.map +1 -1
  9. package/dist/apps/invalid.app.d.mts +10 -0
  10. package/dist/apps/invalid.app.mjs +67 -0
  11. package/dist/apps/invalid.app.mjs.map +1 -0
  12. package/dist/apps/reattempt.app.mjs +1 -8
  13. package/dist/apps/reattempt.app.mjs.map +1 -1
  14. package/dist/apps/unarchive-urls.app.mjs +1 -8
  15. package/dist/apps/unarchive-urls.app.mjs.map +1 -1
  16. package/dist/classes/cleaner.d.mts +5 -1
  17. package/dist/classes/cleaner.mjs +79 -137
  18. package/dist/classes/cleaner.mjs.map +1 -1
  19. package/dist/classes/crawler.mjs +102 -115
  20. package/dist/classes/crawler.mjs.map +1 -1
  21. package/dist/classes/expirer.mjs +11 -6
  22. package/dist/classes/expirer.mjs.map +1 -1
  23. package/dist/classes/robot.mjs +4 -2
  24. package/dist/classes/robot.mjs.map +1 -1
  25. package/dist/cli.mjs +4 -0
  26. package/dist/cli.mjs.map +1 -1
  27. package/dist/helpers/matcher.mjs +7 -9
  28. package/dist/helpers/matcher.mjs.map +1 -1
  29. package/dist/helpers/url.d.mts +1 -0
  30. package/dist/helpers/url.mjs +9 -0
  31. package/dist/helpers/url.mjs.map +1 -0
  32. package/dist/parsers/hyperlinks.parser.mjs +4 -1
  33. package/dist/parsers/hyperlinks.parser.mjs.map +1 -1
  34. package/dist/parsers/interest.parser.mjs +5 -1
  35. package/dist/parsers/interest.parser.mjs.map +1 -1
  36. package/dist/parsers/llama-guard.parser.mjs +5 -1
  37. package/dist/parsers/llama-guard.parser.mjs.map +1 -1
  38. package/dist/parsers/offence.parser.mjs +5 -1
  39. package/dist/parsers/offence.parser.mjs.map +1 -1
  40. package/dist/queries/complex-english.query.mjs +1 -8
  41. package/dist/queries/complex-english.query.mjs.map +1 -1
  42. package/dist/services/database.service.mjs +3 -3
  43. package/dist/services/database.service.mjs.map +1 -1
  44. package/package.json +2 -2
@@ -42,7 +42,8 @@ export class CleanupApp extends CommonsApp {
42
42
  EStatus.DENY,
43
43
  EStatus.FAILED,
44
44
  EStatus.DEAD,
45
- EStatus.DISALLOWED
45
+ EStatus.DISALLOWED,
46
+ EStatus.INVALID
46
47
  ];
47
48
  if (this.includeArchiveds)
48
49
  statuses.push(EStatus.ARCHIVED);
@@ -1 +1 @@
1
- {"version":3,"file":"cleanup.app.mjs","sourceRoot":"","sources":["../../src/apps/cleanup.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,4BAA4B,EAAE,MAAM,oBAAoB,CAAC;AAElE,OAAO,EAAQ,MAAM,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAEnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAC7C,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAUjD,MAAM,OAAO,UAAW,SAAQ,UAAU;IAM/B;IACA;IACA;IAPF,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB,YACU,eAAwB,KAAK,EAC7B,eAAwB,KAAK,EAC7B,gBAA0B;QAEnC,KAAK,CAAC,eAAe,CAAC,CAAC;QAJd,iBAAY,GAAZ,YAAY,CAAiB;QAC7B,iBAAY,GAAZ,YAAY,CAAiB;QAC7B,qBAAgB,GAAhB,gBAAgB,CAAU;QAInC,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,iBAAiB,CAAC;IAC1B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,wBAAwB,CAAC,CAAC;QACrF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEO,KAAK,CAAC,KAAK;QAClB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,QAAQ,GAAc;YAC1B,OAAO,CAAC,IAAI;YACZ,OAAO,CAAC,MAAM;YACd,OAAO,CAAC,IAAI;YACZ,OAAO,CAAC,UAAU;SACnB,CAAC;QACF,IAAI,IAAI,CAAC,gBAAgB;YAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE3D,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,qBAAqB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAU,EAAU,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEpJ,IAAI,KAAK,GAAW,CAAC,CAAC;QACtB,OAAO,IAAI,EAAE,CAAC;YACb,MAAM,MAAM,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACpE,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,CAAC;iBACnC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;iBAChB,IAAI,CAAC,KAAK,CAAC,CAAC;YAEf,IAAI,CAAC;gBACJ,OAAO,IAAI,EAAE,CAAC;oBACb,KAAK,EAAE,CAAC;oBACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;wBAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;oBAE9C,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;oBAC3C,IAAI,GAAG,KAAK,IAAI;wBAAE,MAAM;oBAExB,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;gBACpE,CAAC;gBAED,MAAM;YACP,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACd,IAAI,CAAC,4BAA4B,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,GAAG,CAAC,IAAI,KAAK,EAAE;oBAAE,MAAM,GAAG,CAAC;YAC9E,CAAC;QACF,CAAC;QAED,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACnB,CAAC;IAEO,KAAK,CAAC,KAAK;QAClB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,4BAA4B,CAAC,CAAC;YACzF,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,MAAM,MAAM,GAA8B,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAQ,EAAE,EAAE,EAAE,CAAC,CAAC;YAE9F,IAAI,KAAK,GAAa,EAAE,CAAC;YACzB,OAAO,IAAI,EAAE,CAAC;gBACb,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;gBAEtE,MAAM,IAAI,GAAe,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC7C,IAAI,IAAI,KAAK,IAAI;oBAAE,MAAM;gBAEzB,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpD,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAE9D,IAAI,KAAK,CAAC,MAAM,GAAG,KAAK;oBAAE,SAAS;gBAEnC,MAAM,OAAO,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CAC3E,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,CACxB,CAAC;gBAEF,MAAM,OAAO,GAAa,CAAC,MAAM,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;qBACrF,GAAG,CAAC,CAAC,CAAO,EAAU,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAEnC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;oBACvB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;wBAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAC3C,CAAC;gBAED,KAAK,GAAG,EAAE,CAAC;YACZ,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC5B,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,uBAAuB,CAAC,CAAC;YACpF,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC9B,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,EAAE,CAAC,CAAC;gBAClD,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBAClE,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC;YACxE,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,IAAI,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAC1C,IAAI,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAE1C,MAAM,SAAS,GAAqB,IAAI,CAAC,IAAI,CAAC,yBAAyB,CAAC,YAAY,CAAC,CAAC;QAEtF,MAAM,OAAO,GAAY,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;QACvE,MAAM,OAAO,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACzC,MAAM,OAAO,CAAC,iBAAiB,EAAE,CAAC;IACnC,CAAC;CACD"}
1
+ {"version":3,"file":"cleanup.app.mjs","sourceRoot":"","sources":["../../src/apps/cleanup.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,4BAA4B,EAAE,MAAM,oBAAoB,CAAC;AAElE,OAAO,EAAQ,MAAM,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAEnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAC7C,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAUjD,MAAM,OAAO,UAAW,SAAQ,UAAU;IAM/B;IACA;IACA;IAPF,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB,YACU,eAAwB,KAAK,EAC7B,eAAwB,KAAK,EAC7B,gBAA0B;QAEnC,KAAK,CAAC,eAAe,CAAC,CAAC;QAJd,iBAAY,GAAZ,YAAY,CAAiB;QAC7B,iBAAY,GAAZ,YAAY,CAAiB;QAC7B,qBAAgB,GAAhB,gBAAgB,CAAU;QAInC,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,iBAAiB,CAAC;IAC1B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,wBAAwB,CAAC,CAAC;QACrF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEO,KAAK,CAAC,KAAK;QAClB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,QAAQ,GAAc;YAC1B,OAAO,CAAC,IAAI;YACZ,OAAO,CAAC,MAAM;YACd,OAAO,CAAC,IAAI;YACZ,OAAO,CAAC,UAAU;YAClB,OAAO,CAAC,OAAO;SAChB,CAAC;QACF,IAAI,IAAI,CAAC,gBAAgB;YAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE3D,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,qBAAqB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAU,EAAU,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEpJ,IAAI,KAAK,GAAW,CAAC,CAAC;QACtB,OAAO,IAAI,EAAE,CAAC;YACb,MAAM,MAAM,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACpE,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,CAAC;iBACnC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;iBAChB,IAAI,CAAC,KAAK,CAAC,CAAC;YAEf,IAAI,CAAC;gBACJ,OAAO,IAAI,EAAE,CAAC;oBACb,KAAK,EAAE,CAAC;oBACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;wBAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;oBAE9C,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;oBAC3C,IAAI,GAAG,KAAK,IAAI;wBAAE,MAAM;oBAExB,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;gBACpE,CAAC;gBAED,MAAM;YACP,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACd,IAAI,CAAC,4BAA4B,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,GAAG,CAAC,IAAI,KAAK,EAAE;oBAAE,MAAM,GAAG,CAAC;YAC9E,CAAC;QACF,CAAC;QAED,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACnB,CAAC;IAEO,KAAK,CAAC,KAAK;QAClB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,4BAA4B,CAAC,CAAC;YACzF,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,MAAM,MAAM,GAA8B,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAQ,EAAE,EAAE,EAAE,CAAC,CAAC;YAE9F,IAAI,KAAK,GAAa,EAAE,CAAC;YACzB,OAAO,IAAI,EAAE,CAAC;gBACb,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;gBAEtE,MAAM,IAAI,GAAe,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC7C,IAAI,IAAI,KAAK,IAAI;oBAAE,MAAM;gBAEzB,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpD,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC;oBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAE9D,IAAI,KAAK,CAAC,MAAM,GAAG,KAAK;oBAAE,SAAS;gBAEnC,MAAM,OAAO,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,IAAI,CAC3E,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,CACxB,CAAC;gBAEF,MAAM,OAAO,GAAa,CAAC,MAAM,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;qBACrF,GAAG,CAAC,CAAC,CAAO,EAAU,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAEnC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;oBACvB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;wBAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBAC3C,CAAC;gBAED,KAAK,GAAG,EAAE,CAAC;YACZ,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC5B,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,uBAAuB,CAAC,CAAC;YACpF,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC9B,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,EAAE,CAAC,CAAC;gBAClD,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBAClE,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC;YACxE,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,IAAI,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAC1C,IAAI,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAE1C,MAAM,SAAS,GAAqB,IAAI,CAAC,IAAI,CAAC,yBAAyB,CAAC,YAAY,CAAC,CAAC;QAEtF,MAAM,OAAO,GAAY,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;QACvE,MAAM,OAAO,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACzC,MAAM,OAAO,CAAC,iBAAiB,EAAE,CAAC;IACnC,CAAC;CACD"}
@@ -38,7 +38,7 @@ export class DenylistApp extends CommonsApp {
38
38
  { // scope
39
39
  const log = commonsLogDoing('denylist', 'Searching for non-DENY urls on the denylist');
40
40
  const result = this.databaseService.getUrls()
41
- .find({ status: { $nin: [EStatus.DENY, EStatus.ARCHIVED] } }, {});
41
+ .find({ status: { $nin: [EStatus.DENY, EStatus.ARCHIVED, EStatus.INVALID] } }, {});
42
42
  while (true) {
43
43
  tally++;
44
44
  if ((tally % 100) === 0)
@@ -48,14 +48,8 @@ export class DenylistApp extends CommonsApp {
48
48
  const row = await result.next();
49
49
  if (row === null)
50
50
  break;
51
- try {
52
- if (!this.lists.match(EList.DENY, row.url))
53
- continue;
54
- }
55
- catch (_e) {
56
- // probably an invalid URL
57
- // so mark it as denied
58
- }
51
+ if (!this.lists.match(EList.DENY, row.url))
52
+ continue;
59
53
  found++;
60
54
  urls.push(row.url);
61
55
  }
@@ -96,15 +90,8 @@ export class DenylistApp extends CommonsApp {
96
90
  const row = await result.next();
97
91
  if (row === null)
98
92
  break;
99
- try {
100
- if (this.lists.match(EList.DENY, row.url))
101
- continue;
102
- }
103
- catch (_e) {
104
- // probably an invalid URL
105
- // so don't re-enable it
93
+ if (this.lists.match(EList.DENY, row.url))
106
94
  continue;
107
- }
108
95
  found++;
109
96
  urls.push(row.url);
110
97
  }
@@ -1 +1 @@
1
- {"version":3,"file":"denylist.app.mjs","sourceRoot":"","sources":["../../src/apps/denylist.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAEvD,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAGnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAM7C,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAI3C,MAAM,OAAO,WAAY,SAAQ,UAAU;IAClC,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEvB,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,kBAAkB,CAAC;IAC3B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,wBAAwB,CAAC,CAAC;QACtF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEO,KAAK,CAAC,OAAO,CAAC,KAAc;QACnC,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,IAAI,KAAK,GAAW,CAAC,CAAC;QACtB,IAAI,KAAK,GAAW,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,6CAA6C,CAAC,CAAC;YAE3G,MAAM,MAAM,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACpE,IAAI,CACH,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,QAAQ,CAAE,EAAE,EAAE,EACxD,EAAE,CACH,CAAC;YAEJ,OAAO,IAAI,EAAE,CAAC;gBACb,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBAC5D,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC;oBACJ,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC;wBAAE,SAAS;gBACtD,CAAC;gBAAC,OAAO,EAAE,EAAE,CAAC;oBACb,0BAA0B;oBAC1B,uBAAuB;gBACxB,CAAC;gBAED,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,CAAC;YAED,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,0BAA0B,CAAC,CAAC;YAExF,MAAM,OAAO,GAAe,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEzD,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB,EAAE,IAAI,EAAE;wBACN,MAAM,EAAE,OAAO,CAAC,IAAI;qBACrB,EAAE,MAAM,EAAE;wBACT,GAAG,EAAE,IAAI;qBACV,EAAE,CACJ,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;IAEO,KAAK,CAAC,OAAO,CAAC,KAAc;QACnC,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,IAAI,KAAK,GAAW,CAAC,CAAC;QACtB,IAAI,KAAK,GAAW,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,6CAA6C,CAAC,CAAC;YAE3G,MAAM,MAAM,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACpE,IAAI,CACH,EAAE,MAAM,EAAE,OAAO,CAAC,IAAI,EAAE,EACxB,EAAE,CACH,CAAC;YAEJ,OAAO,IAAI,EAAE,CAAC;gBACb,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBAC5D,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC;oBACJ,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC;wBAAE,SAAS;gBACrD,CAAC;gBAAC,OAAO,EAAE,EAAE,CAAC;oBACb,0BAA0B;oBAC1B,wBAAwB;oBACxB,SAAS;gBACV,CAAC;gBAED,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,CAAC;YAED,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,4BAA4B,CAAC,CAAC;YAE1F,MAAM,OAAO,GAAe,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEzD,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB,EAAE,IAAI,EAAE;wBACN,MAAM,EAAE,OAAO,CAAC,MAAM;qBACvB,EAAE,MAAM,EAAE;wBACT,GAAG,EAAE,IAAI;qBACV,EAAE,CACJ,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,MAAM,IAAI,GAAgB,IAAI,WAAW,EAAE,CAAC;QAE5C,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,yBAAyB,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5D,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,yBAAyB,CAAC,OAAO,CAAC,CAAC,CAAC;IAC7D,CAAC;CACD"}
1
+ {"version":3,"file":"denylist.app.mjs","sourceRoot":"","sources":["../../src/apps/denylist.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAEvD,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAGnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAM7C,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAI3C,MAAM,OAAO,WAAY,SAAQ,UAAU;IAClC,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEvB,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,kBAAkB,CAAC;IAC3B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,wBAAwB,CAAC,CAAC;QACtF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEO,KAAK,CAAC,OAAO,CAAC,KAAc;QACnC,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,IAAI,KAAK,GAAW,CAAC,CAAC;QACtB,IAAI,KAAK,GAAW,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,6CAA6C,CAAC,CAAC;YAE3G,MAAM,MAAM,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACpE,IAAI,CACH,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,CAAE,EAAE,EAAE,EACzE,EAAE,CACH,CAAC;YAEJ,OAAO,IAAI,EAAE,CAAC;gBACb,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBAC5D,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAErD,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,CAAC;YAED,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,0BAA0B,CAAC,CAAC;YAExF,MAAM,OAAO,GAAe,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEzD,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB,EAAE,IAAI,EAAE;wBACN,MAAM,EAAE,OAAO,CAAC,IAAI;qBACrB,EAAE,MAAM,EAAE;wBACT,GAAG,EAAE,IAAI;qBACV,EAAE,CACJ,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;IAEO,KAAK,CAAC,OAAO,CAAC,KAAc;QACnC,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,IAAI,KAAK,GAAW,CAAC,CAAC;QACtB,IAAI,KAAK,GAAW,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,6CAA6C,CAAC,CAAC;YAE3G,MAAM,MAAM,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACpE,IAAI,CACH,EAAE,MAAM,EAAE,OAAO,CAAC,IAAI,EAAE,EACxB,EAAE,CACH,CAAC;YAEJ,OAAO,IAAI,EAAE,CAAC;gBACb,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBAC5D,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,GAAG,KAAK;oBAAE,MAAM;gBAEhD,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAEpD,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,CAAC;YAED,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,UAAU,EAAE,4BAA4B,CAAC,CAAC;YAE1F,MAAM,OAAO,GAAe,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEzD,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB,EAAE,IAAI,EAAE;wBACN,MAAM,EAAE,OAAO,CAAC,MAAM;qBACvB,EAAE,MAAM,EAAE;wBACT,GAAG,EAAE,IAAI;qBACV,EAAE,CACJ,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,MAAM,IAAI,GAAgB,IAAI,WAAW,EAAE,CAAC;QAE5C,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,yBAAyB,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5D,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,yBAAyB,CAAC,OAAO,CAAC,CAAC,CAAC;IAC7D,CAAC;CACD"}
@@ -4,6 +4,7 @@ import { commonsLogDoing } from 'nodecommons-esm-log';
4
4
  import { CommonsApp } from 'nodecommons-esm-app';
5
5
  import { Crawler } from '../classes/crawler.mjs';
6
6
  import { TextParser } from '../parsers/text.parser.mjs';
7
+ import { attemptValidUrl } from '../helpers/url.mjs';
7
8
  // extract the text from the HTML of a URL
8
9
  class TextParserInstance extends TextParser {
9
10
  // eslint-disable-next-line @typescript-eslint/require-await
@@ -22,7 +23,9 @@ export class ExtractTextApp extends CommonsApp {
22
23
  }
23
24
  async run() {
24
25
  const log = commonsLogDoing('extract-text', `Downloading from URL at ${this.url}`);
25
- const whatwg = new URL(this.url);
26
+ const whatwg = attemptValidUrl(this.url);
27
+ if (whatwg === false)
28
+ return;
26
29
  let handler;
27
30
  switch (whatwg.protocol) {
28
31
  case 'http:':
@@ -1 +1 @@
1
- {"version":3,"file":"extract-text.app.mjs","sourceRoot":"","sources":["../../src/apps/extract-text.app.mts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAE/B,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAIjD,OAAO,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAOxD,0CAA0C;AAE1C,MAAM,kBAAmB,SAAQ,UAAyB;IACzD,4DAA4D;IAClD,KAAK,CAAC,SAAS,CACvB,SAA0B,EAC1B,IAAY;QAEb,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;CACD;AAED,MAAM,OAAO,cAAe,SAAQ,UAAU;IAEnC;IADV,YACU,GAAW;QAEpB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,QAAG,GAAH,GAAG,CAAQ;IAGrB,CAAC;IAEM,UAAU;QAChB,OAAO,sBAAsB,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,MAAM,GAAG,GAAuB,eAAe,CAAC,cAAc,EAAE,2BAA2B,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;QAEvG,MAAM,MAAM,GAAQ,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEtC,IAAI,OAA2C,CAAC;QAChD,QAAQ,MAAM,CAAC,QAAQ,EAAE,CAAC;YACzB,KAAK,OAAO;gBAAE,OAAO,GAAG,IAAI,CAAC;gBAAC,MAAM;YACpC,KAAK,QAAQ;gBAAE,OAAO,GAAG,KAAK,CAAC;gBAAC,MAAM;YACtC;gBACC,MAAM,IAAI,KAAK,CAAC,6BAA6B,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,MAAM,OAAO,GAAoB,MAAM,OAAO,CAAC,OAAO,CACpD,OAAO,EACP,IAAI,CAAC,GAAG,EACR,KAAK,EACL,OAAO,CACR,CAAC;QACF,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,MAAM,GAAuB,IAAI,kBAAkB,CACvD,OAAO,CACR,CAAC;QACF,MAAM,MAAM,CAAC,KAAK,CAAC,SAAuC,CAAC,CAAC;IAC7D,CAAC;CACD"}
1
+ {"version":3,"file":"extract-text.app.mjs","sourceRoot":"","sources":["../../src/apps/extract-text.app.mts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAE/B,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAIjD,OAAO,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAIxD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAIrD,0CAA0C;AAE1C,MAAM,kBAAmB,SAAQ,UAAyB;IACzD,4DAA4D;IAClD,KAAK,CAAC,SAAS,CACvB,SAA0B,EAC1B,IAAY;QAEb,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;CACD;AAED,MAAM,OAAO,cAAe,SAAQ,UAAU;IAEnC;IADV,YACU,GAAW;QAEpB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,QAAG,GAAH,GAAG,CAAQ;IAGrB,CAAC;IAEM,UAAU;QAChB,OAAO,sBAAsB,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,MAAM,GAAG,GAAuB,eAAe,CAAC,cAAc,EAAE,2BAA2B,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;QAEvG,MAAM,MAAM,GAAc,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpD,IAAI,MAAM,KAAK,KAAK;YAAE,OAAO;QAE7B,IAAI,OAA2C,CAAC;QAChD,QAAQ,MAAM,CAAC,QAAQ,EAAE,CAAC;YACzB,KAAK,OAAO;gBAAE,OAAO,GAAG,IAAI,CAAC;gBAAC,MAAM;YACpC,KAAK,QAAQ;gBAAE,OAAO,GAAG,KAAK,CAAC;gBAAC,MAAM;YACtC;gBACC,MAAM,IAAI,KAAK,CAAC,6BAA6B,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,MAAM,OAAO,GAAoB,MAAM,OAAO,CAAC,OAAO,CACpD,OAAO,EACP,IAAI,CAAC,GAAG,EACR,KAAK,EACL,OAAO,CACR,CAAC;QACF,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,MAAM,GAAuB,IAAI,kBAAkB,CACvD,OAAO,CACR,CAAC;QACF,MAAM,MAAM,CAAC,KAAK,CAAC,SAAuC,CAAC,CAAC;IAC7D,CAAC;CACD"}
@@ -34,6 +34,7 @@ import { isIParserConfig } from '../interfaces/iparser-config.mjs';
34
34
  import { isTHydraConfig } from '../types/thydra-config.mjs';
35
35
  import { isTCrawlConfig } from '../types/tcrawl-config.mjs';
36
36
  import { isTRobotsConfig } from '../types/trobots-config.mjs';
37
+ import { attemptValidUrl } from '../helpers/url.mjs';
37
38
  export class HydraApp extends CommonsSocketIoApp {
38
39
  databaseService;
39
40
  lists;
@@ -189,7 +190,9 @@ export class HydraApp extends CommonsSocketIoApp {
189
190
  throw new Error('Database service has not been set yet');
190
191
  if (!this.crawlServer)
191
192
  throw new Error('Crawl server has not been set yet');
192
- const whatwg = new URL(singleUrl);
193
+ const whatwg = attemptValidUrl(singleUrl);
194
+ if (whatwg === false)
195
+ throw new Error('Single URL is invalid');
193
196
  const crawler = new Crawler(whatwg.hostname, this.databaseService, this.crawlConfig, this.parsersConfig, this.robotsConfig, this.crawlServer.listParsers(), this.lists);
194
197
  commonsLogInfo(`Running single crawl of: ${singleUrl}`);
195
198
  const outcome = await crawler.fetch(singleUrl, false);
@@ -1 +1 @@
1
- {"version":3,"file":"hydra.app.mjs","sourceRoot":"","sources":["../../src/apps/hydra.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,wBAAwB,EAAE,6BAA6B,EAAmB,MAAM,oBAAoB,CAAC;AAG9G,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,cAAc,EAAsB,MAAM,qBAAqB,CAAC;AAC3G,OAAO,EAAE,gCAAgC,EAA0D,uBAAuB,EAAE,MAAM,yBAAyB,CAAC;AAG5J,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAEnE,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAE/C,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAEjD,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,WAAW,EAAE,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,mCAAmC,CAAC;AAItE,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,kCAAkC,CAAC;AACnE,OAAO,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,oBAAoB,EAAE,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAE,0BAA0B,EAAE,MAAM,6CAA6C,CAAC;AACzF,iEAAiE;AACjE,mEAAmE;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,mCAAmC,CAAC;AAErE,OAAO,EAAiB,eAAe,EAAE,MAAM,kCAAkC,CAAC;AAIlF,OAAO,EAAgB,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,EAAgB,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,EAAiB,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAU7E,MAAM,OAAO,QAAS,SAAQ,kBAAkC;IACvD,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IACb,MAAM,CAAS;IAEf,WAAW,CAAe;IAC1B,WAAW,CAAe;IAC1B,aAAa,CAAiC;IAC9C,YAAY,CAAgB;IAE5B,WAAW,CAAwB;IAEnC,gBAAgB,GAA2B,EAAE,CAAC;IAC9C,iBAAiB,CAA8B;IAE/C,OAAO,CAAoB;IAEnC;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEvB,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAE3B,MAAM,WAAW,GAAY,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QACzD,IAAI,CAAC,cAAc,CAAC,WAAW,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC/E,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAE/B,MAAM,WAAW,GAAY,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QACzD,IAAI,CAAC,cAAc,CAAC,WAAW,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC/E,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAE/B,MAAM,aAAa,GAAY,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC7D,IAAI,CAAC,6BAA6B,CAAgB,aAAa,EAAE,eAAe,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAClI,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QAEnC,MAAM,YAAY,GAAY,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC3D,IAAI,CAAC,eAAe,CAAC,YAAY,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAClF,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IAClC,CAAC;IAEkB,eAAe;QACjC,MAAM,aAAa,GAAY,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC7D,IAAI,CAAC,uBAAuB,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAEvF,OAAO,gCAAgC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,aAAa,CAAC,eAAe,CAAC,CAAC;IAC9F,CAAC;IAES,mBAAmB,CAC3B,aAAyC,EACzC,aAAoC;QAErC,OAAO,IAAI,cAAc,CACvB,aAAa,EACb,aAAa,CACd,CAAC;IACH,CAAC;IAEM,UAAU;QAChB,OAAO,OAAO,CAAC;IAChB,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEM,WAAW,CAChB,QAAmB;QAEpB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;IAEM,sBAAsB,CAC3B,KAA6B;QAE9B,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,IAAI,CAAC,OAAO,CACV,CAAC,UAAgF,EAAE,IAAY,EAAQ,EAAE;YACxG,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAExF,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,8BAA8B,CAAC,CAAC;gBACzF,IAAI,eAAe,CACjB,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;gBAChF,IAAI,OAAO,CACT,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;gBACjF,IAAI,QAAQ,CACV,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC;gBACnF,IAAI,UAAU,CACZ,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,uBAAuB,CAAC,CAAC;gBAClF,IAAI,SAAS,CACX,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,2BAA2B,CAAC,CAAC;gBACtF,IAAI,aAAa,CACf,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;gBAChF,IAAI,OAAO,CACT,UAAU,EACV,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;gBAChF,IAAI,OAAO,CACT,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;QACF,CAAC,CACF,CAAC;QAEF,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;QAEnB,MAAM,cAAc,GAA6B,IAAI,CAAC,cAAc,CAAC;QACrE,IAAI,CAAC,cAAc;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAE7E,IAAI,CAAC,OAAO,GAAG,IAAI,OAAO,CACxB,IAAI,CAAC,eAAe,EACpB,cAAc,CACf,CAAC;QAEF,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAChC,IAAI,CAAC,eAAe,EACpB,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,aAAa,EAClB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,OAAO,CACb,CAAC;QAEF,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC;YACnF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;QAED,KAAK,MAAM,MAAM,IAAI;YACnB,YAAY;YACZ,gBAAgB;YAChB,eAAe;YACf,UAAU;YACV,cAAc;YACd,cAAc;YACd,cAAc;YACd,cAAc;YACd,oBAAoB;YACpB,0BAA0B;YAC1B,iBAAiB;YACjB,kBAAkB;YAClB,gBAAgB;SACjB,EAAE,CAAC;YACH,eAAe,CAAC,qBAAqB,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;YACpD,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YACnC,MAAM,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QAC/C,CAAC;QAED,MAAM,oBAAoB,GAAqB,IAAI,CAAC,IAAI,CAAC,yBAAyB,CAAC,wBAAwB,CAAC,CAAC;QAC7G,IAAI,oBAAoB;YAAE,eAAe,CAAC,qCAAqC,oBAAoB,EAAE,CAAC,CAAC;QAEvG,IAAI,CAAC,iBAAiB,GAAG,IAAI,iBAAiB,CAC5C,IAAI,CAAC,gBAAgB,EACrB,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CAAC,WAAW,EAChB,oBAAoB,CAAC,2DAA2D;SACjF,CAAC;IACH,CAAC;IAEkB,SAAS;QAC3B,KAAK,CAAC,KAAK,IAAmB,EAAE;YAC/B,IAAI,CAAC,IAAI,CAAC,WAAW;gBAAE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;YAEpG,MAAM,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;YAE/B,IAAI,CAAC,KAAK,EAAE,CAAC;QACd,CAAC,CAAC,EAAE,CAAC;IACN,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,MAAM,SAAS,GAAqB,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,YAAY,CAAC,CAAC;QACrF,IAAI,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YACpF,IAAI,CAAC,IAAI,CAAC,WAAW;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAE5E,MAAM,MAAM,GAAQ,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;YAEvC,MAAM,OAAO,GAAY,IAAI,OAAO,CAClC,MAAM,CAAC,QAAQ,EACf,IAAI,CAAC,eAAe,EACpB,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,aAAa,EAClB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,EAC9B,IAAI,CAAC,KAAK,CACX,CAAC;YAEF,cAAc,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;YACxD,MAAM,OAAO,GAAW,MAAM,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAC9D,cAAc,CAAC,gCAAgC,OAAO,EAAE,CAAC,CAAC;YAE1D,OAAO;QACR,CAAC;QAED,MAAM,gBAAgB,GAAqB,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,oBAAoB,CAAC,CAAC;QACpG,IAAI,gBAAgB,EAAE,CAAC;YACtB,MAAM,OAAO,GAAa,wBAAwB,CAAC,gBAAgB,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;YACtF,eAAe,CAAC,iCAAiC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAEtE,IAAI,CAAC,IAAI,CAAC,WAAW;gBAAE,MAAM,IAAI,KAAK,CAAC,+DAA+D,CAAC,CAAC;YACxG,IAAI,CAAC,WAAW,CAAC,UAAU,GAAG,OAAO,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAChG,IAAI,CAAC,IAAI,CAAC,iBAAiB;YAAE,MAAM,IAAI,KAAK,CAAC,sEAAsE,CAAC,CAAC;QAErH,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,KAAK,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAE1B,MAAM,KAAK,CAAC,GAAG,EAAE,CAAC;IACnB,CAAC;IAEkB,KAAK,CAAC,QAAQ;QAChC,IAAI,CAAC,IAAI,CAAC,WAAW;YAAE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAEpG,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;QAElC,MAAM,KAAK,CAAC,QAAQ,EAAE,CAAC;IACxB,CAAC;CACD"}
1
+ {"version":3,"file":"hydra.app.mjs","sourceRoot":"","sources":["../../src/apps/hydra.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,wBAAwB,EAAE,6BAA6B,EAAmB,MAAM,oBAAoB,CAAC;AAG9G,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,cAAc,EAAsB,MAAM,qBAAqB,CAAC;AAC3G,OAAO,EAAE,gCAAgC,EAA0D,uBAAuB,EAAE,MAAM,yBAAyB,CAAC;AAG5J,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAEnE,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAE/C,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAEjD,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,WAAW,EAAE,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,mCAAmC,CAAC;AAItE,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,kCAAkC,CAAC;AACnE,OAAO,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,oBAAoB,EAAE,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAE,0BAA0B,EAAE,MAAM,6CAA6C,CAAC;AACzF,iEAAiE;AACjE,mEAAmE;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,mCAAmC,CAAC;AAErE,OAAO,EAAiB,eAAe,EAAE,MAAM,kCAAkC,CAAC;AAIlF,OAAO,EAAgB,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,EAAgB,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,EAAiB,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAI7E,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAQrD,MAAM,OAAO,QAAS,SAAQ,kBAAkC;IACvD,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IACb,MAAM,CAAS;IAEf,WAAW,CAAe;IAC1B,WAAW,CAAe;IAC1B,aAAa,CAAiC;IAC9C,YAAY,CAAgB;IAE5B,WAAW,CAAwB;IAEnC,gBAAgB,GAA2B,EAAE,CAAC;IAC9C,iBAAiB,CAA8B;IAE/C,OAAO,CAAoB;IAEnC;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEvB,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAE3B,MAAM,WAAW,GAAY,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QACzD,IAAI,CAAC,cAAc,CAAC,WAAW,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC/E,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAE/B,MAAM,WAAW,GAAY,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QACzD,IAAI,CAAC,cAAc,CAAC,WAAW,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC/E,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAE/B,MAAM,aAAa,GAAY,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC7D,IAAI,CAAC,6BAA6B,CAAgB,aAAa,EAAE,eAAe,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAClI,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QAEnC,MAAM,YAAY,GAAY,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC3D,IAAI,CAAC,eAAe,CAAC,YAAY,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAClF,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IAClC,CAAC;IAEkB,eAAe;QACjC,MAAM,aAAa,GAAY,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC7D,IAAI,CAAC,uBAAuB,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAEvF,OAAO,gCAAgC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,aAAa,CAAC,eAAe,CAAC,CAAC;IAC9F,CAAC;IAES,mBAAmB,CAC3B,aAAyC,EACzC,aAAoC;QAErC,OAAO,IAAI,cAAc,CACvB,aAAa,EACb,aAAa,CACd,CAAC;IACH,CAAC;IAEM,UAAU;QAChB,OAAO,OAAO,CAAC;IAChB,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEM,WAAW,CAChB,QAAmB;QAEpB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;IAEM,sBAAsB,CAC3B,KAA6B;QAE9B,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,IAAI,CAAC,OAAO,CACV,CAAC,UAAgF,EAAE,IAAY,EAAQ,EAAE;YACxG,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAExF,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,8BAA8B,CAAC,CAAC;gBACzF,IAAI,eAAe,CACjB,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;gBAChF,IAAI,OAAO,CACT,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAC;gBACjF,IAAI,QAAQ,CACV,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC;gBACnF,IAAI,UAAU,CACZ,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,uBAAuB,CAAC,CAAC;gBAClF,IAAI,SAAS,CACX,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,2BAA2B,CAAC,CAAC;gBACtF,IAAI,aAAa,CACf,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;gBAChF,IAAI,OAAO,CACT,UAAU,EACV,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;YAED,CAAC,CAAC,QAAQ;gBACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;gBAChF,IAAI,OAAO,CACT,UAAU,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CACL,CAAC;gBACF,GAAG,CAAC,OAAO,EAAE,CAAC;YACf,CAAC;QACF,CAAC,CACF,CAAC;QAEF,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;QAEnB,MAAM,cAAc,GAA6B,IAAI,CAAC,cAAc,CAAC;QACrE,IAAI,CAAC,cAAc;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAE7E,IAAI,CAAC,OAAO,GAAG,IAAI,OAAO,CACxB,IAAI,CAAC,eAAe,EACpB,cAAc,CACf,CAAC;QAEF,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAChC,IAAI,CAAC,eAAe,EACpB,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,aAAa,EAClB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,OAAO,CACb,CAAC;QAEF,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,OAAO,EAAE,wBAAwB,CAAC,CAAC;YACnF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;QAED,KAAK,MAAM,MAAM,IAAI;YACnB,YAAY;YACZ,gBAAgB;YAChB,eAAe;YACf,UAAU;YACV,cAAc;YACd,cAAc;YACd,cAAc;YACd,cAAc;YACd,oBAAoB;YACpB,0BAA0B;YAC1B,iBAAiB;YACjB,kBAAkB;YAClB,gBAAgB;SACjB,EAAE,CAAC;YACH,eAAe,CAAC,qBAAqB,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;YACpD,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YACnC,MAAM,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QAC/C,CAAC;QAED,MAAM,oBAAoB,GAAqB,IAAI,CAAC,IAAI,CAAC,yBAAyB,CAAC,wBAAwB,CAAC,CAAC;QAC7G,IAAI,oBAAoB;YAAE,eAAe,CAAC,qCAAqC,oBAAoB,EAAE,CAAC,CAAC;QAEvG,IAAI,CAAC,iBAAiB,GAAG,IAAI,iBAAiB,CAC5C,IAAI,CAAC,gBAAgB,EACrB,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,eAAe,EACpB,IAAI,CAAC,WAAW,EAChB,oBAAoB,CAAC,2DAA2D;SACjF,CAAC;IACH,CAAC;IAEkB,SAAS;QAC3B,KAAK,CAAC,KAAK,IAAmB,EAAE;YAC/B,IAAI,CAAC,IAAI,CAAC,WAAW;gBAAE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;YAEpG,MAAM,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;YAE/B,IAAI,CAAC,KAAK,EAAE,CAAC;QACd,CAAC,CAAC,EAAE,CAAC;IACN,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,MAAM,SAAS,GAAqB,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,YAAY,CAAC,CAAC;QACrF,IAAI,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YACpF,IAAI,CAAC,IAAI,CAAC,WAAW;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAE5E,MAAM,MAAM,GAAc,eAAe,CAAC,SAAS,CAAC,CAAC;YACrD,IAAI,MAAM,KAAK,KAAK;gBAAE,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;YAE/D,MAAM,OAAO,GAAY,IAAI,OAAO,CAClC,MAAM,CAAC,QAAQ,EACf,IAAI,CAAC,eAAe,EACpB,IAAI,CAAC,WAAW,EAChB,IAAI,CAAC,aAAa,EAClB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,EAC9B,IAAI,CAAC,KAAK,CACX,CAAC;YAEF,cAAc,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;YACxD,MAAM,OAAO,GAAW,MAAM,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAC9D,cAAc,CAAC,gCAAgC,OAAO,EAAE,CAAC,CAAC;YAE1D,OAAO;QACR,CAAC;QAED,MAAM,gBAAgB,GAAqB,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,oBAAoB,CAAC,CAAC;QACpG,IAAI,gBAAgB,EAAE,CAAC;YACtB,MAAM,OAAO,GAAa,wBAAwB,CAAC,gBAAgB,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;YACtF,eAAe,CAAC,iCAAiC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAEtE,IAAI,CAAC,IAAI,CAAC,WAAW;gBAAE,MAAM,IAAI,KAAK,CAAC,+DAA+D,CAAC,CAAC;YACxG,IAAI,CAAC,WAAW,CAAC,UAAU,GAAG,OAAO,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAChG,IAAI,CAAC,IAAI,CAAC,iBAAiB;YAAE,MAAM,IAAI,KAAK,CAAC,sEAAsE,CAAC,CAAC;QAErH,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,KAAK,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAE1B,MAAM,KAAK,CAAC,GAAG,EAAE,CAAC;IACnB,CAAC;IAEkB,KAAK,CAAC,QAAQ;QAChC,IAAI,CAAC,IAAI,CAAC,WAAW;YAAE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAEpG,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;QAElC,MAAM,KAAK,CAAC,QAAQ,EAAE,CAAC;IACxB,CAAC;CACD"}
@@ -0,0 +1,10 @@
1
+ import { CommonsApp } from 'nodecommons-esm-app';
2
+ import { DatabaseService } from '../services/database.service.mjs';
3
+ export declare class InvalidApp extends CommonsApp {
4
+ private databaseService;
5
+ constructor();
6
+ getAppName(): string;
7
+ setDatabaseService(databaseService: DatabaseService): void;
8
+ init(): Promise<void>;
9
+ run(): Promise<void>;
10
+ }
@@ -0,0 +1,67 @@
1
+ import { commonsArrayChunk } from 'tscommons-esm-core';
2
+ import { EStatus } from 'hydra-crawler-esm-assets';
3
+ import { commonsLogDoing } from 'nodecommons-esm-log';
4
+ import { CommonsApp } from 'nodecommons-esm-app';
5
+ import { attemptValidUrl } from '../helpers/url.mjs';
6
+ export class InvalidApp extends CommonsApp {
7
+ databaseService;
8
+ constructor() {
9
+ super('hydra-crawler');
10
+ }
11
+ getAppName() {
12
+ return 'Hydra - Invalid';
13
+ }
14
+ setDatabaseService(databaseService) {
15
+ this.databaseService = databaseService;
16
+ }
17
+ async init() {
18
+ if (!this.databaseService)
19
+ throw new Error('Database service has not been set yet');
20
+ const log = commonsLogDoing('invalid', 'Connecting to database');
21
+ await this.databaseService.init();
22
+ log.success();
23
+ await super.init();
24
+ }
25
+ async run() {
26
+ if (!this.databaseService)
27
+ throw new Error('Database service has not been set');
28
+ let tally = 0;
29
+ let found = 0;
30
+ const urls = [];
31
+ { // scope
32
+ const log = commonsLogDoing('invalid', 'Searching for invalid urls not marked INVALID');
33
+ const result = this.databaseService.getUrls()
34
+ .find({ status: { $ne: EStatus.INVALID } }, {});
35
+ while (true) {
36
+ tally++;
37
+ if ((tally % 100) === 0)
38
+ log.progress(`${tally}, ${found}`);
39
+ const row = await result.next();
40
+ if (row === null)
41
+ break;
42
+ const attempt = attemptValidUrl(row.url);
43
+ if (attempt !== false)
44
+ continue;
45
+ found++;
46
+ urls.push(row.url);
47
+ }
48
+ log.result(found);
49
+ }
50
+ { // scope
51
+ const log = commonsLogDoing('invalid', 'Marking detected as INVALID');
52
+ const batches = commonsArrayChunk(urls, 100);
53
+ tally = 0;
54
+ for (const batch of batches) {
55
+ await this.databaseService.getUrls().updateMany({ url: { $in: batch } }, { $set: {
56
+ status: EStatus.INVALID
57
+ }, $unset: {
58
+ ttl: true
59
+ } });
60
+ tally += 100;
61
+ log.progress(tally);
62
+ }
63
+ log.success();
64
+ }
65
+ }
66
+ }
67
+ //# sourceMappingURL=invalid.app.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"invalid.app.mjs","sourceRoot":"","sources":["../../src/apps/invalid.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAEvD,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAGnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAIrD,MAAM,OAAO,UAAW,SAAQ,UAAU;IACjC,eAAe,CAA4B;IAEnD;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;IACxB,CAAC;IAEM,UAAU;QAChB,OAAO,iBAAiB,CAAC;IAC1B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,wBAAwB,CAAC,CAAC;QACrF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,IAAI,KAAK,GAAW,CAAC,CAAC;QACtB,IAAI,KAAK,GAAW,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,+CAA+C,CAAC,CAAC;YAE5G,MAAM,MAAM,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACpE,IAAI,CACH,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,OAAO,CAAC,OAAO,EAAE,EAAE,EACpC,EAAE,CACH,CAAC;YAEJ,OAAO,IAAI,EAAE,CAAC;gBACb,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,KAAK,KAAK,EAAE,CAAC,CAAC;gBAE5D,MAAM,GAAG,GAAc,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC3C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,MAAM,OAAO,GAAc,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACpD,IAAI,OAAO,KAAK,KAAK;oBAAE,SAAS;gBAEhC,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,CAAC;YAED,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,SAAS,EAAE,6BAA6B,CAAC,CAAC;YAE1F,MAAM,OAAO,GAAe,iBAAiB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEzD,KAAK,GAAG,CAAC,CAAC;YACV,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB,EAAE,IAAI,EAAE;wBACN,MAAM,EAAE,OAAO,CAAC,OAAO;qBACxB,EAAE,MAAM,EAAE;wBACT,GAAG,EAAE,IAAI;qBACV,EAAE,CACJ,CAAC;gBAEF,KAAK,IAAI,GAAG,CAAC;gBACb,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;CACD"}
@@ -47,15 +47,8 @@ export class ReattemptApp extends CommonsApp {
47
47
  const row = await results.next();
48
48
  if (row === null)
49
49
  break;
50
- try {
51
- if (!this.lists.match(EList.ALLOW, row.url))
52
- continue;
53
- }
54
- catch (_e) {
55
- // probably an invalid URL
56
- // so ignore
50
+ if (!this.lists.match(EList.ALLOW, row.url))
57
51
  continue;
58
- }
59
52
  urls.push(row.url);
60
53
  tally++;
61
54
  }
@@ -1 +1 @@
1
- {"version":3,"file":"reattempt.app.mjs","sourceRoot":"","sources":["../../src/apps/reattempt.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAGnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAM7C,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAI3C,MAAM,OAAO,YAAa,SAAQ,UAAU;IAMjC;IALF,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB,YACU,OAAkB;QAE3B,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,YAAO,GAAP,OAAO,CAAW;QAI3B,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,mBAAmB,CAAC;IAC5B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,WAAW,EAAE,wBAAwB,CAAC,CAAC;QACvF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,MAAM,GAAG,GAAuB,eAAe,CAAC,WAAW,EAAE,6BAA6B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAExH,MAAM,OAAO,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACrE,IAAI,CACH;gBACE,MAAM,EAAE,EAAE,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE;aAC9B,EACD,EAAE,CACH,CAAC;YAEJ,KAAK,GAAG,CAAC,CAAC;YAEV,OAAO,IAAI,EAAE,CAAC;gBACb,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAE7C,MAAM,GAAG,GAAc,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC;oBACJ,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC;wBAAE,SAAS;gBACvD,CAAC;gBAAC,OAAO,EAAE,EAAE,CAAC;oBACb,0BAA0B;oBAC1B,YAAY;oBACZ,SAAS;gBACV,CAAC;gBAED,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAEnB,KAAK,EAAE,CAAC;YACT,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,WAAW,EAAE,mBAAmB,CAAC,CAAC;YAClF,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;gBACxB,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAE5C,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;qBACjC,SAAS,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;oBACvB,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;oBAChC,MAAM,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE;iBACnB,CAAC,CAAC;YACN,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;IACF,CAAC;CACD"}
1
+ {"version":3,"file":"reattempt.app.mjs","sourceRoot":"","sources":["../../src/apps/reattempt.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAGnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAM7C,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAI3C,MAAM,OAAO,YAAa,SAAQ,UAAU;IAMjC;IALF,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB,YACU,OAAkB;QAE3B,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,YAAO,GAAP,OAAO,CAAW;QAI3B,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,mBAAmB,CAAC;IAC5B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,WAAW,EAAE,wBAAwB,CAAC,CAAC;QACvF,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,MAAM,GAAG,GAAuB,eAAe,CAAC,WAAW,EAAE,6BAA6B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAExH,MAAM,OAAO,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACrE,IAAI,CACH;gBACE,MAAM,EAAE,EAAE,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE;aAC9B,EACD,EAAE,CACH,CAAC;YAEJ,KAAK,GAAG,CAAC,CAAC;YAEV,OAAO,IAAI,EAAE,CAAC;gBACb,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAE7C,MAAM,GAAG,GAAc,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAEtD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAEnB,KAAK,EAAE,CAAC;YACT,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,WAAW,EAAE,mBAAmB,CAAC,CAAC;YAClF,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;gBACxB,KAAK,EAAE,CAAC;gBACR,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAE5C,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;qBACjC,SAAS,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;oBACvB,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;oBAChC,MAAM,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE;iBACnB,CAAC,CAAC;YACN,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;IACF,CAAC;CACD"}
@@ -45,15 +45,8 @@ export class UnarchiveUrlsApp extends CommonsApp {
45
45
  const row = await results.next();
46
46
  if (row === null)
47
47
  break;
48
- try {
49
- if (!this.lists.match(EList.ALLOW, row.url))
50
- continue;
51
- }
52
- catch (_e) {
53
- // probably an invalid URL
54
- // so ignore
48
+ if (!this.lists.match(EList.ALLOW, row.url))
55
49
  continue;
56
- }
57
50
  urls.push(row.url);
58
51
  found++;
59
52
  }
@@ -1 +1 @@
1
- {"version":3,"file":"unarchive-urls.app.mjs","sourceRoot":"","sources":["../../src/apps/unarchive-urls.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAGnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAM7C,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAI3C,4HAA4H;AAE5H,MAAM,OAAO,gBAAiB,SAAQ,UAAU;IACvC,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEvB,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,mBAAmB,CAAC;IAC5B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,gBAAgB,EAAE,wBAAwB,CAAC,CAAC;QAC5F,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,gBAAgB,EAAE,2BAA2B,CAAC,CAAC;YAE/F,MAAM,OAAO,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACrE,IAAI,CACH,EAAE,MAAM,EAAE,OAAO,CAAC,QAAQ,EAAE,EAC5B,EAAE,CACH,CAAC;YAEJ,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,OAAO,IAAI,EAAE,CAAC;gBACb,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,IAAI,KAAK,EAAE,CAAC,CAAC;gBAC3D,KAAK,EAAE,CAAC;gBAER,MAAM,GAAG,GAAc,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC;oBACJ,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC;wBAAE,SAAS;gBACvD,CAAC;gBAAC,OAAO,EAAE,EAAE,CAAC;oBACb,0BAA0B;oBAC1B,YAAY;oBACZ,SAAS;gBACV,CAAC;gBAED,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAEnB,KAAK,EAAE,CAAC;YACT,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,gBAAgB,EAAE,0BAA0B,CAAC,CAAC;YAC9F,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBACpB,KAAK,IAAI,GAAG,CAAC;gBAEb,MAAM,KAAK,GAAa,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAE3C,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB;oBACE,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;iBACjC,CACF,CAAC;gBAEF,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACrB,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;CACD"}
1
+ {"version":3,"file":"unarchive-urls.app.mjs","sourceRoot":"","sources":["../../src/apps/unarchive-urls.app.mts"],"names":[],"mappings":"AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAGnD,OAAO,EAAsB,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,OAAO,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAM7C,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAI3C,4HAA4H;AAE5H,MAAM,OAAO,gBAAiB,SAAQ,UAAU;IACvC,eAAe,CAA4B;IAE3C,KAAK,CAAQ;IAErB;QACC,KAAK,CAAC,eAAe,CAAC,CAAC;QAEvB,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEM,UAAU;QAChB,OAAO,mBAAmB,CAAC;IAC5B,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEM,SAAS,CACd,IAAW,EACX,OAAiB;QAElB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;IAEe,KAAK,CAAC,IAAI;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEpF,MAAM,GAAG,GAAuB,eAAe,CAAC,gBAAgB,EAAE,wBAAwB,CAAC,CAAC;QAC5F,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAClC,GAAG,CAAC,OAAO,EAAE,CAAC;QAEd,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEe,KAAK,CAAC,GAAG;QACxB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QAEhF,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,gBAAgB,EAAE,2BAA2B,CAAC,CAAC;YAE/F,MAAM,OAAO,GAA6B,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACrE,IAAI,CACH,EAAE,MAAM,EAAE,OAAO,CAAC,QAAQ,EAAE,EAC5B,EAAE,CACH,CAAC;YAEJ,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,IAAI,KAAK,GAAW,CAAC,CAAC;YAEtB,OAAO,IAAI,EAAE,CAAC;gBACb,IAAI,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;oBAAE,GAAG,CAAC,QAAQ,CAAC,GAAG,KAAK,IAAI,KAAK,EAAE,CAAC,CAAC;gBAC3D,KAAK,EAAE,CAAC;gBAER,MAAM,GAAG,GAAc,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC;oBAAE,SAAS;gBAEtD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAEnB,KAAK,EAAE,CAAC;YACT,CAAC;YACD,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnB,CAAC;QAED,CAAC,CAAC,QAAQ;YACT,MAAM,GAAG,GAAuB,eAAe,CAAC,gBAAgB,EAAE,0BAA0B,CAAC,CAAC;YAC9F,IAAI,KAAK,GAAW,CAAC,CAAC;YACtB,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBACpB,KAAK,IAAI,GAAG,CAAC;gBAEb,MAAM,KAAK,GAAa,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAE3C,MAAM,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC,UAAU,CAC7C,EAAE,GAAG,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,EACvB;oBACE,IAAI,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE;iBACjC,CACF,CAAC;gBAEF,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACrB,CAAC;YACD,GAAG,CAAC,OAAO,EAAE,CAAC;QACf,CAAC;IACF,CAAC;CACD"}
@@ -4,7 +4,11 @@ export declare class Cleaner {
4
4
  private lists;
5
5
  private databaseService;
6
6
  constructor(lists: Lists, databaseService: DatabaseService);
7
- private detectStatusOrphans;
7
+ private detectOrphans;
8
+ private detectAndDeleteOrphans;
9
+ private detectAndFlagOrphans;
10
+ private detectAndDeleteStatusOrphans;
11
+ private detectAndFlagStatusOrphans;
8
12
  private detectNonAllowlistOrphans;
9
13
  private detectStatusCodeOrphans;
10
14
  purgeOrphanUrls(hardLimit?: number): Promise<void>;
@@ -9,156 +9,98 @@ export class Cleaner {
9
9
  this.lists = lists;
10
10
  this.databaseService = databaseService;
11
11
  }
12
- async detectStatusOrphans(statuses, hardLimit) {
12
+ async detectOrphans(detectingCaption, clauses, matcher, actionCaption, action, hardLimit) {
13
13
  if (!this.databaseService)
14
14
  throw new Error('Database service has not been set yet');
15
15
  let tally = 0;
16
16
  let found = 0;
17
- const log = commonsLogDoing('cleaner', `Detecting ${statuses.join(', ')} orphan urls`);
18
- while (true) {
19
- const urls = this.databaseService.getUrls()
20
- .find({
21
- $and: [
22
- { status: { $in: statuses } },
23
- { $or: [
24
- { orphan: null },
25
- { orphan: false }
26
- ] }
27
- ]
28
- }, {})
29
- .sort({ _id: 1 })
30
- .skip(tally);
31
- try {
32
- while (true) {
33
- tally++;
34
- if ((tally % 100) === 0)
35
- log.progress(`${tally} urls, ${found} orphans`);
36
- if (hardLimit && tally >= hardLimit)
37
- break;
38
- const row = await urls.next();
39
- if (row === null)
40
- break;
41
- const incoming = await this.databaseService.getLinks().findOne({ outgoing: row.url }, { limit: 1 });
42
- if (incoming)
43
- continue;
44
- // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
45
- await this.databaseService.getUrls().updateOne({ _id: row['_id'] }, { $set: { orphan: true } });
46
- found++;
47
- }
48
- break;
49
- }
50
- catch (err) {
51
- if (!commonsTypeHasPropertyNumber(err, 'code') || err.code !== 43)
52
- throw err;
53
- commonsLogDebug('Code 43 encountered. Continuing');
54
- }
55
- }
56
- log.result(`${tally} urls, ${found} orphans`);
57
- }
58
- async detectNonAllowlistOrphans(statuses, hardLimit) {
59
- let tally = 0;
60
- let found = 0;
61
- const log = commonsLogDoing('cleaner', `Detecting non-allowlist ${statuses.join(', ')} orphan urls`);
62
- while (true) {
63
- const urls = this.databaseService.getUrls()
64
- .find({
65
- $and: [
66
- { status: { $in: statuses } },
67
- { $or: [
68
- { orphan: null },
69
- { orphan: false }
70
- ] }
71
- ]
72
- }, {})
73
- .sort({ _id: 1 })
74
- .skip(tally);
75
- try {
76
- while (true) {
77
- tally++;
78
- if ((tally % 100) === 0)
79
- log.progress(`${tally} urls, ${found} orphans`);
80
- if (hardLimit && tally >= hardLimit)
81
- break;
82
- const row = await urls.next();
83
- if (row === null)
84
- break;
85
- let invalid = false;
86
- try {
87
- if (this.lists.match(EList.ALLOW, row.url))
17
+ const orphanUrls = [];
18
+ { // scope
19
+ const log = commonsLogDoing('cleaner', detectingCaption);
20
+ while (true) {
21
+ const urls = this.databaseService.getUrls()
22
+ .find({
23
+ $and: [
24
+ clauses,
25
+ { $or: [
26
+ { orphan: null },
27
+ { orphan: false }
28
+ ] }
29
+ ]
30
+ }, {})
31
+ .sort({ _id: 1 })
32
+ .skip(tally);
33
+ try {
34
+ while (true) {
35
+ tally++;
36
+ if ((tally % 100) === 0)
37
+ log.progress(`${tally} urls, ${found} orphans`);
38
+ if (hardLimit && tally >= hardLimit)
39
+ break;
40
+ const row = await urls.next();
41
+ if (row === null)
42
+ break;
43
+ if (!matcher(row))
88
44
  continue;
89
- }
90
- catch (_e) {
91
- // probably an invalid URL
92
- // so get rid of it
93
- invalid = true;
94
- }
95
- if (!invalid) {
96
45
  const incoming = await this.databaseService.getLinks().findOne({ outgoing: row.url }, { limit: 1 });
97
46
  if (incoming)
98
47
  continue;
48
+ orphanUrls.push(row.url);
49
+ found++;
99
50
  }
100
- // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
101
- await this.databaseService.getUrls().updateOne({ _id: row['_id'] }, { $set: { orphan: true } });
102
- found++;
51
+ break;
52
+ }
53
+ catch (err) {
54
+ if (!commonsTypeHasPropertyNumber(err, 'code') || err.code !== 43)
55
+ throw err;
56
+ commonsLogDebug('Code 43 encountered. Continuing');
103
57
  }
104
- break;
105
58
  }
106
- catch (err) {
107
- if (!commonsTypeHasPropertyNumber(err, 'code') || err.code !== 43)
108
- throw err;
109
- commonsLogDebug('Code 43 encountered. Continuing');
59
+ log.result(`${tally} urls, ${found} orphans`);
60
+ }
61
+ { // scope
62
+ const log = commonsLogDoing('cleaner', actionCaption);
63
+ let i = 0;
64
+ for (const url of orphanUrls) {
65
+ i++;
66
+ if ((i % 100) === 0)
67
+ log.percent(i, orphanUrls.length);
68
+ await action(url);
110
69
  }
70
+ log.success();
111
71
  }
112
- log.result(`${tally} urls, ${found} orphans`);
72
+ }
73
+ async detectAndDeleteOrphans(detectingCaption, clauses, matcher, hardLimit) {
74
+ return this.detectOrphans(detectingCaption, clauses, matcher, 'Deleting orphan urls', async (url) => {
75
+ await this.databaseService.getUrls().deleteOne({ url: url });
76
+ }, hardLimit);
77
+ }
78
+ async detectAndFlagOrphans(detectingCaption, clauses, matcher, hardLimit) {
79
+ return this.detectOrphans(detectingCaption, clauses, matcher, 'Flagging urls as orphans', async (url) => {
80
+ await this.databaseService.getUrls().updateOne({ url: url }, {
81
+ $set: { orphan: true }
82
+ });
83
+ }, hardLimit);
84
+ }
85
+ async detectAndDeleteStatusOrphans(statuses, hardLimit) {
86
+ return this.detectAndDeleteOrphans(`Setting orphan flag for ${statuses.join(', ')} orphan urls`, statuses, (_url) => true, hardLimit);
87
+ }
88
+ async detectAndFlagStatusOrphans(statuses, hardLimit) {
89
+ return this.detectAndFlagOrphans(`Setting orphan flag for ${statuses.join(', ')} orphan urls`, statuses, (_url) => true, hardLimit);
90
+ }
91
+ async detectNonAllowlistOrphans(statuses, hardLimit) {
92
+ return this.detectAndFlagOrphans(`Detecting non-allowlist ${statuses.join(', ')} orphan urls`, { status: { $in: statuses } }, (url) => !this.lists.match(EList.ALLOW, url.url), hardLimit);
113
93
  }
114
94
  async detectStatusCodeOrphans(gt, lt, hardLimit) {
115
- let tally = 0;
116
- let found = 0;
117
- const log = commonsLogDoing('cleaner', `Detecting DONE status code between ${gt}<${lt} orphan urls`);
118
- while (true) {
119
- const urls = this.databaseService.getUrls()
120
- .find({
121
- $and: [
122
- { status: EStatus.DONE },
123
- { statusCode: { $gt: gt } },
124
- { statusCode: { $lt: lt } },
125
- { $or: [
126
- { orphan: null },
127
- { orphan: false }
128
- ] }
129
- ]
130
- }, {})
131
- .sort({ _id: 1 })
132
- .skip(tally);
133
- try {
134
- while (true) {
135
- tally++;
136
- if ((tally % 100) === 0)
137
- log.progress(`${tally} urls, ${found} orphans`);
138
- if (hardLimit && tally >= hardLimit)
139
- break;
140
- const row = await urls.next();
141
- if (row === null)
142
- break;
143
- const incoming = await this.databaseService.getLinks().findOne({ outgoing: row.url }, { limit: 1 });
144
- if (incoming)
145
- continue;
146
- // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
147
- await this.databaseService.getUrls().updateOne({ _id: row['_id'] }, { $set: { orphan: true } });
148
- found++;
149
- }
150
- break;
151
- }
152
- catch (err) {
153
- if (!commonsTypeHasPropertyNumber(err, 'code') || err.code !== 43)
154
- throw err;
155
- commonsLogDebug('Code 43 encountered. Continuing');
156
- }
157
- }
158
- log.result(`${tally} urls, ${found} orphans`);
95
+ return this.detectAndFlagOrphans(`Detecting DONE status code between ${gt}<${lt} orphan urls`, { $and: [
96
+ { status: EStatus.DONE },
97
+ { statusCode: { $gt: gt } },
98
+ { statusCode: { $lt: lt } }
99
+ ] }, (_url) => true, hardLimit);
159
100
  }
160
101
  async purgeOrphanUrls(hardLimit) {
161
- await this.detectStatusOrphans([
102
+ await this.detectAndDeleteStatusOrphans([EStatus.INVALID], hardLimit);
103
+ await this.detectAndFlagStatusOrphans([
162
104
  EStatus.DENY,
163
105
  EStatus.FAILED,
164
106
  EStatus.DEAD,
@@ -172,7 +114,7 @@ export class Cleaner {
172
114
  await this.detectStatusCodeOrphans(400, 500, hardLimit);
173
115
  let tally = 0;
174
116
  { // scope
175
- const log = commonsLogDoing('clealer', 'Removing orphan outgoing links');
117
+ const log = commonsLogDoing('cleaner', 'Removing orphan outgoing links');
176
118
  while (true) {
177
119
  const urls2 = this.databaseService.getUrls()
178
120
  .find({ orphan: true }, {})
@@ -199,7 +141,7 @@ export class Cleaner {
199
141
  log.result(tally);
200
142
  }
201
143
  { // scope
202
- const log = commonsLogDoing('clealer', 'Archiving orphans');
144
+ const log = commonsLogDoing('cleaner', 'Archiving orphans');
203
145
  await this.databaseService.getUrls().updateMany({ orphan: true }, {
204
146
  $set: { status: EStatus.ARCHIVED },
205
147
  $unset: {
@@ -234,7 +176,7 @@ export class Cleaner {
234
176
  let found = 0;
235
177
  const domains = [];
236
178
  { // scope
237
- const log = commonsLogDoing('clealer', 'Enumerating domains');
179
+ const log = commonsLogDoing('cleaner', 'Enumerating domains');
238
180
  const results = this.databaseService.getDomains().find({}, {});
239
181
  try {
240
182
  tally = 0;
@@ -257,7 +199,7 @@ export class Cleaner {
257
199
  { // scope
258
200
  tally = 0;
259
201
  found = 0;
260
- const log = commonsLogDoing('clealer', 'Detecting and removing empty domains');
202
+ const log = commonsLogDoing('cleaner', 'Detecting and removing empty domains');
261
203
  for (const domain of domains) {
262
204
  tally++;
263
205
  if ((tally % 10) === 0)