hydra-crawler 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,6 +41,7 @@ export class ExportDomainUrlsApp extends CommonsApp {
41
41
  throw new Error('Database service has not been set');
42
42
  const validOnly = this.getArgs().hasAttribute('valid-only');
43
43
  const pagesOnly = this.getArgs().hasAttribute('pages-only');
44
+ const mergeHttp = this.getArgs().hasAttribute('merge-http');
44
45
  const matchPipeline = [
45
46
  { $match: { domain: this.domain } }
46
47
  ];
@@ -75,12 +76,15 @@ export class ExportDomainUrlsApp extends CommonsApp {
75
76
  const row = yield cursor.next();
76
77
  if (row === null)
77
78
  break;
78
- const url = row.url
79
+ let url = row.url
79
80
  .trim()
80
81
  .replace(/[?].*$/, '')
81
82
  .replace(/\/index\.(htm|html)$/i, '/')
82
83
  .replace(/\/$/, '')
83
84
  .trim();
85
+ if (mergeHttp) {
86
+ url = url.replace(/^http(s?):/, '');
87
+ }
84
88
  if (!urls.includes(url))
85
89
  urls.push(url);
86
90
  }
@@ -1 +1 @@
1
- {"version":3,"file":"export-domain-urls.js","sourceRoot":"","sources":["../../src/apps/export-domain-urls.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAMhD,8EAA8E;AAE9E,MAAM,OAAO,mBAAoB,SAAQ,UAAU;IAGlD,YACU,MAAc;QAEvB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,WAAM,GAAN,MAAM,CAAQ;IAGxB,CAAC;IAEM,UAAU;QAChB,OAAO,4BAA4B,CAAC;IACrC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YAErE,MAAM,aAAa,GAA4B;gBAC7C,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE;aACpC,CAAC;YACF,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAChB,EAAE,MAAM,EAAE;wBACR,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE;wBACjE,UAAU,EAAE,EAAE,GAAG,EAAE,CAAE,GAAG,EAAE,GAAG,CAAE,EAAE;qBAClC,EAAE,CACJ,CAAC;aACF;YACD,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAAC,GAAG;oBACpB,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;yBACrB,EAAE;oBACH,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,gCAAgC,EAAE;yBAChD,EAAE;iBACJ,CAAC,CAAC;aACH;YAED,MAAM,MAAM,GAA+C,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACtF,SAAS,CAAC;gBACT,GAAG,aAAa;gBAChB,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE;aACxC,CAAC,CAAC;YAEL,mEAAmE;YACnE,8BAA8B;YAE9B,kBAAkB,CAAC,6BAA6B,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAE/D,MAAM,IAAI,GAAa,EAAE,CAAC;YAC1B,OAAO,IAAI,EAAE;gBACZ,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,KAAK,CAAC;oBAAE,qBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEjE,MAAM,GAAG,GAAyB,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBACtD,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,MAAM,GAAG,GAAW,GAAG,CAAC,GAAG;qBACxB,IAAI,EAAE;qBACN,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;qBACrB,OAAO,CAAC,uBAAuB,EAAE,GAAG,CAAC;qBACrC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;qBAClB,IAAI,EAAE,CAAC;gBACV,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;aACxC;YACD,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;aACjB;QACF,CAAC;KAAA;CACD"}
1
+ {"version":3,"file":"export-domain-urls.js","sourceRoot":"","sources":["../../src/apps/export-domain-urls.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAMhD,8EAA8E;AAE9E,MAAM,OAAO,mBAAoB,SAAQ,UAAU;IAGlD,YACU,MAAc;QAEvB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,WAAM,GAAN,MAAM,CAAQ;IAGxB,CAAC;IAEM,UAAU;QAChB,OAAO,4BAA4B,CAAC;IACrC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YAErE,MAAM,aAAa,GAA4B;gBAC7C,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE;aACpC,CAAC;YACF,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAChB,EAAE,MAAM,EAAE;wBACR,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE;wBACjE,UAAU,EAAE,EAAE,GAAG,EAAE,CAAE,GAAG,EAAE,GAAG,CAAE,EAAE;qBAClC,EAAE,CACJ,CAAC;aACF;YACD,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAAC,GAAG;oBACpB,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;yBACrB,EAAE;oBACH,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,gCAAgC,EAAE;yBAChD,EAAE;iBACJ,CAAC,CAAC;aACH;YAED,MAAM,MAAM,GAA+C,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACtF,SAAS,CAAC;gBACT,GAAG,aAAa;gBAChB,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE;aACxC,CAAC,CAAC;YAEL,mEAAmE;YACnE,8BAA8B;YAE9B,kBAAkB,CAAC,6BAA6B,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAE/D,MAAM,IAAI,GAAa,EAAE,CAAC;YAC1B,OAAO,IAAI,EAAE;gBACZ,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,KAAK,CAAC;oBAAE,qBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEjE,MAAM,GAAG,GAAyB,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBACtD,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,GAAG,GAAW,GAAG,CAAC,GAAG;qBACtB,IAAI,EAAE;qBACN,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;qBACrB,OAAO,CAAC,uBAAuB,EAAE,GAAG,CAAC;qBACrC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;qBAClB,IAAI,EAAE,CAAC;gBAEV,IAAI,SAAS,EAAE;oBACd,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;iBACpC;gBAED,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;aACxC;YACD,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;aACjB;QACF,CAAC;KAAA;CACD"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hydra-crawler",
3
- "version": "2.3.0",
3
+ "version": "2.3.1",
4
4
  "description": "Node.js Hydra web crawler",
5
5
  "author": "Pete Morris",
6
6
  "license": "ISC",