hydra-crawler 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -41,6 +41,7 @@ export class ExportDomainUrlsApp extends CommonsApp {
|
|
|
41
41
|
throw new Error('Database service has not been set');
|
|
42
42
|
const validOnly = this.getArgs().hasAttribute('valid-only');
|
|
43
43
|
const pagesOnly = this.getArgs().hasAttribute('pages-only');
|
|
44
|
+
const mergeHttp = this.getArgs().hasAttribute('merge-http');
|
|
44
45
|
const matchPipeline = [
|
|
45
46
|
{ $match: { domain: this.domain } }
|
|
46
47
|
];
|
|
@@ -75,12 +76,15 @@ export class ExportDomainUrlsApp extends CommonsApp {
|
|
|
75
76
|
const row = yield cursor.next();
|
|
76
77
|
if (row === null)
|
|
77
78
|
break;
|
|
78
|
-
|
|
79
|
+
let url = row.url
|
|
79
80
|
.trim()
|
|
80
81
|
.replace(/[?].*$/, '')
|
|
81
82
|
.replace(/\/index\.(htm|html)$/i, '/')
|
|
82
83
|
.replace(/\/$/, '')
|
|
83
84
|
.trim();
|
|
85
|
+
if (mergeHttp) {
|
|
86
|
+
url = url.replace(/^http(s?):/, '');
|
|
87
|
+
}
|
|
84
88
|
if (!urls.includes(url))
|
|
85
89
|
urls.push(url);
|
|
86
90
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"export-domain-urls.js","sourceRoot":"","sources":["../../src/apps/export-domain-urls.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAMhD,8EAA8E;AAE9E,MAAM,OAAO,mBAAoB,SAAQ,UAAU;IAGlD,YACU,MAAc;QAEvB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,WAAM,GAAN,MAAM,CAAQ;IAGxB,CAAC;IAEM,UAAU;QAChB,OAAO,4BAA4B,CAAC;IACrC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YAErE,MAAM,aAAa,GAA4B;gBAC7C,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE;aACpC,CAAC;YACF,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAChB,EAAE,MAAM,EAAE;wBACR,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE;wBACjE,UAAU,EAAE,EAAE,GAAG,EAAE,CAAE,GAAG,EAAE,GAAG,CAAE,EAAE;qBAClC,EAAE,CACJ,CAAC;aACF;YACD,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAAC,GAAG;oBACpB,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;yBACrB,EAAE;oBACH,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,gCAAgC,EAAE;yBAChD,EAAE;iBACJ,CAAC,CAAC;aACH;YAED,MAAM,MAAM,GAA+C,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACtF,SAAS,CAAC;gBACT,GAAG,aAAa;gBAChB,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE;aACxC,CAAC,CAAC;YAEL,mEAAmE;YACnE,8BAA8B;YAE9B,kBAAkB,CAAC,6BAA6B,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAE/D,MAAM,IAAI,GAAa,EAAE,CAAC;YAC1B,OAAO,IAAI,EAAE;gBACZ,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,KAAK,CAAC;oBAAE,qBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEjE,MAAM,GAAG,GAAyB,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBACtD,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,
|
|
1
|
+
{"version":3,"file":"export-domain-urls.js","sourceRoot":"","sources":["../../src/apps/export-domain-urls.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAMhD,8EAA8E;AAE9E,MAAM,OAAO,mBAAoB,SAAQ,UAAU;IAGlD,YACU,MAAc;QAEvB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,WAAM,GAAN,MAAM,CAAQ;IAGxB,CAAC;IAEM,UAAU;QAChB,OAAO,4BAA4B,CAAC;IACrC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YAErE,MAAM,aAAa,GAA4B;gBAC7C,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE;aACpC,CAAC;YACF,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAChB,EAAE,MAAM,EAAE;wBACR,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE;wBACjE,UAAU,EAAE,EAAE,GAAG,EAAE,CAAE,GAAG,EAAE,GAAG,CAAE,EAAE;qBAClC,EAAE,CACJ,CAAC;aACF;YACD,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAAC,GAAG;oBACpB,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;yBACrB,EAAE;oBACH,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,gCAAgC,EAAE;yBAChD,EAAE;iBACJ,CAAC,CAAC;aACH;YAED,MAAM,MAAM,GAA+C,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACtF,SAAS,CAAC;gBACT,GAAG,aAAa;gBAChB,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE;aACxC,CAAC,CAAC;YAEL,mEAAmE;YACnE,8BAA8B;YAE9B,kBAAkB,CAAC,6BAA6B,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAE/D,MAAM,IAAI,GAAa,EAAE,CAAC;YAC1B,OAAO,IAAI,EAAE;gBACZ,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,KAAK,CAAC;oBAAE,qBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEjE,MAAM,GAAG,GAAyB,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBACtD,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,GAAG,GAAW,GAAG,CAAC,GAAG;qBACtB,IAAI,EAAE;qBACN,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;qBACrB,OAAO,CAAC,uBAAuB,EAAE,GAAG,CAAC;qBACrC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;qBAClB,IAAI,EAAE,CAAC;gBAEV,IAAI,SAAS,EAAE;oBACd,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;iBACpC;gBAED,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;aACxC;YACD,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;aACjB;QACF,CAAC;KAAA;CACD"}
|