hydra-crawler 2.2.10 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { CommonsApp } from 'nodecommons-es-app';
|
|
2
|
+
import { DatabaseService } from '../services/database.service';
|
|
3
|
+
import { IInternalHydraCommonDbApp } from './internal-hydra-common.app';
|
|
4
|
+
export declare class ExportDomainUrlsApp extends CommonsApp implements IInternalHydraCommonDbApp {
|
|
5
|
+
private domain;
|
|
6
|
+
private databaseService;
|
|
7
|
+
constructor(domain: string);
|
|
8
|
+
getAppName(): string;
|
|
9
|
+
setDatabaseService(databaseService: DatabaseService): void;
|
|
10
|
+
init(): Promise<void>;
|
|
11
|
+
run(): Promise<void>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { EStatus } from 'hydra-crawler-ts-assets';
|
|
11
|
+
import { commonsOutputDoing, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
|
|
12
|
+
import { CommonsApp } from 'nodecommons-es-app';
|
|
13
|
+
// export the QUEUED, DONE and FAILED urls for importing into another instance
|
|
14
|
+
export class ExportDomainUrlsApp extends CommonsApp {
|
|
15
|
+
constructor(domain) {
|
|
16
|
+
super('hydra-crawler');
|
|
17
|
+
this.domain = domain;
|
|
18
|
+
}
|
|
19
|
+
getAppName() {
|
|
20
|
+
return 'Hydra - Export Domain Urls';
|
|
21
|
+
}
|
|
22
|
+
setDatabaseService(databaseService) {
|
|
23
|
+
this.databaseService = databaseService;
|
|
24
|
+
}
|
|
25
|
+
init() {
|
|
26
|
+
const _super = Object.create(null, {
|
|
27
|
+
init: { get: () => super.init }
|
|
28
|
+
});
|
|
29
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
30
|
+
if (!this.databaseService)
|
|
31
|
+
throw new Error('Database service has not been set yet');
|
|
32
|
+
commonsOutputDoing('Connecting to database');
|
|
33
|
+
yield this.databaseService.init();
|
|
34
|
+
commonsOutputSuccess();
|
|
35
|
+
yield _super.init.call(this);
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
run() {
|
|
39
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
40
|
+
if (!this.databaseService)
|
|
41
|
+
throw new Error('Database service has not been set');
|
|
42
|
+
const validOnly = this.getArgs().hasAttribute('valid-only');
|
|
43
|
+
const pagesOnly = this.getArgs().hasAttribute('pages-only');
|
|
44
|
+
const mergeHttp = this.getArgs().hasAttribute('merge-http');
|
|
45
|
+
const matchPipeline = [
|
|
46
|
+
{ $match: { domain: this.domain } }
|
|
47
|
+
];
|
|
48
|
+
if (validOnly) {
|
|
49
|
+
matchPipeline.push({ $match: {
|
|
50
|
+
status: { $in: [EStatus.DONE, EStatus.QUEUED, EStatus.ACTIVE] },
|
|
51
|
+
statusCode: { $in: [200, 204] }
|
|
52
|
+
} });
|
|
53
|
+
}
|
|
54
|
+
if (pagesOnly) {
|
|
55
|
+
matchPipeline.push(...[
|
|
56
|
+
{ $match: {
|
|
57
|
+
url: { $not: /[?]/ }
|
|
58
|
+
} },
|
|
59
|
+
{ $match: {
|
|
60
|
+
url: { $not: /(gif|jpeg|jpg|png|webp|avif)$/i }
|
|
61
|
+
} }
|
|
62
|
+
]);
|
|
63
|
+
}
|
|
64
|
+
const cursor = this.databaseService.getUrls()
|
|
65
|
+
.aggregate([
|
|
66
|
+
...matchPipeline,
|
|
67
|
+
{ $project: { _id: false, url: true } }
|
|
68
|
+
]);
|
|
69
|
+
// safer to do this directly rather than a call to listQueryResults
|
|
70
|
+
// ditto not using CommonsFile
|
|
71
|
+
commonsOutputDoing(`Exporting URLs for domain ${this.domain}`);
|
|
72
|
+
const urls = [];
|
|
73
|
+
while (true) {
|
|
74
|
+
if (urls.length % 1000 === 0)
|
|
75
|
+
commonsOutputProgress(urls.length);
|
|
76
|
+
const row = yield cursor.next();
|
|
77
|
+
if (row === null)
|
|
78
|
+
break;
|
|
79
|
+
let url = row.url
|
|
80
|
+
.trim()
|
|
81
|
+
.replace(/[?].*$/, '')
|
|
82
|
+
.replace(/\/index\.(htm|html)$/i, '/')
|
|
83
|
+
.replace(/\/$/, '')
|
|
84
|
+
.trim();
|
|
85
|
+
if (mergeHttp) {
|
|
86
|
+
url = url.replace(/^http(s?):/, '');
|
|
87
|
+
}
|
|
88
|
+
if (!urls.includes(url))
|
|
89
|
+
urls.push(url);
|
|
90
|
+
}
|
|
91
|
+
commonsOutputResult(urls.length);
|
|
92
|
+
urls.sort();
|
|
93
|
+
for (const url of urls) {
|
|
94
|
+
console.log(url);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=export-domain-urls.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"export-domain-urls.js","sourceRoot":"","sources":["../../src/apps/export-domain-urls.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAMhD,8EAA8E;AAE9E,MAAM,OAAO,mBAAoB,SAAQ,UAAU;IAGlD,YACU,MAAc;QAEvB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,WAAM,GAAN,MAAM,CAAQ;IAGxB,CAAC;IAEM,UAAU;QAChB,OAAO,4BAA4B,CAAC;IACrC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YAErE,MAAM,aAAa,GAA4B;gBAC7C,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE;aACpC,CAAC;YACF,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAChB,EAAE,MAAM,EAAE;wBACR,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE;wBACjE,UAAU,EAAE,EAAE,GAAG,EAAE,CAAE,GAAG,EAAE,GAAG,CAAE,EAAE;qBAClC,EAAE,CACJ,CAAC;aACF;YACD,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAAC,GAAG;oBACpB,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;yBACrB,EAAE;oBACH,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,gCAAgC,EAAE;yBAChD,EAAE;iBACJ,CAAC,CAAC;aACH;YAED,MAAM,MAAM,GAA+C,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACtF,SAAS,CAAC;gBACT,GAAG,aAAa;gBAChB,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE;aACxC,CAAC,CAAC;YAEL,mEAAmE;YACnE,8BAA8B;YAE9B,kBAAkB,CAAC,6BAA6B,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAE/D,MAAM,IAAI,GAAa,EAAE,CAAC;YAC1B,OAAO,IAAI,EAAE;gBACZ,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,KAAK,CAAC;oBAAE,qBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEjE,MAAM,GAAG,GAAyB,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBACtD,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,IAAI,GAAG,GAAW,GAAG,CAAC,GAAG;qBACtB,IAAI,EAAE;qBACN,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;qBACrB,OAAO,CAAC,uBAAuB,EAAE,GAAG,CAAC;qBACrC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;qBAClB,IAAI,EAAE,CAAC;gBAEV,IAAI,SAAS,EAAE;oBACd,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;iBACpC;gBAED,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;aACxC;YACD,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;aACjB;QACF,CAAC;KAAA;CACD"}
|
package/dist/cli.js
CHANGED
|
@@ -30,6 +30,7 @@ import { QueryApp } from './apps/query.app';
|
|
|
30
30
|
import { CrossPopulateExportApp } from './apps/cross-populate-export.app';
|
|
31
31
|
import { CrossPopulateImportApp } from './apps/cross-populate-import.app';
|
|
32
32
|
import { ExtractTextApp } from './apps/extract-text.app';
|
|
33
|
+
import { ExportDomainUrlsApp } from './apps/export-domain-urls';
|
|
33
34
|
import { DatabaseService } from './services/database.service';
|
|
34
35
|
import { isIMatch } from './interfaces/imatch';
|
|
35
36
|
import { isIExpiry, toIExpiry } from './interfaces/iexpiry';
|
|
@@ -107,6 +108,10 @@ else if (args.hasAttribute('extract-text')) {
|
|
|
107
108
|
const url = args.getString('url');
|
|
108
109
|
app = new ExtractTextApp(url);
|
|
109
110
|
}
|
|
111
|
+
else if (args.hasAttribute('export-domain-urls')) {
|
|
112
|
+
const domain = args.getString('domain');
|
|
113
|
+
app = new ExportDomainUrlsApp(domain);
|
|
114
|
+
}
|
|
110
115
|
else {
|
|
111
116
|
app = new HydraApp();
|
|
112
117
|
}
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AAAA,6CAA6C;;;;;;;;;;AAE7C,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAElF,OAAO,EAAwB,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAGlF,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACvH,OAAO,EAAE,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AAEhE,OAAO,EAEL,2BAA2B,EAC3B,6BAA6B,EAC7B,+BAA+B,EAC/B,oCAAoC,EACrC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AAAA,6CAA6C;;;;;;;;;;AAE7C,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAElF,OAAO,EAAwB,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAGlF,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACvH,OAAO,EAAE,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AAEhE,OAAO,EAEL,2BAA2B,EAC3B,6BAA6B,EAC7B,+BAA+B,EAC/B,oCAAoC,EACrC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAEhE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAE9D,OAAO,EAAU,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,EAAW,SAAS,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAErE,OAAO,EAAE,iBAAiB,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1D,MAAM,IAAI,GAAgB,IAAI,WAAW,EAAE,CAAC;AAC5C,IAAI,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;IAAE,yBAAyB,CAAC,IAAI,CAAC,CAAC;AAChE,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC;IAAE,sBAAsB,CAAC,IAAI,CAAC,CAAC;AAE9D,IAAI,GAAsC,CAAC;AAE3C,IAAI,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,EAAE;IAClC,GAAG,GAAG,IAAI,WAAW,EAAE,CAAC;CACxB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE;IACxC,MAAM,YAAY,GAAY,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;IACzD,MAAM,YAAY,GAAY,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;IAEzD,GAAG,GAAG,IAAI,UAAU,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;CACjD;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE;IACvC,IAAI,SAAyB,CAAC;IAE9B,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE;QAC9B,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;KAChE;IACD,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE;QAC9B,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;KACjC;IAED,GAAG,GAAG,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;CAC/B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE;IACvC,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC;CACtB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,EAAE;IAC1C,MAAM,OAAO,GAAc,EAAE,CAAC;IAC9B,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC9D,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1D,GAAG,GAAG,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;CAChC;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE;IACrC,GAAG,GAAG,IAAI,OAAO,EAAE,CAAC;CACpB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE;IACxC,GAAG,GAAG,IAAI,UAAU,EAAE,CAAC;CACvB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,EAAE;IAC/C,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,QAAQ,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;IACjF,MAAM,MAAM,GAAW,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAEhD,GAAG,GAAG,IAAI,gBAAgB,CACxB,MAAM,EACN,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC,CAClC,CAAC;CACF;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,EAAE;IAC1C,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;CAC7B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,iBAAiB,CAAC,EAAE;IAChD,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;CAC7B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,EAAE;IAC9C,GAAG,GAAG,IAAI,eAAe,EAAE,CAAC;CAC5B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,uBAAuB,CAAC,EAAE;IACtD,MAAM,QAAQ,GAAW,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAEpD,GAAG,GAAG,IAAI,sBAAsB,CAAC,QAAQ,CAAC,CAAC;CAC3C;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,uBAAuB,CAAC,EAAE;IACtD,MAAM,QAAQ,GAAW,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAEpD,GAAG,GAAG,IAAI,sBAAsB,CAAC,QAAQ,CAAC,CAAC;CAC3C;KAAM,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,QAAQ,CAAC,EAAE;IAC/C,MAAM,KAAK,GAAW,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAE9C,GAAG,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC;CAC1B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,EAAE;IAC7C,MAAM,GAAG,GAAW,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAE1C,GAAG,GAAG,IAAI,cAAc,CAAC,GAAG,CAAC,CAAC;CAC9B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,EAAE;IACnD,MAAM,MAAM,GAAW,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAEhD,GAAG,GAAG,IAAI,mBAAmB,CAAC,MAAM,CAAC,CAAC;CACtC;KAAM;IACN,GAAG,GAAG,IAAI,QAAQ,EAAE,CAAC;CACrB;AAED,IAAI,CAAC,GAAG;IAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;AAEzE,MAAM,UAAU,GAAkB,GAAG,CAAC,cAAc,CAAC,iBAAiB,CAAC,CAAC;AACxE,MAAM,WAAW,GAAoB,UAAU,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;AACtE,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC;IAAE,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;AAE/F,IAAI,2BAA2B,CAAC,GAAG,CAAC,EAAE;IACrC,MAAM,eAAe,GAAoB,IAAI,eAAe,CAAC,WAAW,CAAC,CAAC;IAC1E,GAAG,CAAC,kBAAkB,CAAC,eAAe,CAAC,CAAC;CACxC;AAED,IAAI,6BAA6B,CAAC,GAAG,CAAC,EAAE;IACvC,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;QAC1B,MAAM,QAAQ,GAAW,iBAAiB,CAAC,IAAI,CAAC,CAAC;QAEjD,MAAM,OAAO,GAAY,GAAG,CAAC,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAC7D,IAAI,CAAC,mBAAmB,CAAS,OAAO,EAAE,QAAQ,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC;QACtG,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;KAC7B;CACD;AAED,IAAI,+BAA+B,CAAC,GAAG,CAAC,EAAE;IACzC,MAAM,IAAI,GAAY,GAAG,CAAC,qBAAqB,CAAC,kBAAkB,CAAC,CAAC;IACpE,IAAI,CAAC,wBAAwB,CAAC,IAAI,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IAE5E,MAAM,QAAQ,GAAc,IAAI;SAC7B,GAAG,CAAC,CAAC,MAAoC,EAAW,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;IAE7E,IAAI,CAAC,mBAAmB,CAAU,QAAQ,EAAE,SAAS,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;IACxG,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;CAC1B;AAED,IAAI,oCAAoC,CAAC,GAAG,CAAC,EAAE;IAC9C,MAAM,IAAI,GAAY,GAAG,CAAC,qBAAqB,CAAC,eAAe,CAAC,CAAC;IACjE,IAAI,CAAC,mBAAmB,CAAuB,IAAI,EAAE,sBAAsB,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;IAEvH,GAAG,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;CACjC;AAED,KAAK,CAAC,GAAwB,EAAE;IAC/B,iBAAiB,CAAC,yBAAyB,GAAG,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAC/D,MAAM,GAAG,CAAC,KAAK,EAAE,CAAC;IAClB,iBAAiB,CAAC,uBAAuB,CAAC,CAAC;IAE3C,UAAU,CAAC,GAAS,EAAE;QACrB,gEAAgE;QAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC,EAAE,GAAG,CAAC,CAAC;AACT,CAAC,CAAA,CAAC,EAAE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hydra-crawler",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.1",
|
|
4
4
|
"description": "Node.js Hydra web crawler",
|
|
5
5
|
"author": "Pete Morris",
|
|
6
6
|
"license": "ISC",
|
|
@@ -24,23 +24,23 @@
|
|
|
24
24
|
"jpeg-js": "^0.4.4",
|
|
25
25
|
"memcached-promisify": "^2.0.0",
|
|
26
26
|
"mongodb": "^5.0.1",
|
|
27
|
-
"nodecommons-es-app": "^1.0.
|
|
28
|
-
"nodecommons-es-app-socket-io": "2.0.
|
|
29
|
-
"nodecommons-es-cli": "^2.0.
|
|
30
|
-
"nodecommons-es-config": "^1.0.
|
|
31
|
-
"nodecommons-es-database": "^2.
|
|
32
|
-
"nodecommons-es-database-mongodb": "^1.0.
|
|
33
|
-
"nodecommons-es-express": "^3.0.
|
|
34
|
-
"nodecommons-es-file": "^1.1.
|
|
35
|
-
"nodecommons-es-http": "^2.4.
|
|
36
|
-
"nodecommons-es-process": "^1.0.
|
|
37
|
-
"nodecommons-es-rest": "^2.0.
|
|
38
|
-
"nodecommons-es-security": "^1.0.
|
|
39
|
-
"nodecommons-es-socket-io": "3.0.
|
|
40
|
-
"tscommons-es-async": "^1.1.
|
|
41
|
-
"tscommons-es-config": "^1.0.
|
|
42
|
-
"tscommons-es-core": "^1.
|
|
43
|
-
"tscommons-es-format": "1.0.
|
|
27
|
+
"nodecommons-es-app": "^1.0.9",
|
|
28
|
+
"nodecommons-es-app-socket-io": "2.0.25",
|
|
29
|
+
"nodecommons-es-cli": "^2.0.6",
|
|
30
|
+
"nodecommons-es-config": "^1.0.7",
|
|
31
|
+
"nodecommons-es-database": "^2.2.7",
|
|
32
|
+
"nodecommons-es-database-mongodb": "^1.0.8",
|
|
33
|
+
"nodecommons-es-express": "^3.0.6",
|
|
34
|
+
"nodecommons-es-file": "^1.1.2",
|
|
35
|
+
"nodecommons-es-http": "^2.4.6",
|
|
36
|
+
"nodecommons-es-process": "^1.0.5",
|
|
37
|
+
"nodecommons-es-rest": "^2.0.11",
|
|
38
|
+
"nodecommons-es-security": "^1.0.9",
|
|
39
|
+
"nodecommons-es-socket-io": "3.0.5",
|
|
40
|
+
"tscommons-es-async": "^1.1.3",
|
|
41
|
+
"tscommons-es-config": "^1.0.5",
|
|
42
|
+
"tscommons-es-core": "^1.16.3",
|
|
43
|
+
"tscommons-es-format": "1.0.5",
|
|
44
44
|
"unfluff": "^3.2.0"
|
|
45
45
|
},
|
|
46
46
|
"devDependencies": {
|