hydra-crawler 2.2.9 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/apps/export-domain-urls.d.ts +12 -0
- package/dist/apps/export-domain-urls.js +95 -0
- package/dist/apps/export-domain-urls.js.map +1 -0
- package/dist/cli.js +5 -0
- package/dist/cli.js.map +1 -1
- package/dist/servers/maintenance.server.d.ts +1 -0
- package/dist/servers/maintenance.server.js +34 -18
- package/dist/servers/maintenance.server.js.map +1 -1
- package/package.json +18 -18
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { CommonsApp } from 'nodecommons-es-app';
|
|
2
|
+
import { DatabaseService } from '../services/database.service';
|
|
3
|
+
import { IInternalHydraCommonDbApp } from './internal-hydra-common.app';
|
|
4
|
+
export declare class ExportDomainUrlsApp extends CommonsApp implements IInternalHydraCommonDbApp {
|
|
5
|
+
private domain;
|
|
6
|
+
private databaseService;
|
|
7
|
+
constructor(domain: string);
|
|
8
|
+
getAppName(): string;
|
|
9
|
+
setDatabaseService(databaseService: DatabaseService): void;
|
|
10
|
+
init(): Promise<void>;
|
|
11
|
+
run(): Promise<void>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { EStatus } from 'hydra-crawler-ts-assets';
|
|
11
|
+
import { commonsOutputDoing, commonsOutputProgress, commonsOutputResult, commonsOutputSuccess } from 'nodecommons-es-cli';
|
|
12
|
+
import { CommonsApp } from 'nodecommons-es-app';
|
|
13
|
+
// export the QUEUED, DONE and FAILED urls for importing into another instance
|
|
14
|
+
export class ExportDomainUrlsApp extends CommonsApp {
|
|
15
|
+
constructor(domain) {
|
|
16
|
+
super('hydra-crawler');
|
|
17
|
+
this.domain = domain;
|
|
18
|
+
}
|
|
19
|
+
getAppName() {
|
|
20
|
+
return 'Hydra - Export Domain Urls';
|
|
21
|
+
}
|
|
22
|
+
setDatabaseService(databaseService) {
|
|
23
|
+
this.databaseService = databaseService;
|
|
24
|
+
}
|
|
25
|
+
init() {
|
|
26
|
+
const _super = Object.create(null, {
|
|
27
|
+
init: { get: () => super.init }
|
|
28
|
+
});
|
|
29
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
30
|
+
if (!this.databaseService)
|
|
31
|
+
throw new Error('Database service has not been set yet');
|
|
32
|
+
commonsOutputDoing('Connecting to database');
|
|
33
|
+
yield this.databaseService.init();
|
|
34
|
+
commonsOutputSuccess();
|
|
35
|
+
yield _super.init.call(this);
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
run() {
|
|
39
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
40
|
+
if (!this.databaseService)
|
|
41
|
+
throw new Error('Database service has not been set');
|
|
42
|
+
const validOnly = this.getArgs().hasAttribute('valid-only');
|
|
43
|
+
const pagesOnly = this.getArgs().hasAttribute('pages-only');
|
|
44
|
+
const matchPipeline = [
|
|
45
|
+
{ $match: { domain: this.domain } }
|
|
46
|
+
];
|
|
47
|
+
if (validOnly) {
|
|
48
|
+
matchPipeline.push({ $match: {
|
|
49
|
+
status: { $in: [EStatus.DONE, EStatus.QUEUED, EStatus.ACTIVE] },
|
|
50
|
+
statusCode: { $in: [200, 204] }
|
|
51
|
+
} });
|
|
52
|
+
}
|
|
53
|
+
if (pagesOnly) {
|
|
54
|
+
matchPipeline.push(...[
|
|
55
|
+
{ $match: {
|
|
56
|
+
url: { $not: /[?]/ }
|
|
57
|
+
} },
|
|
58
|
+
{ $match: {
|
|
59
|
+
url: { $not: /(gif|jpeg|jpg|png|webp|avif)$/i }
|
|
60
|
+
} }
|
|
61
|
+
]);
|
|
62
|
+
}
|
|
63
|
+
const cursor = this.databaseService.getUrls()
|
|
64
|
+
.aggregate([
|
|
65
|
+
...matchPipeline,
|
|
66
|
+
{ $project: { _id: false, url: true } }
|
|
67
|
+
]);
|
|
68
|
+
// safer to do this directly rather than a call to listQueryResults
|
|
69
|
+
// ditto not using CommonsFile
|
|
70
|
+
commonsOutputDoing(`Exporting URLs for domain ${this.domain}`);
|
|
71
|
+
const urls = [];
|
|
72
|
+
while (true) {
|
|
73
|
+
if (urls.length % 1000 === 0)
|
|
74
|
+
commonsOutputProgress(urls.length);
|
|
75
|
+
const row = yield cursor.next();
|
|
76
|
+
if (row === null)
|
|
77
|
+
break;
|
|
78
|
+
const url = row.url
|
|
79
|
+
.trim()
|
|
80
|
+
.replace(/[?].*$/, '')
|
|
81
|
+
.replace(/\/index\.(htm|html)$/i, '/')
|
|
82
|
+
.replace(/\/$/, '')
|
|
83
|
+
.trim();
|
|
84
|
+
if (!urls.includes(url))
|
|
85
|
+
urls.push(url);
|
|
86
|
+
}
|
|
87
|
+
commonsOutputResult(urls.length);
|
|
88
|
+
urls.sort();
|
|
89
|
+
for (const url of urls) {
|
|
90
|
+
console.log(url);
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=export-domain-urls.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"export-domain-urls.js","sourceRoot":"","sources":["../../src/apps/export-domain-urls.ts"],"names":[],"mappings":";;;;;;;;;AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,kBAAkB,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1H,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAMhD,8EAA8E;AAE9E,MAAM,OAAO,mBAAoB,SAAQ,UAAU;IAGlD,YACU,MAAc;QAEvB,KAAK,CAAC,eAAe,CAAC,CAAC;QAFd,WAAM,GAAN,MAAM,CAAQ;IAGxB,CAAC;IAEM,UAAU;QAChB,OAAO,4BAA4B,CAAC;IACrC,CAAC;IAEM,kBAAkB,CACvB,eAAgC;QAEjC,IAAI,CAAC,eAAe,GAAG,eAAe,CAAC;IACxC,CAAC;IAEY,IAAI;;;;;YAChB,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAEpF,kBAAkB,CAAC,wBAAwB,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;YAClC,oBAAoB,EAAE,CAAC;YAEvB,MAAM,OAAM,IAAI,WAAE,CAAC;QACpB,CAAC;KAAA;IAEY,GAAG;;YACf,IAAI,CAAC,IAAI,CAAC,eAAe;gBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YAEhF,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YACrE,MAAM,SAAS,GAAY,IAAI,CAAC,OAAO,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YAErE,MAAM,aAAa,GAA4B;gBAC7C,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE;aACpC,CAAC;YACF,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAChB,EAAE,MAAM,EAAE;wBACR,MAAM,EAAE,EAAE,GAAG,EAAE,CAAE,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAE,EAAE;wBACjE,UAAU,EAAE,EAAE,GAAG,EAAE,CAAE,GAAG,EAAE,GAAG,CAAE,EAAE;qBAClC,EAAE,CACJ,CAAC;aACF;YACD,IAAI,SAAS,EAAE;gBACd,aAAa,CAAC,IAAI,CAAC,GAAG;oBACpB,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE;yBACrB,EAAE;oBACH,EAAE,MAAM,EAAE;4BACR,GAAG,EAAE,EAAE,IAAI,EAAE,gCAAgC,EAAE;yBAChD,EAAE;iBACJ,CAAC,CAAC;aACH;YAED,MAAM,MAAM,GAA+C,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE;iBACtF,SAAS,CAAC;gBACT,GAAG,aAAa;gBAChB,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE;aACxC,CAAC,CAAC;YAEL,mEAAmE;YACnE,8BAA8B;YAE9B,kBAAkB,CAAC,6BAA6B,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;YAE/D,MAAM,IAAI,GAAa,EAAE,CAAC;YAC1B,OAAO,IAAI,EAAE;gBACZ,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,KAAK,CAAC;oBAAE,qBAAqB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEjE,MAAM,GAAG,GAAyB,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBACtD,IAAI,GAAG,KAAK,IAAI;oBAAE,MAAM;gBAExB,MAAM,GAAG,GAAW,GAAG,CAAC,GAAG;qBACxB,IAAI,EAAE;qBACN,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;qBACrB,OAAO,CAAC,uBAAuB,EAAE,GAAG,CAAC;qBACrC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;qBAClB,IAAI,EAAE,CAAC;gBACV,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;aACxC;YACD,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;aACjB;QACF,CAAC;KAAA;CACD"}
|
package/dist/cli.js
CHANGED
|
@@ -30,6 +30,7 @@ import { QueryApp } from './apps/query.app';
|
|
|
30
30
|
import { CrossPopulateExportApp } from './apps/cross-populate-export.app';
|
|
31
31
|
import { CrossPopulateImportApp } from './apps/cross-populate-import.app';
|
|
32
32
|
import { ExtractTextApp } from './apps/extract-text.app';
|
|
33
|
+
import { ExportDomainUrlsApp } from './apps/export-domain-urls';
|
|
33
34
|
import { DatabaseService } from './services/database.service';
|
|
34
35
|
import { isIMatch } from './interfaces/imatch';
|
|
35
36
|
import { isIExpiry, toIExpiry } from './interfaces/iexpiry';
|
|
@@ -107,6 +108,10 @@ else if (args.hasAttribute('extract-text')) {
|
|
|
107
108
|
const url = args.getString('url');
|
|
108
109
|
app = new ExtractTextApp(url);
|
|
109
110
|
}
|
|
111
|
+
else if (args.hasAttribute('export-domain-urls')) {
|
|
112
|
+
const domain = args.getString('domain');
|
|
113
|
+
app = new ExportDomainUrlsApp(domain);
|
|
114
|
+
}
|
|
110
115
|
else {
|
|
111
116
|
app = new HydraApp();
|
|
112
117
|
}
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AAAA,6CAA6C;;;;;;;;;;AAE7C,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAElF,OAAO,EAAwB,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAGlF,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACvH,OAAO,EAAE,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AAEhE,OAAO,EAEL,2BAA2B,EAC3B,6BAA6B,EAC7B,+BAA+B,EAC/B,oCAAoC,EACrC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AAAA,6CAA6C;;;;;;;;;;AAE7C,OAAO,EAAE,wBAAwB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAElF,OAAO,EAAwB,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAGlF,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAElD,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACvH,OAAO,EAAE,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AAEhE,OAAO,EAEL,2BAA2B,EAC3B,6BAA6B,EAC7B,+BAA+B,EAC/B,oCAAoC,EACrC,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAC3D,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,sBAAsB,EAAE,MAAM,kCAAkC,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAEhE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAE9D,OAAO,EAAU,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,EAAW,SAAS,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAErE,OAAO,EAAE,iBAAiB,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1D,MAAM,IAAI,GAAgB,IAAI,WAAW,EAAE,CAAC;AAC5C,IAAI,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC;IAAE,yBAAyB,CAAC,IAAI,CAAC,CAAC;AAChE,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC;IAAE,sBAAsB,CAAC,IAAI,CAAC,CAAC;AAE9D,IAAI,GAAsC,CAAC;AAE3C,IAAI,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,EAAE;IAClC,GAAG,GAAG,IAAI,WAAW,EAAE,CAAC;CACxB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE;IACxC,MAAM,YAAY,GAAY,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;IACzD,MAAM,YAAY,GAAY,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;IAEzD,GAAG,GAAG,IAAI,UAAU,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;CACjD;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE;IACvC,IAAI,SAAyB,CAAC;IAE9B,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE;QAC9B,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;KAChE;IACD,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE;QAC9B,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;KACjC;IAED,GAAG,GAAG,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;CAC/B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE;IACvC,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC;CACtB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,EAAE;IAC1C,MAAM,OAAO,GAAc,EAAE,CAAC;IAC9B,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAC9D,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC;QAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1D,GAAG,GAAG,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;CAChC;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE;IACrC,GAAG,GAAG,IAAI,OAAO,EAAE,CAAC;CACpB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,EAAE;IACxC,GAAG,GAAG,IAAI,UAAU,EAAE,CAAC;CACvB;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,EAAE;IAC/C,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,QAAQ,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;IACjF,MAAM,MAAM,GAAW,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAEhD,GAAG,GAAG,IAAI,gBAAgB,CACxB,MAAM,EACN,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC,CAClC,CAAC;CACF;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,EAAE;IAC1C,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;CAC7B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,iBAAiB,CAAC,EAAE;IAChD,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;CAC7B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,EAAE;IAC9C,GAAG,GAAG,IAAI,eAAe,EAAE,CAAC;CAC5B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,uBAAuB,CAAC,EAAE;IACtD,MAAM,QAAQ,GAAW,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAEpD,GAAG,GAAG,IAAI,sBAAsB,CAAC,QAAQ,CAAC,CAAC;CAC3C;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,uBAAuB,CAAC,EAAE;IACtD,MAAM,QAAQ,GAAW,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAEpD,GAAG,GAAG,IAAI,sBAAsB,CAAC,QAAQ,CAAC,CAAC;CAC3C;KAAM,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,QAAQ,CAAC,EAAE;IAC/C,MAAM,KAAK,GAAW,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAE9C,GAAG,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,CAAC;CAC1B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,EAAE;IAC7C,MAAM,GAAG,GAAW,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAE1C,GAAG,GAAG,IAAI,cAAc,CAAC,GAAG,CAAC,CAAC;CAC9B;KAAM,IAAI,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,EAAE;IACnD,MAAM,MAAM,GAAW,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAEhD,GAAG,GAAG,IAAI,mBAAmB,CAAC,MAAM,CAAC,CAAC;CACtC;KAAM;IACN,GAAG,GAAG,IAAI,QAAQ,EAAE,CAAC;CACrB;AAED,IAAI,CAAC,GAAG;IAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;AAEzE,MAAM,UAAU,GAAkB,GAAG,CAAC,cAAc,CAAC,iBAAiB,CAAC,CAAC;AACxE,MAAM,WAAW,GAAoB,UAAU,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;AACtE,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC;IAAE,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;AAE/F,IAAI,2BAA2B,CAAC,GAAG,CAAC,EAAE;IACrC,MAAM,eAAe,GAAoB,IAAI,eAAe,CAAC,WAAW,CAAC,CAAC;IAC1E,GAAG,CAAC,kBAAkB,CAAC,eAAe,CAAC,CAAC;CACxC;AAED,IAAI,6BAA6B,CAAC,GAAG,CAAC,EAAE;IACvC,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;QAC1B,MAAM,QAAQ,GAAW,iBAAiB,CAAC,IAAI,CAAC,CAAC;QAEjD,MAAM,OAAO,GAAY,GAAG,CAAC,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAC7D,IAAI,CAAC,mBAAmB,CAAS,OAAO,EAAE,QAAQ,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC;QACtG,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;KAC7B;CACD;AAED,IAAI,+BAA+B,CAAC,GAAG,CAAC,EAAE;IACzC,MAAM,IAAI,GAAY,GAAG,CAAC,qBAAqB,CAAC,kBAAkB,CAAC,CAAC;IACpE,IAAI,CAAC,wBAAwB,CAAC,IAAI,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IAE5E,MAAM,QAAQ,GAAc,IAAI;SAC7B,GAAG,CAAC,CAAC,MAAoC,EAAW,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;IAE7E,IAAI,CAAC,mBAAmB,CAAU,QAAQ,EAAE,SAAS,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;IACxG,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;CAC1B;AAED,IAAI,oCAAoC,CAAC,GAAG,CAAC,EAAE;IAC9C,MAAM,IAAI,GAAY,GAAG,CAAC,qBAAqB,CAAC,eAAe,CAAC,CAAC;IACjE,IAAI,CAAC,mBAAmB,CAAuB,IAAI,EAAE,sBAAsB,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;IAEvH,GAAG,CAAC,sBAAsB,CAAC,IAAI,CAAC,CAAC;CACjC;AAED,KAAK,CAAC,GAAwB,EAAE;IAC/B,iBAAiB,CAAC,yBAAyB,GAAG,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAC/D,MAAM,GAAG,CAAC,KAAK,EAAE,CAAC;IAClB,iBAAiB,CAAC,uBAAuB,CAAC,CAAC;IAE3C,UAAU,CAAC,GAAS,EAAE;QACrB,gEAAgE;QAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC,EAAE,GAAG,CAAC,CAAC;AACT,CAAC,CAAA,CAAC,EAAE,CAAC"}
|
|
@@ -16,6 +16,7 @@ export declare class MaintenanceServer {
|
|
|
16
16
|
private expirer;
|
|
17
17
|
private cleaner;
|
|
18
18
|
private isPaused;
|
|
19
|
+
private isRunning;
|
|
19
20
|
constructor(times: TCommonsScheduleTime[], expiry: Expiry, lists: Lists, database: DatabaseService, crawl: CrawlServer, hardLimit?: number | undefined);
|
|
20
21
|
private perform;
|
|
21
22
|
start(): void;
|
|
@@ -8,7 +8,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
10
|
import { commonsAsyncTimeout, CommonsSchedule } from 'tscommons-es-async';
|
|
11
|
-
import { commonsOutputAlert } from 'nodecommons-es-cli';
|
|
11
|
+
import { commonsOutputAlert, commonsOutputDebug, commonsOutputError } from 'nodecommons-es-cli';
|
|
12
12
|
import { commonsGracefulAbortAddCallback } from 'nodecommons-es-process';
|
|
13
13
|
import { Expirer } from '../classes/expirer';
|
|
14
14
|
import { Cleaner } from '../classes/cleaner';
|
|
@@ -34,28 +34,44 @@ export class MaintenanceServer {
|
|
|
34
34
|
this.crawl = crawl;
|
|
35
35
|
this.hardLimit = hardLimit;
|
|
36
36
|
this.isPaused = false;
|
|
37
|
+
this.isRunning = false;
|
|
37
38
|
this.schedule = new CommonsSchedule('hydra-maintenance');
|
|
38
39
|
this.schedule.parse(times, (action) => toEAction(action), (action) => __awaiter(this, void 0, void 0, function* () {
|
|
39
|
-
|
|
40
|
-
if (
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
40
|
+
// prevent duplicate runs
|
|
41
|
+
if (this.isRunning) {
|
|
42
|
+
commonsOutputDebug('Maintenance is already running. Not re-running');
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
this.isRunning = true;
|
|
46
|
+
try {
|
|
47
|
+
let claimedPause = false;
|
|
48
|
+
if (!this.isPaused) {
|
|
49
|
+
this.isPaused = true;
|
|
50
|
+
claimedPause = true;
|
|
51
|
+
this.crawl.pause();
|
|
52
|
+
for (let i = 30; i-- > 0;) {
|
|
53
|
+
commonsOutputAlert(`Going down for maintenance ... ${i}`);
|
|
54
|
+
try {
|
|
55
|
+
yield commonsAsyncTimeout(1000);
|
|
56
|
+
}
|
|
57
|
+
catch (ex) {
|
|
58
|
+
/* do nothing */
|
|
59
|
+
}
|
|
51
60
|
}
|
|
52
61
|
}
|
|
62
|
+
yield this.perform(action);
|
|
63
|
+
if (claimedPause) {
|
|
64
|
+
commonsOutputAlert('Resuming from maintenance');
|
|
65
|
+
this.isPaused = false;
|
|
66
|
+
this.crawl.resume();
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
catch (e) {
|
|
70
|
+
console.log(e);
|
|
71
|
+
commonsOutputError(e.message);
|
|
53
72
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
commonsOutputAlert('Resuming from maintenance');
|
|
57
|
-
this.isPaused = false;
|
|
58
|
-
this.crawl.resume();
|
|
73
|
+
finally {
|
|
74
|
+
this.isRunning = false;
|
|
59
75
|
}
|
|
60
76
|
}));
|
|
61
77
|
this.expirer = new Expirer(expiry, database);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"maintenance.server.js","sourceRoot":"","sources":["../../src/servers/maintenance.server.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,mBAAmB,EAAE,eAAe,EAAwB,MAAM,oBAAoB,CAAC;AAEhG,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"maintenance.server.js","sourceRoot":"","sources":["../../src/servers/maintenance.server.ts"],"names":[],"mappings":";;;;;;;;;AAAA,OAAO,EAAE,mBAAmB,EAAE,eAAe,EAAwB,MAAM,oBAAoB,CAAC;AAEhG,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAChG,OAAO,EAAE,+BAA+B,EAAE,MAAM,wBAAwB,CAAC;AAIzE,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAM7C,IAAK,OAIJ;AAJD,WAAK,OAAO;IACX,4BAAiB,CAAA;IACjB,sDAA2C,CAAA;IAC3C,kDAAuC,CAAA;AACxC,CAAC,EAJI,OAAO,KAAP,OAAO,QAIX;AAED,MAAM,UAAU,SAAS,CAAC,KAAa;IACtC,QAAQ,KAAK,EAAE;QACd,KAAK,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE;YAC7B,OAAO,OAAO,CAAC,MAAM,CAAC;QACvB,KAAK,OAAO,CAAC,mBAAmB,CAAC,QAAQ,EAAE;YAC1C,OAAO,OAAO,CAAC,mBAAmB,CAAC;QACpC,KAAK,OAAO,CAAC,iBAAiB,CAAC,QAAQ,EAAE;YACxC,OAAO,OAAO,CAAC,iBAAiB,CAAC;KAClC;IAED,OAAO,SAAS,CAAC;AAClB,CAAC;AAED,MAAM,OAAO,iBAAiB;IAQ7B,YACE,KAA6B,EAC7B,MAAc,EACd,KAAY,EACZ,QAAyB,EACjB,KAAkB,EAClB,SAAkB;QADlB,UAAK,GAAL,KAAK,CAAa;QAClB,cAAS,GAAT,SAAS,CAAS;QATpB,aAAQ,GAAY,KAAK,CAAC;QAC1B,cAAS,GAAY,KAAK,CAAC;QAUlC,IAAI,CAAC,QAAQ,GAAG,IAAI,eAAe,CAAC,mBAAmB,CAAC,CAAC;QAEzD,IAAI,CAAC,QAAQ,CAAC,KAAK,CACjB,KAAK,EACL,CAAC,MAAc,EAAqB,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,EACxD,CAAO,MAAe,EAAiB,EAAE;YACxC,yBAAyB;YACzB,IAAI,IAAI,CAAC,SAAS,EAAE;gBACnB,kBAAkB,CAAC,gDAAgD,CAAC,CAAC;gBACrE,OAAO;aACP;YACD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YAEtB,IAAI;gBACH,IAAI,YAAY,GAAY,KAAK,CAAC;gBAElC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;oBACnB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;oBACrB,YAAY,GAAG,IAAI,CAAC;oBAEpB,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;oBAEnB,KAAK,IAAI,CAAC,GAAW,EAAE,EAAE,CAAC,EAAE,GAAG,CAAC,GAAG;wBAClC,kBAAkB,CAAC,kCAAkC,CAAC,EAAE,CAAC,CAAC;wBAE1D,IAAI;4BACH,MAAM,mBAAmB,CAAC,IAAI,CAAC,CAAC;yBAChC;wBAAC,OAAO,EAAE,EAAE;4BACZ,gBAAgB;yBAChB;qBACD;iBACD;gBAED,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;gBAE3B,IAAI,YAAY,EAAE;oBACjB,kBAAkB,CAAC,2BAA2B,CAAC,CAAC;oBAEhD,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;oBAEtB,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;iBACpB;aACD;YAAC,OAAO,CAAC,EAAE;gBACX,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBACf,kBAAkB,CAAE,CAAW,CAAC,OAAO,CAAC,CAAC;aACzC;oBAAS;gBACT,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;aACvB;QACF,CAAC,CAAA,CACF,CAAC;QAEF,IAAI,CAAC,OAAO,GAAG,IAAI,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,OAAO,GAAG,IAAI,OAAO,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAE5C,+BAA+B,CAAC,GAAS,EAAE;YAC1C,kBAAkB,CAAC,wDAAwD,CAAC,CAAC;YAC7E,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACtB,CAAC,CAAC,CAAC;IACJ,CAAC;IAEa,OAAO,CAAC,MAAe;;YACpC,QAAQ,MAAM,EAAE;gBACf,KAAK,OAAO,CAAC,MAAM;oBAClB,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;oBAC5B,MAAM;gBACP,KAAK,OAAO,CAAC,mBAAmB;oBAC/B,MAAM,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,CAAC;oBACvC,MAAM;gBACP,KAAK,OAAO,CAAC,iBAAiB;oBAC7B,MAAM,IAAI,CAAC,OAAO,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACnD,MAAM;aACP;QACF,CAAC;KAAA;IAEM,KAAK;QACX,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;CACD"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hydra-crawler",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"description": "Node.js Hydra web crawler",
|
|
5
5
|
"author": "Pete Morris",
|
|
6
6
|
"license": "ISC",
|
|
@@ -24,23 +24,23 @@
|
|
|
24
24
|
"jpeg-js": "^0.4.4",
|
|
25
25
|
"memcached-promisify": "^2.0.0",
|
|
26
26
|
"mongodb": "^5.0.1",
|
|
27
|
-
"nodecommons-es-app": "^1.0.
|
|
28
|
-
"nodecommons-es-app-socket-io": "2.0.
|
|
29
|
-
"nodecommons-es-cli": "^2.0.
|
|
30
|
-
"nodecommons-es-config": "^1.0.
|
|
31
|
-
"nodecommons-es-database": "^2.
|
|
32
|
-
"nodecommons-es-database-mongodb": "^1.0.
|
|
33
|
-
"nodecommons-es-express": "^3.0.
|
|
34
|
-
"nodecommons-es-file": "^1.1.
|
|
35
|
-
"nodecommons-es-http": "^2.4.
|
|
36
|
-
"nodecommons-es-process": "^1.0.
|
|
37
|
-
"nodecommons-es-rest": "^2.0.
|
|
38
|
-
"nodecommons-es-security": "^1.0.
|
|
39
|
-
"nodecommons-es-socket-io": "3.0.
|
|
40
|
-
"tscommons-es-async": "^1.1.
|
|
41
|
-
"tscommons-es-config": "^1.0.
|
|
42
|
-
"tscommons-es-core": "^1.
|
|
43
|
-
"tscommons-es-format": "1.0.
|
|
27
|
+
"nodecommons-es-app": "^1.0.9",
|
|
28
|
+
"nodecommons-es-app-socket-io": "2.0.25",
|
|
29
|
+
"nodecommons-es-cli": "^2.0.6",
|
|
30
|
+
"nodecommons-es-config": "^1.0.7",
|
|
31
|
+
"nodecommons-es-database": "^2.2.7",
|
|
32
|
+
"nodecommons-es-database-mongodb": "^1.0.8",
|
|
33
|
+
"nodecommons-es-express": "^3.0.6",
|
|
34
|
+
"nodecommons-es-file": "^1.1.2",
|
|
35
|
+
"nodecommons-es-http": "^2.4.6",
|
|
36
|
+
"nodecommons-es-process": "^1.0.5",
|
|
37
|
+
"nodecommons-es-rest": "^2.0.11",
|
|
38
|
+
"nodecommons-es-security": "^1.0.9",
|
|
39
|
+
"nodecommons-es-socket-io": "3.0.5",
|
|
40
|
+
"tscommons-es-async": "^1.1.3",
|
|
41
|
+
"tscommons-es-config": "^1.0.5",
|
|
42
|
+
"tscommons-es-core": "^1.16.3",
|
|
43
|
+
"tscommons-es-format": "1.0.5",
|
|
44
44
|
"unfluff": "^3.2.0"
|
|
45
45
|
},
|
|
46
46
|
"devDependencies": {
|