website-scrap-engine 0.8.7 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +245 -39
- package/lib/downloader/main.d.ts.map +1 -1
- package/lib/downloader/main.js +15 -16
- package/lib/downloader/main.js.map +1 -1
- package/lib/downloader/multi.d.ts.map +1 -1
- package/lib/downloader/multi.js +3 -4
- package/lib/downloader/multi.js.map +1 -1
- package/lib/downloader/pipeline-executor-impl.d.ts +6 -3
- package/lib/downloader/pipeline-executor-impl.d.ts.map +1 -1
- package/lib/downloader/pipeline-executor-impl.js +86 -2
- package/lib/downloader/pipeline-executor-impl.js.map +1 -1
- package/lib/downloader/single.d.ts.map +1 -1
- package/lib/downloader/single.js +4 -5
- package/lib/downloader/single.js.map +1 -1
- package/lib/downloader/worker-pool.d.ts.map +1 -1
- package/lib/downloader/worker-pool.js +12 -7
- package/lib/downloader/worker-pool.js.map +1 -1
- package/lib/downloader/worker-type.d.ts +2 -2
- package/lib/downloader/worker-type.d.ts.map +1 -1
- package/lib/downloader/worker.js +2 -4
- package/lib/downloader/worker.js.map +1 -1
- package/lib/index.d.ts +2 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +1 -0
- package/lib/index.js.map +1 -1
- package/lib/life-cycle/adapters.d.ts +7 -1
- package/lib/life-cycle/adapters.d.ts.map +1 -1
- package/lib/life-cycle/adapters.js +6 -0
- package/lib/life-cycle/adapters.js.map +1 -1
- package/lib/life-cycle/default-life-cycle.d.ts.map +1 -1
- package/lib/life-cycle/default-life-cycle.js +3 -1
- package/lib/life-cycle/default-life-cycle.js.map +1 -1
- package/lib/life-cycle/default-status-listener.d.ts +4 -0
- package/lib/life-cycle/default-status-listener.d.ts.map +1 -0
- package/lib/life-cycle/default-status-listener.js +38 -0
- package/lib/life-cycle/default-status-listener.js.map +1 -0
- package/lib/life-cycle/download-resource.d.ts.map +1 -1
- package/lib/life-cycle/download-resource.js +13 -3
- package/lib/life-cycle/download-resource.js.map +1 -1
- package/lib/life-cycle/index.d.ts +1 -0
- package/lib/life-cycle/index.d.ts.map +1 -1
- package/lib/life-cycle/index.js +1 -0
- package/lib/life-cycle/index.js.map +1 -1
- package/lib/life-cycle/pipeline-executor.d.ts +10 -3
- package/lib/life-cycle/pipeline-executor.d.ts.map +1 -1
- package/lib/life-cycle/types.d.ts +56 -4
- package/lib/life-cycle/types.d.ts.map +1 -1
- package/lib/logger/default-logger.d.ts +3 -0
- package/lib/logger/default-logger.d.ts.map +1 -0
- package/lib/logger/default-logger.js +11 -0
- package/lib/logger/default-logger.js.map +1 -0
- package/lib/logger/log4js-adapter.d.ts +3 -0
- package/lib/logger/log4js-adapter.d.ts.map +1 -0
- package/lib/logger/log4js-adapter.js +143 -0
- package/lib/logger/log4js-adapter.js.map +1 -0
- package/lib/logger/logger-worker.d.ts +3 -4
- package/lib/logger/logger-worker.d.ts.map +1 -1
- package/lib/logger/logger-worker.js +21 -20
- package/lib/logger/logger-worker.js.map +1 -1
- package/lib/logger/logger.d.ts +13 -11
- package/lib/logger/logger.d.ts.map +1 -1
- package/lib/logger/logger.js +32 -14
- package/lib/logger/logger.js.map +1 -1
- package/lib/logger/types.d.ts +23 -0
- package/lib/logger/types.d.ts.map +1 -0
- package/lib/logger/types.js +2 -0
- package/lib/logger/types.js.map +1 -0
- package/lib/options.d.ts +5 -4
- package/lib/options.d.ts.map +1 -1
- package/lib/options.js +6 -4
- package/lib/options.js.map +1 -1
- package/package.json +7 -5
- package/src/downloader/main.ts +15 -14
- package/src/downloader/multi.ts +3 -5
- package/src/downloader/pipeline-executor-impl.ts +98 -2
- package/src/downloader/single.ts +4 -5
- package/src/downloader/worker-pool.ts +12 -6
- package/src/downloader/worker-type.ts +2 -2
- package/src/downloader/worker.ts +2 -7
- package/src/index.ts +2 -0
- package/src/life-cycle/adapters.ts +13 -0
- package/src/life-cycle/default-life-cycle.ts +3 -1
- package/src/life-cycle/default-status-listener.ts +40 -0
- package/src/life-cycle/download-resource.ts +13 -4
- package/src/life-cycle/index.ts +1 -0
- package/src/life-cycle/pipeline-executor.ts +16 -2
- package/src/life-cycle/types.ts +79 -3
- package/src/logger/default-logger.ts +12 -0
- package/src/logger/log4js-adapter.ts +147 -0
- package/src/logger/logger-worker.ts +24 -23
- package/src/logger/logger.ts +36 -16
- package/src/logger/types.ts +35 -0
- package/src/options.ts +11 -7
- package/lib/logger/config-logger.d.ts +0 -3
- package/lib/logger/config-logger.d.ts.map +0 -1
- package/lib/logger/config-logger.js +0 -92
- package/lib/logger/config-logger.js.map +0 -1
- package/src/logger/config-logger.ts +0 -95
package/src/life-cycle/types.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import type {OptionsInit as GotOptions} from 'got';
|
|
2
|
+
import type {Stats} from 'node:fs';
|
|
2
3
|
import type {
|
|
3
4
|
createResource,
|
|
4
5
|
GenerateSavePathFn,
|
|
6
|
+
RawResource,
|
|
5
7
|
Resource,
|
|
6
8
|
ResourceBody,
|
|
7
9
|
ResourceType
|
|
@@ -14,15 +16,43 @@ import type {WorkerInfo} from '../downloader/worker-pool.js';
|
|
|
14
16
|
|
|
15
17
|
export type AsyncResult<T> = T | Promise<T>;
|
|
16
18
|
|
|
19
|
+
export type ResourceStatus =
|
|
20
|
+
| 'createResource'
|
|
21
|
+
| 'processBeforeDownload'
|
|
22
|
+
| 'download'
|
|
23
|
+
| 'processAfterDownload'
|
|
24
|
+
| 'saveToDisk'
|
|
25
|
+
| 'error'
|
|
26
|
+
| 'dispose';
|
|
27
|
+
|
|
28
|
+
export interface StatusChangeFunc {
|
|
29
|
+
(res: Resource | RawResource,
|
|
30
|
+
status: ResourceStatus,
|
|
31
|
+
options: StaticDownloadOptions,
|
|
32
|
+
pipeline: PipelineExecutor): void | Promise<void>;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface InitSubmitFunc {
|
|
36
|
+
/**
|
|
37
|
+
* Submit a raw URL to be processed through the full pipeline
|
|
38
|
+
* after all init hooks complete.
|
|
39
|
+
*
|
|
40
|
+
* This is fire-and-forget: the URL is appended to the initial URL list
|
|
41
|
+
* and processed identically to entries in {@link DownloadOptions.initialUrl}.
|
|
42
|
+
* No pipeline stages run during the call.
|
|
43
|
+
*/
|
|
44
|
+
(url: string): void;
|
|
45
|
+
}
|
|
46
|
+
|
|
17
47
|
export interface InitLifeCycleFunc {
|
|
18
48
|
/**
|
|
19
49
|
* The init life cycle would be called:
|
|
20
50
|
*
|
|
21
51
|
* Each time a downloader initialized,
|
|
22
|
-
* after
|
|
52
|
+
* after logger initialization but before addInitialResource.
|
|
23
53
|
*
|
|
24
54
|
* Each time a worker initialized,
|
|
25
|
-
*
|
|
55
|
+
* before parentPort?.addListener.
|
|
26
56
|
*
|
|
27
57
|
* The init life cycle could be async,
|
|
28
58
|
* in main thread the addInitialResource should wait for init completed,
|
|
@@ -31,8 +61,12 @@ export interface InitLifeCycleFunc {
|
|
|
31
61
|
*
|
|
32
62
|
* @param pipeline the PipelineExecutor
|
|
33
63
|
* @param downloader the DownloaderWithMeta when in main thread
|
|
64
|
+
* @param submit function to submit URLs for processing after init;
|
|
65
|
+
* undefined in worker threads
|
|
34
66
|
*/
|
|
35
|
-
(pipeline: PipelineExecutor,
|
|
67
|
+
(pipeline: PipelineExecutor,
|
|
68
|
+
downloader?: DownloaderWithMeta,
|
|
69
|
+
submit?: InitSubmitFunc): AsyncResult<void>;
|
|
36
70
|
}
|
|
37
71
|
|
|
38
72
|
export interface LinkRedirectFunc {
|
|
@@ -190,5 +224,47 @@ export interface ProcessingLifeCycle {
|
|
|
190
224
|
processAfterDownload: ProcessResourceAfterDownloadFunc[];
|
|
191
225
|
saveToDisk: SaveToDiskFunc[];
|
|
192
226
|
dispose: DisposeLifeCycle[];
|
|
227
|
+
/**
|
|
228
|
+
* Status change listeners.
|
|
229
|
+
*
|
|
230
|
+
* Unlike other life cycle hooks, all listeners always run
|
|
231
|
+
* (returning void does not short-circuit), and thrown errors are swallowed.
|
|
232
|
+
*
|
|
233
|
+
* Listeners should treat the resource and all parameters as readonly.
|
|
234
|
+
* Mutating them is undefined behavior.
|
|
235
|
+
*/
|
|
236
|
+
statusChange: StatusChangeFunc[];
|
|
237
|
+
/**
|
|
238
|
+
* Optional callback to decide what to do when a local file already exists
|
|
239
|
+
* for a resource. Called at most twice per resource: once before download
|
|
240
|
+
* and once before save.
|
|
241
|
+
*
|
|
242
|
+
* Unlike other life cycle hooks, this is a single function (not an array)
|
|
243
|
+
* because the decision is mutually exclusive — there is no meaningful way
|
|
244
|
+
* to compose multiple strategies.
|
|
245
|
+
*
|
|
246
|
+
* If absent, the pipeline behaves as before (always download and overwrite).
|
|
247
|
+
*/
|
|
248
|
+
existingResource?: ExistingResourceFunc;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
export type ExistingResourceAction =
|
|
252
|
+
| 'skip'
|
|
253
|
+
| 'overwrite'
|
|
254
|
+
| 'ifModifiedSince'
|
|
255
|
+
| 'skipSave';
|
|
256
|
+
|
|
257
|
+
export type ExistingResourceStage = 'download' | 'saveToDisk';
|
|
258
|
+
|
|
259
|
+
export interface ExistingResourceContext {
|
|
260
|
+
res: Resource;
|
|
261
|
+
stage: ExistingResourceStage;
|
|
262
|
+
localPath: string;
|
|
263
|
+
stat: Stats;
|
|
264
|
+
options: StaticDownloadOptions;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
export interface ExistingResourceFunc {
|
|
268
|
+
(ctx: ExistingResourceContext): ExistingResourceAction;
|
|
193
269
|
}
|
|
194
270
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type {Logger} from './types.js';
|
|
2
|
+
|
|
3
|
+
export function createDefaultLogger(): Logger {
|
|
4
|
+
return {
|
|
5
|
+
trace() { /* no-op */ },
|
|
6
|
+
debug(type, ...contents) { console.debug(`[${type}]`, ...contents); },
|
|
7
|
+
info(type, ...contents) { console.info(`[${type}]`, ...contents); },
|
|
8
|
+
warn(type, ...contents) { console.warn(`[${type}]`, ...contents); },
|
|
9
|
+
error(type, ...contents) { console.error(`[${type}]`, ...contents); },
|
|
10
|
+
isTraceEnabled() { return false; },
|
|
11
|
+
};
|
|
12
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
import log4js from 'log4js';
|
|
3
|
+
import type {Logger, LogType} from './types.js';
|
|
4
|
+
|
|
5
|
+
const typeToCategory: Partial<Record<LogType, string>> = {
|
|
6
|
+
'io.http.request': 'request',
|
|
7
|
+
'io.http.response': 'response',
|
|
8
|
+
'io.http.notFound': 'notFound',
|
|
9
|
+
'io.http.retry': 'retry',
|
|
10
|
+
'io.disk.mkdir': 'mkdir',
|
|
11
|
+
'system.skip': 'skip',
|
|
12
|
+
'system.skipExternal': 'skipExternal',
|
|
13
|
+
'system.complete': 'complete',
|
|
14
|
+
'system.adjustConcurrency': 'adjustConcurrency',
|
|
15
|
+
'system.error': 'error',
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export function createLog4jsLogger(
|
|
19
|
+
localRoot: string,
|
|
20
|
+
logSubDir?: string
|
|
21
|
+
): Logger {
|
|
22
|
+
const logDir = path.join(localRoot, logSubDir || '', 'logs');
|
|
23
|
+
log4js.configure({
|
|
24
|
+
appenders: {
|
|
25
|
+
'retry': {
|
|
26
|
+
type: 'file',
|
|
27
|
+
filename: path.join(logDir, 'retry.log')
|
|
28
|
+
},
|
|
29
|
+
'mkdir': {
|
|
30
|
+
type: 'file',
|
|
31
|
+
filename: path.join(logDir, 'mkdir.log')
|
|
32
|
+
},
|
|
33
|
+
'error': {
|
|
34
|
+
type: 'file',
|
|
35
|
+
filename: path.join(logDir, 'error.log')
|
|
36
|
+
},
|
|
37
|
+
'skip': {
|
|
38
|
+
type: 'file',
|
|
39
|
+
filename: path.join(logDir, 'skip.log')
|
|
40
|
+
},
|
|
41
|
+
'404': {
|
|
42
|
+
type: 'file',
|
|
43
|
+
filename: path.join(logDir, '404.log')
|
|
44
|
+
},
|
|
45
|
+
'complete': {
|
|
46
|
+
type: 'file',
|
|
47
|
+
filename: path.join(logDir, 'complete.log')
|
|
48
|
+
},
|
|
49
|
+
'request': {
|
|
50
|
+
type: 'file',
|
|
51
|
+
filename: path.join(logDir, 'request.log')
|
|
52
|
+
},
|
|
53
|
+
'response': {
|
|
54
|
+
type: 'file',
|
|
55
|
+
filename: path.join(logDir, 'response.log')
|
|
56
|
+
},
|
|
57
|
+
'stdout': {
|
|
58
|
+
type: 'stdout'
|
|
59
|
+
},
|
|
60
|
+
'stderr': {
|
|
61
|
+
type: 'stderr'
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
categories: {
|
|
65
|
+
'retry': {
|
|
66
|
+
appenders: ['stdout', 'retry'],
|
|
67
|
+
level: 'debug'
|
|
68
|
+
},
|
|
69
|
+
'mkdir': {
|
|
70
|
+
appenders: ['mkdir'],
|
|
71
|
+
level: 'debug'
|
|
72
|
+
},
|
|
73
|
+
'error': {
|
|
74
|
+
appenders: ['stderr', 'error'],
|
|
75
|
+
level: 'debug'
|
|
76
|
+
},
|
|
77
|
+
'skip': {
|
|
78
|
+
appenders: ['stdout', 'skip'],
|
|
79
|
+
level: 'debug'
|
|
80
|
+
},
|
|
81
|
+
'skipExternal': {
|
|
82
|
+
appenders: ['skip'],
|
|
83
|
+
level: 'debug'
|
|
84
|
+
},
|
|
85
|
+
'notFound': {
|
|
86
|
+
appenders: ['404'],
|
|
87
|
+
level: 'debug'
|
|
88
|
+
},
|
|
89
|
+
'complete': {
|
|
90
|
+
appenders: ['complete'],
|
|
91
|
+
level: 'debug'
|
|
92
|
+
},
|
|
93
|
+
'request': {
|
|
94
|
+
appenders: ['request'],
|
|
95
|
+
level: 'debug'
|
|
96
|
+
},
|
|
97
|
+
'response': {
|
|
98
|
+
appenders: ['response'],
|
|
99
|
+
level: 'debug'
|
|
100
|
+
},
|
|
101
|
+
'adjustConcurrency': {
|
|
102
|
+
appenders: ['stdout', 'complete'],
|
|
103
|
+
level: 'debug'
|
|
104
|
+
},
|
|
105
|
+
'default': {
|
|
106
|
+
appenders: ['stdout', 'complete'],
|
|
107
|
+
level: 'debug'
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const loggers = new Map<string, log4js.Logger>();
|
|
113
|
+
const getOrCreate = (category: string): log4js.Logger => {
|
|
114
|
+
let l = loggers.get(category);
|
|
115
|
+
if (!l) {
|
|
116
|
+
l = log4js.getLogger(category);
|
|
117
|
+
loggers.set(category, l);
|
|
118
|
+
}
|
|
119
|
+
return l;
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
123
|
+
const spread = (contents: unknown[]): [any, ...any[]] =>
|
|
124
|
+
contents as [any, ...any[]];
|
|
125
|
+
/* eslint-enable @typescript-eslint/no-explicit-any */
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
trace(type, ...contents) {
|
|
129
|
+
getOrCreate(typeToCategory[type] ?? 'default').trace(...spread(contents));
|
|
130
|
+
},
|
|
131
|
+
debug(type, ...contents) {
|
|
132
|
+
getOrCreate(typeToCategory[type] ?? 'default').debug(...spread(contents));
|
|
133
|
+
},
|
|
134
|
+
info(type, ...contents) {
|
|
135
|
+
getOrCreate(typeToCategory[type] ?? 'default').info(...spread(contents));
|
|
136
|
+
},
|
|
137
|
+
warn(type, ...contents) {
|
|
138
|
+
getOrCreate(typeToCategory[type] ?? 'default').warn(...spread(contents));
|
|
139
|
+
},
|
|
140
|
+
error(type, ...contents) {
|
|
141
|
+
getOrCreate(typeToCategory[type] ?? 'default').error(...spread(contents));
|
|
142
|
+
},
|
|
143
|
+
isTraceEnabled() {
|
|
144
|
+
return log4js.getLogger().isTraceEnabled();
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
}
|
|
@@ -1,31 +1,32 @@
|
|
|
1
|
-
import type {Logger} from 'log4js';
|
|
2
|
-
// https://github.com/jestjs/jest/issues/11563
|
|
3
|
-
import log4js from 'log4js';
|
|
4
1
|
import {parentPort} from 'node:worker_threads';
|
|
5
|
-
import type {
|
|
2
|
+
import type {CategoryLogger, LogType} from './types.js';
|
|
3
|
+
import type {LogWorkerMessage} from '../downloader/worker-type.js';
|
|
6
4
|
import {WorkerMessageType} from '../downloader/types.js';
|
|
7
5
|
|
|
8
|
-
const getLogger = log4js.getLogger;
|
|
9
|
-
|
|
10
6
|
export const logLevels = [
|
|
11
|
-
'trace', 'debug', 'info', 'warn', 'error'
|
|
7
|
+
'trace', 'debug', 'info', 'warn', 'error'
|
|
12
8
|
] as const;
|
|
13
9
|
|
|
14
|
-
export function
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
content
|
|
25
|
-
}
|
|
26
|
-
};
|
|
27
|
-
parentPort?.postMessage(msg);
|
|
10
|
+
export function createWorkerCategoryLogger(type: LogType): CategoryLogger {
|
|
11
|
+
function send<T>(level: typeof logLevels[number], content: T[]): void {
|
|
12
|
+
const msg: LogWorkerMessage<T> = {
|
|
13
|
+
taskId: -1,
|
|
14
|
+
type: WorkerMessageType.Log,
|
|
15
|
+
body: {
|
|
16
|
+
logType: type,
|
|
17
|
+
level,
|
|
18
|
+
content
|
|
19
|
+
}
|
|
28
20
|
};
|
|
29
|
-
|
|
30
|
-
|
|
21
|
+
parentPort?.postMessage(msg);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
trace(...content: unknown[]) { send('trace', content); },
|
|
26
|
+
debug(...content: unknown[]) { send('debug', content); },
|
|
27
|
+
info(...content: unknown[]) { send('info', content); },
|
|
28
|
+
warn(...content: unknown[]) { send('warn', content); },
|
|
29
|
+
error(...content: unknown[]) { send('error', content); },
|
|
30
|
+
isTraceEnabled() { return false; },
|
|
31
|
+
};
|
|
31
32
|
}
|
package/src/logger/logger.ts
CHANGED
|
@@ -1,19 +1,39 @@
|
|
|
1
|
-
import type {Logger} from 'log4js';
|
|
2
|
-
// https://github.com/jestjs/jest/issues/11563
|
|
3
|
-
import log4js from 'log4js';
|
|
4
1
|
import {isMainThread} from 'node:worker_threads';
|
|
5
|
-
import {
|
|
2
|
+
import type {CategoryLogger, Logger, LogType} from './types.js';
|
|
3
|
+
import {createDefaultLogger} from './default-logger.js';
|
|
4
|
+
import {createWorkerCategoryLogger} from './logger-worker.js';
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
isMainThread ? log4js.getLogger : getWorkerLogger;
|
|
6
|
+
let _logger: Logger = createDefaultLogger();
|
|
9
7
|
|
|
10
|
-
export
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
export
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
8
|
+
export function setLogger(logger: Logger): void {
|
|
9
|
+
_logger = logger;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function getLogger(): Logger {
|
|
13
|
+
return _logger;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function createCategoryProxy(type: LogType): CategoryLogger {
|
|
17
|
+
if (!isMainThread) {
|
|
18
|
+
return createWorkerCategoryLogger(type);
|
|
19
|
+
}
|
|
20
|
+
return {
|
|
21
|
+
trace(...contents: unknown[]) { _logger.trace(type, ...contents); },
|
|
22
|
+
debug(...contents: unknown[]) { _logger.debug(type, ...contents); },
|
|
23
|
+
info(...contents: unknown[]) { _logger.info(type, ...contents); },
|
|
24
|
+
warn(...contents: unknown[]) { _logger.warn(type, ...contents); },
|
|
25
|
+
error(...contents: unknown[]) { _logger.error(type, ...contents); },
|
|
26
|
+
isTraceEnabled() { return _logger.isTraceEnabled(); },
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export const notFound: CategoryLogger = createCategoryProxy('io.http.notFound');
|
|
31
|
+
export const retry: CategoryLogger = createCategoryProxy('io.http.retry');
|
|
32
|
+
export const mkdir: CategoryLogger = createCategoryProxy('io.disk.mkdir');
|
|
33
|
+
export const request: CategoryLogger = createCategoryProxy('io.http.request');
|
|
34
|
+
export const response: CategoryLogger = createCategoryProxy('io.http.response');
|
|
35
|
+
export const error: CategoryLogger = createCategoryProxy('system.error');
|
|
36
|
+
export const complete: CategoryLogger = createCategoryProxy('system.complete');
|
|
37
|
+
export const skip: CategoryLogger = createCategoryProxy('system.skip');
|
|
38
|
+
export const skipExternal: CategoryLogger = createCategoryProxy('system.skipExternal');
|
|
39
|
+
export const adjustConcurrency: CategoryLogger = createCategoryProxy('system.adjustConcurrency');
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export type LogType =
|
|
2
|
+
| 'io.http.request'
|
|
3
|
+
| 'io.http.response'
|
|
4
|
+
| 'io.http.notFound'
|
|
5
|
+
| 'io.http.retry'
|
|
6
|
+
| 'io.disk.mkdir'
|
|
7
|
+
| 'system.skip'
|
|
8
|
+
| 'system.skipExternal'
|
|
9
|
+
| 'system.complete'
|
|
10
|
+
| 'system.adjustConcurrency'
|
|
11
|
+
| 'system.error'
|
|
12
|
+
| `custom.${string}`;
|
|
13
|
+
|
|
14
|
+
export interface Logger {
|
|
15
|
+
trace(type: LogType, ...contents: unknown[]): void;
|
|
16
|
+
debug(type: LogType, ...contents: unknown[]): void;
|
|
17
|
+
info(type: LogType, ...contents: unknown[]): void;
|
|
18
|
+
warn(type: LogType, ...contents: unknown[]): void;
|
|
19
|
+
error(type: LogType, ...contents: unknown[]): void;
|
|
20
|
+
isTraceEnabled(): boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Logger proxy for a specific category.
|
|
25
|
+
* Consumer code calls methods without a LogType argument;
|
|
26
|
+
* the proxy prepends the appropriate LogType automatically.
|
|
27
|
+
*/
|
|
28
|
+
export interface CategoryLogger {
|
|
29
|
+
trace(...contents: unknown[]): void;
|
|
30
|
+
debug(...contents: unknown[]): void;
|
|
31
|
+
info(...contents: unknown[]): void;
|
|
32
|
+
warn(...contents: unknown[]): void;
|
|
33
|
+
error(...contents: unknown[]): void;
|
|
34
|
+
isTraceEnabled(): boolean;
|
|
35
|
+
}
|
package/src/options.ts
CHANGED
|
@@ -8,7 +8,8 @@ import {beforeRetryHook} from './life-cycle/download-resource.js';
|
|
|
8
8
|
import {error} from './logger/logger.js';
|
|
9
9
|
// noinspection ES6PreferShortImport
|
|
10
10
|
import {adjust} from './downloader/adjust-concurrency.js';
|
|
11
|
-
import {
|
|
11
|
+
import type {Logger} from './logger/types.js';
|
|
12
|
+
import {createDefaultLogger} from './logger/default-logger.js';
|
|
12
13
|
import type {DownloaderWithMeta} from './downloader/types.js';
|
|
13
14
|
import {weakAssign} from './util.js';
|
|
14
15
|
import type {SourceDefinition} from './sources.js';
|
|
@@ -112,7 +113,7 @@ export interface StaticDownloadOptions {
|
|
|
112
113
|
initialUrl?: string[];
|
|
113
114
|
|
|
114
115
|
/**
|
|
115
|
-
* @see DownloadOptions.
|
|
116
|
+
* @see DownloadOptions.createLogger
|
|
116
117
|
*/
|
|
117
118
|
logSubDir?: string;
|
|
118
119
|
|
|
@@ -162,9 +163,10 @@ export interface DownloadOptions extends StaticDownloadOptions, ProcessingLifeCy
|
|
|
162
163
|
adjustConcurrencyFunc?: (downloader: DownloaderWithMeta) => void;
|
|
163
164
|
|
|
164
165
|
/**
|
|
165
|
-
* Use a custom function to
|
|
166
|
+
* Use a custom function to create a logger instance.
|
|
167
|
+
* Defaults to {@link createDefaultLogger} (console-based).
|
|
166
168
|
*/
|
|
167
|
-
|
|
169
|
+
createLogger?: (options: StaticDownloadOptions) => Logger;
|
|
168
170
|
}
|
|
169
171
|
|
|
170
172
|
export type ExtendedError = (TimeoutError | RequestError) & {
|
|
@@ -275,7 +277,7 @@ const defaultOptions: DownloadOptions = {
|
|
|
275
277
|
init: [],
|
|
276
278
|
dispose: [],
|
|
277
279
|
concurrency: 12,
|
|
278
|
-
|
|
280
|
+
createLogger: createDefaultLogger,
|
|
279
281
|
createResource,
|
|
280
282
|
detectResourceType: [],
|
|
281
283
|
download: [],
|
|
@@ -285,13 +287,15 @@ const defaultOptions: DownloadOptions = {
|
|
|
285
287
|
localRoot: '',
|
|
286
288
|
maxDepth: 1,
|
|
287
289
|
meta: {
|
|
288
|
-
detectIncompleteHtml: '</html>'
|
|
290
|
+
detectIncompleteHtml: '</html>',
|
|
291
|
+
warnForNonHtml: true
|
|
289
292
|
},
|
|
290
293
|
processAfterDownload: [],
|
|
291
294
|
processBeforeDownload: [],
|
|
292
295
|
req: {},
|
|
293
296
|
saveToDisk: [],
|
|
294
|
-
deduplicateStripSearch: true
|
|
297
|
+
deduplicateStripSearch: true,
|
|
298
|
+
statusChange: []
|
|
295
299
|
};
|
|
296
300
|
|
|
297
301
|
export function defaultDownloadOptions(
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"config-logger.d.ts","sourceRoot":"","sources":["../../src/logger/config-logger.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,QAAQ,CAAC;AAKnC,eAAO,MAAM,eAAe,GAAI,WAAW,MAAM,EAAE,QAAQ,MAAM,KAAG,MAyFhE,CAAC"}
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
// https://github.com/jestjs/jest/issues/11563
|
|
2
|
-
import log4js from 'log4js';
|
|
3
|
-
import * as path from 'node:path';
|
|
4
|
-
export const configureLogger = (localRoot, subDir) => log4js.configure({
|
|
5
|
-
appenders: {
|
|
6
|
-
'retry': {
|
|
7
|
-
type: 'file',
|
|
8
|
-
filename: path.join(localRoot, subDir, 'logs', 'retry.log')
|
|
9
|
-
},
|
|
10
|
-
'mkdir': {
|
|
11
|
-
type: 'file',
|
|
12
|
-
filename: path.join(localRoot, subDir, 'logs', 'mkdir.log')
|
|
13
|
-
},
|
|
14
|
-
'error': {
|
|
15
|
-
type: 'file',
|
|
16
|
-
filename: path.join(localRoot, subDir, 'logs', 'error.log')
|
|
17
|
-
},
|
|
18
|
-
'skip': {
|
|
19
|
-
type: 'file',
|
|
20
|
-
filename: path.join(localRoot, subDir, 'logs', 'skip.log')
|
|
21
|
-
},
|
|
22
|
-
'404': {
|
|
23
|
-
type: 'file',
|
|
24
|
-
filename: path.join(localRoot, subDir, 'logs', '404.log')
|
|
25
|
-
},
|
|
26
|
-
'complete': {
|
|
27
|
-
type: 'file',
|
|
28
|
-
filename: path.join(localRoot, subDir, 'logs', 'complete.log')
|
|
29
|
-
},
|
|
30
|
-
'request': {
|
|
31
|
-
type: 'file',
|
|
32
|
-
filename: path.join(localRoot, subDir, 'logs', 'request.log')
|
|
33
|
-
},
|
|
34
|
-
'response': {
|
|
35
|
-
type: 'file',
|
|
36
|
-
filename: path.join(localRoot, subDir, 'logs', 'response.log')
|
|
37
|
-
},
|
|
38
|
-
'stdout': {
|
|
39
|
-
type: 'stdout'
|
|
40
|
-
},
|
|
41
|
-
'stderr': {
|
|
42
|
-
type: 'stderr'
|
|
43
|
-
}
|
|
44
|
-
},
|
|
45
|
-
categories: {
|
|
46
|
-
'retry': {
|
|
47
|
-
appenders: ['stdout', 'retry'],
|
|
48
|
-
level: 'debug'
|
|
49
|
-
},
|
|
50
|
-
'mkdir': {
|
|
51
|
-
appenders: ['mkdir'],
|
|
52
|
-
level: 'debug'
|
|
53
|
-
},
|
|
54
|
-
'error': {
|
|
55
|
-
appenders: ['stderr', 'error'],
|
|
56
|
-
level: 'debug'
|
|
57
|
-
},
|
|
58
|
-
'skip': {
|
|
59
|
-
appenders: ['stdout', 'skip'],
|
|
60
|
-
level: 'debug'
|
|
61
|
-
},
|
|
62
|
-
'skipExternal': {
|
|
63
|
-
appenders: ['skip'],
|
|
64
|
-
level: 'debug'
|
|
65
|
-
},
|
|
66
|
-
'notFound': {
|
|
67
|
-
appenders: ['404'],
|
|
68
|
-
level: 'debug'
|
|
69
|
-
},
|
|
70
|
-
'complete': {
|
|
71
|
-
appenders: ['complete'],
|
|
72
|
-
level: 'debug'
|
|
73
|
-
},
|
|
74
|
-
'request': {
|
|
75
|
-
appenders: ['request'],
|
|
76
|
-
level: 'debug'
|
|
77
|
-
},
|
|
78
|
-
'response': {
|
|
79
|
-
appenders: ['response'],
|
|
80
|
-
level: 'debug'
|
|
81
|
-
},
|
|
82
|
-
'adjustConcurrency': {
|
|
83
|
-
appenders: ['stdout', 'complete'],
|
|
84
|
-
level: 'debug'
|
|
85
|
-
},
|
|
86
|
-
'default': {
|
|
87
|
-
appenders: ['stdout', 'complete'],
|
|
88
|
-
level: 'debug'
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
});
|
|
92
|
-
//# sourceMappingURL=config-logger.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"config-logger.js","sourceRoot":"","sources":["../../src/logger/config-logger.ts"],"names":[],"mappings":"AACA,8CAA8C;AAC9C,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,SAAiB,EAAE,MAAc,EAAU,EAAE,CAC3E,MAAM,CAAC,SAAS,CAAC;IACf,SAAS,EAAE;QACT,OAAO,EAAE;YACP,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC;SAC5D;QACD,OAAO,EAAE;YACP,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC;SAC5D;QACD,OAAO,EAAE;YACP,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC;SAC5D;QACD,MAAM,EAAE;YACN,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC;SAC3D;QACD,KAAK,EAAE;YACL,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC;SAC1D;QACD,UAAU,EAAE;YACV,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC;SAC/D;QACD,SAAS,EAAE;YACT,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,CAAC;SAC9D;QACD,UAAU,EAAE;YACV,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC;SAC/D;QACD,QAAQ,EAAE;YACR,IAAI,EAAE,QAAQ;SACf;QACD,QAAQ,EAAE;YACR,IAAI,EAAE,QAAQ;SACf;KACF;IAED,UAAU,EAAE;QACV,OAAO,EAAE;YACP,SAAS,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC;YAC9B,KAAK,EAAE,OAAO;SACf;QACD,OAAO,EAAE;YACP,SAAS,EAAE,CAAC,OAAO,CAAC;YACpB,KAAK,EAAE,OAAO;SACf;QACD,OAAO,EAAE;YACP,SAAS,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC;YAC9B,KAAK,EAAE,OAAO;SACf;QACD,MAAM,EAAE;YACN,SAAS,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC;YAC7B,KAAK,EAAE,OAAO;SACf;QACD,cAAc,EAAE;YACd,SAAS,EAAE,CAAC,MAAM,CAAC;YACnB,KAAK,EAAE,OAAO;SACf;QACD,UAAU,EAAE;YACV,SAAS,EAAE,CAAC,KAAK,CAAC;YAClB,KAAK,EAAE,OAAO;SACf;QACD,UAAU,EAAE;YACV,SAAS,EAAE,CAAC,UAAU,CAAC;YACvB,KAAK,EAAE,OAAO;SACf;QACD,SAAS,EAAE;YACT,SAAS,EAAE,CAAC,SAAS,CAAC;YACtB,KAAK,EAAE,OAAO;SACf;QACD,UAAU,EAAE;YACV,SAAS,EAAE,CAAC,UAAU,CAAC;YACvB,KAAK,EAAE,OAAO;SACf;QACD,mBAAmB,EAAE;YACnB,SAAS,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;YACjC,KAAK,EAAE,OAAO;SACf;QACD,SAAS,EAAE;YACT,SAAS,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;YACjC,KAAK,EAAE,OAAO;SACf;KACF;CACF,CAAC,CAAC"}
|