website-scrap-engine 0.8.7 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +245 -39
  2. package/lib/downloader/main.d.ts.map +1 -1
  3. package/lib/downloader/main.js +15 -16
  4. package/lib/downloader/main.js.map +1 -1
  5. package/lib/downloader/multi.d.ts.map +1 -1
  6. package/lib/downloader/multi.js +3 -4
  7. package/lib/downloader/multi.js.map +1 -1
  8. package/lib/downloader/pipeline-executor-impl.d.ts +6 -3
  9. package/lib/downloader/pipeline-executor-impl.d.ts.map +1 -1
  10. package/lib/downloader/pipeline-executor-impl.js +86 -2
  11. package/lib/downloader/pipeline-executor-impl.js.map +1 -1
  12. package/lib/downloader/single.d.ts.map +1 -1
  13. package/lib/downloader/single.js +4 -5
  14. package/lib/downloader/single.js.map +1 -1
  15. package/lib/downloader/worker-pool.d.ts.map +1 -1
  16. package/lib/downloader/worker-pool.js +12 -7
  17. package/lib/downloader/worker-pool.js.map +1 -1
  18. package/lib/downloader/worker-type.d.ts +2 -2
  19. package/lib/downloader/worker-type.d.ts.map +1 -1
  20. package/lib/downloader/worker.js +2 -4
  21. package/lib/downloader/worker.js.map +1 -1
  22. package/lib/index.d.ts +2 -0
  23. package/lib/index.d.ts.map +1 -1
  24. package/lib/index.js +1 -0
  25. package/lib/index.js.map +1 -1
  26. package/lib/life-cycle/adapters.d.ts +7 -1
  27. package/lib/life-cycle/adapters.d.ts.map +1 -1
  28. package/lib/life-cycle/adapters.js +6 -0
  29. package/lib/life-cycle/adapters.js.map +1 -1
  30. package/lib/life-cycle/default-life-cycle.d.ts.map +1 -1
  31. package/lib/life-cycle/default-life-cycle.js +3 -1
  32. package/lib/life-cycle/default-life-cycle.js.map +1 -1
  33. package/lib/life-cycle/default-status-listener.d.ts +4 -0
  34. package/lib/life-cycle/default-status-listener.d.ts.map +1 -0
  35. package/lib/life-cycle/default-status-listener.js +38 -0
  36. package/lib/life-cycle/default-status-listener.js.map +1 -0
  37. package/lib/life-cycle/download-resource.d.ts.map +1 -1
  38. package/lib/life-cycle/download-resource.js +13 -3
  39. package/lib/life-cycle/download-resource.js.map +1 -1
  40. package/lib/life-cycle/index.d.ts +1 -0
  41. package/lib/life-cycle/index.d.ts.map +1 -1
  42. package/lib/life-cycle/index.js +1 -0
  43. package/lib/life-cycle/index.js.map +1 -1
  44. package/lib/life-cycle/pipeline-executor.d.ts +10 -3
  45. package/lib/life-cycle/pipeline-executor.d.ts.map +1 -1
  46. package/lib/life-cycle/types.d.ts +56 -4
  47. package/lib/life-cycle/types.d.ts.map +1 -1
  48. package/lib/logger/default-logger.d.ts +3 -0
  49. package/lib/logger/default-logger.d.ts.map +1 -0
  50. package/lib/logger/default-logger.js +11 -0
  51. package/lib/logger/default-logger.js.map +1 -0
  52. package/lib/logger/log4js-adapter.d.ts +3 -0
  53. package/lib/logger/log4js-adapter.d.ts.map +1 -0
  54. package/lib/logger/log4js-adapter.js +143 -0
  55. package/lib/logger/log4js-adapter.js.map +1 -0
  56. package/lib/logger/logger-worker.d.ts +3 -4
  57. package/lib/logger/logger-worker.d.ts.map +1 -1
  58. package/lib/logger/logger-worker.js +21 -20
  59. package/lib/logger/logger-worker.js.map +1 -1
  60. package/lib/logger/logger.d.ts +13 -11
  61. package/lib/logger/logger.d.ts.map +1 -1
  62. package/lib/logger/logger.js +32 -14
  63. package/lib/logger/logger.js.map +1 -1
  64. package/lib/logger/types.d.ts +23 -0
  65. package/lib/logger/types.d.ts.map +1 -0
  66. package/lib/logger/types.js +2 -0
  67. package/lib/logger/types.js.map +1 -0
  68. package/lib/options.d.ts +5 -4
  69. package/lib/options.d.ts.map +1 -1
  70. package/lib/options.js +6 -4
  71. package/lib/options.js.map +1 -1
  72. package/package.json +7 -5
  73. package/src/downloader/main.ts +15 -14
  74. package/src/downloader/multi.ts +3 -5
  75. package/src/downloader/pipeline-executor-impl.ts +98 -2
  76. package/src/downloader/single.ts +4 -5
  77. package/src/downloader/worker-pool.ts +12 -6
  78. package/src/downloader/worker-type.ts +2 -2
  79. package/src/downloader/worker.ts +2 -7
  80. package/src/index.ts +2 -0
  81. package/src/life-cycle/adapters.ts +13 -0
  82. package/src/life-cycle/default-life-cycle.ts +3 -1
  83. package/src/life-cycle/default-status-listener.ts +40 -0
  84. package/src/life-cycle/download-resource.ts +13 -4
  85. package/src/life-cycle/index.ts +1 -0
  86. package/src/life-cycle/pipeline-executor.ts +16 -2
  87. package/src/life-cycle/types.ts +79 -3
  88. package/src/logger/default-logger.ts +12 -0
  89. package/src/logger/log4js-adapter.ts +147 -0
  90. package/src/logger/logger-worker.ts +24 -23
  91. package/src/logger/logger.ts +36 -16
  92. package/src/logger/types.ts +35 -0
  93. package/src/options.ts +11 -7
  94. package/lib/logger/config-logger.d.ts +0 -3
  95. package/lib/logger/config-logger.d.ts.map +0 -1
  96. package/lib/logger/config-logger.js +0 -92
  97. package/lib/logger/config-logger.js.map +0 -1
  98. package/src/logger/config-logger.ts +0 -95
@@ -1,7 +1,9 @@
1
1
  import type {OptionsInit as GotOptions} from 'got';
2
+ import type {Stats} from 'node:fs';
2
3
  import type {
3
4
  createResource,
4
5
  GenerateSavePathFn,
6
+ RawResource,
5
7
  Resource,
6
8
  ResourceBody,
7
9
  ResourceType
@@ -14,15 +16,43 @@ import type {WorkerInfo} from '../downloader/worker-pool.js';
14
16
 
15
17
  export type AsyncResult<T> = T | Promise<T>;
16
18
 
19
+ export type ResourceStatus =
20
+ | 'createResource'
21
+ | 'processBeforeDownload'
22
+ | 'download'
23
+ | 'processAfterDownload'
24
+ | 'saveToDisk'
25
+ | 'error'
26
+ | 'dispose';
27
+
28
+ export interface StatusChangeFunc {
29
+ (res: Resource | RawResource,
30
+ status: ResourceStatus,
31
+ options: StaticDownloadOptions,
32
+ pipeline: PipelineExecutor): void | Promise<void>;
33
+ }
34
+
35
+ export interface InitSubmitFunc {
36
+ /**
37
+ * Submit a raw URL to be processed through the full pipeline
38
+ * after all init hooks complete.
39
+ *
40
+ * This is fire-and-forget: the URL is appended to the initial URL list
41
+ * and processed identically to entries in {@link DownloadOptions.initialUrl}.
42
+ * No pipeline stages run during the call.
43
+ */
44
+ (url: string): void;
45
+ }
46
+
17
47
  export interface InitLifeCycleFunc {
18
48
  /**
19
49
  * The init life cycle would be called:
20
50
  *
21
51
  * Each time a downloader initialized,
22
- * after configureLogger but before addInitialResource.
52
+ * after logger initialization but before addInitialResource.
23
53
  *
24
54
  * Each time a worker initialized,
25
- * after configureLogger but before parentPort?.addListener.
55
+ * before parentPort?.addListener.
26
56
  *
27
57
  * The init life cycle could be async,
28
58
  * in main thread the addInitialResource should wait for init completed,
@@ -31,8 +61,12 @@ export interface InitLifeCycleFunc {
31
61
  *
32
62
  * @param pipeline the PipelineExecutor
33
63
  * @param downloader the DownloaderWithMeta when in main thread
64
+ * @param submit function to submit URLs for processing after init;
65
+ * undefined in worker threads
34
66
  */
35
- (pipeline: PipelineExecutor, downloader?: DownloaderWithMeta): AsyncResult<void>;
67
+ (pipeline: PipelineExecutor,
68
+ downloader?: DownloaderWithMeta,
69
+ submit?: InitSubmitFunc): AsyncResult<void>;
36
70
  }
37
71
 
38
72
  export interface LinkRedirectFunc {
@@ -190,5 +224,47 @@ export interface ProcessingLifeCycle {
190
224
  processAfterDownload: ProcessResourceAfterDownloadFunc[];
191
225
  saveToDisk: SaveToDiskFunc[];
192
226
  dispose: DisposeLifeCycle[];
227
+ /**
228
+ * Status change listeners.
229
+ *
230
+ * Unlike other life cycle hooks, all listeners always run
231
+ * (returning void does not short-circuit), and thrown errors are swallowed.
232
+ *
233
+ * Listeners should treat the resource and all parameters as readonly.
234
+ * Mutating them is undefined behavior.
235
+ */
236
+ statusChange: StatusChangeFunc[];
237
+ /**
238
+ * Optional callback to decide what to do when a local file already exists
239
+ * for a resource. Called at most twice per resource: once before download
240
+ * and once before save.
241
+ *
242
+ * Unlike other life cycle hooks, this is a single function (not an array)
243
+ * because the decision is mutually exclusive — there is no meaningful way
244
+ * to compose multiple strategies.
245
+ *
246
+ * If absent, the pipeline behaves as before (always download and overwrite).
247
+ */
248
+ existingResource?: ExistingResourceFunc;
249
+ }
250
+
251
+ export type ExistingResourceAction =
252
+ | 'skip'
253
+ | 'overwrite'
254
+ | 'ifModifiedSince'
255
+ | 'skipSave';
256
+
257
+ export type ExistingResourceStage = 'download' | 'saveToDisk';
258
+
259
+ export interface ExistingResourceContext {
260
+ res: Resource;
261
+ stage: ExistingResourceStage;
262
+ localPath: string;
263
+ stat: Stats;
264
+ options: StaticDownloadOptions;
265
+ }
266
+
267
+ export interface ExistingResourceFunc {
268
+ (ctx: ExistingResourceContext): ExistingResourceAction;
193
269
  }
194
270
 
@@ -0,0 +1,12 @@
1
+ import type {Logger} from './types.js';
2
+
3
+ export function createDefaultLogger(): Logger {
4
+ return {
5
+ trace() { /* no-op */ },
6
+ debug(type, ...contents) { console.debug(`[${type}]`, ...contents); },
7
+ info(type, ...contents) { console.info(`[${type}]`, ...contents); },
8
+ warn(type, ...contents) { console.warn(`[${type}]`, ...contents); },
9
+ error(type, ...contents) { console.error(`[${type}]`, ...contents); },
10
+ isTraceEnabled() { return false; },
11
+ };
12
+ }
@@ -0,0 +1,147 @@
1
+ import * as path from 'node:path';
2
+ import log4js from 'log4js';
3
+ import type {Logger, LogType} from './types.js';
4
+
5
+ const typeToCategory: Partial<Record<LogType, string>> = {
6
+ 'io.http.request': 'request',
7
+ 'io.http.response': 'response',
8
+ 'io.http.notFound': 'notFound',
9
+ 'io.http.retry': 'retry',
10
+ 'io.disk.mkdir': 'mkdir',
11
+ 'system.skip': 'skip',
12
+ 'system.skipExternal': 'skipExternal',
13
+ 'system.complete': 'complete',
14
+ 'system.adjustConcurrency': 'adjustConcurrency',
15
+ 'system.error': 'error',
16
+ };
17
+
18
+ export function createLog4jsLogger(
19
+ localRoot: string,
20
+ logSubDir?: string
21
+ ): Logger {
22
+ const logDir = path.join(localRoot, logSubDir || '', 'logs');
23
+ log4js.configure({
24
+ appenders: {
25
+ 'retry': {
26
+ type: 'file',
27
+ filename: path.join(logDir, 'retry.log')
28
+ },
29
+ 'mkdir': {
30
+ type: 'file',
31
+ filename: path.join(logDir, 'mkdir.log')
32
+ },
33
+ 'error': {
34
+ type: 'file',
35
+ filename: path.join(logDir, 'error.log')
36
+ },
37
+ 'skip': {
38
+ type: 'file',
39
+ filename: path.join(logDir, 'skip.log')
40
+ },
41
+ '404': {
42
+ type: 'file',
43
+ filename: path.join(logDir, '404.log')
44
+ },
45
+ 'complete': {
46
+ type: 'file',
47
+ filename: path.join(logDir, 'complete.log')
48
+ },
49
+ 'request': {
50
+ type: 'file',
51
+ filename: path.join(logDir, 'request.log')
52
+ },
53
+ 'response': {
54
+ type: 'file',
55
+ filename: path.join(logDir, 'response.log')
56
+ },
57
+ 'stdout': {
58
+ type: 'stdout'
59
+ },
60
+ 'stderr': {
61
+ type: 'stderr'
62
+ }
63
+ },
64
+ categories: {
65
+ 'retry': {
66
+ appenders: ['stdout', 'retry'],
67
+ level: 'debug'
68
+ },
69
+ 'mkdir': {
70
+ appenders: ['mkdir'],
71
+ level: 'debug'
72
+ },
73
+ 'error': {
74
+ appenders: ['stderr', 'error'],
75
+ level: 'debug'
76
+ },
77
+ 'skip': {
78
+ appenders: ['stdout', 'skip'],
79
+ level: 'debug'
80
+ },
81
+ 'skipExternal': {
82
+ appenders: ['skip'],
83
+ level: 'debug'
84
+ },
85
+ 'notFound': {
86
+ appenders: ['404'],
87
+ level: 'debug'
88
+ },
89
+ 'complete': {
90
+ appenders: ['complete'],
91
+ level: 'debug'
92
+ },
93
+ 'request': {
94
+ appenders: ['request'],
95
+ level: 'debug'
96
+ },
97
+ 'response': {
98
+ appenders: ['response'],
99
+ level: 'debug'
100
+ },
101
+ 'adjustConcurrency': {
102
+ appenders: ['stdout', 'complete'],
103
+ level: 'debug'
104
+ },
105
+ 'default': {
106
+ appenders: ['stdout', 'complete'],
107
+ level: 'debug'
108
+ }
109
+ }
110
+ });
111
+
112
+ const loggers = new Map<string, log4js.Logger>();
113
+ const getOrCreate = (category: string): log4js.Logger => {
114
+ let l = loggers.get(category);
115
+ if (!l) {
116
+ l = log4js.getLogger(category);
117
+ loggers.set(category, l);
118
+ }
119
+ return l;
120
+ };
121
+
122
+ /* eslint-disable @typescript-eslint/no-explicit-any */
123
+ const spread = (contents: unknown[]): [any, ...any[]] =>
124
+ contents as [any, ...any[]];
125
+ /* eslint-enable @typescript-eslint/no-explicit-any */
126
+
127
+ return {
128
+ trace(type, ...contents) {
129
+ getOrCreate(typeToCategory[type] ?? 'default').trace(...spread(contents));
130
+ },
131
+ debug(type, ...contents) {
132
+ getOrCreate(typeToCategory[type] ?? 'default').debug(...spread(contents));
133
+ },
134
+ info(type, ...contents) {
135
+ getOrCreate(typeToCategory[type] ?? 'default').info(...spread(contents));
136
+ },
137
+ warn(type, ...contents) {
138
+ getOrCreate(typeToCategory[type] ?? 'default').warn(...spread(contents));
139
+ },
140
+ error(type, ...contents) {
141
+ getOrCreate(typeToCategory[type] ?? 'default').error(...spread(contents));
142
+ },
143
+ isTraceEnabled() {
144
+ return log4js.getLogger().isTraceEnabled();
145
+ },
146
+ };
147
+ }
@@ -1,31 +1,32 @@
1
- import type {Logger} from 'log4js';
2
- // https://github.com/jestjs/jest/issues/11563
3
- import log4js from 'log4js';
4
1
  import {parentPort} from 'node:worker_threads';
5
- import type {LogWorkerMessage, WorkerLog} from '../downloader/worker-type.js';
2
+ import type {CategoryLogger, LogType} from './types.js';
3
+ import type {LogWorkerMessage} from '../downloader/worker-type.js';
6
4
  import {WorkerMessageType} from '../downloader/types.js';
7
5
 
8
- const getLogger = log4js.getLogger;
9
-
10
6
  export const logLevels = [
11
- 'trace', 'debug', 'info', 'warn', 'error', 'fatal', 'mark'
7
+ 'trace', 'debug', 'info', 'warn', 'error'
12
8
  ] as const;
13
9
 
14
- export function getWorkerLogger(category: WorkerLog['logger']): Logger {
15
- const logger: Logger = getLogger(category);
16
- logLevels.forEach((level: typeof logLevels[number]) => {
17
- logger[level] = <T>(...content: T[]) => {
18
- const msg: LogWorkerMessage<T> = {
19
- taskId: -1,
20
- type: WorkerMessageType.Log,
21
- body: {
22
- level,
23
- logger: category,
24
- content
25
- }
26
- };
27
- parentPort?.postMessage(msg);
10
+ export function createWorkerCategoryLogger(type: LogType): CategoryLogger {
11
+ function send<T>(level: typeof logLevels[number], content: T[]): void {
12
+ const msg: LogWorkerMessage<T> = {
13
+ taskId: -1,
14
+ type: WorkerMessageType.Log,
15
+ body: {
16
+ logType: type,
17
+ level,
18
+ content
19
+ }
28
20
  };
29
- });
30
- return logger;
21
+ parentPort?.postMessage(msg);
22
+ }
23
+
24
+ return {
25
+ trace(...content: unknown[]) { send('trace', content); },
26
+ debug(...content: unknown[]) { send('debug', content); },
27
+ info(...content: unknown[]) { send('info', content); },
28
+ warn(...content: unknown[]) { send('warn', content); },
29
+ error(...content: unknown[]) { send('error', content); },
30
+ isTraceEnabled() { return false; },
31
+ };
31
32
  }
@@ -1,19 +1,39 @@
1
- import type {Logger} from 'log4js';
2
- // https://github.com/jestjs/jest/issues/11563
3
- import log4js from 'log4js';
4
1
  import {isMainThread} from 'node:worker_threads';
5
- import {getWorkerLogger} from './logger-worker.js';
2
+ import type {CategoryLogger, Logger, LogType} from './types.js';
3
+ import {createDefaultLogger} from './default-logger.js';
4
+ import {createWorkerCategoryLogger} from './logger-worker.js';
6
5
 
7
- const getLogger: typeof getWorkerLogger =
8
- isMainThread ? log4js.getLogger : getWorkerLogger;
6
+ let _logger: Logger = createDefaultLogger();
9
7
 
10
- export const notFound: Logger = getLogger('notFound');
11
- export const retry: Logger = getLogger('retry');
12
- export const mkdir: Logger = getLogger('mkdir');
13
- export const request: Logger = getLogger('request');
14
- export const response: Logger = getLogger('response');
15
- export const error: Logger = getLogger('error');
16
- export const complete: Logger = getLogger('complete');
17
- export const skip: Logger = getLogger('skip');
18
- export const skipExternal: Logger = getLogger('skipExternal');
19
- export const adjustConcurrency: Logger = getLogger('adjustConcurrency');
8
+ export function setLogger(logger: Logger): void {
9
+ _logger = logger;
10
+ }
11
+
12
+ export function getLogger(): Logger {
13
+ return _logger;
14
+ }
15
+
16
+ function createCategoryProxy(type: LogType): CategoryLogger {
17
+ if (!isMainThread) {
18
+ return createWorkerCategoryLogger(type);
19
+ }
20
+ return {
21
+ trace(...contents: unknown[]) { _logger.trace(type, ...contents); },
22
+ debug(...contents: unknown[]) { _logger.debug(type, ...contents); },
23
+ info(...contents: unknown[]) { _logger.info(type, ...contents); },
24
+ warn(...contents: unknown[]) { _logger.warn(type, ...contents); },
25
+ error(...contents: unknown[]) { _logger.error(type, ...contents); },
26
+ isTraceEnabled() { return _logger.isTraceEnabled(); },
27
+ };
28
+ }
29
+
30
+ export const notFound: CategoryLogger = createCategoryProxy('io.http.notFound');
31
+ export const retry: CategoryLogger = createCategoryProxy('io.http.retry');
32
+ export const mkdir: CategoryLogger = createCategoryProxy('io.disk.mkdir');
33
+ export const request: CategoryLogger = createCategoryProxy('io.http.request');
34
+ export const response: CategoryLogger = createCategoryProxy('io.http.response');
35
+ export const error: CategoryLogger = createCategoryProxy('system.error');
36
+ export const complete: CategoryLogger = createCategoryProxy('system.complete');
37
+ export const skip: CategoryLogger = createCategoryProxy('system.skip');
38
+ export const skipExternal: CategoryLogger = createCategoryProxy('system.skipExternal');
39
+ export const adjustConcurrency: CategoryLogger = createCategoryProxy('system.adjustConcurrency');
@@ -0,0 +1,35 @@
1
+ export type LogType =
2
+ | 'io.http.request'
3
+ | 'io.http.response'
4
+ | 'io.http.notFound'
5
+ | 'io.http.retry'
6
+ | 'io.disk.mkdir'
7
+ | 'system.skip'
8
+ | 'system.skipExternal'
9
+ | 'system.complete'
10
+ | 'system.adjustConcurrency'
11
+ | 'system.error'
12
+ | `custom.${string}`;
13
+
14
+ export interface Logger {
15
+ trace(type: LogType, ...contents: unknown[]): void;
16
+ debug(type: LogType, ...contents: unknown[]): void;
17
+ info(type: LogType, ...contents: unknown[]): void;
18
+ warn(type: LogType, ...contents: unknown[]): void;
19
+ error(type: LogType, ...contents: unknown[]): void;
20
+ isTraceEnabled(): boolean;
21
+ }
22
+
23
+ /**
24
+ * Logger proxy for a specific category.
25
+ * Consumer code calls methods without a LogType argument;
26
+ * the proxy prepends the appropriate LogType automatically.
27
+ */
28
+ export interface CategoryLogger {
29
+ trace(...contents: unknown[]): void;
30
+ debug(...contents: unknown[]): void;
31
+ info(...contents: unknown[]): void;
32
+ warn(...contents: unknown[]): void;
33
+ error(...contents: unknown[]): void;
34
+ isTraceEnabled(): boolean;
35
+ }
package/src/options.ts CHANGED
@@ -8,7 +8,8 @@ import {beforeRetryHook} from './life-cycle/download-resource.js';
8
8
  import {error} from './logger/logger.js';
9
9
  // noinspection ES6PreferShortImport
10
10
  import {adjust} from './downloader/adjust-concurrency.js';
11
- import {configureLogger} from './logger/config-logger.js';
11
+ import type {Logger} from './logger/types.js';
12
+ import {createDefaultLogger} from './logger/default-logger.js';
12
13
  import type {DownloaderWithMeta} from './downloader/types.js';
13
14
  import {weakAssign} from './util.js';
14
15
  import type {SourceDefinition} from './sources.js';
@@ -112,7 +113,7 @@ export interface StaticDownloadOptions {
112
113
  initialUrl?: string[];
113
114
 
114
115
  /**
115
- * @see DownloadOptions.configureLogger
116
+ * @see DownloadOptions.createLogger
116
117
  */
117
118
  logSubDir?: string;
118
119
 
@@ -162,9 +163,10 @@ export interface DownloadOptions extends StaticDownloadOptions, ProcessingLifeCy
162
163
  adjustConcurrencyFunc?: (downloader: DownloaderWithMeta) => void;
163
164
 
164
165
  /**
165
- * Use a custom function to configure logger.
166
+ * Use a custom function to create a logger instance.
167
+ * Defaults to {@link createDefaultLogger} (console-based).
166
168
  */
167
- configureLogger: typeof configureLogger;
169
+ createLogger?: (options: StaticDownloadOptions) => Logger;
168
170
  }
169
171
 
170
172
  export type ExtendedError = (TimeoutError | RequestError) & {
@@ -275,7 +277,7 @@ const defaultOptions: DownloadOptions = {
275
277
  init: [],
276
278
  dispose: [],
277
279
  concurrency: 12,
278
- configureLogger,
280
+ createLogger: createDefaultLogger,
279
281
  createResource,
280
282
  detectResourceType: [],
281
283
  download: [],
@@ -285,13 +287,15 @@ const defaultOptions: DownloadOptions = {
285
287
  localRoot: '',
286
288
  maxDepth: 1,
287
289
  meta: {
288
- detectIncompleteHtml: '</html>'
290
+ detectIncompleteHtml: '</html>',
291
+ warnForNonHtml: true
289
292
  },
290
293
  processAfterDownload: [],
291
294
  processBeforeDownload: [],
292
295
  req: {},
293
296
  saveToDisk: [],
294
- deduplicateStripSearch: true
297
+ deduplicateStripSearch: true,
298
+ statusChange: []
295
299
  };
296
300
 
297
301
  export function defaultDownloadOptions(
@@ -1,3 +0,0 @@
1
- import type { Log4js } from 'log4js';
2
- export declare const configureLogger: (localRoot: string, subDir: string) => Log4js;
3
- //# sourceMappingURL=config-logger.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"config-logger.d.ts","sourceRoot":"","sources":["../../src/logger/config-logger.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,QAAQ,CAAC;AAKnC,eAAO,MAAM,eAAe,GAAI,WAAW,MAAM,EAAE,QAAQ,MAAM,KAAG,MAyFhE,CAAC"}
@@ -1,92 +0,0 @@
1
- // https://github.com/jestjs/jest/issues/11563
2
- import log4js from 'log4js';
3
- import * as path from 'node:path';
4
- export const configureLogger = (localRoot, subDir) => log4js.configure({
5
- appenders: {
6
- 'retry': {
7
- type: 'file',
8
- filename: path.join(localRoot, subDir, 'logs', 'retry.log')
9
- },
10
- 'mkdir': {
11
- type: 'file',
12
- filename: path.join(localRoot, subDir, 'logs', 'mkdir.log')
13
- },
14
- 'error': {
15
- type: 'file',
16
- filename: path.join(localRoot, subDir, 'logs', 'error.log')
17
- },
18
- 'skip': {
19
- type: 'file',
20
- filename: path.join(localRoot, subDir, 'logs', 'skip.log')
21
- },
22
- '404': {
23
- type: 'file',
24
- filename: path.join(localRoot, subDir, 'logs', '404.log')
25
- },
26
- 'complete': {
27
- type: 'file',
28
- filename: path.join(localRoot, subDir, 'logs', 'complete.log')
29
- },
30
- 'request': {
31
- type: 'file',
32
- filename: path.join(localRoot, subDir, 'logs', 'request.log')
33
- },
34
- 'response': {
35
- type: 'file',
36
- filename: path.join(localRoot, subDir, 'logs', 'response.log')
37
- },
38
- 'stdout': {
39
- type: 'stdout'
40
- },
41
- 'stderr': {
42
- type: 'stderr'
43
- }
44
- },
45
- categories: {
46
- 'retry': {
47
- appenders: ['stdout', 'retry'],
48
- level: 'debug'
49
- },
50
- 'mkdir': {
51
- appenders: ['mkdir'],
52
- level: 'debug'
53
- },
54
- 'error': {
55
- appenders: ['stderr', 'error'],
56
- level: 'debug'
57
- },
58
- 'skip': {
59
- appenders: ['stdout', 'skip'],
60
- level: 'debug'
61
- },
62
- 'skipExternal': {
63
- appenders: ['skip'],
64
- level: 'debug'
65
- },
66
- 'notFound': {
67
- appenders: ['404'],
68
- level: 'debug'
69
- },
70
- 'complete': {
71
- appenders: ['complete'],
72
- level: 'debug'
73
- },
74
- 'request': {
75
- appenders: ['request'],
76
- level: 'debug'
77
- },
78
- 'response': {
79
- appenders: ['response'],
80
- level: 'debug'
81
- },
82
- 'adjustConcurrency': {
83
- appenders: ['stdout', 'complete'],
84
- level: 'debug'
85
- },
86
- 'default': {
87
- appenders: ['stdout', 'complete'],
88
- level: 'debug'
89
- }
90
- }
91
- });
92
- //# sourceMappingURL=config-logger.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"config-logger.js","sourceRoot":"","sources":["../../src/logger/config-logger.ts"],"names":[],"mappings":"AACA,8CAA8C;AAC9C,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,SAAiB,EAAE,MAAc,EAAU,EAAE,CAC3E,MAAM,CAAC,SAAS,CAAC;IACf,SAAS,EAAE;QACT,OAAO,EAAE;YACP,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC;SAC5D;QACD,OAAO,EAAE;YACP,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC;SAC5D;QACD,OAAO,EAAE;YACP,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC;SAC5D;QACD,MAAM,EAAE;YACN,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC;SAC3D;QACD,KAAK,EAAE;YACL,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC;SAC1D;QACD,UAAU,EAAE;YACV,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC;SAC/D;QACD,SAAS,EAAE;YACT,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,CAAC;SAC9D;QACD,UAAU,EAAE;YACV,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC;SAC/D;QACD,QAAQ,EAAE;YACR,IAAI,EAAE,QAAQ;SACf;QACD,QAAQ,EAAE;YACR,IAAI,EAAE,QAAQ;SACf;KACF;IAED,UAAU,EAAE;QACV,OAAO,EAAE;YACP,SAAS,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC;YAC9B,KAAK,EAAE,OAAO;SACf;QACD,OAAO,EAAE;YACP,SAAS,EAAE,CAAC,OAAO,CAAC;YACpB,KAAK,EAAE,OAAO;SACf;QACD,OAAO,EAAE;YACP,SAAS,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC;YAC9B,KAAK,EAAE,OAAO;SACf;QACD,MAAM,EAAE;YACN,SAAS,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC;YAC7B,KAAK,EAAE,OAAO;SACf;QACD,cAAc,EAAE;YACd,SAAS,EAAE,CAAC,MAAM,CAAC;YACnB,KAAK,EAAE,OAAO;SACf;QACD,UAAU,EAAE;YACV,SAAS,EAAE,CAAC,KAAK,CAAC;YAClB,KAAK,EAAE,OAAO;SACf;QACD,UAAU,EAAE;YACV,SAAS,EAAE,CAAC,UAAU,CAAC;YACvB,KAAK,EAAE,OAAO;SACf;QACD,SAAS,EAAE;YACT,SAAS,EAAE,CAAC,SAAS,CAAC;YACtB,KAAK,EAAE,OAAO;SACf;QACD,UAAU,EAAE;YACV,SAAS,EAAE,CAAC,UAAU,CAAC;YACvB,KAAK,EAAE,OAAO;SACf;QACD,mBAAmB,EAAE;YACnB,SAAS,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;YACjC,KAAK,EAAE,OAAO;SACf;QACD,SAAS,EAAE;YACT,SAAS,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;YACjC,KAAK,EAAE,OAAO;SACf;KACF;CACF,CAAC,CAAC"}