website-scrap-engine 0.7.2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/downloader/adjust-concurrency.d.ts +2 -1
- package/lib/downloader/adjust-concurrency.d.ts.map +1 -0
- package/lib/downloader/adjust-concurrency.js +4 -8
- package/lib/downloader/adjust-concurrency.js.map +1 -1
- package/lib/downloader/index.d.ts +9 -8
- package/lib/downloader/index.d.ts.map +1 -0
- package/lib/downloader/index.js +8 -40
- package/lib/downloader/index.js.map +1 -1
- package/lib/downloader/main.d.ts +15 -6
- package/lib/downloader/main.d.ts.map +1 -0
- package/lib/downloader/main.js +49 -32
- package/lib/downloader/main.js.map +1 -1
- package/lib/downloader/multi.d.ts +7 -5
- package/lib/downloader/multi.d.ts.map +1 -0
- package/lib/downloader/multi.js +10 -17
- package/lib/downloader/multi.js.map +1 -1
- package/lib/downloader/pipeline-executor-impl.d.ts +8 -7
- package/lib/downloader/pipeline-executor-impl.d.ts.map +1 -0
- package/lib/downloader/pipeline-executor-impl.js +1 -5
- package/lib/downloader/pipeline-executor-impl.js.map +1 -1
- package/lib/downloader/single.d.ts +4 -3
- package/lib/downloader/single.d.ts.map +1 -0
- package/lib/downloader/single.js +7 -11
- package/lib/downloader/single.js.map +1 -1
- package/lib/downloader/types.d.ts +4 -4
- package/lib/downloader/types.d.ts.map +1 -0
- package/lib/downloader/types.js +2 -5
- package/lib/downloader/types.js.map +1 -1
- package/lib/downloader/worker-pool.d.ts +6 -7
- package/lib/downloader/worker-pool.d.ts.map +1 -0
- package/lib/downloader/worker-pool.js +7 -35
- package/lib/downloader/worker-pool.js.map +1 -1
- package/lib/downloader/worker-type.d.ts +4 -3
- package/lib/downloader/worker-type.d.ts.map +1 -0
- package/lib/downloader/worker-type.js +1 -2
- package/lib/downloader/worker.d.ts +1 -0
- package/lib/downloader/worker.d.ts.map +1 -0
- package/lib/downloader/worker.js +52 -27
- package/lib/downloader/worker.js.map +1 -1
- package/lib/index.d.ts +9 -8
- package/lib/index.d.ts.map +1 -0
- package/lib/index.js +7 -33
- package/lib/index.js.map +1 -1
- package/lib/io.d.ts +2 -1
- package/lib/io.d.ts.map +1 -0
- package/lib/io.js +17 -25
- package/lib/io.js.map +1 -1
- package/lib/life-cycle/adapters.d.ts +7 -5
- package/lib/life-cycle/adapters.d.ts.map +1 -0
- package/lib/life-cycle/adapters.js +18 -30
- package/lib/life-cycle/adapters.js.map +1 -1
- package/lib/life-cycle/default-life-cycle.d.ts +2 -1
- package/lib/life-cycle/default-life-cycle.d.ts.map +1 -0
- package/lib/life-cycle/default-life-cycle.js +28 -32
- package/lib/life-cycle/default-life-cycle.js.map +1 -1
- package/lib/life-cycle/detect-resource-type.d.ts +2 -1
- package/lib/life-cycle/detect-resource-type.d.ts.map +1 -0
- package/lib/life-cycle/detect-resource-type.js +12 -17
- package/lib/life-cycle/detect-resource-type.js.map +1 -1
- package/lib/life-cycle/download-resource.d.ts +6 -7
- package/lib/life-cycle/download-resource.d.ts.map +1 -0
- package/lib/life-cycle/download-resource.js +49 -52
- package/lib/life-cycle/download-resource.js.map +1 -1
- package/lib/life-cycle/download-streaming-resource.d.ts +6 -5
- package/lib/life-cycle/download-streaming-resource.d.ts.map +1 -0
- package/lib/life-cycle/download-streaming-resource.js +39 -74
- package/lib/life-cycle/download-streaming-resource.js.map +1 -1
- package/lib/life-cycle/index.d.ts +16 -15
- package/lib/life-cycle/index.d.ts.map +1 -0
- package/lib/life-cycle/index.js +14 -59
- package/lib/life-cycle/index.js.map +1 -1
- package/lib/life-cycle/pipeline-executor.d.ts +7 -6
- package/lib/life-cycle/pipeline-executor.d.ts.map +1 -0
- package/lib/life-cycle/pipeline-executor.js +1 -2
- package/lib/life-cycle/process-css.d.ts +5 -4
- package/lib/life-cycle/process-css.d.ts.map +1 -0
- package/lib/life-cycle/process-css.js +10 -18
- package/lib/life-cycle/process-css.js.map +1 -1
- package/lib/life-cycle/process-html-meta.d.ts +4 -3
- package/lib/life-cycle/process-html-meta.d.ts.map +1 -0
- package/lib/life-cycle/process-html-meta.js +11 -15
- package/lib/life-cycle/process-html-meta.js.map +1 -1
- package/lib/life-cycle/process-html.d.ts +4 -3
- package/lib/life-cycle/process-html.d.ts.map +1 -0
- package/lib/life-cycle/process-html.js +61 -43
- package/lib/life-cycle/process-html.js.map +1 -1
- package/lib/life-cycle/process-site-map.d.ts +4 -3
- package/lib/life-cycle/process-site-map.d.ts.map +1 -0
- package/lib/life-cycle/process-site-map.js +7 -11
- package/lib/life-cycle/process-site-map.js.map +1 -1
- package/lib/life-cycle/process-source-map.d.ts +4 -4
- package/lib/life-cycle/process-source-map.d.ts.map +1 -0
- package/lib/life-cycle/process-source-map.js +16 -21
- package/lib/life-cycle/process-source-map.js.map +1 -1
- package/lib/life-cycle/process-svg.d.ts +4 -3
- package/lib/life-cycle/process-svg.d.ts.map +1 -0
- package/lib/life-cycle/process-svg.js +17 -21
- package/lib/life-cycle/process-svg.js.map +1 -1
- package/lib/life-cycle/read-or-copy-local-resource.d.ts +4 -3
- package/lib/life-cycle/read-or-copy-local-resource.d.ts.map +1 -0
- package/lib/life-cycle/read-or-copy-local-resource.js +15 -42
- package/lib/life-cycle/read-or-copy-local-resource.js.map +1 -1
- package/lib/life-cycle/save-html-to-disk.d.ts +6 -4
- package/lib/life-cycle/save-html-to-disk.d.ts.map +1 -0
- package/lib/life-cycle/save-html-to-disk.js +24 -33
- package/lib/life-cycle/save-html-to-disk.js.map +1 -1
- package/lib/life-cycle/save-resource-to-disk.d.ts +4 -3
- package/lib/life-cycle/save-resource-to-disk.d.ts.map +1 -0
- package/lib/life-cycle/save-resource-to-disk.js +10 -17
- package/lib/life-cycle/save-resource-to-disk.js.map +1 -1
- package/lib/life-cycle/skip-links.d.ts +1 -0
- package/lib/life-cycle/skip-links.d.ts.map +1 -0
- package/lib/life-cycle/skip-links.js +6 -10
- package/lib/life-cycle/skip-links.js.map +1 -1
- package/lib/life-cycle/types.d.ts +8 -7
- package/lib/life-cycle/types.d.ts.map +1 -0
- package/lib/life-cycle/types.js +1 -2
- package/lib/logger/config-logger.d.ts +2 -1
- package/lib/logger/config-logger.d.ts.map +1 -0
- package/lib/logger/config-logger.js +4 -30
- package/lib/logger/config-logger.js.map +1 -1
- package/lib/logger/logger-worker.d.ts +3 -2
- package/lib/logger/logger-worker.d.ts.map +1 -0
- package/lib/logger/logger-worker.js +11 -13
- package/lib/logger/logger-worker.js.map +1 -1
- package/lib/logger/logger.d.ts +2 -1
- package/lib/logger/logger.d.ts.map +1 -0
- package/lib/logger/logger.js +15 -17
- package/lib/logger/logger.js.map +1 -1
- package/lib/options.d.ts +9 -8
- package/lib/options.d.ts.map +1 -0
- package/lib/options.js +22 -32
- package/lib/options.js.map +1 -1
- package/lib/resource.d.ts +3 -4
- package/lib/resource.d.ts.map +1 -0
- package/lib/resource.js +34 -70
- package/lib/resource.js.map +1 -1
- package/lib/sources.d.ts +2 -1
- package/lib/sources.d.ts.map +1 -0
- package/lib/sources.js +9 -12
- package/lib/sources.js.map +1 -1
- package/lib/types.d.ts +1 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +1 -2
- package/lib/util.d.ts +4 -3
- package/lib/util.d.ts.map +1 -0
- package/lib/util.js +17 -34
- package/lib/util.js.map +1 -1
- package/package.json +18 -20
- package/src/downloader/adjust-concurrency.ts +2 -2
- package/src/downloader/index.ts +8 -8
- package/src/downloader/main.ts +50 -28
- package/src/downloader/multi.ts +11 -10
- package/src/downloader/pipeline-executor-impl.ts +7 -7
- package/src/downloader/single.ts +9 -6
- package/src/downloader/types.ts +3 -3
- package/src/downloader/worker-pool.ts +9 -9
- package/src/downloader/worker-type.ts +3 -3
- package/src/downloader/worker.ts +51 -29
- package/src/index.ts +8 -8
- package/src/io.ts +6 -6
- package/src/life-cycle/adapters.ts +7 -6
- package/src/life-cycle/css-url-parser.d.ts +1 -1
- package/src/life-cycle/default-life-cycle.ts +15 -15
- package/src/life-cycle/detect-resource-type.ts +2 -2
- package/src/life-cycle/download-resource.ts +45 -20
- package/src/life-cycle/download-streaming-resource.ts +20 -18
- package/src/life-cycle/index.ts +15 -15
- package/src/life-cycle/pipeline-executor.ts +6 -6
- package/src/life-cycle/process-css.ts +6 -5
- package/src/life-cycle/process-html-meta.ts +7 -6
- package/src/life-cycle/process-html.ts +74 -32
- package/src/life-cycle/process-site-map.ts +7 -6
- package/src/life-cycle/process-source-map.ts +5 -4
- package/src/life-cycle/process-svg.ts +10 -9
- package/src/life-cycle/read-or-copy-local-resource.ts +9 -7
- package/src/life-cycle/save-html-to-disk.ts +9 -13
- package/src/life-cycle/save-resource-to-disk.ts +6 -6
- package/src/life-cycle/types.ts +7 -7
- package/src/logger/config-logger.ts +5 -3
- package/src/logger/logger-worker.ts +8 -4
- package/src/logger/logger.ts +6 -4
- package/src/options.ts +16 -19
- package/src/resource.ts +10 -5
- package/src/sources.ts +1 -1
- package/src/util.ts +6 -10
- package/tsconfig.json +6 -2
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import type {DownloadResource, SubmitResourceFunc} from './types';
|
|
2
|
-
import type {StaticDownloadOptions} from '../options';
|
|
3
|
-
import {Resource, ResourceEncoding
|
|
4
|
-
import
|
|
1
|
+
import type {DownloadResource, SubmitResourceFunc} from './types.js';
|
|
2
|
+
import type {StaticDownloadOptions} from '../options.js';
|
|
3
|
+
import type {Resource, ResourceEncoding} from '../resource.js';
|
|
4
|
+
import {ResourceType} from '../resource.js';
|
|
5
|
+
import type {PipelineExecutor} from './pipeline-executor.js';
|
|
5
6
|
|
|
6
7
|
// https://developer.mozilla.org/docs/Web/HTTP/Headers/SourceMap
|
|
7
8
|
export const SOURCE_MAP_HEADER = 'SourceMap'.toLowerCase();
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import type {SourceDefinition} from '../sources';
|
|
2
|
-
import type {DownloadResource, SubmitResourceFunc} from './types';
|
|
3
|
-
import type {StaticDownloadOptions} from '../options';
|
|
4
|
-
import {Resource
|
|
5
|
-
import {
|
|
6
|
-
import
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import
|
|
1
|
+
import type {SourceDefinition} from '../sources.js';
|
|
2
|
+
import type {DownloadResource, SubmitResourceFunc} from './types.js';
|
|
3
|
+
import type {StaticDownloadOptions} from '../options.js';
|
|
4
|
+
import type {Resource} from '../resource.js';
|
|
5
|
+
import {ResourceType} from '../resource.js';
|
|
6
|
+
import {error, skip} from '../logger/logger.js';
|
|
7
|
+
import type {PipelineExecutor} from './pipeline-executor.js';
|
|
8
|
+
import {parseHtml} from './adapters.js';
|
|
9
|
+
import {getResourceBodyFromHtml} from './save-html-to-disk.js';
|
|
10
|
+
import type {Cheerio, CheerioStatic} from '../types.js';
|
|
10
11
|
|
|
11
12
|
const svgSelectors: SourceDefinition[] = [
|
|
12
13
|
{selector: '*[xlink\\:href]', attr: 'xlink:href', type: ResourceType.Binary},
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import * as path from 'path';
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import type {
|
|
5
|
-
import
|
|
6
|
-
import {
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
import type {Stats} from 'node:fs';
|
|
3
|
+
import {promises} from 'node:fs';
|
|
4
|
+
import type {Resource} from '../resource.js';
|
|
5
|
+
import {ResourceType} from '../resource.js';
|
|
6
|
+
import type {DownloadResource, RequestOptions} from './types.js';
|
|
7
|
+
import type {StaticDownloadOptions} from '../options.js';
|
|
8
|
+
import {error as errorLogger} from '../logger/logger.js';
|
|
7
9
|
|
|
8
10
|
const FILE_PREFIX = 'file://';
|
|
9
11
|
|
|
@@ -27,7 +29,7 @@ export async function readOrCopyLocalResource(
|
|
|
27
29
|
return;
|
|
28
30
|
}
|
|
29
31
|
// index.html handling
|
|
30
|
-
let stats: Stats | void;
|
|
32
|
+
let stats: Stats | void = void 0;
|
|
31
33
|
if (res.type === ResourceType.Html) {
|
|
32
34
|
stats = await promises.stat(fileSrcPath);
|
|
33
35
|
if (stats.isDirectory()) {
|
|
@@ -1,16 +1,12 @@
|
|
|
1
|
-
import path from 'path';
|
|
1
|
+
import path from 'node:path';
|
|
2
2
|
import URI from 'urijs';
|
|
3
|
-
import type {DownloadResource} from './types';
|
|
4
|
-
import type {StaticDownloadOptions} from '../options';
|
|
5
|
-
import {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
} from '../resource';
|
|
11
|
-
import {escapePath} from '../util';
|
|
12
|
-
import {writeFile} from '../io';
|
|
13
|
-
import type {PipelineExecutor} from './pipeline-executor';
|
|
3
|
+
import type {DownloadResource} from './types.js';
|
|
4
|
+
import type {StaticDownloadOptions} from '../options.js';
|
|
5
|
+
import type {ResourceBody, ResourceEncoding} from '../resource.js';
|
|
6
|
+
import {ResourceType, urlOfSavePath} from '../resource.js';
|
|
7
|
+
import {escapePath} from '../util.js';
|
|
8
|
+
import {writeFile} from '../io.js';
|
|
9
|
+
import type {PipelineExecutor} from './pipeline-executor.js';
|
|
14
10
|
|
|
15
11
|
export function getResourceBodyFromHtml(
|
|
16
12
|
res: DownloadResource & { type: ResourceType.Html },
|
|
@@ -46,7 +42,7 @@ export async function saveHtmlToDisk(
|
|
|
46
42
|
}
|
|
47
43
|
const localRoot: string = res.localRoot ?? options.localRoot;
|
|
48
44
|
// https://github.com/website-local/website-scrap-engine/issues/174
|
|
49
|
-
let mtime: number | void;
|
|
45
|
+
let mtime: number | void = void 0;
|
|
50
46
|
if (options.preferRemoteLastModifiedTime && res.meta?.headers?.['last-modified']) {
|
|
51
47
|
mtime = Date.parse(res.meta.headers?.['last-modified']);
|
|
52
48
|
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import path from 'path';
|
|
2
|
-
import type {DownloadResource} from './types';
|
|
3
|
-
import type {StaticDownloadOptions} from '../options';
|
|
4
|
-
import {writeFile} from '../io';
|
|
5
|
-
import type {PipelineExecutor} from './pipeline-executor';
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import type {DownloadResource} from './types.js';
|
|
3
|
+
import type {StaticDownloadOptions} from '../options.js';
|
|
4
|
+
import {writeFile} from '../io.js';
|
|
5
|
+
import type {PipelineExecutor} from './pipeline-executor.js';
|
|
6
6
|
|
|
7
7
|
export async function saveResourceToDisk(
|
|
8
8
|
res: DownloadResource,
|
|
@@ -10,7 +10,7 @@ export async function saveResourceToDisk(
|
|
|
10
10
|
pipeline: PipelineExecutor): Promise<DownloadResource | void> {
|
|
11
11
|
const localRoot: string = res.localRoot ?? options.localRoot;
|
|
12
12
|
// https://github.com/website-local/website-scrap-engine/issues/174
|
|
13
|
-
let mtime: number | void;
|
|
13
|
+
let mtime: number | void = void 0;
|
|
14
14
|
if (options.preferRemoteLastModifiedTime && res.meta?.headers?.['last-modified']) {
|
|
15
15
|
mtime = Date.parse(res.meta.headers?.['last-modified']);
|
|
16
16
|
}
|
package/src/life-cycle/types.ts
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type {OptionsInit as GotOptions} from 'got';
|
|
2
2
|
import type {
|
|
3
3
|
createResource,
|
|
4
4
|
GenerateSavePathFn,
|
|
5
5
|
Resource,
|
|
6
6
|
ResourceBody,
|
|
7
7
|
ResourceType
|
|
8
|
-
} from '../resource';
|
|
9
|
-
import type {StaticDownloadOptions} from '../options';
|
|
10
|
-
import type {PipelineExecutor} from './pipeline-executor';
|
|
11
|
-
import type {Cheerio} from '../types';
|
|
12
|
-
import type {DownloaderWithMeta} from '../downloader/types';
|
|
13
|
-
import type {WorkerInfo} from '../downloader/worker-pool';
|
|
8
|
+
} from '../resource.js';
|
|
9
|
+
import type {StaticDownloadOptions} from '../options.js';
|
|
10
|
+
import type {PipelineExecutor} from './pipeline-executor.js';
|
|
11
|
+
import type {Cheerio} from '../types.js';
|
|
12
|
+
import type {DownloaderWithMeta} from '../downloader/types.js';
|
|
13
|
+
import type {WorkerInfo} from '../downloader/worker-pool.js';
|
|
14
14
|
|
|
15
15
|
export type AsyncResult<T> = T | Promise<T>;
|
|
16
16
|
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
1
|
+
import type {Log4js} from 'log4js';
|
|
2
|
+
// https://github.com/jestjs/jest/issues/11563
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
import * as path from 'node:path';
|
|
3
5
|
|
|
4
6
|
export const configureLogger = (localRoot: string, subDir: string): Log4js =>
|
|
5
|
-
configure({
|
|
7
|
+
log4js.configure({
|
|
6
8
|
appenders: {
|
|
7
9
|
'retry': {
|
|
8
10
|
type: 'file',
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
import
|
|
4
|
-
import {
|
|
1
|
+
import type {Logger} from 'log4js';
|
|
2
|
+
// https://github.com/jestjs/jest/issues/11563
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
import {parentPort} from 'node:worker_threads';
|
|
5
|
+
import type {LogWorkerMessage, WorkerLog} from '../downloader/worker-type.js';
|
|
6
|
+
import {WorkerMessageType} from '../downloader/types.js';
|
|
7
|
+
|
|
8
|
+
const getLogger = log4js.getLogger;
|
|
5
9
|
|
|
6
10
|
export const logLevels = [
|
|
7
11
|
'trace', 'debug', 'info', 'warn', 'error', 'fatal', 'mark'
|
package/src/logger/logger.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
import
|
|
1
|
+
import type {Logger} from 'log4js';
|
|
2
|
+
// https://github.com/jestjs/jest/issues/11563
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
import {isMainThread} from 'node:worker_threads';
|
|
5
|
+
import {getWorkerLogger} from './logger-worker.js';
|
|
4
6
|
|
|
5
7
|
const getLogger: typeof getWorkerLogger =
|
|
6
|
-
isMainThread ?
|
|
8
|
+
isMainThread ? log4js.getLogger : getWorkerLogger;
|
|
7
9
|
|
|
8
10
|
export const notFound: Logger = getLogger('notFound');
|
|
9
11
|
export const retry: Logger = getLogger('retry');
|
package/src/options.ts
CHANGED
|
@@ -1,22 +1,18 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
} from 'got/dist/source/as-promise/types';
|
|
7
|
-
import type {RequestError} from 'got/dist/source/core';
|
|
8
|
-
import {createResource, ResourceEncoding, ResourceType} from './resource';
|
|
9
|
-
import type {ProcessingLifeCycle, RequestOptions} from './life-cycle/types';
|
|
1
|
+
import type {RequestError, RetryFunction, RetryObject, TimeoutError} from 'got';
|
|
2
|
+
import got, {Options} from 'got';
|
|
3
|
+
import type {ResourceEncoding, ResourceType} from './resource.js';
|
|
4
|
+
import {createResource} from './resource.js';
|
|
5
|
+
import type {ProcessingLifeCycle, RequestOptions} from './life-cycle/types.js';
|
|
10
6
|
// noinspection ES6PreferShortImport
|
|
11
|
-
import {beforeRetryHook} from './life-cycle/download-resource';
|
|
12
|
-
import {error} from './logger/logger';
|
|
7
|
+
import {beforeRetryHook} from './life-cycle/download-resource.js';
|
|
8
|
+
import {error} from './logger/logger.js';
|
|
13
9
|
// noinspection ES6PreferShortImport
|
|
14
|
-
import {adjust} from './downloader/adjust-concurrency';
|
|
15
|
-
import {configureLogger} from './logger/config-logger';
|
|
16
|
-
import type {DownloaderWithMeta} from './downloader/types';
|
|
17
|
-
import {weakAssign} from './util';
|
|
18
|
-
import type {SourceDefinition} from './sources';
|
|
19
|
-
import type {CheerioOptionsInterface} from './types';
|
|
10
|
+
import {adjust} from './downloader/adjust-concurrency.js';
|
|
11
|
+
import {configureLogger} from './logger/config-logger.js';
|
|
12
|
+
import type {DownloaderWithMeta} from './downloader/types.js';
|
|
13
|
+
import {weakAssign} from './util.js';
|
|
14
|
+
import type {SourceDefinition} from './sources.js';
|
|
15
|
+
import type {CheerioOptionsInterface} from './types.js';
|
|
20
16
|
|
|
21
17
|
/**
|
|
22
18
|
* Extra options for custom life cycle
|
|
@@ -24,6 +20,7 @@ import type {CheerioOptionsInterface} from './types';
|
|
|
24
20
|
export interface StaticDownloadMeta
|
|
25
21
|
extends Record<string, string | number | boolean| void> {
|
|
26
22
|
detectIncompleteHtml?: '</html>' | '</body>' | string;
|
|
23
|
+
warnForNonHtml?: boolean;
|
|
27
24
|
}
|
|
28
25
|
|
|
29
26
|
/**
|
|
@@ -379,8 +376,8 @@ export function mergeOverrideOptions(
|
|
|
379
376
|
overrideOptions.meta = Object.assign(opt.meta, overrideOptions.meta);
|
|
380
377
|
}
|
|
381
378
|
if (opt.req && overrideOptions.req) {
|
|
382
|
-
|
|
383
|
-
|
|
379
|
+
const options = got.defaults.options;
|
|
380
|
+
overrideOptions.req = new Options(opt.req, overrideOptions.req, options);
|
|
384
381
|
}
|
|
385
382
|
return checkDownloadOptions(Object.assign(opt, overrideOptions));
|
|
386
383
|
}
|
package/src/resource.ts
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import URI from 'urijs';
|
|
2
|
-
import type {IncomingHttpHeaders} from 'http';
|
|
3
|
-
import * as path from 'path';
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
import type {IncomingHttpHeaders} from 'node:http';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
import {
|
|
5
|
+
escapePath,
|
|
6
|
+
isUrlHttp,
|
|
7
|
+
orderUrlSearch,
|
|
8
|
+
simpleHashString
|
|
9
|
+
} from './util.js';
|
|
10
|
+
import type {CheerioStatic} from './types.js';
|
|
11
|
+
import {error as log} from './logger/logger.js';
|
|
7
12
|
|
|
8
13
|
export enum ResourceType {
|
|
9
14
|
/**
|
package/src/sources.ts
CHANGED
package/src/util.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {createHash} from 'crypto';
|
|
2
|
-
import type {ResourceBody, ResourceEncoding} from './resource';
|
|
1
|
+
import {createHash} from 'node:crypto';
|
|
2
|
+
import type {ResourceBody, ResourceEncoding} from './resource.js';
|
|
3
3
|
|
|
4
4
|
const forbiddenChar = /[:*?"<>|&]|%3A|%2A|%3F|%22|%3C|%3E|%7C|%26/ig;
|
|
5
5
|
|
|
@@ -39,14 +39,10 @@ export const toString = (body: ResourceBody, encoding: ResourceEncoding): string
|
|
|
39
39
|
return stringValue;
|
|
40
40
|
};
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if (mod && mod.__esModule && mod.default) {
|
|
47
|
-
return mod.default;
|
|
48
|
-
}
|
|
49
|
-
return mod;
|
|
42
|
+
export const importDefaultFromPath = <T>(path: string): Promise<T> => {
|
|
43
|
+
return import(path).then(mod => {
|
|
44
|
+
return mod.default || mod;
|
|
45
|
+
});
|
|
50
46
|
};
|
|
51
47
|
|
|
52
48
|
export const orderUrlSearch = (search: string): string => {
|
package/tsconfig.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"compilerOptions": {
|
|
3
|
-
"module": "
|
|
3
|
+
"module": "node16",
|
|
4
|
+
"moduleResolution": "node16",
|
|
4
5
|
"target": "es2018",
|
|
5
6
|
"sourceMap": true,
|
|
6
7
|
"newLine": "lf",
|
|
@@ -8,7 +9,10 @@
|
|
|
8
9
|
"declaration": true,
|
|
9
10
|
"esModuleInterop": true,
|
|
10
11
|
"removeComments": false,
|
|
11
|
-
"strict": true
|
|
12
|
+
"strict": true,
|
|
13
|
+
"declarationMap": true,
|
|
14
|
+
"allowJs": true,
|
|
15
|
+
"verbatimModuleSyntax": true
|
|
12
16
|
},
|
|
13
17
|
"include": [
|
|
14
18
|
"src"
|