@d-zero/puppeteer-dealer 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/create-child-process.d.ts +6 -0
- package/dist/create-child-process.js +63 -0
- package/dist/create-main-process.d.ts +17 -0
- package/dist/create-main-process.js +33 -0
- package/dist/deal.d.ts +7 -5
- package/dist/deal.js +18 -89
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/types.d.ts +25 -1
- package/package.json +10 -7
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { ProcTalk } from '@d-zero/proc-talk';
|
|
2
|
+
import { launch } from 'puppeteer';
|
|
3
|
+
import { log } from './debug.js';
|
|
4
|
+
const childLog = log.extend(`child:${process.pid}`);
|
|
5
|
+
/**
|
|
6
|
+
*
|
|
7
|
+
* @param handler
|
|
8
|
+
*/
|
|
9
|
+
export function createChildProcess(handler) {
|
|
10
|
+
new ProcTalk({
|
|
11
|
+
type: 'child',
|
|
12
|
+
title: '@d-zero/puppeteer-dealer',
|
|
13
|
+
async process(options) {
|
|
14
|
+
const config = {
|
|
15
|
+
locale: 'ja-JP',
|
|
16
|
+
...options,
|
|
17
|
+
};
|
|
18
|
+
childLog('Process started: %O', config);
|
|
19
|
+
const params = await this.call('init');
|
|
20
|
+
childLog('Params: %O', params);
|
|
21
|
+
childLog('Needs auth: %s', params.needAuth);
|
|
22
|
+
const { eachPage } = await handler(params);
|
|
23
|
+
const launchOptions = {
|
|
24
|
+
headless: config.headless ?? (params.needAuth ? 'shell' : true),
|
|
25
|
+
args: [
|
|
26
|
+
//
|
|
27
|
+
`--lang=${config.locale}`,
|
|
28
|
+
'--no-zygote',
|
|
29
|
+
'--ignore-certificate-errors',
|
|
30
|
+
'--no-sandbox',
|
|
31
|
+
'--disable-web-security',
|
|
32
|
+
'--disable-features=SafeBrowsing',
|
|
33
|
+
],
|
|
34
|
+
...config,
|
|
35
|
+
};
|
|
36
|
+
childLog('Launch options: %O', launchOptions);
|
|
37
|
+
const browser = await launch(launchOptions);
|
|
38
|
+
const page = await browser?.newPage();
|
|
39
|
+
if (!page) {
|
|
40
|
+
throw new Error('Failed to create page');
|
|
41
|
+
}
|
|
42
|
+
page.setDefaultNavigationTimeout(0);
|
|
43
|
+
if (config.locale) {
|
|
44
|
+
await page.setExtraHTTPHeaders({
|
|
45
|
+
'Accept-Language': config.locale,
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
childLog('Page is ready');
|
|
49
|
+
this.bind('each', async (id, url, index) => {
|
|
50
|
+
const result = await eachPage({ page, id, url, index }, async (log) => {
|
|
51
|
+
await this.call('log', log);
|
|
52
|
+
});
|
|
53
|
+
return result;
|
|
54
|
+
});
|
|
55
|
+
return async () => {
|
|
56
|
+
childLog('Close page and browser');
|
|
57
|
+
await page.close();
|
|
58
|
+
await browser.close();
|
|
59
|
+
childLog('Cleanup done');
|
|
60
|
+
};
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { CommonParams, Logger, PuppeteerDealerOptions } from './types.js';
|
|
2
|
+
import type { LaunchOptions } from 'puppeteer';
|
|
3
|
+
/**
|
|
4
|
+
*
|
|
5
|
+
* @param subModulePath
|
|
6
|
+
* @param params
|
|
7
|
+
* @param options
|
|
8
|
+
*/
|
|
9
|
+
export declare function createProcess<P, R = void>(subModulePath: string, params: P, options?: PuppeteerDealerOptions & LaunchOptions): (needAuth: boolean) => ChildProcessManager<P, R>;
|
|
10
|
+
export declare class ChildProcessManager<P, R> {
|
|
11
|
+
#private;
|
|
12
|
+
constructor(subModulePath: string, params: P & CommonParams, options?: PuppeteerDealerOptions & LaunchOptions);
|
|
13
|
+
close(): Promise<void>;
|
|
14
|
+
each(id: string, url: string, index: number): Promise<R>;
|
|
15
|
+
log(logger: Logger): void;
|
|
16
|
+
ready(): Promise<void>;
|
|
17
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { ProcTalk } from '@d-zero/proc-talk';
|
|
2
|
+
/**
|
|
3
|
+
*
|
|
4
|
+
* @param subModulePath
|
|
5
|
+
* @param params
|
|
6
|
+
* @param options
|
|
7
|
+
*/
|
|
8
|
+
export function createProcess(subModulePath, params, options) {
|
|
9
|
+
return (needAuth) => new ChildProcessManager(subModulePath, { ...params, needAuth }, options);
|
|
10
|
+
}
|
|
11
|
+
export class ChildProcessManager {
|
|
12
|
+
#procTalk;
|
|
13
|
+
constructor(subModulePath, params, options) {
|
|
14
|
+
this.#procTalk = new ProcTalk({
|
|
15
|
+
type: 'main',
|
|
16
|
+
subModulePath,
|
|
17
|
+
options,
|
|
18
|
+
});
|
|
19
|
+
this.#procTalk.bind('init', () => Promise.resolve(params));
|
|
20
|
+
}
|
|
21
|
+
async close() {
|
|
22
|
+
await this.#procTalk.close();
|
|
23
|
+
}
|
|
24
|
+
async each(id, url, index) {
|
|
25
|
+
return await this.#procTalk.call('each', id, url, index);
|
|
26
|
+
}
|
|
27
|
+
log(logger) {
|
|
28
|
+
this.#procTalk.bind('log', logger);
|
|
29
|
+
}
|
|
30
|
+
async ready() {
|
|
31
|
+
await this.#procTalk.initialized();
|
|
32
|
+
}
|
|
33
|
+
}
|
package/dist/deal.d.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type {
|
|
3
|
-
import type {
|
|
1
|
+
import type { ChildProcessManager } from './create-main-process.js';
|
|
2
|
+
import type { URLInfo } from './types.js';
|
|
3
|
+
import type { DealHeader, DealOptions } from '@d-zero/dealer';
|
|
4
4
|
/**
|
|
5
5
|
*
|
|
6
6
|
* @param list
|
|
7
7
|
* @param header
|
|
8
|
-
* @param
|
|
8
|
+
* @param createProcess
|
|
9
9
|
* @param options
|
|
10
10
|
*/
|
|
11
|
-
export declare function deal(list: readonly URLInfo[], header: DealHeader,
|
|
11
|
+
export declare function deal<T, R = void>(list: readonly URLInfo[], header: DealHeader, createProcess: () => (needAuth: boolean) => ChildProcessManager<T, R>, options?: Omit<DealOptions, 'header'> & {
|
|
12
|
+
each?: (result: R) => void | Promise<void>;
|
|
13
|
+
}): Promise<void>;
|
package/dist/deal.js
CHANGED
|
@@ -1,107 +1,36 @@
|
|
|
1
1
|
import { deal as coreDeal } from '@d-zero/dealer';
|
|
2
|
-
import { createPage } from '@d-zero/puppeteer-page';
|
|
3
|
-
import { delay } from '@d-zero/shared/delay';
|
|
4
2
|
import c from 'ansi-colors';
|
|
5
|
-
import { log } from './debug.js';
|
|
6
3
|
/**
|
|
7
4
|
*
|
|
8
5
|
* @param list
|
|
9
6
|
* @param header
|
|
10
|
-
* @param
|
|
7
|
+
* @param createProcess
|
|
11
8
|
* @param options
|
|
12
9
|
*/
|
|
13
|
-
export
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
const cleanUp = () => {
|
|
20
|
-
log('child process IDs: %o', childPrecessIds);
|
|
21
|
-
for (const pid of childPrecessIds) {
|
|
22
|
-
try {
|
|
23
|
-
process.kill(pid);
|
|
24
|
-
log('killed %d', pid);
|
|
25
|
-
}
|
|
26
|
-
catch (error) {
|
|
27
|
-
log('Already dead: %d', pid);
|
|
28
|
-
if (error instanceof Error && 'code' in error && error.code === 'ESRCH') {
|
|
29
|
-
// ignore
|
|
30
|
-
continue;
|
|
31
|
-
}
|
|
32
|
-
throw error;
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
if (log.enabled) {
|
|
36
|
-
log('process.getActiveResourcesInfo(): %o', process.getActiveResourcesInfo());
|
|
37
|
-
}
|
|
38
|
-
};
|
|
39
|
-
process.on('exit', cleanUp);
|
|
40
|
-
await coreDeal(list, ({ id, url }, update, index) => {
|
|
10
|
+
export function deal(list, header, createProcess, options) {
|
|
11
|
+
const needAuth = list.some(({ url }) => {
|
|
12
|
+
const urlObj = new URL(url);
|
|
13
|
+
return !!(urlObj.username && urlObj.password);
|
|
14
|
+
});
|
|
15
|
+
return coreDeal(list, ({ id, url }, update, index) => {
|
|
41
16
|
const fileId = id || index.toString().padStart(3, '0');
|
|
42
17
|
const lineHeader = `%braille% ${c.bgWhite(` ${fileId} `)} ${c.gray(url.toString())}: `;
|
|
43
18
|
return async () => {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
args: [
|
|
51
|
-
//
|
|
52
|
-
`--lang=${config.locale}`,
|
|
53
|
-
'--no-zygote',
|
|
54
|
-
'--ignore-certificate-errors',
|
|
55
|
-
],
|
|
56
|
-
...config,
|
|
19
|
+
update(`${lineHeader}Using ${needAuth ? 'auth' : 'no auth'}`);
|
|
20
|
+
const processManager = createProcess()(needAuth);
|
|
21
|
+
update(`${lineHeader}Booting ChildProcess%dots%`);
|
|
22
|
+
await processManager.ready();
|
|
23
|
+
processManager.log((log) => {
|
|
24
|
+
update(`${lineHeader}${log}`);
|
|
57
25
|
});
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
await page.setDefaultNavigationTimeout(0);
|
|
62
|
-
if (config.locale) {
|
|
63
|
-
await page.setExtraHTTPHeaders({
|
|
64
|
-
'Accept-Language': config.locale,
|
|
65
|
-
});
|
|
26
|
+
const result = await processManager.each(fileId, url.toString(), index);
|
|
27
|
+
if (options?.each) {
|
|
28
|
+
await options.each(result);
|
|
66
29
|
}
|
|
67
|
-
await
|
|
68
|
-
.deal(page, fileId, url.toString(), (log) => update(`${lineHeader}${log}`), index)
|
|
69
|
-
.catch(evaluationError(page, url.toString(), fileId, index));
|
|
70
|
-
update(`${lineHeader} ${c.blue('✓')} Closing page%dots%`);
|
|
71
|
-
await page.close();
|
|
72
|
-
update(`${lineHeader} ${c.greenBright('✓')} Page process completed!`);
|
|
73
|
-
await delay(600);
|
|
30
|
+
await processManager.close();
|
|
74
31
|
};
|
|
75
32
|
}, {
|
|
76
|
-
...
|
|
33
|
+
...options,
|
|
77
34
|
header,
|
|
78
35
|
});
|
|
79
|
-
log('PuppeteerDealer.deal() completed');
|
|
80
|
-
}
|
|
81
|
-
/**
|
|
82
|
-
*
|
|
83
|
-
* @param page
|
|
84
|
-
* @param url
|
|
85
|
-
* @param fileId
|
|
86
|
-
* @param index
|
|
87
|
-
*/
|
|
88
|
-
function evaluationError(page, url, fileId, index) {
|
|
89
|
-
return (error) => {
|
|
90
|
-
if (error instanceof Error &&
|
|
91
|
-
error.message.includes('Execution context was destroyed')) {
|
|
92
|
-
error.message +=
|
|
93
|
-
'\n' +
|
|
94
|
-
c.red([
|
|
95
|
-
`PuppeteerDealer.deal() failed:`,
|
|
96
|
-
` URL: ${url}`,
|
|
97
|
-
` ID: ${fileId}`,
|
|
98
|
-
` Index: ${index}`,
|
|
99
|
-
' Page:',
|
|
100
|
-
` url: ${page.url()}`,
|
|
101
|
-
` isClosed: ${page.isClosed()}`,
|
|
102
|
-
].join('\n'));
|
|
103
|
-
throw error;
|
|
104
|
-
}
|
|
105
|
-
throw error;
|
|
106
|
-
};
|
|
107
36
|
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { DealOptions } from '@d-zero/dealer';
|
|
2
|
-
import type { Page } from '
|
|
2
|
+
import type { Page } from 'puppeteer';
|
|
3
3
|
export type PuppeteerDealerOptions = {
|
|
4
4
|
readonly locale?: string;
|
|
5
5
|
} & DealOptions;
|
|
@@ -12,3 +12,27 @@ export type URLInfo = {
|
|
|
12
12
|
readonly url: string | URL;
|
|
13
13
|
};
|
|
14
14
|
export type Logger = (log: string) => void;
|
|
15
|
+
export type CommonParams = {
|
|
16
|
+
readonly needAuth: boolean;
|
|
17
|
+
};
|
|
18
|
+
export type ChildProcessMethods<R> = {
|
|
19
|
+
eachPage: (params: EachPageParams, logger: Logger) => Promise<R>;
|
|
20
|
+
};
|
|
21
|
+
type EachPageParams = {
|
|
22
|
+
readonly page: Page;
|
|
23
|
+
readonly id: string;
|
|
24
|
+
readonly url: string;
|
|
25
|
+
readonly index: number;
|
|
26
|
+
};
|
|
27
|
+
export type ChildProcessCommonParams = {
|
|
28
|
+
readonly id: string;
|
|
29
|
+
readonly url: string;
|
|
30
|
+
readonly logger: Logger;
|
|
31
|
+
};
|
|
32
|
+
export type ChildProcessHandler<P extends CommonParams, R> = (params: P) => Promise<ChildProcessMethods<R>> | ChildProcessMethods<R>;
|
|
33
|
+
export type ChildProcessCommands<P extends CommonParams, R> = {
|
|
34
|
+
init: () => Promise<P>;
|
|
35
|
+
each: (id: string, url: string, index: number) => Promise<R>;
|
|
36
|
+
log: Logger;
|
|
37
|
+
};
|
|
38
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/puppeteer-dealer",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Puppeteer handles each page",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,14 +24,17 @@
|
|
|
24
24
|
"clean": "tsc --build --clean"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@d-zero/dealer": "1.3.
|
|
28
|
-
"@d-zero/
|
|
29
|
-
"@d-zero/shared": "0.
|
|
27
|
+
"@d-zero/dealer": "1.3.1",
|
|
28
|
+
"@d-zero/proc-talk": "0.4.1",
|
|
29
|
+
"@d-zero/shared": "0.9.0",
|
|
30
30
|
"ansi-colors": "4.1.3",
|
|
31
|
-
"debug": "4.4.
|
|
31
|
+
"debug": "4.4.1"
|
|
32
32
|
},
|
|
33
33
|
"devDependencies": {
|
|
34
|
-
"puppeteer": "
|
|
34
|
+
"puppeteer": "24.10.1"
|
|
35
35
|
},
|
|
36
|
-
"
|
|
36
|
+
"peerDependencies": {
|
|
37
|
+
"puppeteer": "24.10.1"
|
|
38
|
+
},
|
|
39
|
+
"gitHead": "04c6969564182c36ee38ef41e78130936dfa4863"
|
|
37
40
|
}
|