@d-zero/puppeteer-dealer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/create-child-process.d.ts +28 -0
- package/dist/create-child-process.js +59 -0
- package/dist/create-main-process.d.ts +17 -0
- package/dist/create-main-process.js +33 -0
- package/dist/deal.d.ts +5 -5
- package/dist/deal.js +13 -90
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/types.d.ts +1 -1
- package/package.json +10 -7
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { Logger } from './types.js';
|
|
2
|
+
import type { Page } from 'puppeteer';
|
|
3
|
+
export type ChildProcessMethods<R> = {
|
|
4
|
+
eachPage: (params: EachPageParams, logger: Logger) => Promise<R>;
|
|
5
|
+
};
|
|
6
|
+
type EachPageParams = {
|
|
7
|
+
readonly page: Page;
|
|
8
|
+
readonly id: string;
|
|
9
|
+
readonly url: string;
|
|
10
|
+
readonly index: number;
|
|
11
|
+
};
|
|
12
|
+
export type ChildProcessCommonParams = {
|
|
13
|
+
readonly id: string;
|
|
14
|
+
readonly url: string;
|
|
15
|
+
readonly logger: Logger;
|
|
16
|
+
};
|
|
17
|
+
export type ChildProcessHandler<P, R> = (params: P) => Promise<ChildProcessMethods<R>> | ChildProcessMethods<R>;
|
|
18
|
+
export type ChildProcessCommands<P, R> = {
|
|
19
|
+
init: () => Promise<P>;
|
|
20
|
+
each: (id: string, url: string, index: number) => Promise<R>;
|
|
21
|
+
log: Logger;
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
*
|
|
25
|
+
* @param handler
|
|
26
|
+
*/
|
|
27
|
+
export declare function createChildProcess<P, R = void>(handler: ChildProcessHandler<P, R>): void;
|
|
28
|
+
export {};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { ProcTalk } from '@d-zero/proc-talk';
|
|
2
|
+
import puppeteer from 'puppeteer';
|
|
3
|
+
import { log } from './debug.js';
|
|
4
|
+
const childLog = log.extend(`child:${process.pid}`);
|
|
5
|
+
/**
|
|
6
|
+
*
|
|
7
|
+
* @param handler
|
|
8
|
+
*/
|
|
9
|
+
export function createChildProcess(handler) {
|
|
10
|
+
new ProcTalk({
|
|
11
|
+
type: 'child',
|
|
12
|
+
title: '@d-zero/puppeteer-dealer',
|
|
13
|
+
async process(options) {
|
|
14
|
+
const config = {
|
|
15
|
+
locale: 'ja-JP',
|
|
16
|
+
...options,
|
|
17
|
+
};
|
|
18
|
+
childLog('Process started: %O', config);
|
|
19
|
+
const params = await this.call('init');
|
|
20
|
+
childLog('Params: %O', params);
|
|
21
|
+
const { eachPage } = await handler(params);
|
|
22
|
+
const launchOptions = {
|
|
23
|
+
headless: true,
|
|
24
|
+
args: [
|
|
25
|
+
//
|
|
26
|
+
`--lang=${config.locale}`,
|
|
27
|
+
'--no-zygote',
|
|
28
|
+
'--ignore-certificate-errors',
|
|
29
|
+
],
|
|
30
|
+
...config,
|
|
31
|
+
};
|
|
32
|
+
childLog('Launch options: %O', launchOptions);
|
|
33
|
+
const browser = await puppeteer.launch(launchOptions);
|
|
34
|
+
const page = await browser?.newPage();
|
|
35
|
+
if (!page) {
|
|
36
|
+
throw new Error('Failed to create page');
|
|
37
|
+
}
|
|
38
|
+
page.setDefaultNavigationTimeout(0);
|
|
39
|
+
if (config.locale) {
|
|
40
|
+
await page.setExtraHTTPHeaders({
|
|
41
|
+
'Accept-Language': config.locale,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
childLog('Page is ready');
|
|
45
|
+
this.bind('each', async (id, url, index) => {
|
|
46
|
+
const result = await eachPage({ page, id, url, index }, async (log) => {
|
|
47
|
+
await this.call('log', log);
|
|
48
|
+
});
|
|
49
|
+
return result;
|
|
50
|
+
});
|
|
51
|
+
return async () => {
|
|
52
|
+
childLog('Close page and browser');
|
|
53
|
+
await page.close();
|
|
54
|
+
await browser.close();
|
|
55
|
+
childLog('Cleanup done');
|
|
56
|
+
};
|
|
57
|
+
},
|
|
58
|
+
});
|
|
59
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { Logger, PuppeteerDealerOptions } from './types.js';
|
|
2
|
+
import type { LaunchOptions } from 'puppeteer';
|
|
3
|
+
/**
|
|
4
|
+
*
|
|
5
|
+
* @param subModulePath
|
|
6
|
+
* @param params
|
|
7
|
+
* @param options
|
|
8
|
+
*/
|
|
9
|
+
export declare function createProcess<P, R = void>(subModulePath: string, params: P, options?: PuppeteerDealerOptions & LaunchOptions): ChildProcessManager<P, R>;
|
|
10
|
+
export declare class ChildProcessManager<P, R> {
|
|
11
|
+
#private;
|
|
12
|
+
constructor(subModulePath: string, params: P, options?: PuppeteerDealerOptions & LaunchOptions);
|
|
13
|
+
close(): Promise<void>;
|
|
14
|
+
each(id: string, url: string, index: number): Promise<R>;
|
|
15
|
+
log(logger: Logger): void;
|
|
16
|
+
ready(): Promise<void>;
|
|
17
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { ProcTalk } from '@d-zero/proc-talk';
|
|
2
|
+
/**
|
|
3
|
+
*
|
|
4
|
+
* @param subModulePath
|
|
5
|
+
* @param params
|
|
6
|
+
* @param options
|
|
7
|
+
*/
|
|
8
|
+
export function createProcess(subModulePath, params, options) {
|
|
9
|
+
return new ChildProcessManager(subModulePath, params, options);
|
|
10
|
+
}
|
|
11
|
+
export class ChildProcessManager {
|
|
12
|
+
#procTalk;
|
|
13
|
+
constructor(subModulePath, params, options) {
|
|
14
|
+
this.#procTalk = new ProcTalk({
|
|
15
|
+
type: 'main',
|
|
16
|
+
subModulePath,
|
|
17
|
+
options,
|
|
18
|
+
});
|
|
19
|
+
this.#procTalk.bind('init', () => Promise.resolve(params));
|
|
20
|
+
}
|
|
21
|
+
async close() {
|
|
22
|
+
await this.#procTalk.close();
|
|
23
|
+
}
|
|
24
|
+
async each(id, url, index) {
|
|
25
|
+
return await this.#procTalk.call('each', id, url, index);
|
|
26
|
+
}
|
|
27
|
+
log(logger) {
|
|
28
|
+
this.#procTalk.bind('log', logger);
|
|
29
|
+
}
|
|
30
|
+
async ready() {
|
|
31
|
+
await this.#procTalk.initialized();
|
|
32
|
+
}
|
|
33
|
+
}
|
package/dist/deal.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ChildProcessManager } from './create-main-process.js';
|
|
2
|
+
import type { URLInfo } from './types.js';
|
|
2
3
|
import type { DealHeader } from '@d-zero/dealer';
|
|
3
|
-
import type { PuppeteerLaunchOptions } from 'puppeteer';
|
|
4
4
|
/**
|
|
5
5
|
*
|
|
6
6
|
* @param list
|
|
7
7
|
* @param header
|
|
8
|
-
* @param
|
|
9
|
-
* @param
|
|
8
|
+
* @param createProcess
|
|
9
|
+
* @param each
|
|
10
10
|
*/
|
|
11
|
-
export declare function deal(list: readonly URLInfo[], header: DealHeader,
|
|
11
|
+
export declare function deal<T extends Record<string, unknown>, R = void>(list: readonly URLInfo[], header: DealHeader, createProcess: () => ChildProcessManager<T, R>, each?: (result: R) => void | Promise<void>): Promise<void>;
|
package/dist/deal.js
CHANGED
|
@@ -1,107 +1,30 @@
|
|
|
1
1
|
import { deal as coreDeal } from '@d-zero/dealer';
|
|
2
|
-
import { createPage } from '@d-zero/puppeteer-page';
|
|
3
|
-
import { delay } from '@d-zero/shared/delay';
|
|
4
2
|
import c from 'ansi-colors';
|
|
5
|
-
import { log } from './debug.js';
|
|
6
3
|
/**
|
|
7
4
|
*
|
|
8
5
|
* @param list
|
|
9
6
|
* @param header
|
|
10
|
-
* @param
|
|
11
|
-
* @param
|
|
7
|
+
* @param createProcess
|
|
8
|
+
* @param each
|
|
12
9
|
*/
|
|
13
|
-
export
|
|
14
|
-
|
|
15
|
-
locale: 'ja-JP',
|
|
16
|
-
...options,
|
|
17
|
-
};
|
|
18
|
-
const childPrecessIds = new Set();
|
|
19
|
-
const cleanUp = () => {
|
|
20
|
-
log('child process IDs: %o', childPrecessIds);
|
|
21
|
-
for (const pid of childPrecessIds) {
|
|
22
|
-
try {
|
|
23
|
-
process.kill(pid);
|
|
24
|
-
log('killed %d', pid);
|
|
25
|
-
}
|
|
26
|
-
catch (error) {
|
|
27
|
-
log('Already dead: %d', pid);
|
|
28
|
-
if (error instanceof Error && 'code' in error && error.code === 'ESRCH') {
|
|
29
|
-
// ignore
|
|
30
|
-
continue;
|
|
31
|
-
}
|
|
32
|
-
throw error;
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
if (log.enabled) {
|
|
36
|
-
log('process.getActiveResourcesInfo(): %o', process.getActiveResourcesInfo());
|
|
37
|
-
}
|
|
38
|
-
};
|
|
39
|
-
process.on('exit', cleanUp);
|
|
40
|
-
await coreDeal(list, ({ id, url }, update, index) => {
|
|
10
|
+
export function deal(list, header, createProcess, each) {
|
|
11
|
+
return coreDeal(list, ({ id, url }, update, index) => {
|
|
41
12
|
const fileId = id || index.toString().padStart(3, '0');
|
|
42
13
|
const lineHeader = `%braille% ${c.bgWhite(` ${fileId} `)} ${c.gray(url.toString())}: `;
|
|
43
14
|
return async () => {
|
|
44
|
-
const
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
headless: true,
|
|
50
|
-
args: [
|
|
51
|
-
//
|
|
52
|
-
`--lang=${config.locale}`,
|
|
53
|
-
'--no-zygote',
|
|
54
|
-
'--ignore-certificate-errors',
|
|
55
|
-
],
|
|
56
|
-
...config,
|
|
15
|
+
const processManager = createProcess();
|
|
16
|
+
update(`${lineHeader}Booting ChildProcess%dots%`);
|
|
17
|
+
await processManager.ready();
|
|
18
|
+
processManager.log((log) => {
|
|
19
|
+
update(`${lineHeader}${log}`);
|
|
57
20
|
});
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
await page.setDefaultNavigationTimeout(0);
|
|
62
|
-
if (config.locale) {
|
|
63
|
-
await page.setExtraHTTPHeaders({
|
|
64
|
-
'Accept-Language': config.locale,
|
|
65
|
-
});
|
|
21
|
+
const result = await processManager.each(fileId, url.toString(), index);
|
|
22
|
+
if (each) {
|
|
23
|
+
await each(result);
|
|
66
24
|
}
|
|
67
|
-
await
|
|
68
|
-
.deal(page, fileId, url.toString(), (log) => update(`${lineHeader}${log}`), index)
|
|
69
|
-
.catch(evaluationError(page, url.toString(), fileId, index));
|
|
70
|
-
update(`${lineHeader} ${c.blue('✓')} Closing page%dots%`);
|
|
71
|
-
await page.close();
|
|
72
|
-
update(`${lineHeader} ${c.greenBright('✓')} Page process completed!`);
|
|
73
|
-
await delay(600);
|
|
25
|
+
await processManager.close();
|
|
74
26
|
};
|
|
75
27
|
}, {
|
|
76
|
-
...config,
|
|
77
28
|
header,
|
|
78
29
|
});
|
|
79
|
-
log('PuppeteerDealer.deal() completed');
|
|
80
|
-
}
|
|
81
|
-
/**
|
|
82
|
-
*
|
|
83
|
-
* @param page
|
|
84
|
-
* @param url
|
|
85
|
-
* @param fileId
|
|
86
|
-
* @param index
|
|
87
|
-
*/
|
|
88
|
-
function evaluationError(page, url, fileId, index) {
|
|
89
|
-
return (error) => {
|
|
90
|
-
if (error instanceof Error &&
|
|
91
|
-
error.message.includes('Execution context was destroyed')) {
|
|
92
|
-
error.message +=
|
|
93
|
-
'\n' +
|
|
94
|
-
c.red([
|
|
95
|
-
`PuppeteerDealer.deal() failed:`,
|
|
96
|
-
` URL: ${url}`,
|
|
97
|
-
` ID: ${fileId}`,
|
|
98
|
-
` Index: ${index}`,
|
|
99
|
-
' Page:',
|
|
100
|
-
` url: ${page.url()}`,
|
|
101
|
-
` isClosed: ${page.isClosed()}`,
|
|
102
|
-
].join('\n'));
|
|
103
|
-
throw error;
|
|
104
|
-
}
|
|
105
|
-
throw error;
|
|
106
|
-
};
|
|
107
30
|
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/types.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/puppeteer-dealer",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Puppeteer handles each page",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,14 +24,17 @@
|
|
|
24
24
|
"clean": "tsc --build --clean"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@d-zero/dealer": "1.3.
|
|
28
|
-
"@d-zero/
|
|
29
|
-
"@d-zero/shared": "0.
|
|
27
|
+
"@d-zero/dealer": "1.3.1",
|
|
28
|
+
"@d-zero/proc-talk": "0.4.0",
|
|
29
|
+
"@d-zero/shared": "0.8.0",
|
|
30
30
|
"ansi-colors": "4.1.3",
|
|
31
|
-
"debug": "4.4.
|
|
31
|
+
"debug": "4.4.1"
|
|
32
32
|
},
|
|
33
33
|
"devDependencies": {
|
|
34
|
-
"puppeteer": "
|
|
34
|
+
"puppeteer": "24.9.0"
|
|
35
35
|
},
|
|
36
|
-
"
|
|
36
|
+
"peerDependencies": {
|
|
37
|
+
"puppeteer": "24.8.2"
|
|
38
|
+
},
|
|
39
|
+
"gitHead": "4e9cc7b87e0fef91b6f2d4edfb66ca9134b2491b"
|
|
37
40
|
}
|