@d-zero/puppeteer-dealer 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ import type { ChildProcessHandler, CommonParams } from './types.js';
2
+ /**
3
+ *
4
+ * @param handler
5
+ */
6
+ export declare function createChildProcess<P, R = void>(handler: ChildProcessHandler<P & CommonParams, R>): void;
@@ -0,0 +1,63 @@
1
+ import { ProcTalk } from '@d-zero/proc-talk';
2
+ import { launch } from 'puppeteer';
3
+ import { log } from './debug.js';
4
+ const childLog = log.extend(`child:${process.pid}`);
5
+ /**
6
+ *
7
+ * @param handler
8
+ */
9
+ export function createChildProcess(handler) {
10
+ new ProcTalk({
11
+ type: 'child',
12
+ title: '@d-zero/puppeteer-dealer',
13
+ async process(options) {
14
+ const config = {
15
+ locale: 'ja-JP',
16
+ ...options,
17
+ };
18
+ childLog('Process started: %O', config);
19
+ const params = await this.call('init');
20
+ childLog('Params: %O', params);
21
+ childLog('Needs auth: %s', params.needAuth);
22
+ const { eachPage } = await handler(params);
23
+ const launchOptions = {
24
+ headless: config.headless ?? (params.needAuth ? 'shell' : true),
25
+ args: [
26
+ //
27
+ `--lang=${config.locale}`,
28
+ '--no-zygote',
29
+ '--ignore-certificate-errors',
30
+ '--no-sandbox',
31
+ '--disable-web-security',
32
+ '--disable-features=SafeBrowsing',
33
+ ],
34
+ ...config,
35
+ };
36
+ childLog('Launch options: %O', launchOptions);
37
+ const browser = await launch(launchOptions);
38
+ const page = await browser?.newPage();
39
+ if (!page) {
40
+ throw new Error('Failed to create page');
41
+ }
42
+ page.setDefaultNavigationTimeout(0);
43
+ if (config.locale) {
44
+ await page.setExtraHTTPHeaders({
45
+ 'Accept-Language': config.locale,
46
+ });
47
+ }
48
+ childLog('Page is ready');
49
+ this.bind('each', async (id, url, index) => {
50
+ const result = await eachPage({ page, id, url, index }, async (log) => {
51
+ await this.call('log', log);
52
+ });
53
+ return result;
54
+ });
55
+ return async () => {
56
+ childLog('Close page and browser');
57
+ await page.close();
58
+ await browser.close();
59
+ childLog('Cleanup done');
60
+ };
61
+ },
62
+ });
63
+ }
@@ -0,0 +1,17 @@
1
+ import type { CommonParams, Logger, PuppeteerDealerOptions } from './types.js';
2
+ import type { LaunchOptions } from 'puppeteer';
3
+ /**
4
+ *
5
+ * @param subModulePath
6
+ * @param params
7
+ * @param options
8
+ */
9
+ export declare function createProcess<P, R = void>(subModulePath: string, params: P, options?: PuppeteerDealerOptions & LaunchOptions): (needAuth: boolean) => ChildProcessManager<P, R>;
10
+ export declare class ChildProcessManager<P, R> {
11
+ #private;
12
+ constructor(subModulePath: string, params: P & CommonParams, options?: PuppeteerDealerOptions & LaunchOptions);
13
+ close(): Promise<void>;
14
+ each(id: string, url: string, index: number): Promise<R>;
15
+ log(logger: Logger): void;
16
+ ready(): Promise<void>;
17
+ }
@@ -0,0 +1,33 @@
1
+ import { ProcTalk } from '@d-zero/proc-talk';
2
+ /**
3
+ *
4
+ * @param subModulePath
5
+ * @param params
6
+ * @param options
7
+ */
8
+ export function createProcess(subModulePath, params, options) {
9
+ return (needAuth) => new ChildProcessManager(subModulePath, { ...params, needAuth }, options);
10
+ }
11
+ export class ChildProcessManager {
12
+ #procTalk;
13
+ constructor(subModulePath, params, options) {
14
+ this.#procTalk = new ProcTalk({
15
+ type: 'main',
16
+ subModulePath,
17
+ options,
18
+ });
19
+ this.#procTalk.bind('init', () => Promise.resolve(params));
20
+ }
21
+ async close() {
22
+ await this.#procTalk.close();
23
+ }
24
+ async each(id, url, index) {
25
+ return await this.#procTalk.call('each', id, url, index);
26
+ }
27
+ log(logger) {
28
+ this.#procTalk.bind('log', logger);
29
+ }
30
+ async ready() {
31
+ await this.#procTalk.initialized();
32
+ }
33
+ }
package/dist/deal.d.ts CHANGED
@@ -1,11 +1,13 @@
1
- import type { PuppeteerDealerOptions, PuppeteerDealHandler, URLInfo } from './types.js';
2
- import type { DealHeader } from '@d-zero/dealer';
3
- import type { PuppeteerLaunchOptions } from 'puppeteer';
1
+ import type { ChildProcessManager } from './create-main-process.js';
2
+ import type { URLInfo } from './types.js';
3
+ import type { DealHeader, DealOptions } from '@d-zero/dealer';
4
4
  /**
5
5
  *
6
6
  * @param list
7
7
  * @param header
8
- * @param handler
8
+ * @param createProcess
9
9
  * @param options
10
10
  */
11
- export declare function deal(list: readonly URLInfo[], header: DealHeader, handler: PuppeteerDealHandler, options?: PuppeteerDealerOptions & PuppeteerLaunchOptions): Promise<void>;
11
+ export declare function deal<T, R = void>(list: readonly URLInfo[], header: DealHeader, createProcess: () => (needAuth: boolean) => ChildProcessManager<T, R>, options?: Omit<DealOptions, 'header'> & {
12
+ each?: (result: R) => void | Promise<void>;
13
+ }): Promise<void>;
package/dist/deal.js CHANGED
@@ -1,107 +1,36 @@
1
1
  import { deal as coreDeal } from '@d-zero/dealer';
2
- import { createPage } from '@d-zero/puppeteer-page';
3
- import { delay } from '@d-zero/shared/delay';
4
2
  import c from 'ansi-colors';
5
- import { log } from './debug.js';
6
3
  /**
7
4
  *
8
5
  * @param list
9
6
  * @param header
10
- * @param handler
7
+ * @param createProcess
11
8
  * @param options
12
9
  */
13
- export async function deal(list, header, handler, options) {
14
- const config = {
15
- locale: 'ja-JP',
16
- ...options,
17
- };
18
- const childPrecessIds = new Set();
19
- const cleanUp = () => {
20
- log('child process IDs: %o', childPrecessIds);
21
- for (const pid of childPrecessIds) {
22
- try {
23
- process.kill(pid);
24
- log('killed %d', pid);
25
- }
26
- catch (error) {
27
- log('Already dead: %d', pid);
28
- if (error instanceof Error && 'code' in error && error.code === 'ESRCH') {
29
- // ignore
30
- continue;
31
- }
32
- throw error;
33
- }
34
- }
35
- if (log.enabled) {
36
- log('process.getActiveResourcesInfo(): %o', process.getActiveResourcesInfo());
37
- }
38
- };
39
- process.on('exit', cleanUp);
40
- await coreDeal(list, ({ id, url }, update, index) => {
10
+ export function deal(list, header, createProcess, options) {
11
+ const needAuth = list.some(({ url }) => {
12
+ const urlObj = new URL(url);
13
+ return !!(urlObj.username && urlObj.password);
14
+ });
15
+ return coreDeal(list, ({ id, url }, update, index) => {
41
16
  const fileId = id || index.toString().padStart(3, '0');
42
17
  const lineHeader = `%braille% ${c.bgWhite(` ${fileId} `)} ${c.gray(url.toString())}: `;
43
18
  return async () => {
44
- const continued = await handler.beforeOpenPage?.(fileId, url.toString(), (log) => update(`${lineHeader}${log}`), index);
45
- if (continued === false) {
46
- return;
47
- }
48
- const page = await createPage({
49
- headless: true,
50
- args: [
51
- //
52
- `--lang=${config.locale}`,
53
- '--no-zygote',
54
- '--ignore-certificate-errors',
55
- ],
56
- ...config,
19
+ update(`${lineHeader}Using ${needAuth ? 'auth' : 'no auth'}`);
20
+ const processManager = createProcess()(needAuth);
21
+ update(`${lineHeader}Booting ChildProcess%dots%`);
22
+ await processManager.ready();
23
+ processManager.log((log) => {
24
+ update(`${lineHeader}${log}`);
57
25
  });
58
- if (page.pid !== null) {
59
- childPrecessIds.add(page.pid);
60
- }
61
- await page.setDefaultNavigationTimeout(0);
62
- if (config.locale) {
63
- await page.setExtraHTTPHeaders({
64
- 'Accept-Language': config.locale,
65
- });
26
+ const result = await processManager.each(fileId, url.toString(), index);
27
+ if (options?.each) {
28
+ await options.each(result);
66
29
  }
67
- await handler
68
- .deal(page, fileId, url.toString(), (log) => update(`${lineHeader}${log}`), index)
69
- .catch(evaluationError(page, url.toString(), fileId, index));
70
- update(`${lineHeader} ${c.blue('✓')} Closing page%dots%`);
71
- await page.close();
72
- update(`${lineHeader} ${c.greenBright('✓')} Page process completed!`);
73
- await delay(600);
30
+ await processManager.close();
74
31
  };
75
32
  }, {
76
- ...config,
33
+ ...options,
77
34
  header,
78
35
  });
79
- log('PuppeteerDealer.deal() completed');
80
- }
81
- /**
82
- *
83
- * @param page
84
- * @param url
85
- * @param fileId
86
- * @param index
87
- */
88
- function evaluationError(page, url, fileId, index) {
89
- return (error) => {
90
- if (error instanceof Error &&
91
- error.message.includes('Execution context was destroyed')) {
92
- error.message +=
93
- '\n' +
94
- c.red([
95
- `PuppeteerDealer.deal() failed:`,
96
- ` URL: ${url}`,
97
- ` ID: ${fileId}`,
98
- ` Index: ${index}`,
99
- ' Page:',
100
- ` url: ${page.url()}`,
101
- ` isClosed: ${page.isClosed()}`,
102
- ].join('\n'));
103
- throw error;
104
- }
105
- throw error;
106
- };
107
36
  }
package/dist/index.d.ts CHANGED
@@ -1,2 +1,4 @@
1
1
  export { deal } from './deal.js';
2
+ export { createChildProcess } from './create-child-process.js';
3
+ export { createProcess } from './create-main-process.js';
2
4
  export * from './types.js';
package/dist/index.js CHANGED
@@ -1,2 +1,4 @@
1
1
  export { deal } from './deal.js';
2
+ export { createChildProcess } from './create-child-process.js';
3
+ export { createProcess } from './create-main-process.js';
2
4
  export * from './types.js';
package/dist/types.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { DealOptions } from '@d-zero/dealer';
2
- import type { Page } from '@d-zero/puppeteer-page';
2
+ import type { Page } from 'puppeteer';
3
3
  export type PuppeteerDealerOptions = {
4
4
  readonly locale?: string;
5
5
  } & DealOptions;
@@ -12,3 +12,27 @@ export type URLInfo = {
12
12
  readonly url: string | URL;
13
13
  };
14
14
  export type Logger = (log: string) => void;
15
+ export type CommonParams = {
16
+ readonly needAuth: boolean;
17
+ };
18
+ export type ChildProcessMethods<R> = {
19
+ eachPage: (params: EachPageParams, logger: Logger) => Promise<R>;
20
+ };
21
+ type EachPageParams = {
22
+ readonly page: Page;
23
+ readonly id: string;
24
+ readonly url: string;
25
+ readonly index: number;
26
+ };
27
+ export type ChildProcessCommonParams = {
28
+ readonly id: string;
29
+ readonly url: string;
30
+ readonly logger: Logger;
31
+ };
32
+ export type ChildProcessHandler<P extends CommonParams, R> = (params: P) => Promise<ChildProcessMethods<R>> | ChildProcessMethods<R>;
33
+ export type ChildProcessCommands<P extends CommonParams, R> = {
34
+ init: () => Promise<P>;
35
+ each: (id: string, url: string, index: number) => Promise<R>;
36
+ log: Logger;
37
+ };
38
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/puppeteer-dealer",
3
- "version": "0.3.0",
3
+ "version": "0.5.0",
4
4
  "description": "Puppeteer handles each page",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -24,14 +24,17 @@
24
24
  "clean": "tsc --build --clean"
25
25
  },
26
26
  "dependencies": {
27
- "@d-zero/dealer": "1.3.0",
28
- "@d-zero/puppeteer-page": "0.3.0",
29
- "@d-zero/shared": "0.7.0",
27
+ "@d-zero/dealer": "1.3.1",
28
+ "@d-zero/proc-talk": "0.4.1",
29
+ "@d-zero/shared": "0.9.0",
30
30
  "ansi-colors": "4.1.3",
31
- "debug": "4.4.0"
31
+ "debug": "4.4.1"
32
32
  },
33
33
  "devDependencies": {
34
- "puppeteer": "23.7.1"
34
+ "puppeteer": "24.10.1"
35
35
  },
36
- "gitHead": "e4fd17857e31022d121527b00fd7f009dbdb2142"
36
+ "peerDependencies": {
37
+ "puppeteer": "24.10.1"
38
+ },
39
+ "gitHead": "04c6969564182c36ee38ef41e78130936dfa4863"
37
40
  }