@d-zero/replicator 0.8.7 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -26,6 +26,7 @@ npx @d-zero/replicator <url...> -o <output-directory> [options]
26
26
  - `--interval <ms>`: 並列実行間の間隔(デフォルト: なし)
27
27
  - 数値または"min-max"形式でランダム範囲を指定可能
28
28
  - `--only <type>`: ダウンロード対象を限定(`page` または `resource`)
29
+ - `-a, --auth <user:pass>`: Basic認証の認証情報(`ユーザー名:パスワード` 形式)
29
30
  - `-v, --verbose`: 詳細ログモード
30
31
 
31
32
  ##### `--only` オプション
@@ -67,6 +68,9 @@ npx @d-zero/replicator https://example.com -o ./output --only page
67
68
 
68
69
  # リソースのみダウンロード(HTMLを除外)
69
70
  npx @d-zero/replicator https://example.com -o ./output --only resource
71
+
72
+ # Basic認証が必要なページ
73
+ npx @d-zero/replicator https://example.com -o ./output -a username:password
70
74
  ```
71
75
 
72
76
  ### プログラマティック使用
@@ -116,10 +120,19 @@ await replicate({
116
120
  outputDir: './output',
117
121
  only: 'resource',
118
122
  });
123
+
124
+ // Basic認証が必要なページ
125
+ await replicate({
126
+ urls: ['https://example.com'],
127
+ outputDir: './output',
128
+ username: 'username',
129
+ password: 'password',
130
+ });
119
131
  ```
120
132
 
121
133
  ## 機能
122
134
 
135
+ - **Basic認証対応**: `--auth user:pass`オプションでBasic認証が必要なページにアクセス可能
123
136
  - **並列処理**: 複数のURLを並列で効率的に処理
124
137
  - **メモリ効率**: リソースを直接ディスクに保存してメモリ使用量を最小化
125
138
  - **選択的ダウンロード**: `--only`オプションでHTMLページのみまたはリソースのみをダウンロード可能
@@ -3,7 +3,7 @@ import { beforePageScan, devicePresets } from '@d-zero/puppeteer-page-scan';
3
3
  import { scrollAllOver } from '@d-zero/puppeteer-scroll';
4
4
  import { encodeResourcePath } from '@d-zero/shared/encode-resource-path';
5
5
  createChildProcess((param) => {
6
- const { devices, timeout } = param;
6
+ const { devices, timeout, username, password } = param;
7
7
  return {
8
8
  async eachPage({ page, url }, logger) {
9
9
  const resourcePaths = new Set();
@@ -38,6 +38,9 @@ createChildProcess((param) => {
38
38
  resourcePaths.add(encodeResourcePath(resourceUrlObj, mimeType));
39
39
  };
40
40
  page.on('response', responseHandler);
41
+ if (username && password) {
42
+ await page.authenticate({ username, password });
43
+ }
41
44
  const defaultSizes = {
42
45
  'desktop-compact': devicePresets['desktop-compact'],
43
46
  mobile: devicePresets.mobile,
package/dist/cli.js CHANGED
@@ -14,6 +14,7 @@ const { options, args } = createCLI({
14
14
  t: 'timeout',
15
15
  d: 'devices',
16
16
  l: 'limit',
17
+ a: 'auth',
17
18
  },
18
19
  usage: [
19
20
  'Usage: replicator <url1> [url2...] -o <output-directory> [options]',
@@ -26,6 +27,7 @@ const { options, args } = createCLI({
26
27
  ' --interval <ms> Interval between parallel executions (default: none)',
27
28
  ' Format: number or "min-max" for random range',
28
29
  ' --only <type> Download only specified type: page or resource',
30
+ ' -a, --auth <user:pass> Credentials for Basic authentication',
29
31
  ' -v, --verbose Enable verbose logging',
30
32
  '',
31
33
  'Available device presets:',
@@ -46,6 +48,7 @@ const { options, args } = createCLI({
46
48
  devices: cli.devices,
47
49
  limit: cli.limit ? Number(cli.limit) : undefined,
48
50
  only: cli.only,
51
+ auth: cli.auth,
49
52
  }),
50
53
  validateArgs: (options, cli) => {
51
54
  if (options.only && options.only !== 'page' && options.only !== 'resource') {
@@ -64,6 +67,9 @@ if (urls.length === 0) {
64
67
  try {
65
68
  const deviceNames = options.devices ? parseList(options.devices) : undefined;
66
69
  const devices = parseDevicesOption(deviceNames);
70
+ const colonIndex = options.auth ? options.auth.indexOf(':') : -1;
71
+ const username = colonIndex >= 0 ? options.auth.slice(0, colonIndex) : undefined;
72
+ const password = colonIndex >= 0 ? options.auth.slice(colonIndex + 1) : undefined;
67
73
  await replicate({
68
74
  urls,
69
75
  outputDir,
@@ -73,6 +79,8 @@ try {
73
79
  limit: options.limit,
74
80
  only: options.only,
75
81
  interval: options.interval,
82
+ username,
83
+ password,
76
84
  });
77
85
  }
78
86
  catch (error) {
package/dist/index.js CHANGED
@@ -33,9 +33,11 @@ function collectPageUrlsOnly(urls, progress) {
33
33
  * @param limit - Parallel execution limit
34
34
  * @param progress - Progress logger function
35
35
  * @param interval
36
+ * @param username
37
+ * @param password
36
38
  * @returns Set of encoded URLs
37
39
  */
38
- async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress, interval) {
40
+ async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress, interval, username, password) {
39
41
  progress(c.bold.yellow('📡 Phase 1: Collecting resource metadata...'));
40
42
  const results = [];
41
43
  await deal(urls.map((url) => ({ id: null, url })), (_, done, total) => {
@@ -44,6 +46,8 @@ async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit
44
46
  }, () => createProcess(path.resolve(import.meta.dirname, 'child-process.js'), {
45
47
  devices: targetSizes,
46
48
  timeout,
49
+ username,
50
+ password,
47
51
  }, {}), {
48
52
  verbose,
49
53
  limit,
@@ -99,7 +103,7 @@ async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit
99
103
  * @param options - Replication options
100
104
  */
101
105
  export async function replicate(options) {
102
- const { urls, outputDir, verbose = false, timeout = 30_000, devices, limit = 3, only, interval, } = options;
106
+ const { urls, outputDir, verbose = false, timeout = 30_000, devices, limit = 3, only, interval, username, password, } = options;
103
107
  if (urls.length === 0) {
104
108
  throw new Error('At least one URL is required');
105
109
  }
@@ -134,7 +138,7 @@ export async function replicate(options) {
134
138
  }
135
139
  case 'resource':
136
140
  case undefined: {
137
- allEncodedUrls = await collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress, interval);
141
+ allEncodedUrls = await collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress, interval, username, password);
138
142
  break;
139
143
  }
140
144
  default: {
@@ -148,7 +152,7 @@ export async function replicate(options) {
148
152
  // Use the first URL as base URL for constructing full URLs
149
153
  const baseUrl = urls[0];
150
154
  // Download all resources
151
- await downloadResources([...allEncodedUrls], baseUrl, outputDir, progress, verbose, only, interval);
155
+ await downloadResources([...allEncodedUrls], baseUrl, outputDir, progress, verbose, only, interval, username, password);
152
156
  progress('');
153
157
  progress(c.bold.green(`✅ Replication complete!`));
154
158
  progress(c.gray(` All resources saved to: ${outputDir}`));
@@ -17,5 +17,7 @@ import type { DelayOptions } from '@d-zero/shared/delay';
17
17
  * @param verbose
18
18
  * @param only
19
19
  * @param interval
20
+ * @param username
21
+ * @param password
20
22
  */
21
- export declare function downloadResources(encodedPaths: string[], baseUrl: string, outputDir: string, logger: (message: string) => void, verbose?: boolean, only?: 'page' | 'resource', interval?: number | DelayOptions): Promise<void>;
23
+ export declare function downloadResources(encodedPaths: string[], baseUrl: string, outputDir: string, logger: (message: string) => void, verbose?: boolean, only?: 'page' | 'resource', interval?: number | DelayOptions, username?: string, password?: string): Promise<void>;
@@ -21,8 +21,10 @@ import c from 'ansi-colors';
21
21
  * @param verbose
22
22
  * @param only
23
23
  * @param interval
24
+ * @param username
25
+ * @param password
24
26
  */
25
- export async function downloadResources(encodedPaths, baseUrl, outputDir, logger, verbose = false, only, interval) {
27
+ export async function downloadResources(encodedPaths, baseUrl, outputDir, logger, verbose = false, only, interval, username, password) {
26
28
  const uniqueResources = new Map();
27
29
  // Parse all encoded pathnames
28
30
  for (const encodedPath of encodedPaths) {
@@ -46,6 +48,9 @@ export async function downloadResources(encodedPaths, baseUrl, outputDir, logger
46
48
  }
47
49
  }
48
50
  const tasks = [...uniqueResources.values()];
51
+ const authHeader = username && password
52
+ ? `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`
53
+ : undefined;
49
54
  if (tasks.length === 0) {
50
55
  logger(c.yellow('⚠️ No resources to download'));
51
56
  return;
@@ -60,7 +65,9 @@ export async function downloadResources(encodedPaths, baseUrl, outputDir, logger
60
65
  setLineHeader(lineHeader);
61
66
  return async () => {
62
67
  update('Fetching%dots%');
63
- const response = await fetch(task.url).catch((error) => {
68
+ const response = await fetch(task.url, {
69
+ headers: authHeader ? { Authorization: authHeader } : {},
70
+ }).catch((error) => {
64
71
  update(c.red(`❌ Fetch failed: ${error.message}`));
65
72
  failed++;
66
73
  return null;
package/dist/types.d.ts CHANGED
@@ -11,6 +11,8 @@ export interface ReplicateOptions {
11
11
  limit?: number;
12
12
  only?: 'page' | 'resource';
13
13
  interval?: number | DelayOptions;
14
+ username?: string;
15
+ password?: string;
14
16
  }
15
17
  export interface Resource {
16
18
  url: string;
@@ -22,6 +24,8 @@ export interface ChildProcessInput {
22
24
  resolution?: number;
23
25
  }>;
24
26
  timeout?: number;
27
+ username?: string;
28
+ password?: string;
25
29
  }
26
30
  export interface ChildProcessResult {
27
31
  url: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/replicator",
3
- "version": "0.8.7",
3
+ "version": "0.9.0",
4
4
  "description": "Replicate web pages with all their resources to local directories",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -25,8 +25,8 @@
25
25
  },
26
26
  "dependencies": {
27
27
  "@d-zero/cli-core": "1.3.4",
28
- "@d-zero/dealer": "1.6.4",
29
- "@d-zero/puppeteer-dealer": "0.7.4",
28
+ "@d-zero/dealer": "1.6.5",
29
+ "@d-zero/puppeteer-dealer": "0.7.5",
30
30
  "@d-zero/puppeteer-page-scan": "4.4.5",
31
31
  "@d-zero/puppeteer-scroll": "3.1.13",
32
32
  "@d-zero/shared": "0.20.1",
@@ -37,5 +37,5 @@
37
37
  "devDependencies": {
38
38
  "@types/minimist": "1.2.5"
39
39
  },
40
- "gitHead": "a6b5eb0a0a327c003053f7c25be4c075ed319c76"
40
+ "gitHead": "31410767ae6beff5c5dbe21a824406a4e6716868"
41
41
  }