@d-zero/replicator 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,7 @@
1
1
  import { createChildProcess } from '@d-zero/puppeteer-dealer';
2
2
  import { beforePageScan, devicePresets } from '@d-zero/puppeteer-page-scan';
3
3
  import { scrollAllOver } from '@d-zero/puppeteer-scroll';
4
- /**
5
- * Add resource path to the set with MIME type encoding if needed
6
- * @param pathname - Resource pathname
7
- * @param mimeType - MIME type from response headers (optional)
8
- * @returns Encoded resource path
9
- */
10
- function encodeResourcePath(pathname, mimeType) {
11
- // Normalize empty pathname to "/"
12
- if (pathname === '') {
13
- pathname = '/';
14
- }
15
- // Check if the last segment has an extension
16
- const lastSlashIndex = pathname.lastIndexOf('/');
17
- const lastSegment = lastSlashIndex === -1 ? pathname : pathname.slice(lastSlashIndex + 1);
18
- const hasExtension = lastSegment.includes('.');
19
- // For paths without extension, encode with MIME type if available
20
- if (!hasExtension && mimeType) {
21
- return `${pathname}:::${mimeType}`;
22
- }
23
- // For paths with extension or without MIME type, return as-is
24
- return pathname;
25
- }
4
+ import { encodeResourcePath } from '@d-zero/shared/encode-resource-path';
26
5
  createChildProcess((param) => {
27
6
  const { devices, timeout } = param;
28
7
  return {
@@ -31,8 +10,7 @@ createChildProcess((param) => {
31
10
  const pageHostname = new URL(url).hostname;
32
11
  // Add the page URL itself first (in case response event is missed)
33
12
  const pageUrlObj = new URL(url);
34
- const pagePathname = pageUrlObj.pathname;
35
- resourcePaths.add(encodeResourcePath(pagePathname, 'text/html'));
13
+ resourcePaths.add(encodeResourcePath(pageUrlObj, 'text/html'));
36
14
  // Listen to all network responses
37
15
  const responseHandler = (response) => {
38
16
  const responseUrl = response.url();
@@ -53,12 +31,11 @@ createChildProcess((param) => {
53
31
  if (resourceUrlObj.hostname !== pageHostname) {
54
32
  return;
55
33
  }
56
- // Get pathname and MIME type
57
- const resourcePath = resourceUrlObj.pathname;
34
+ // Get MIME type
58
35
  const contentType = response.headers()['content-type'];
59
36
  const mimeType = contentType?.split(';')[0]?.trim();
60
37
  // Add resource with MIME encoding if needed
61
- resourcePaths.add(encodeResourcePath(resourcePath, mimeType));
38
+ resourcePaths.add(encodeResourcePath(resourceUrlObj, mimeType));
62
39
  };
63
40
  page.on('response', responseHandler);
64
41
  const defaultSizes = {
package/dist/index.js CHANGED
@@ -1,31 +1,10 @@
1
1
  import path from 'node:path';
2
2
  import { deal, createProcess } from '@d-zero/puppeteer-dealer';
3
3
  import { devicePresets } from '@d-zero/puppeteer-page-scan';
4
+ import { encodeResourcePath } from '@d-zero/shared/encode-resource-path';
4
5
  import { validateSameHost } from '@d-zero/shared/validate-same-host';
5
6
  import c from 'ansi-colors';
6
7
  import { downloadResources } from './resource-downloader.js';
7
- /**
8
- * Encode resource path with MIME type if needed
9
- * @param pathname - Resource pathname
10
- * @param mimeType - MIME type (optional)
11
- * @returns Encoded resource path
12
- */
13
- function encodeResourcePath(pathname, mimeType) {
14
- // Normalize empty pathname to "/"
15
- if (pathname === '') {
16
- pathname = '/';
17
- }
18
- // Check if the last segment has an extension
19
- const lastSlashIndex = pathname.lastIndexOf('/');
20
- const lastSegment = lastSlashIndex === -1 ? pathname : pathname.slice(lastSlashIndex + 1);
21
- const hasExtension = lastSegment.includes('.');
22
- // For paths without extension, encode with MIME type if available
23
- if (!hasExtension && mimeType) {
24
- return `${pathname}:::${mimeType}`;
25
- }
26
- // For paths with extension or without MIME type, return as-is
27
- return pathname;
28
- }
29
8
  /**
30
9
  * Collect page URLs without resource scanning (page-only mode)
31
10
  * @param urls - Array of URLs to process
@@ -38,9 +17,8 @@ function collectPageUrlsOnly(urls, progress) {
38
17
  const encodedUrls = new Set();
39
18
  for (const url of urls) {
40
19
  const urlObj = new URL(url);
41
- const pathname = urlObj.pathname || '/';
42
20
  // Encode as HTML page
43
- const encodedPath = encodeResourcePath(pathname, 'text/html');
21
+ const encodedPath = encodeResourcePath(urlObj, 'text/html');
44
22
  encodedUrls.add(encodedPath);
45
23
  }
46
24
  progress(c.bold.green(`✅ Prepared ${encodedUrls.size} page(s) for download`));
@@ -1,31 +1,8 @@
1
1
  import { mkdir, writeFile } from 'node:fs/promises';
2
2
  import path from 'node:path';
3
3
  import { deal } from '@d-zero/dealer';
4
- import { mimeToExtension } from '@d-zero/shared/mime-to-extension';
5
- import { urlToLocalPath } from '@d-zero/shared/url-to-local-path';
4
+ import { parseEncodedPath } from '@d-zero/shared/encode-resource-path';
6
5
  import c from 'ansi-colors';
7
- /**
8
- * Parse encoded pathname and return the actual URL and local path
9
- * @param encodedPath - pathname or "pathname:::MIME/type" format
10
- * @param baseUrl - Base URL to construct full URL from pathname
11
- */
12
- function parseEncodedPath(encodedPath, baseUrl) {
13
- const parts = encodedPath.split(':::');
14
- if (parts.length === 2) {
15
- // Format: "pathname:::MIME/type"
16
- const pathname = parts[0];
17
- const mimeType = parts[1];
18
- const url = new URL(pathname, baseUrl).href;
19
- const extension = mimeToExtension(mimeType);
20
- const localPath = urlToLocalPath(url, extension);
21
- return { url, localPath };
22
- }
23
- // Regular pathname without MIME encoding
24
- const pathname = encodedPath;
25
- const url = new URL(pathname, baseUrl).href;
26
- const localPath = urlToLocalPath(url, '');
27
- return { url, localPath };
28
- }
29
6
  /**
30
7
  * Download and save resources to disk
31
8
  * @param encodedPaths - Array of encoded pathnames
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/replicator",
3
- "version": "0.7.0",
3
+ "version": "0.7.2",
4
4
  "description": "Replicate web pages with all their resources to local directories",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -24,12 +24,12 @@
24
24
  "clean": "tsc --build --clean"
25
25
  },
26
26
  "dependencies": {
27
- "@d-zero/cli-core": "1.2.0",
28
- "@d-zero/dealer": "1.4.0",
29
- "@d-zero/puppeteer-dealer": "0.5.10",
30
- "@d-zero/puppeteer-page-scan": "4.2.6",
31
- "@d-zero/puppeteer-scroll": "3.1.0",
32
- "@d-zero/shared": "0.13.0",
27
+ "@d-zero/cli-core": "1.2.2",
28
+ "@d-zero/dealer": "1.4.2",
29
+ "@d-zero/puppeteer-dealer": "0.5.12",
30
+ "@d-zero/puppeteer-page-scan": "4.2.8",
31
+ "@d-zero/puppeteer-scroll": "3.1.2",
32
+ "@d-zero/shared": "0.15.0",
33
33
  "ansi-colors": "4.1.3",
34
34
  "minimist": "1.2.8",
35
35
  "puppeteer": "24.27.0"
@@ -37,5 +37,5 @@
37
37
  "devDependencies": {
38
38
  "@types/minimist": "1.2.5"
39
39
  },
40
- "gitHead": "85abd39686d2ce02c7b8db071437464d212dd982"
40
+ "gitHead": "858864f8e57494a5a872431e1d772e18e4cc843a"
41
41
  }