@d-zero/replicator 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/child-process.d.ts +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/index.d.ts +26 -0
- package/dist/resource-downloader.d.ts +10 -0
- package/dist/types.d.ts +27 -0
- package/package.json +2 -2
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/cli.d.ts
ADDED
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { ReplicateOptions } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Replicate web pages with all their resources to local directories
|
|
4
|
+
*
|
|
5
|
+
* ## Architecture
|
|
6
|
+
*
|
|
7
|
+
* This implementation uses a two-phase architecture for memory efficiency:
|
|
8
|
+
*
|
|
9
|
+
* ### Phase 1: Metadata Collection
|
|
10
|
+
* - Each URL is processed in a separate child process using puppeteer-dealer
|
|
11
|
+
* - Child processes scan pages with Puppeteer and collect resource URLs
|
|
12
|
+
* - For URLs ending with '/' (e.g., https://example.com/), MIME type is captured
|
|
13
|
+
* and encoded as "url:::MIME/type" format
|
|
14
|
+
* - Only metadata (URLs + MIME types) is returned to parent - no buffer data
|
|
15
|
+
*
|
|
16
|
+
* ### Phase 2: Resource Download
|
|
17
|
+
* - Parent process aggregates all metadata and removes duplicates
|
|
18
|
+
* - Parses encoded URLs to determine correct local paths
|
|
19
|
+
* - Downloads resources via fetch() and immediately writes to disk
|
|
20
|
+
* - No resource content is kept in memory
|
|
21
|
+
*
|
|
22
|
+
* This approach minimizes memory usage by avoiding duplicate I/O operations
|
|
23
|
+
* and keeping buffer data out of inter-process communication.
|
|
24
|
+
* @param options - Replication options
|
|
25
|
+
*/
|
|
26
|
+
export declare function replicate(options: ReplicateOptions): Promise<void>;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Download and save resources to disk
|
|
3
|
+
* @param encodedPaths - Array of encoded pathnames
|
|
4
|
+
* @param baseUrl - Base URL to construct full URLs
|
|
5
|
+
* @param outputDir - Output directory
|
|
6
|
+
* @param logger - Logger function
|
|
7
|
+
* @param verbose - Enable verbose output
|
|
8
|
+
* @param only - Download only specified type: page or resource
|
|
9
|
+
*/
|
|
10
|
+
export declare function downloadResources(encodedPaths: string[], baseUrl: string, outputDir: string, logger: (message: string) => void, verbose?: boolean, only?: 'page' | 'resource'): Promise<void>;
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export interface ReplicateOptions {
|
|
2
|
+
urls: string[];
|
|
3
|
+
outputDir: string;
|
|
4
|
+
verbose?: boolean;
|
|
5
|
+
timeout?: number;
|
|
6
|
+
devices?: Record<string, {
|
|
7
|
+
width: number;
|
|
8
|
+
resolution?: number;
|
|
9
|
+
}>;
|
|
10
|
+
limit?: number;
|
|
11
|
+
only?: 'page' | 'resource';
|
|
12
|
+
}
|
|
13
|
+
export interface Resource {
|
|
14
|
+
url: string;
|
|
15
|
+
localPath: string;
|
|
16
|
+
}
|
|
17
|
+
export interface ChildProcessInput {
|
|
18
|
+
devices?: Record<string, {
|
|
19
|
+
width: number;
|
|
20
|
+
resolution?: number;
|
|
21
|
+
}>;
|
|
22
|
+
timeout?: number;
|
|
23
|
+
}
|
|
24
|
+
export interface ChildProcessResult {
|
|
25
|
+
url: string;
|
|
26
|
+
encodedUrls: string[];
|
|
27
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/replicator",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "Replicate web pages with all their resources to local directories",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -37,5 +37,5 @@
|
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@types/minimist": "1.2.5"
|
|
39
39
|
},
|
|
40
|
-
"gitHead": "
|
|
40
|
+
"gitHead": "bf9cfd1370947212367041214f68c8e8e6317b86"
|
|
41
41
|
}
|