@d-zero/replicator 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ export {};
package/dist/cli.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -0,0 +1,26 @@
1
+ import type { ReplicateOptions } from './types.js';
2
+ /**
3
+ * Replicate web pages with all their resources to local directories
4
+ *
5
+ * ## Architecture
6
+ *
7
+ * This implementation uses a two-phase architecture for memory efficiency:
8
+ *
9
+ * ### Phase 1: Metadata Collection
10
+ * - Each URL is processed in a separate child process using puppeteer-dealer
11
+ * - Child processes scan pages with Puppeteer and collect resource URLs
12
+ * - For URLs ending with '/' (e.g., https://example.com/), MIME type is captured
13
+ * and encoded as "url:::MIME/type" format
14
+ * - Only metadata (URLs + MIME types) is returned to parent - no buffer data
15
+ *
16
+ * ### Phase 2: Resource Download
17
+ * - Parent process aggregates all metadata and removes duplicates
18
+ * - Parses encoded URLs to determine correct local paths
19
+ * - Downloads resources via fetch() and immediately writes to disk
20
+ * - No resource content is kept in memory
21
+ *
22
+ * This approach minimizes memory usage by avoiding duplicate I/O operations
23
+ * and keeping buffer data out of inter-process communication.
24
+ * @param options - Replication options
25
+ */
26
+ export declare function replicate(options: ReplicateOptions): Promise<void>;
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Download and save resources to disk
3
+ * @param encodedPaths - Array of encoded pathnames
4
+ * @param baseUrl - Base URL to construct full URLs
5
+ * @param outputDir - Output directory
6
+ * @param logger - Logger function
7
+ * @param verbose - Enable verbose output
8
+ * @param only - Download only specified type: page or resource
9
+ */
10
+ export declare function downloadResources(encodedPaths: string[], baseUrl: string, outputDir: string, logger: (message: string) => void, verbose?: boolean, only?: 'page' | 'resource'): Promise<void>;
@@ -0,0 +1,27 @@
1
+ export interface ReplicateOptions {
2
+ urls: string[];
3
+ outputDir: string;
4
+ verbose?: boolean;
5
+ timeout?: number;
6
+ devices?: Record<string, {
7
+ width: number;
8
+ resolution?: number;
9
+ }>;
10
+ limit?: number;
11
+ only?: 'page' | 'resource';
12
+ }
13
+ export interface Resource {
14
+ url: string;
15
+ localPath: string;
16
+ }
17
+ export interface ChildProcessInput {
18
+ devices?: Record<string, {
19
+ width: number;
20
+ resolution?: number;
21
+ }>;
22
+ timeout?: number;
23
+ }
24
+ export interface ChildProcessResult {
25
+ url: string;
26
+ encodedUrls: string[];
27
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/replicator",
3
- "version": "0.6.0",
3
+ "version": "0.6.1",
4
4
  "description": "Replicate web pages with all their resources to local directories",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -37,5 +37,5 @@
37
37
  "devDependencies": {
38
38
  "@types/minimist": "1.2.5"
39
39
  },
40
- "gitHead": "fe6d98ee0108b0e53848f28a74e4e08875e31a78"
40
+ "gitHead": "bf9cfd1370947212367041214f68c8e8e6317b86"
41
41
  }