@pagepocket/write-down-unit 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/filename.d.ts +1 -0
- package/dist/filename.js +10 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +1 -0
- package/dist/plugin.d.ts +61 -0
- package/dist/plugin.js +230 -0
- package/dist/utils/bytes.d.ts +2 -0
- package/dist/utils/bytes.js +22 -0
- package/dist/utils/posix-path.d.ts +10 -0
- package/dist/utils/posix-path.js +36 -0
- package/dist/utils/zip.d.ts +5 -0
- package/dist/utils/zip.js +11 -0
- package/dist/write-down-plugin.d.ts +25 -0
- package/dist/write-down-plugin.js +196 -0
- package/dist/write-down-unit.d.ts +20 -0
- package/dist/write-down-unit.js +35 -0
- package/dist/write-down.d.ts +18 -0
- package/dist/write-down.js +127 -0
- package/package.json +25 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const safeFilename: (input: string) => string;
|
package/dist/filename.js
ADDED
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { WriteDownUnit } from "./write-down-unit.js";
|
package/dist/plugin.d.ts
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type { ContentRef, PagePocketContext, PagePocketPlugin } from "@pagepocket/lib";
|
|
2
|
+
export type WriteDownPluginOptions = {
|
|
3
|
+
enabled?: boolean;
|
|
4
|
+
type: "raw" | "zip";
|
|
5
|
+
/**
|
|
6
|
+
* For raw: output directory path.
|
|
7
|
+
* For zip: output file path.
|
|
8
|
+
*/
|
|
9
|
+
outputPath: string;
|
|
10
|
+
overwrite?: boolean;
|
|
11
|
+
};
|
|
12
|
+
export type WriteDownFileSource = {
|
|
13
|
+
kind: "bytes";
|
|
14
|
+
data: Uint8Array;
|
|
15
|
+
} | {
|
|
16
|
+
kind: "text";
|
|
17
|
+
text: string;
|
|
18
|
+
} | {
|
|
19
|
+
kind: "content-ref";
|
|
20
|
+
ref: ContentRef;
|
|
21
|
+
};
|
|
22
|
+
export type WriteDownFile = {
|
|
23
|
+
kind: "file";
|
|
24
|
+
/** Path relative to the root of this write-down (posix-ish, may include '/'). */
|
|
25
|
+
path: string;
|
|
26
|
+
source: WriteDownFileSource;
|
|
27
|
+
};
|
|
28
|
+
export type WriteDownDirectory = {
|
|
29
|
+
kind: "directory";
|
|
30
|
+
/** Path relative to the root of this write-down (posix-ish, may include '/'). */
|
|
31
|
+
path: string;
|
|
32
|
+
entries: WriteDownEntry[];
|
|
33
|
+
};
|
|
34
|
+
export type WriteDownEntry = WriteDownFile | WriteDownDirectory;
|
|
35
|
+
/**
|
|
36
|
+
* Generic, plugin-agnostic filesystem tree to be written by WriteDownPlugin.
|
|
37
|
+
*
|
|
38
|
+
* Other plugins should populate `ctx.files` using this shape.
|
|
39
|
+
*/
|
|
40
|
+
export type WriteDownFiles = {
|
|
41
|
+
/** Root directory (virtual). Its own `path` should be "". */
|
|
42
|
+
root: WriteDownDirectory;
|
|
43
|
+
/**
|
|
44
|
+
* Optional content store handle used when entries reference `{ kind: "content-ref" }`.
|
|
45
|
+
*
|
|
46
|
+
* Typically this comes from a PageSnapshot's `content`.
|
|
47
|
+
*/
|
|
48
|
+
content?: {
|
|
49
|
+
open(ref: ContentRef): Promise<ReadableStream<Uint8Array>>;
|
|
50
|
+
dispose?(): Promise<void>;
|
|
51
|
+
};
|
|
52
|
+
};
|
|
53
|
+
export declare class WriteDownPlugin implements PagePocketPlugin {
|
|
54
|
+
readonly name = "plugin:write-down";
|
|
55
|
+
enabled?: boolean;
|
|
56
|
+
private type;
|
|
57
|
+
private outputPath;
|
|
58
|
+
private overwrite;
|
|
59
|
+
constructor(options: WriteDownPluginOptions);
|
|
60
|
+
apply(ctx: PagePocketContext): void;
|
|
61
|
+
}
|
package/dist/plugin.js
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.WriteDownPlugin = void 0;
|
|
4
|
+
const uni_fs_1 = require("@pagepocket/uni-fs");
|
|
5
|
+
const normalizePath = (value) => value.replace(/\\/g, "/");
|
|
6
|
+
const trimLeadingSlash = (value) => normalizePath(value).replace(/^\/+/, "");
|
|
7
|
+
const joinPosix = (base, relative) => {
|
|
8
|
+
const cleanBase = normalizePath(base).replace(/\/+$/, "");
|
|
9
|
+
const cleanRel = normalizePath(relative).replace(/^\/+/, "");
|
|
10
|
+
if (!cleanBase) {
|
|
11
|
+
return cleanRel;
|
|
12
|
+
}
|
|
13
|
+
if (!cleanRel) {
|
|
14
|
+
return cleanBase;
|
|
15
|
+
}
|
|
16
|
+
return `${cleanBase}/${cleanRel}`;
|
|
17
|
+
};
|
|
18
|
+
const streamToUint8Array = async (stream) => {
|
|
19
|
+
const reader = stream.getReader();
|
|
20
|
+
const chunks = [];
|
|
21
|
+
let total = 0;
|
|
22
|
+
while (true) {
|
|
23
|
+
const result = await reader.read();
|
|
24
|
+
if (result.done)
|
|
25
|
+
break;
|
|
26
|
+
if (result.value) {
|
|
27
|
+
chunks.push(result.value);
|
|
28
|
+
total += result.value.byteLength;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const output = new Uint8Array(total);
|
|
32
|
+
let offset = 0;
|
|
33
|
+
for (const chunk of chunks) {
|
|
34
|
+
output.set(chunk, offset);
|
|
35
|
+
offset += chunk.byteLength;
|
|
36
|
+
}
|
|
37
|
+
return output;
|
|
38
|
+
};
|
|
39
|
+
const encodeText = (text) => new TextEncoder().encode(text);
|
|
40
|
+
const splitPathExtension = (value) => {
|
|
41
|
+
const clean = normalizePath(value);
|
|
42
|
+
const lastSlash = clean.lastIndexOf("/");
|
|
43
|
+
const lastDot = clean.lastIndexOf(".");
|
|
44
|
+
if (lastDot > lastSlash) {
|
|
45
|
+
return {
|
|
46
|
+
filename: clean.slice(0, lastDot),
|
|
47
|
+
extension: clean.slice(lastDot + 1)
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
return { filename: clean, extension: "" };
|
|
51
|
+
};
|
|
52
|
+
const flattenEntries = (dir, prefix) => {
|
|
53
|
+
const out = [];
|
|
54
|
+
const dirPrefix = joinPosix(prefix, dir.path);
|
|
55
|
+
for (const entry of dir.entries) {
|
|
56
|
+
if (entry.kind === "file") {
|
|
57
|
+
const filePath = joinPosix(dirPrefix, entry.path);
|
|
58
|
+
out.push({ path: filePath, file: entry });
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
out.push(...flattenEntries(entry, dirPrefix));
|
|
62
|
+
}
|
|
63
|
+
return out;
|
|
64
|
+
};
|
|
65
|
+
const buildZipBytes = async (files) => {
|
|
66
|
+
// Minimal ZIP writer (store/no compression). Mirrors @pagepocket/lib toZip semantics.
|
|
67
|
+
const writeUint16 = (value) => {
|
|
68
|
+
const buffer = new Uint8Array(2);
|
|
69
|
+
const view = new DataView(buffer.buffer);
|
|
70
|
+
view.setUint16(0, value, true);
|
|
71
|
+
return buffer;
|
|
72
|
+
};
|
|
73
|
+
const writeUint32 = (value) => {
|
|
74
|
+
const buffer = new Uint8Array(4);
|
|
75
|
+
const view = new DataView(buffer.buffer);
|
|
76
|
+
view.setUint32(0, value, true);
|
|
77
|
+
return buffer;
|
|
78
|
+
};
|
|
79
|
+
const concatBytes = (chunks) => {
|
|
80
|
+
const total = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
|
|
81
|
+
const output = new Uint8Array(total);
|
|
82
|
+
let offset = 0;
|
|
83
|
+
for (const chunk of chunks) {
|
|
84
|
+
output.set(chunk, offset);
|
|
85
|
+
offset += chunk.byteLength;
|
|
86
|
+
}
|
|
87
|
+
return output;
|
|
88
|
+
};
|
|
89
|
+
const crc32Table = (() => {
|
|
90
|
+
const table = new Uint32Array(256);
|
|
91
|
+
for (let i = 0; i < 256; i += 1) {
|
|
92
|
+
let c = i;
|
|
93
|
+
for (let k = 0; k < 8; k += 1) {
|
|
94
|
+
c = c & 1 ? 0xedb88320 ^ (c >>> 1) : c >>> 1;
|
|
95
|
+
}
|
|
96
|
+
table[i] = c >>> 0;
|
|
97
|
+
}
|
|
98
|
+
return table;
|
|
99
|
+
})();
|
|
100
|
+
const crc32 = (data) => {
|
|
101
|
+
let crc = 0 ^ -1;
|
|
102
|
+
for (let i = 0; i < data.length; i += 1) {
|
|
103
|
+
crc = (crc >>> 8) ^ crc32Table[(crc ^ data[i]) & 0xff];
|
|
104
|
+
}
|
|
105
|
+
return (crc ^ -1) >>> 0;
|
|
106
|
+
};
|
|
107
|
+
const localChunks = [];
|
|
108
|
+
const centralChunks = [];
|
|
109
|
+
let offset = 0;
|
|
110
|
+
for (const file of files) {
|
|
111
|
+
const name = trimLeadingSlash(file.path);
|
|
112
|
+
const nameBytes = new TextEncoder().encode(name);
|
|
113
|
+
const data = file.data;
|
|
114
|
+
const crc = crc32(data);
|
|
115
|
+
const localHeader = concatBytes([
|
|
116
|
+
writeUint32(0x04034b50),
|
|
117
|
+
writeUint16(20),
|
|
118
|
+
writeUint16(0),
|
|
119
|
+
writeUint16(0),
|
|
120
|
+
writeUint16(0),
|
|
121
|
+
writeUint16(0),
|
|
122
|
+
writeUint32(crc),
|
|
123
|
+
writeUint32(data.byteLength),
|
|
124
|
+
writeUint32(data.byteLength),
|
|
125
|
+
writeUint16(nameBytes.byteLength),
|
|
126
|
+
writeUint16(0),
|
|
127
|
+
nameBytes
|
|
128
|
+
]);
|
|
129
|
+
localChunks.push(localHeader, data);
|
|
130
|
+
const centralHeader = concatBytes([
|
|
131
|
+
writeUint32(0x02014b50),
|
|
132
|
+
writeUint16(20),
|
|
133
|
+
writeUint16(20),
|
|
134
|
+
writeUint16(0),
|
|
135
|
+
writeUint16(0),
|
|
136
|
+
writeUint16(0),
|
|
137
|
+
writeUint16(0),
|
|
138
|
+
writeUint32(crc),
|
|
139
|
+
writeUint32(data.byteLength),
|
|
140
|
+
writeUint32(data.byteLength),
|
|
141
|
+
writeUint16(nameBytes.byteLength),
|
|
142
|
+
writeUint16(0),
|
|
143
|
+
writeUint16(0),
|
|
144
|
+
writeUint16(0),
|
|
145
|
+
writeUint16(0),
|
|
146
|
+
writeUint32(0),
|
|
147
|
+
writeUint32(offset),
|
|
148
|
+
nameBytes
|
|
149
|
+
]);
|
|
150
|
+
centralChunks.push(centralHeader);
|
|
151
|
+
offset += localHeader.byteLength + data.byteLength;
|
|
152
|
+
}
|
|
153
|
+
const centralDirectory = concatBytes(centralChunks);
|
|
154
|
+
const endRecord = concatBytes([
|
|
155
|
+
writeUint32(0x06054b50),
|
|
156
|
+
writeUint16(0),
|
|
157
|
+
writeUint16(0),
|
|
158
|
+
writeUint16(files.length),
|
|
159
|
+
writeUint16(files.length),
|
|
160
|
+
writeUint32(centralDirectory.byteLength),
|
|
161
|
+
writeUint32(offset),
|
|
162
|
+
writeUint16(0)
|
|
163
|
+
]);
|
|
164
|
+
return concatBytes([...localChunks, centralDirectory, endRecord]);
|
|
165
|
+
};
|
|
166
|
+
class WriteDownPlugin {
|
|
167
|
+
constructor(options) {
|
|
168
|
+
this.name = "plugin:write-down";
|
|
169
|
+
this.enabled = options.enabled;
|
|
170
|
+
this.type = options.type;
|
|
171
|
+
this.outputPath = options.outputPath;
|
|
172
|
+
this.overwrite = options.overwrite ?? false;
|
|
173
|
+
}
|
|
174
|
+
apply(ctx) {
|
|
175
|
+
ctx.onFinalize(async () => {
|
|
176
|
+
const files = ctx.files;
|
|
177
|
+
if (!files) {
|
|
178
|
+
throw new Error("WriteDownPlugin requires ctx.files");
|
|
179
|
+
}
|
|
180
|
+
if (!files.root || files.root.kind !== "directory") {
|
|
181
|
+
throw new Error("WriteDownPlugin requires ctx.files.root directory");
|
|
182
|
+
}
|
|
183
|
+
const flattened = flattenEntries(files.root, "");
|
|
184
|
+
const outputs = [];
|
|
185
|
+
for (const item of flattened) {
|
|
186
|
+
const src = item.file.source;
|
|
187
|
+
if (src.kind === "bytes") {
|
|
188
|
+
outputs.push({ path: item.path, data: src.data });
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
if (src.kind === "text") {
|
|
192
|
+
outputs.push({ path: item.path, data: encodeText(src.text) });
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
if (src.kind === "content-ref") {
|
|
196
|
+
if (!files.content) {
|
|
197
|
+
throw new Error('WriteDownPlugin cannot resolve { kind: "content-ref" } without ctx.files.content');
|
|
198
|
+
}
|
|
199
|
+
const stream = await files.content.open(src.ref);
|
|
200
|
+
const data = await streamToUint8Array(stream);
|
|
201
|
+
outputs.push({ path: item.path, data });
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
// Exhaustiveness guard.
|
|
205
|
+
const _never = src;
|
|
206
|
+
throw new Error(`Unhandled file source kind: ${String(_never)}`);
|
|
207
|
+
}
|
|
208
|
+
if (this.type === "raw") {
|
|
209
|
+
// Note: @pagepocket/uni-fs write() creates parent directories.
|
|
210
|
+
for (const file of outputs) {
|
|
211
|
+
const outputPath = joinPosix(this.outputPath, file.path);
|
|
212
|
+
const { filename, extension } = splitPathExtension(outputPath);
|
|
213
|
+
await (0, uni_fs_1.write)(filename, extension, file.data);
|
|
214
|
+
}
|
|
215
|
+
if (files.content?.dispose) {
|
|
216
|
+
await files.content.dispose();
|
|
217
|
+
}
|
|
218
|
+
return { kind: "raw", outputDir: this.outputPath };
|
|
219
|
+
}
|
|
220
|
+
const zipBytes = await buildZipBytes(outputs);
|
|
221
|
+
const { filename, extension } = splitPathExtension(this.outputPath);
|
|
222
|
+
await (0, uni_fs_1.write)(filename, extension, zipBytes);
|
|
223
|
+
if (files.content?.dispose) {
|
|
224
|
+
await files.content.dispose();
|
|
225
|
+
}
|
|
226
|
+
return { kind: "zip", zip: { data: zipBytes, outputPath: this.outputPath } };
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
exports.WriteDownPlugin = WriteDownPlugin;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export const streamToUint8Array = async (stream) => {
|
|
2
|
+
const reader = stream.getReader();
|
|
3
|
+
const chunks = [];
|
|
4
|
+
let total = 0;
|
|
5
|
+
while (true) {
|
|
6
|
+
const result = await reader.read();
|
|
7
|
+
if (result.done)
|
|
8
|
+
break;
|
|
9
|
+
if (result.value) {
|
|
10
|
+
chunks.push(result.value);
|
|
11
|
+
total += result.value.byteLength;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
const output = new Uint8Array(total);
|
|
15
|
+
let offset = 0;
|
|
16
|
+
for (const chunk of chunks) {
|
|
17
|
+
output.set(chunk, offset);
|
|
18
|
+
offset += chunk.byteLength;
|
|
19
|
+
}
|
|
20
|
+
return output;
|
|
21
|
+
};
|
|
22
|
+
export const textToUint8Array = (text) => new TextEncoder().encode(text);
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
type PosixPath = string;
|
|
2
|
+
export declare const normalizePath: (value: string) => PosixPath;
|
|
3
|
+
export declare const trimLeadingSlash: (value: string) => string;
|
|
4
|
+
export declare const joinPosix: (base: string, relative: string) => string;
|
|
5
|
+
export declare const dirnamePosix: (value: string) => PosixPath;
|
|
6
|
+
export declare const splitPathExtension: (value: string) => {
|
|
7
|
+
filename: string;
|
|
8
|
+
extension: string;
|
|
9
|
+
};
|
|
10
|
+
export {};
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export const normalizePath = (value) => value.replace(/\\/g, "/");
|
|
2
|
+
export const trimLeadingSlash = (value) => normalizePath(value).replace(/^\/+/, "");
|
|
3
|
+
export const joinPosix = (base, relative) => {
|
|
4
|
+
const cleanBase = normalizePath(base).replace(/\/+$/, "");
|
|
5
|
+
const cleanRel = normalizePath(relative).replace(/^\/+/, "");
|
|
6
|
+
if (!cleanBase) {
|
|
7
|
+
return cleanRel;
|
|
8
|
+
}
|
|
9
|
+
if (!cleanRel) {
|
|
10
|
+
return cleanBase;
|
|
11
|
+
}
|
|
12
|
+
return `${cleanBase}/${cleanRel}`;
|
|
13
|
+
};
|
|
14
|
+
export const dirnamePosix = (value) => {
|
|
15
|
+
const clean = normalizePath(value).replace(/\/+$/, "");
|
|
16
|
+
const lastSlash = clean.lastIndexOf("/");
|
|
17
|
+
if (lastSlash < 0) {
|
|
18
|
+
return ".";
|
|
19
|
+
}
|
|
20
|
+
if (lastSlash === 0) {
|
|
21
|
+
return "/";
|
|
22
|
+
}
|
|
23
|
+
return clean.slice(0, lastSlash);
|
|
24
|
+
};
|
|
25
|
+
export const splitPathExtension = (value) => {
|
|
26
|
+
const clean = normalizePath(value);
|
|
27
|
+
const lastSlash = clean.lastIndexOf("/");
|
|
28
|
+
const lastDot = clean.lastIndexOf(".");
|
|
29
|
+
if (lastDot > lastSlash) {
|
|
30
|
+
return {
|
|
31
|
+
filename: clean.slice(0, lastDot),
|
|
32
|
+
extension: clean.slice(lastDot + 1)
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
return { filename: clean, extension: "" };
|
|
36
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { zipSync } from "fflate";
|
|
2
|
+
export const buildZipBytes = (files) => {
|
|
3
|
+
const entries = {};
|
|
4
|
+
for (const item of files) {
|
|
5
|
+
// ZIP expects paths without a leading slash.
|
|
6
|
+
const key = item.path.startsWith("/") ? item.path.slice(1) : item.path;
|
|
7
|
+
entries[key] = item.data;
|
|
8
|
+
}
|
|
9
|
+
// level: 0 => store/no compression (matches previous behavior intent).
|
|
10
|
+
return zipSync(entries, { level: 0 });
|
|
11
|
+
};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { FileTree, FileTreeDirectory, FileTreeEntry, FileTreeFile, FileTreeSource, PagePocketContext, PagePocketPlugin } from "@pagepocket/lib";
|
|
2
|
+
export type WriteDownPluginOptions = {
|
|
3
|
+
type: "raw" | "zip";
|
|
4
|
+
/**
|
|
5
|
+
* Base output directory.
|
|
6
|
+
*
|
|
7
|
+
* - For raw emit: snapshot is written under <outputPath>/<title>/...
|
|
8
|
+
* - For zip emit: snapshot is written to <outputPath>/<title>.zip
|
|
9
|
+
*/
|
|
10
|
+
outputPath: string;
|
|
11
|
+
overwrite?: boolean;
|
|
12
|
+
};
|
|
13
|
+
export type WriteDownFileSource = FileTreeSource;
|
|
14
|
+
export type WriteDownFile = FileTreeFile;
|
|
15
|
+
export type WriteDownDirectory = FileTreeDirectory;
|
|
16
|
+
export type WriteDownEntry = FileTreeEntry;
|
|
17
|
+
export type WriteDownFiles = FileTree;
|
|
18
|
+
export declare class WriteDownPlugin implements PagePocketPlugin {
|
|
19
|
+
readonly name = "plugin:write-down";
|
|
20
|
+
private type;
|
|
21
|
+
private outputPath;
|
|
22
|
+
private overwrite;
|
|
23
|
+
constructor(options: WriteDownPluginOptions);
|
|
24
|
+
apply(ctx: PagePocketContext): void;
|
|
25
|
+
}
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { write } from "@pagepocket/uni-fs";
|
|
2
|
+
import { safeFilename } from "./filename.js";
|
|
3
|
+
import { buildZipBytes } from "./utils/zip.js";
|
|
4
|
+
const normalizePath = (value) => value.replace(/\\/g, "/");
|
|
5
|
+
const joinPosix = (base, relative) => {
|
|
6
|
+
const cleanBase = normalizePath(base).replace(/\/+$/, "");
|
|
7
|
+
const cleanRel = normalizePath(relative).replace(/^\/+/, "");
|
|
8
|
+
if (!cleanBase) {
|
|
9
|
+
return cleanRel;
|
|
10
|
+
}
|
|
11
|
+
if (!cleanRel) {
|
|
12
|
+
return cleanBase;
|
|
13
|
+
}
|
|
14
|
+
return `${cleanBase}/${cleanRel}`;
|
|
15
|
+
};
|
|
16
|
+
const extractHtmlTitle = (html) => {
|
|
17
|
+
const match = /<title[^>]*>([\s\S]*?)<\/title>/i.exec(html);
|
|
18
|
+
if (!match) {
|
|
19
|
+
return undefined;
|
|
20
|
+
}
|
|
21
|
+
const raw = match[1] ?? "";
|
|
22
|
+
const normalized = raw.replace(/\s+/g, " ").trim();
|
|
23
|
+
return normalized || undefined;
|
|
24
|
+
};
|
|
25
|
+
const tryHostname = (url) => {
|
|
26
|
+
if (!url) {
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
try {
|
|
30
|
+
return new URL(url).hostname;
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
const pathExists = async (p) => {
|
|
37
|
+
const { stat } = await import("node:fs/promises");
|
|
38
|
+
try {
|
|
39
|
+
await stat(p);
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
const removeExistingPath = async (p) => {
|
|
47
|
+
const { rm } = await import("node:fs/promises");
|
|
48
|
+
await rm(p, { force: true, recursive: true });
|
|
49
|
+
};
|
|
50
|
+
const withUniquePath = async (desiredPath, options) => {
|
|
51
|
+
if (!(await pathExists(desiredPath))) {
|
|
52
|
+
return desiredPath;
|
|
53
|
+
}
|
|
54
|
+
if (options.overwrite) {
|
|
55
|
+
await removeExistingPath(desiredPath);
|
|
56
|
+
return desiredPath;
|
|
57
|
+
}
|
|
58
|
+
const pathMod = await import("node:path");
|
|
59
|
+
const parsed = pathMod.parse(desiredPath);
|
|
60
|
+
const base = pathMod.join(parsed.dir, parsed.name);
|
|
61
|
+
const ext = parsed.ext;
|
|
62
|
+
for (let i = 2; i <= 9999; i += 1) {
|
|
63
|
+
const candidate = `${base}-${i}${ext}`;
|
|
64
|
+
if (!(await pathExists(candidate))) {
|
|
65
|
+
return candidate;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
throw new Error(`Unable to find a unique output path for: ${desiredPath}`);
|
|
69
|
+
};
|
|
70
|
+
const resolveSnapshotBaseName = async (ctx) => {
|
|
71
|
+
const html = await ctx.whenHtml();
|
|
72
|
+
const titleFromHtml = extractHtmlTitle(html.htmlString);
|
|
73
|
+
const fallbackHostname = ctx.entry.kind === "url" ? tryHostname(ctx.entry.url) : tryHostname(html.url ?? html.baseUrl);
|
|
74
|
+
return safeFilename(titleFromHtml ?? fallbackHostname ?? "snapshot");
|
|
75
|
+
};
|
|
76
|
+
const streamToUint8Array = async (stream) => {
|
|
77
|
+
const reader = stream.getReader();
|
|
78
|
+
const chunks = [];
|
|
79
|
+
let total = 0;
|
|
80
|
+
while (true) {
|
|
81
|
+
const result = await reader.read();
|
|
82
|
+
if (result.done)
|
|
83
|
+
break;
|
|
84
|
+
if (result.value) {
|
|
85
|
+
chunks.push(result.value);
|
|
86
|
+
total += result.value.byteLength;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
const output = new Uint8Array(total);
|
|
90
|
+
let offset = 0;
|
|
91
|
+
for (const chunk of chunks) {
|
|
92
|
+
output.set(chunk, offset);
|
|
93
|
+
offset += chunk.byteLength;
|
|
94
|
+
}
|
|
95
|
+
return output;
|
|
96
|
+
};
|
|
97
|
+
const encodeText = (text) => new TextEncoder().encode(text);
|
|
98
|
+
const splitPathExtension = (value) => {
|
|
99
|
+
const clean = normalizePath(value);
|
|
100
|
+
const lastSlash = clean.lastIndexOf("/");
|
|
101
|
+
const lastDot = clean.lastIndexOf(".");
|
|
102
|
+
if (lastDot > lastSlash) {
|
|
103
|
+
return {
|
|
104
|
+
filename: clean.slice(0, lastDot),
|
|
105
|
+
extension: clean.slice(lastDot + 1)
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
return { filename: clean, extension: "" };
|
|
109
|
+
};
|
|
110
|
+
const flattenEntries = (dir, prefix) => {
|
|
111
|
+
const out = [];
|
|
112
|
+
const dirPrefix = joinPosix(prefix, dir.path);
|
|
113
|
+
for (const entry of dir.entries) {
|
|
114
|
+
if (entry.kind === "file") {
|
|
115
|
+
const filePath = joinPosix(dirPrefix, entry.path);
|
|
116
|
+
out.push({ path: filePath, file: entry });
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
out.push(...flattenEntries(entry, dirPrefix));
|
|
120
|
+
}
|
|
121
|
+
return out;
|
|
122
|
+
};
|
|
123
|
+
// buildZipBytes moved to ./utils/zip
|
|
124
|
+
export class WriteDownPlugin {
|
|
125
|
+
constructor(options) {
|
|
126
|
+
this.name = "plugin:write-down";
|
|
127
|
+
this.type = options.type;
|
|
128
|
+
this.outputPath = options.outputPath;
|
|
129
|
+
this.overwrite = options.overwrite ?? false;
|
|
130
|
+
}
|
|
131
|
+
apply(ctx) {
|
|
132
|
+
ctx.onFinalize(async () => {
|
|
133
|
+
if (!this.outputPath || !this.outputPath.trim()) {
|
|
134
|
+
throw new Error("WriteDownPlugin requires a non-empty outputPath");
|
|
135
|
+
}
|
|
136
|
+
const baseName = await resolveSnapshotBaseName(ctx);
|
|
137
|
+
const pathMod = await import("node:path");
|
|
138
|
+
const outputRoot = pathMod.resolve(this.outputPath);
|
|
139
|
+
const rawDir = pathMod.join(outputRoot, baseName);
|
|
140
|
+
const zipPath = pathMod.join(outputRoot, `${baseName}.zip`);
|
|
141
|
+
const finalRawDir = await withUniquePath(rawDir, { overwrite: this.overwrite });
|
|
142
|
+
const finalZipPath = await withUniquePath(zipPath, { overwrite: this.overwrite });
|
|
143
|
+
const files = ctx.files;
|
|
144
|
+
if (!files) {
|
|
145
|
+
throw new Error("WriteDownPlugin requires ctx.files");
|
|
146
|
+
}
|
|
147
|
+
if (!files.root || files.root.kind !== "directory") {
|
|
148
|
+
throw new Error("WriteDownPlugin requires ctx.files.root directory");
|
|
149
|
+
}
|
|
150
|
+
const flattened = flattenEntries(files.root, "");
|
|
151
|
+
const outputs = [];
|
|
152
|
+
for (const item of flattened) {
|
|
153
|
+
const src = item.file.source;
|
|
154
|
+
if (src.kind === "bytes") {
|
|
155
|
+
outputs.push({ path: item.path, data: src.data });
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
if (src.kind === "text") {
|
|
159
|
+
outputs.push({ path: item.path, data: encodeText(src.text) });
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
if (src.kind === "content-ref") {
|
|
163
|
+
if (!files.content) {
|
|
164
|
+
throw new Error('WriteDownPlugin cannot resolve { kind: "content-ref" } without ctx.files.content');
|
|
165
|
+
}
|
|
166
|
+
const stream = await files.content.open(src.ref);
|
|
167
|
+
const data = await streamToUint8Array(stream);
|
|
168
|
+
outputs.push({ path: item.path, data });
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
// Exhaustiveness guard.
|
|
172
|
+
const _never = src;
|
|
173
|
+
throw new Error(`Unhandled file source kind: ${String(_never)}`);
|
|
174
|
+
}
|
|
175
|
+
if (this.type === "raw") {
|
|
176
|
+
// Note: @pagepocket/uni-fs write() creates parent directories.
|
|
177
|
+
for (const file of outputs) {
|
|
178
|
+
const outputPath = joinPosix(finalRawDir, file.path);
|
|
179
|
+
const { filename, extension } = splitPathExtension(outputPath);
|
|
180
|
+
await write(filename, extension, file.data);
|
|
181
|
+
}
|
|
182
|
+
if (files.content?.dispose) {
|
|
183
|
+
await files.content.dispose();
|
|
184
|
+
}
|
|
185
|
+
return { kind: "raw", outputDir: finalRawDir };
|
|
186
|
+
}
|
|
187
|
+
const zipBytes = await buildZipBytes(outputs);
|
|
188
|
+
const { filename, extension } = splitPathExtension(finalZipPath);
|
|
189
|
+
await write(filename, extension, zipBytes);
|
|
190
|
+
if (files.content?.dispose) {
|
|
191
|
+
await files.content.dispose();
|
|
192
|
+
}
|
|
193
|
+
return { kind: "zip", zip: { data: zipBytes, outputPath: finalZipPath } };
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Unit, type CaptureResult, type UnitContext } from "@pagepocket/lib";
|
|
2
|
+
export type WriteDownUnitOptions = {
|
|
3
|
+
type: "raw" | "zip";
|
|
4
|
+
outputPath: string;
|
|
5
|
+
overwrite?: boolean;
|
|
6
|
+
};
|
|
7
|
+
/**
|
|
8
|
+
* v3 unit wrapper around the existing WriteDownPlugin logic.
|
|
9
|
+
*
|
|
10
|
+
* Break-change path: this will become the primary implementation.
|
|
11
|
+
*/
|
|
12
|
+
export declare class WriteDownUnit extends Unit {
|
|
13
|
+
readonly id = "writeDown";
|
|
14
|
+
readonly kind = "write.down";
|
|
15
|
+
private options;
|
|
16
|
+
constructor(options: WriteDownUnitOptions);
|
|
17
|
+
run(ctx: UnitContext, rt: import("@pagepocket/lib").UnitRuntime): Promise<{
|
|
18
|
+
__pagepocketResult: CaptureResult;
|
|
19
|
+
}>;
|
|
20
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { TERMINAL_RESULT_KEY, Unit } from "@pagepocket/lib";
|
|
2
|
+
import { writeDown } from "./write-down.js";
|
|
3
|
+
/**
|
|
4
|
+
* v3 unit wrapper around the existing WriteDownPlugin logic.
|
|
5
|
+
*
|
|
6
|
+
* Break-change path: this will become the primary implementation.
|
|
7
|
+
*/
|
|
8
|
+
export class WriteDownUnit extends Unit {
|
|
9
|
+
constructor(options) {
|
|
10
|
+
super();
|
|
11
|
+
this.id = "writeDown";
|
|
12
|
+
this.kind = "write.down";
|
|
13
|
+
this.options = options;
|
|
14
|
+
}
|
|
15
|
+
async run(ctx, rt) {
|
|
16
|
+
const files = ctx.value.files;
|
|
17
|
+
if (!files) {
|
|
18
|
+
throw new Error("WriteDownUnit requires ctx.value.files");
|
|
19
|
+
}
|
|
20
|
+
const html = ctx.value.html;
|
|
21
|
+
if (!html) {
|
|
22
|
+
throw new Error("WriteDownUnit requires ctx.value.html");
|
|
23
|
+
}
|
|
24
|
+
// Reuse the library implementation directly.
|
|
25
|
+
const result = await writeDown({
|
|
26
|
+
entry: rt.entry,
|
|
27
|
+
html,
|
|
28
|
+
files,
|
|
29
|
+
type: this.options.type,
|
|
30
|
+
outputPath: this.options.outputPath,
|
|
31
|
+
overwrite: this.options.overwrite ?? false
|
|
32
|
+
});
|
|
33
|
+
return { [TERMINAL_RESULT_KEY]: result };
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { FileTree } from "@pagepocket/lib";
|
|
2
|
+
export type WriteDownOptions = {
|
|
3
|
+
type: "raw" | "zip";
|
|
4
|
+
outputPath: string;
|
|
5
|
+
overwrite: boolean;
|
|
6
|
+
};
|
|
7
|
+
export declare const writeDown: (input: {
|
|
8
|
+
entry: unknown;
|
|
9
|
+
html: {
|
|
10
|
+
htmlString: string;
|
|
11
|
+
baseUrl: string;
|
|
12
|
+
url?: string;
|
|
13
|
+
};
|
|
14
|
+
files: FileTree;
|
|
15
|
+
type: WriteDownOptions["type"];
|
|
16
|
+
outputPath: string;
|
|
17
|
+
overwrite: boolean;
|
|
18
|
+
}) => Promise<import("@pagepocket/lib").PagePocketCaptureResult>;
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import { rm, stat } from "node:fs/promises";
|
|
2
|
+
import pathMod from "node:path";
|
|
3
|
+
import { write } from "@pagepocket/uni-fs";
|
|
4
|
+
import { safeFilename } from "./filename.js";
|
|
5
|
+
import { streamToUint8Array, textToUint8Array } from "./utils/bytes.js";
|
|
6
|
+
import { joinPosix, normalizePath } from "./utils/posix-path.js";
|
|
7
|
+
import { buildZipBytes } from "./utils/zip.js";
|
|
8
|
+
const extractHtmlTitle = (html) => {
|
|
9
|
+
const match = /<title[^>]*>([\s\S]*?)<\/title>/i.exec(html);
|
|
10
|
+
if (!match) {
|
|
11
|
+
return undefined;
|
|
12
|
+
}
|
|
13
|
+
const raw = match[1] ?? "";
|
|
14
|
+
const normalized = raw.replace(/\s+/g, " ").trim();
|
|
15
|
+
return normalized || undefined;
|
|
16
|
+
};
|
|
17
|
+
const tryHostname = (url) => {
|
|
18
|
+
if (!url) {
|
|
19
|
+
return undefined;
|
|
20
|
+
}
|
|
21
|
+
try {
|
|
22
|
+
return new URL(url).hostname;
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return undefined;
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
const pathExists = async (p) => {
|
|
29
|
+
try {
|
|
30
|
+
await stat(p);
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
const removeExistingPath = async (p) => {
|
|
38
|
+
await rm(p, { force: true, recursive: true });
|
|
39
|
+
};
|
|
40
|
+
const withUniquePath = async (desiredPath, options) => {
|
|
41
|
+
if (!(await pathExists(desiredPath))) {
|
|
42
|
+
return desiredPath;
|
|
43
|
+
}
|
|
44
|
+
if (options.overwrite) {
|
|
45
|
+
await removeExistingPath(desiredPath);
|
|
46
|
+
return desiredPath;
|
|
47
|
+
}
|
|
48
|
+
const parsed = pathMod.parse(desiredPath);
|
|
49
|
+
const base = pathMod.join(parsed.dir, parsed.name);
|
|
50
|
+
const ext = parsed.ext;
|
|
51
|
+
for (let i = 2; i <= 9999; i += 1) {
|
|
52
|
+
const candidate = `${base}-${i}${ext}`;
|
|
53
|
+
if (!(await pathExists(candidate))) {
|
|
54
|
+
return candidate;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
throw new Error(`Unable to find a unique output path for: ${desiredPath}`);
|
|
58
|
+
};
|
|
59
|
+
const resolveSnapshotBaseName = (entry, html) => {
|
|
60
|
+
const titleFromHtml = extractHtmlTitle(html.htmlString);
|
|
61
|
+
const entryUrl = entry && typeof entry === "object" && "kind" in entry && entry.kind === "url"
|
|
62
|
+
? entry.url
|
|
63
|
+
: undefined;
|
|
64
|
+
const fallbackHostname = tryHostname(entryUrl) ?? tryHostname(html.url ?? html.baseUrl);
|
|
65
|
+
return safeFilename(titleFromHtml ?? fallbackHostname ?? "snapshot");
|
|
66
|
+
};
|
|
67
|
+
const flattenEntries = (dir, prefix) => {
|
|
68
|
+
const out = [];
|
|
69
|
+
const dirPrefix = joinPosix(prefix, dir.path);
|
|
70
|
+
for (const entry of dir.entries) {
|
|
71
|
+
if (entry.kind === "file") {
|
|
72
|
+
const filePath = joinPosix(dirPrefix, entry.path);
|
|
73
|
+
out.push({ path: filePath, file: entry });
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
out.push(...flattenEntries(entry, dirPrefix));
|
|
77
|
+
}
|
|
78
|
+
return out;
|
|
79
|
+
};
|
|
80
|
+
export const writeDown = async (input) => {
|
|
81
|
+
if (!input.outputPath || !input.outputPath.trim()) {
|
|
82
|
+
throw new Error("WriteDownUnit requires a non-empty outputPath");
|
|
83
|
+
}
|
|
84
|
+
if (!input.files.root || input.files.root.kind !== "directory") {
|
|
85
|
+
throw new Error("WriteDownUnit requires files.root directory");
|
|
86
|
+
}
|
|
87
|
+
const baseName = resolveSnapshotBaseName(input.entry, input.html);
|
|
88
|
+
const outputRoot = pathMod.resolve(input.outputPath);
|
|
89
|
+
const rawDir = pathMod.join(outputRoot, baseName);
|
|
90
|
+
const zipPath = pathMod.join(outputRoot, `${baseName}.zip`);
|
|
91
|
+
const finalRawDir = await withUniquePath(rawDir, { overwrite: input.overwrite });
|
|
92
|
+
const finalZipPath = await withUniquePath(zipPath, { overwrite: input.overwrite });
|
|
93
|
+
const flattened = flattenEntries(input.files.root, "");
|
|
94
|
+
const outputs = [];
|
|
95
|
+
for (const item of flattened) {
|
|
96
|
+
const src = item.file.source;
|
|
97
|
+
if (src.kind === "bytes") {
|
|
98
|
+
outputs.push({ path: item.path, data: src.data });
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
if (src.kind === "text") {
|
|
102
|
+
outputs.push({ path: item.path, data: textToUint8Array(src.text) });
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (src.kind === "content-ref") {
|
|
106
|
+
if (!input.files.content) {
|
|
107
|
+
throw new Error('WriteDownUnit cannot resolve { kind: "content-ref" } without files.content');
|
|
108
|
+
}
|
|
109
|
+
const stream = await input.files.content.open(src.ref);
|
|
110
|
+
const data = await streamToUint8Array(stream);
|
|
111
|
+
outputs.push({ path: item.path, data });
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
const _never = src;
|
|
115
|
+
throw new Error(`Unhandled file source kind: ${String(_never)}`);
|
|
116
|
+
}
|
|
117
|
+
if (input.type === "raw") {
|
|
118
|
+
for (const item of outputs) {
|
|
119
|
+
const outPath = pathMod.join(finalRawDir, normalizePath(item.path).replace(/^\/+/, ""));
|
|
120
|
+
await write(outPath, "", item.data);
|
|
121
|
+
}
|
|
122
|
+
return { kind: "raw", outputDir: finalRawDir };
|
|
123
|
+
}
|
|
124
|
+
const zipBytes = buildZipBytes(outputs);
|
|
125
|
+
await write(finalZipPath, "", zipBytes);
|
|
126
|
+
return { kind: "zip", zip: { data: zipBytes, outputPath: finalZipPath } };
|
|
127
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@pagepocket/write-down-unit",
|
|
3
|
+
"version": "0.8.0",
|
|
4
|
+
"description": "PagePocket plugin: write snapshot output to fs (raw or zip)",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"files": [
|
|
9
|
+
"dist"
|
|
10
|
+
],
|
|
11
|
+
"license": "ISC",
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"fflate": "^0.8.2",
|
|
14
|
+
"@pagepocket/lib": "0.8.0",
|
|
15
|
+
"@pagepocket/contracts": "0.8.0",
|
|
16
|
+
"@pagepocket/uni-fs": "0.8.0"
|
|
17
|
+
},
|
|
18
|
+
"devDependencies": {
|
|
19
|
+
"typescript": "^5.4.5"
|
|
20
|
+
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "tsc -p tsconfig.json",
|
|
23
|
+
"test": "node -e \"process.exit(0)\""
|
|
24
|
+
}
|
|
25
|
+
}
|