@pagepocket/metadata-unit 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-metadata.d.ts +24 -0
- package/dist/build-metadata.js +40 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +1 -0
- package/dist/metadata-unit.d.ts +6 -0
- package/dist/metadata-unit.js +40 -0
- package/package.json +23 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { EntryInfo, FileTree } from "@pagepocket/lib";
|
|
2
|
+
export type PagePocketMetadata = {
|
|
3
|
+
version: 1;
|
|
4
|
+
url: string;
|
|
5
|
+
title: string;
|
|
6
|
+
snapshotType: string;
|
|
7
|
+
capturedAt: string;
|
|
8
|
+
generator: string;
|
|
9
|
+
resources: {
|
|
10
|
+
fileCount: number;
|
|
11
|
+
};
|
|
12
|
+
};
|
|
13
|
+
export type BuildMetadataInput = {
|
|
14
|
+
entry: EntryInfo;
|
|
15
|
+
html?: {
|
|
16
|
+
htmlString: string;
|
|
17
|
+
baseUrl: string;
|
|
18
|
+
url?: string;
|
|
19
|
+
};
|
|
20
|
+
files?: FileTree;
|
|
21
|
+
snapshotType?: string;
|
|
22
|
+
capturedAt?: Date;
|
|
23
|
+
};
|
|
24
|
+
export declare const buildMetadata: (input: BuildMetadataInput) => PagePocketMetadata;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
const extractTitle = (htmlString) => {
|
|
2
|
+
const match = /<title[^>]*>([\s\S]*?)<\/title>/i.exec(htmlString);
|
|
3
|
+
const raw = match?.[1] ?? "";
|
|
4
|
+
return raw.replace(/\s+/g, " ").trim();
|
|
5
|
+
};
|
|
6
|
+
const resolveUrl = (entry, html) => {
|
|
7
|
+
if (entry.kind === "url") {
|
|
8
|
+
return entry.url;
|
|
9
|
+
}
|
|
10
|
+
if (entry.kind === "html-string" || entry.kind === "document") {
|
|
11
|
+
return entry.url ?? entry.baseUrl;
|
|
12
|
+
}
|
|
13
|
+
return html?.url ?? html?.baseUrl ?? "";
|
|
14
|
+
};
|
|
15
|
+
const countFiles = (dir) => {
|
|
16
|
+
let count = 0;
|
|
17
|
+
for (const entry of dir.entries) {
|
|
18
|
+
if (entry.kind === "file") {
|
|
19
|
+
count += 1;
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
count += countFiles(entry);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return count;
|
|
26
|
+
};
|
|
27
|
+
export const buildMetadata = (input) => {
|
|
28
|
+
const url = resolveUrl(input.entry, input.html);
|
|
29
|
+
const title = input.html ? extractTitle(input.html.htmlString) : "";
|
|
30
|
+
const fileCount = input.files?.root ? countFiles(input.files.root) : 0;
|
|
31
|
+
return {
|
|
32
|
+
version: 1,
|
|
33
|
+
url,
|
|
34
|
+
title,
|
|
35
|
+
snapshotType: input.snapshotType ?? "unknown",
|
|
36
|
+
capturedAt: (input.capturedAt ?? new Date()).toISOString(),
|
|
37
|
+
generator: "pagepocket",
|
|
38
|
+
resources: { fileCount }
|
|
39
|
+
};
|
|
40
|
+
};
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { MetadataUnit, MetadataUnit as default } from "./metadata-unit.js";
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { FileTreeUnit, type UnitContext, type UnitPatch, type UnitRuntime } from "@pagepocket/lib";
|
|
2
|
+
export declare class MetadataUnit extends FileTreeUnit {
|
|
3
|
+
readonly id = "metadata";
|
|
4
|
+
readonly description = "Extracting metadata";
|
|
5
|
+
run(ctx: UnitContext, rt: UnitRuntime): Promise<UnitPatch>;
|
|
6
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { FileTreeUnit } from "@pagepocket/lib";
|
|
2
|
+
import { buildMetadata } from "./build-metadata.js";
|
|
3
|
+
const METADATA_PATH = "/.pagepocket.json";
|
|
4
|
+
const injectFile = (root, path, data) => ({
|
|
5
|
+
...root,
|
|
6
|
+
entries: [
|
|
7
|
+
...root.entries,
|
|
8
|
+
{ kind: "file", path, source: { kind: "bytes", data } }
|
|
9
|
+
]
|
|
10
|
+
});
|
|
11
|
+
export class MetadataUnit extends FileTreeUnit {
|
|
12
|
+
constructor() {
|
|
13
|
+
super(...arguments);
|
|
14
|
+
this.id = "metadata";
|
|
15
|
+
this.description = "Extracting metadata";
|
|
16
|
+
}
|
|
17
|
+
async run(ctx, rt) {
|
|
18
|
+
const files = ctx.value.files;
|
|
19
|
+
const html = ctx.value.html;
|
|
20
|
+
const snapshotType = ctx.value.snapshotType;
|
|
21
|
+
const metadata = buildMetadata({
|
|
22
|
+
entry: rt.entry,
|
|
23
|
+
html,
|
|
24
|
+
files,
|
|
25
|
+
snapshotType
|
|
26
|
+
});
|
|
27
|
+
const bytes = new TextEncoder().encode(`${JSON.stringify(metadata, undefined, 2)}\n`);
|
|
28
|
+
if (!files?.root) {
|
|
29
|
+
const root = {
|
|
30
|
+
kind: "directory",
|
|
31
|
+
path: "",
|
|
32
|
+
entries: [{ kind: "file", path: METADATA_PATH, source: { kind: "bytes", data: bytes } }]
|
|
33
|
+
};
|
|
34
|
+
return { ...ctx.value, files: { root } };
|
|
35
|
+
}
|
|
36
|
+
const nextRoot = injectFile(files.root, METADATA_PATH, bytes);
|
|
37
|
+
const nextFiles = { ...files, root: nextRoot };
|
|
38
|
+
return { ...ctx.value, files: nextFiles };
|
|
39
|
+
}
|
|
40
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@pagepocket/metadata-unit",
|
|
3
|
+
"version": "0.13.0",
|
|
4
|
+
"description": "PagePocket unit: emit .pagepocket.json metadata into the snapshot FileTree",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"files": [
|
|
9
|
+
"dist"
|
|
10
|
+
],
|
|
11
|
+
"license": "ISC",
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"@pagepocket/lib": "0.13.0"
|
|
14
|
+
},
|
|
15
|
+
"devDependencies": {
|
|
16
|
+
"tsx": "^4.19.3",
|
|
17
|
+
"typescript": "^5.4.5"
|
|
18
|
+
},
|
|
19
|
+
"scripts": {
|
|
20
|
+
"build": "tsc -p tsconfig.json",
|
|
21
|
+
"test": "tsx --test specs/*.spec.ts"
|
|
22
|
+
}
|
|
23
|
+
}
|