bentopdf-sh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import { createRequire } from "node:module";
4
+ import { Engine } from "./base.js";
5
+
6
+ const SUPPORTED = ["docx", "doc", "pptx", "ppt", "xlsx", "xls", "odt", "ods", "odp", "rtf"];
7
+
8
+ export class LibreOfficeEngine extends Engine {
9
+ constructor() {
10
+ super("libreoffice");
11
+ this._converter = null;
12
+ }
13
+
14
+ supportedExtensions() {
15
+ return SUPPORTED;
16
+ }
17
+
18
+ async ensureLoaded() {
19
+ if (this._loaded) return;
20
+
21
+ const { LibreOfficeConverter } = await import("@matbee/libreoffice-converter/server");
22
+ const require = createRequire(import.meta.url);
23
+ const wasmLoader = require("@matbee/libreoffice-converter/wasm/loader");
24
+
25
+ this._converter = new LibreOfficeConverter({ wasmLoader });
26
+ await this._converter.initialize();
27
+ this._loaded = true;
28
+ }
29
+
30
+ async convert(inputPath, outputPath, options = {}) {
31
+ await this.ensureLoaded();
32
+
33
+ const ext = path.extname(inputPath).slice(1).toLowerCase();
34
+ const inputData = fs.readFileSync(inputPath);
35
+
36
+ const result = await this._converter.convert(inputData, {
37
+ outputFormat: "pdf",
38
+ inputFormat: ext,
39
+ });
40
+
41
+ fs.writeFileSync(outputPath, Buffer.from(result.data));
42
+ }
43
+ }
@@ -0,0 +1,73 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import { Engine } from "./base.js";
4
+
5
+ const SUPPORTED = ["jpg", "jpeg", "png", "svg", "tiff", "tif", "webp"];
6
+
7
+ const MIME_MAP = {
8
+ jpg: "image/jpeg",
9
+ jpeg: "image/jpeg",
10
+ png: "image/png",
11
+ svg: "image/svg+xml",
12
+ tiff: "image/tiff",
13
+ tif: "image/tiff",
14
+ webp: "image/webp",
15
+ html: "text/html",
16
+ htm: "text/html",
17
+ };
18
+
19
+ export class MuPdfEngine extends Engine {
20
+ constructor() {
21
+ super("mupdf");
22
+ this._mupdf = null;
23
+ }
24
+
25
+ supportedExtensions() {
26
+ return SUPPORTED;
27
+ }
28
+
29
+ async ensureLoaded() {
30
+ if (this._loaded) return;
31
+ this._mupdf = await import("mupdf");
32
+ this._loaded = true;
33
+ }
34
+
35
+ async convert(inputPath, outputPath, options = {}) {
36
+ await this.ensureLoaded();
37
+
38
+ const ext = path.extname(inputPath).slice(1).toLowerCase();
39
+ const mime = MIME_MAP[ext];
40
+ if (!mime) {
41
+ throw new Error(`MuPDF: unsupported format .${ext}`);
42
+ }
43
+
44
+ const inputData = fs.readFileSync(inputPath);
45
+ const pdfBytes = this._renderToPdf(inputData, mime);
46
+ fs.writeFileSync(outputPath, pdfBytes);
47
+ }
48
+
49
+ // Public method for pipeline use (e.g., pandoc engine passing HTML buffer)
50
+ async convertBuffer(data, mime, outputPath) {
51
+ await this.ensureLoaded();
52
+ const pdfBytes = this._renderToPdf(data, mime);
53
+ fs.writeFileSync(outputPath, pdfBytes);
54
+ }
55
+
56
+ _renderToPdf(inputData, mime) {
57
+ const mupdf = this._mupdf;
58
+ const doc = mupdf.Document.openDocument(inputData, mime);
59
+ const buf = new mupdf.Buffer();
60
+ const writer = new mupdf.DocumentWriter(buf, "pdf", "");
61
+
62
+ for (let i = 0; i < doc.countPages(); i++) {
63
+ const page = doc.loadPage(i);
64
+ const bounds = page.getBounds();
65
+ const dev = writer.beginPage(bounds);
66
+ page.run(dev, mupdf.Matrix.identity);
67
+ writer.endPage();
68
+ }
69
+
70
+ writer.close();
71
+ return buf.asUint8Array();
72
+ }
73
+ }
@@ -0,0 +1,66 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import { Engine } from "./base.js";
4
+ import { MuPdfEngine } from "./mupdf.js";
5
+
6
+ const SUPPORTED = ["md", "markdown", "html", "htm"];
7
+
8
+ const PANDOC_FORMAT_MAP = {
9
+ md: "markdown",
10
+ markdown: "markdown",
11
+ html: "html",
12
+ htm: "html",
13
+ };
14
+
15
+ export class PandocEngine extends Engine {
16
+ constructor() {
17
+ super("pandoc");
18
+ this._pandoc = null;
19
+ this._mupdf = new MuPdfEngine();
20
+ }
21
+
22
+ supportedExtensions() {
23
+ return SUPPORTED;
24
+ }
25
+
26
+ async ensureLoaded() {
27
+ if (this._loaded) return;
28
+ this._pandoc = await import("pandoc-wasm");
29
+ await this._mupdf.ensureLoaded();
30
+ this._loaded = true;
31
+ }
32
+
33
+ async convert(inputPath, outputPath, options = {}) {
34
+ await this.ensureLoaded();
35
+
36
+ const ext = path.extname(inputPath).slice(1).toLowerCase();
37
+ const fromFormat = PANDOC_FORMAT_MAP[ext];
38
+ if (!fromFormat) {
39
+ throw new Error(`Pandoc: unsupported format .${ext}`);
40
+ }
41
+
42
+ const inputText = fs.readFileSync(inputPath, "utf-8");
43
+
44
+ // Stage 1: Convert to standalone HTML via pandoc-wasm
45
+ let htmlContent;
46
+ if (fromFormat === "html") {
47
+ // Already HTML — use directly
48
+ htmlContent = inputText;
49
+ } else {
50
+ const result = await this._pandoc.convert(
51
+ { from: fromFormat, to: "html", standalone: true },
52
+ inputText,
53
+ {}
54
+ );
55
+ if (result.stderr) {
56
+ // pandoc-wasm has no exitCode; check stderr for errors
57
+ console.error(`Pandoc warnings: ${result.stderr}`);
58
+ }
59
+ htmlContent = result.stdout;
60
+ }
61
+
62
+ // Stage 2: Render HTML to PDF via MuPDF's public convertBuffer API
63
+ const htmlData = new TextEncoder().encode(htmlContent);
64
+ await this._mupdf.convertBuffer(htmlData, "text/html", outputPath);
65
+ }
66
+ }
@@ -0,0 +1,39 @@
1
+ const FORMAT_MAP = {
2
+ // LibreOffice-WASM
3
+ docx: "libreoffice",
4
+ doc: "libreoffice",
5
+ pptx: "libreoffice",
6
+ ppt: "libreoffice",
7
+ xlsx: "libreoffice",
8
+ xls: "libreoffice",
9
+ odt: "libreoffice",
10
+ ods: "libreoffice",
11
+ odp: "libreoffice",
12
+ rtf: "libreoffice",
13
+
14
+ // MuPDF
15
+ jpg: "mupdf",
16
+ jpeg: "mupdf",
17
+ png: "mupdf",
18
+ svg: "mupdf",
19
+ tiff: "mupdf",
20
+ tif: "mupdf",
21
+ webp: "mupdf",
22
+
23
+ // Pandoc pipeline
24
+ md: "pandoc",
25
+ markdown: "pandoc",
26
+ html: "pandoc",
27
+ htm: "pandoc",
28
+ };
29
+
30
+ export class Registry {
31
+ resolve(ext) {
32
+ const normalized = ext.startsWith(".") ? ext.slice(1) : ext;
33
+ return FORMAT_MAP[normalized.toLowerCase()] ?? null;
34
+ }
35
+
36
+ supportedExtensions() {
37
+ return Object.keys(FORMAT_MAP);
38
+ }
39
+ }