@docverse-pdf/server 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ export interface GotenbergOptions {
2
+ /** Base URL of the Gotenberg service, e.g. `http://localhost:3000`. */
3
+ url?: string;
4
+ /** Per-request timeout in ms (default: 60_000). */
5
+ timeout?: number;
6
+ /** Optional Basic Auth credentials if Gotenberg is protected. */
7
+ basicAuth?: {
8
+ username: string;
9
+ password: string;
10
+ };
11
+ /** Extra headers to attach to every request (e.g. tracing headers). */
12
+ headers?: Record<string, string>;
13
+ }
14
+ export interface GotenbergConvertOptions {
15
+ /**
16
+ * Override the filename sent to Gotenberg. Gotenberg uses the file
17
+ * extension to decide which LibreOffice filter to apply, so make sure
18
+ * it matches the actual format (e.g. `input.docx`, `input.pptx`).
19
+ */
20
+ filename?: string;
21
+ /** Produce a PDF/A compliant document. */
22
+ pdfa?: 'PDF/A-1a' | 'PDF/A-2b' | 'PDF/A-3b';
23
+ /** Produce a PDF/UA compliant document (accessibility). */
24
+ pdfua?: boolean;
25
+ /** Force landscape orientation. */
26
+ landscape?: boolean;
27
+ /** LibreOffice native page range, e.g. "1-3,5". */
28
+ nativePageRanges?: string;
29
+ /** PDF metadata (title, author, subject, keywords, creator, ...). */
30
+ metadata?: Record<string, string>;
31
+ /** Per-call timeout override (ms). Falls back to client-level timeout. */
32
+ timeout?: number;
33
+ }
34
+ export interface GotenbergFile {
35
+ buffer: Buffer | Uint8Array;
36
+ filename: string;
37
+ }
38
+ /**
39
+ * Thin HTTP client for the Gotenberg document conversion service.
40
+ *
41
+ * The client is stateless — there is no `start()` / `stop()`. Instances
42
+ * are cheap; reuse one across requests to benefit from Node's undici
43
+ * HTTP keep-alive pool.
44
+ */
45
+ export declare class GotenbergClient {
46
+ private readonly url;
47
+ private readonly timeout;
48
+ private readonly basicAuth?;
49
+ private readonly extraHeaders;
50
+ constructor(options?: GotenbergOptions);
51
+ /** Returns the normalized base URL (trailing slash stripped). */
52
+ get baseUrl(): string;
53
+ /**
54
+ * Health check against Gotenberg's `/health` endpoint.
55
+ * Returns `true` on any 2xx, `false` otherwise (including network errors).
56
+ */
57
+ ping(timeoutMs?: number): Promise<boolean>;
58
+ /**
59
+ * Convert a single document (DOCX, ODT, XLSX, PPTX, RTF, ...) to PDF via
60
+ * Gotenberg's LibreOffice route. Returns the PDF bytes.
61
+ */
62
+ convert(input: Buffer | Uint8Array, filenameOrOptions?: string | GotenbergConvertOptions): Promise<Buffer>;
63
+ /**
64
+ * Convert multiple documents in a single request. By default Gotenberg
65
+ * concatenates them into one PDF (the `merge=true` form field). Pass
66
+ * `merge: false` via a future option if you ever need zipped output,
67
+ * but for now we always merge — the common case for batch conversion.
68
+ */
69
+ convertMany(files: GotenbergFile[], options?: GotenbergConvertOptions): Promise<Buffer>;
70
+ private buildHeaders;
71
+ }
@@ -0,0 +1,142 @@
1
+ // ═══════════════════════════════════════════════
2
+ // GOTENBERG HTTP CLIENT
3
+ // ═══════════════════════════════════════════════
4
+ //
5
+ // Gotenberg (https://gotenberg.dev) is a stateless HTTP API that wraps
6
+ // LibreOffice and Chromium for document→PDF conversion. Running it as a
7
+ // sidecar container lets the server SDK stay a thin HTTP client — no
8
+ // local `soffice` process, no warm-pool management, no port juggling.
9
+ //
10
+ // Minimal docker-compose example:
11
+ // services:
12
+ // gotenberg:
13
+ // image: gotenberg/gotenberg:8
14
+ // ports: ["3000:3000"]
15
+ //
16
+ // Then on the Node side:
17
+ // const gotenberg = new GotenbergClient({ url: 'http://gotenberg:3000' });
18
+ // const pdf = await gotenberg.convert(docxBuffer, 'report.docx');
19
+ /**
20
+ * Thin HTTP client for the Gotenberg document conversion service.
21
+ *
22
+ * The client is stateless — there is no `start()` / `stop()`. Instances
23
+ * are cheap; reuse one across requests to benefit from Node's undici
24
+ * HTTP keep-alive pool.
25
+ */
26
+ export class GotenbergClient {
27
+ url;
28
+ timeout;
29
+ basicAuth;
30
+ extraHeaders;
31
+ constructor(options = {}) {
32
+ const url = (options.url ?? 'http://localhost:3000').replace(/\/+$/, '');
33
+ this.url = url;
34
+ this.timeout = options.timeout ?? 60_000;
35
+ this.basicAuth = options.basicAuth;
36
+ this.extraHeaders = { ...(options.headers ?? {}) };
37
+ }
38
+ /** Returns the normalized base URL (trailing slash stripped). */
39
+ get baseUrl() {
40
+ return this.url;
41
+ }
42
+ /**
43
+ * Health check against Gotenberg's `/health` endpoint.
44
+ * Returns `true` on any 2xx, `false` otherwise (including network errors).
45
+ */
46
+ async ping(timeoutMs = 2_000) {
47
+ try {
48
+ const controller = new AbortController();
49
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
50
+ try {
51
+ const res = await fetch(`${this.url}/health`, {
52
+ method: 'GET',
53
+ headers: this.buildHeaders(),
54
+ signal: controller.signal,
55
+ });
56
+ return res.ok;
57
+ }
58
+ finally {
59
+ clearTimeout(timer);
60
+ }
61
+ }
62
+ catch {
63
+ return false;
64
+ }
65
+ }
66
+ /**
67
+ * Convert a single document (DOCX, ODT, XLSX, PPTX, RTF, ...) to PDF via
68
+ * Gotenberg's LibreOffice route. Returns the PDF bytes.
69
+ */
70
+ async convert(input, filenameOrOptions = 'input.docx') {
71
+ const opts = typeof filenameOrOptions === 'string'
72
+ ? { filename: filenameOrOptions }
73
+ : filenameOrOptions;
74
+ return this.convertMany([{ buffer: input, filename: opts.filename ?? 'input.docx' }], opts);
75
+ }
76
+ /**
77
+ * Convert multiple documents in a single request. By default Gotenberg
78
+ * concatenates them into one PDF (the `merge=true` form field). Pass
79
+ * `merge: false` via a future option if you ever need zipped output,
80
+ * but for now we always merge — the common case for batch conversion.
81
+ */
82
+ async convertMany(files, options = {}) {
83
+ if (files.length === 0) {
84
+ throw new Error('GotenbergClient.convertMany: at least one file is required');
85
+ }
86
+ const form = new FormData();
87
+ for (const f of files) {
88
+ const bytes = Buffer.from(f.buffer);
89
+ // Wrap in a Blob so FormData streams the raw bytes with no extra
90
+ // text encoding. Gotenberg inspects the filename's extension to pick
91
+ // the LibreOffice filter, so it must match the real format.
92
+ form.append('files', new Blob([bytes]), f.filename);
93
+ }
94
+ if (options.pdfa)
95
+ form.append('pdfa', options.pdfa);
96
+ if (options.pdfua)
97
+ form.append('pdfua', 'true');
98
+ if (options.landscape)
99
+ form.append('landscape', 'true');
100
+ if (options.nativePageRanges)
101
+ form.append('nativePageRanges', options.nativePageRanges);
102
+ if (files.length > 1)
103
+ form.append('merge', 'true');
104
+ if (options.metadata && Object.keys(options.metadata).length > 0) {
105
+ form.append('metadata', JSON.stringify(options.metadata));
106
+ }
107
+ const timeout = options.timeout ?? this.timeout;
108
+ const controller = new AbortController();
109
+ const timer = setTimeout(() => controller.abort(), timeout);
110
+ try {
111
+ const res = await fetch(`${this.url}/forms/libreoffice/convert`, {
112
+ method: 'POST',
113
+ body: form,
114
+ headers: this.buildHeaders(),
115
+ signal: controller.signal,
116
+ });
117
+ if (!res.ok) {
118
+ const bodyText = await res.text().catch(() => '');
119
+ throw new Error(`Gotenberg returned ${res.status} ${res.statusText}: ${bodyText || '(empty body)'}`);
120
+ }
121
+ const arrayBuf = await res.arrayBuffer();
122
+ return Buffer.from(arrayBuf);
123
+ }
124
+ catch (err) {
125
+ if (err.name === 'AbortError') {
126
+ throw new Error(`Gotenberg request timed out after ${timeout}ms`);
127
+ }
128
+ throw err;
129
+ }
130
+ finally {
131
+ clearTimeout(timer);
132
+ }
133
+ }
134
+ buildHeaders() {
135
+ const headers = { ...this.extraHeaders };
136
+ if (this.basicAuth) {
137
+ const token = Buffer.from(`${this.basicAuth.username}:${this.basicAuth.password}`).toString('base64');
138
+ headers['Authorization'] = `Basic ${token}`;
139
+ }
140
+ return headers;
141
+ }
142
+ }
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  export { PDFDocument } from './PDFDocument.js';
2
2
  export type { PageSize, TextBlock, SearchResult, AnnotationInfo, PageObjectInfo, FormField, Bookmark, RenderOptions, ImageInfo, TextObjectStyle, } from './PDFDocument.js';
3
3
  export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
4
- export { wordToPDF, LibreOfficePool, stopSharedUnoServer } from './wordToPDF.js';
4
+ export { wordToPDF, LibreOfficePool, resetSharedGotenbergClient } from './wordToPDF.js';
5
5
  export type { ConvertOptions, PoolOptions } from './wordToPDF.js';
6
- export { UnoServer, UnoServerPool } from './unoServer.js';
7
- export type { UnoServerOptions, UnoServerPoolOptions, UnoServerStats, } from './unoServer.js';
6
+ export { GotenbergClient } from './gotenberg.js';
7
+ export type { GotenbergOptions, GotenbergConvertOptions, GotenbergFile, } from './gotenberg.js';
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
1
  export { PDFDocument } from './PDFDocument.js';
2
2
  export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
3
- export { wordToPDF, LibreOfficePool, stopSharedUnoServer } from './wordToPDF.js';
4
- export { UnoServer, UnoServerPool } from './unoServer.js';
3
+ export { wordToPDF, LibreOfficePool, resetSharedGotenbergClient } from './wordToPDF.js';
4
+ export { GotenbergClient } from './gotenberg.js';
@@ -1,21 +1,27 @@
1
- import { UnoServer, UnoServerPool } from './unoServer.js';
2
- import type { UnoServerOptions } from './unoServer.js';
1
+ import { GotenbergClient } from './gotenberg.js';
2
+ import type { GotenbergOptions, GotenbergConvertOptions } from './gotenberg.js';
3
3
  export interface ConvertOptions {
4
4
  format?: string;
5
- mode?: 'process' | 'socket' | 'pool' | 'unoserver';
5
+ mode?: 'process' | 'socket' | 'pool' | 'gotenberg';
6
6
  socketUrl?: string;
7
7
  timeout?: number;
8
8
  /**
9
- * When `mode: 'unoserver'`, reuse this instance instead of the module-level
10
- * shared singleton. Pass a `UnoServer` for a single warm daemon, or a
11
- * `UnoServerPool` for concurrent workers.
9
+ * When `mode: 'gotenberg'`, reuse this client instead of the module-level
10
+ * shared singleton. Create one `GotenbergClient` at app boot and pass it
11
+ * here to benefit from HTTP keep-alive.
12
12
  */
13
- unoServer?: UnoServer | UnoServerPool;
13
+ gotenbergClient?: GotenbergClient;
14
14
  /**
15
- * When `mode: 'unoserver'` and no `unoServer` is supplied, these options
16
- * are used to lazily construct the shared singleton on the first call.
15
+ * When `mode: 'gotenberg'` and no `gotenbergClient` is supplied, these
16
+ * options are used to lazily construct the shared singleton on the first
17
+ * call (e.g. `{ url: 'http://gotenberg:3000' }`).
17
18
  */
18
- unoServerOptions?: UnoServerOptions;
19
+ gotenbergOptions?: GotenbergOptions;
20
+ /**
21
+ * Extra per-conversion options forwarded to Gotenberg (pdfa, landscape,
22
+ * nativePageRanges, metadata, filename, ...).
23
+ */
24
+ gotenbergConvertOptions?: GotenbergConvertOptions;
19
25
  }
20
26
  export interface PoolOptions {
21
27
  workers?: number;
@@ -26,11 +32,13 @@ export interface PoolOptions {
26
32
  }
27
33
  export declare function wordToPDF(wordBuffer: Buffer | Uint8Array, optionsOrFormat?: string | ConvertOptions): Promise<Buffer>;
28
34
  /**
29
- * Stops and clears the shared unoserver singleton created lazily by
30
- * `wordToPDF(..., { mode: 'unoserver' })`. Safe to call even if nothing
31
- * was started. Useful in graceful-shutdown hooks.
35
+ * Clears the module-level Gotenberg client created lazily by
36
+ * `wordToPDF(..., { mode: 'gotenberg' })`. The client holds no OS
37
+ * resources (only Node's HTTP keep-alive sockets, which undici cleans up
38
+ * automatically), so this is only needed if you want the next call to
39
+ * pick up fresh `gotenbergOptions`.
32
40
  */
33
- export declare function stopSharedUnoServer(): Promise<void>;
41
+ export declare function resetSharedGotenbergClient(): void;
34
42
  export declare class LibreOfficePool {
35
43
  private workers;
36
44
  private queue;
package/dist/wordToPDF.js CHANGED
@@ -4,7 +4,7 @@ import { writeFileSync, readFileSync, unlinkSync, mkdirSync, existsSync } from '
4
4
  import { join } from 'path';
5
5
  import { tmpdir } from 'os';
6
6
  import { randomBytes } from 'crypto';
7
- import { UnoServer } from './unoServer.js';
7
+ import { GotenbergClient } from './gotenberg.js';
8
8
  // ═══════════════════════════════════════════════
9
9
  // SIMPLE CONVERT (backward compatible)
10
10
  // ═══════════════════════════════════════════════
@@ -15,8 +15,8 @@ export async function wordToPDF(wordBuffer, optionsOrFormat = 'pdf') {
15
15
  const format = opts.format ?? 'pdf';
16
16
  const mode = opts.mode ?? 'process';
17
17
  const timeout = opts.timeout ?? 30000;
18
- if (mode === 'unoserver') {
19
- return convertViaUnoServer(wordBuffer, format, opts);
18
+ if (mode === 'gotenberg') {
19
+ return convertViaGotenberg(wordBuffer, opts);
20
20
  }
21
21
  if (mode === 'socket' && opts.socketUrl) {
22
22
  return convertViaSocket(wordBuffer, format, opts.socketUrl, timeout);
@@ -24,39 +24,34 @@ export async function wordToPDF(wordBuffer, optionsOrFormat = 'pdf') {
24
24
  return convertViaProcess(wordBuffer, format, timeout);
25
25
  }
26
26
  // ═══════════════════════════════════════════════
27
- // UNOSERVER MODE (warm LibreOffice, 200–500ms)
27
+ // GOTENBERG MODE (HTTP sidecar service, recommended)
28
28
  // ═══════════════════════════════════════════════
29
- let sharedUnoServer = null;
30
- let sharedUnoServerStarting = null;
31
- async function getSharedUnoServer(options) {
32
- if (sharedUnoServer && sharedUnoServer.isReady())
33
- return sharedUnoServer;
34
- if (sharedUnoServerStarting)
35
- return sharedUnoServerStarting;
36
- sharedUnoServerStarting = (async () => {
37
- const server = new UnoServer(options);
38
- await server.start();
39
- sharedUnoServer = server;
40
- sharedUnoServerStarting = null;
41
- return server;
42
- })();
43
- return sharedUnoServerStarting;
29
+ let sharedGotenbergClient = null;
30
+ function getSharedGotenbergClient(options) {
31
+ if (sharedGotenbergClient)
32
+ return sharedGotenbergClient;
33
+ sharedGotenbergClient = new GotenbergClient(options);
34
+ return sharedGotenbergClient;
44
35
  }
45
36
  /**
46
- * Stops and clears the shared unoserver singleton created lazily by
47
- * `wordToPDF(..., { mode: 'unoserver' })`. Safe to call even if nothing
48
- * was started. Useful in graceful-shutdown hooks.
37
+ * Clears the module-level Gotenberg client created lazily by
38
+ * `wordToPDF(..., { mode: 'gotenberg' })`. The client holds no OS
39
+ * resources (only Node's HTTP keep-alive sockets, which undici cleans up
40
+ * automatically), so this is only needed if you want the next call to
41
+ * pick up fresh `gotenbergOptions`.
49
42
  */
50
- export async function stopSharedUnoServer() {
51
- const server = sharedUnoServer;
52
- sharedUnoServer = null;
53
- sharedUnoServerStarting = null;
54
- if (server)
55
- await server.stop();
43
+ export function resetSharedGotenbergClient() {
44
+ sharedGotenbergClient = null;
56
45
  }
57
- async function convertViaUnoServer(wordBuffer, format, opts) {
58
- const instance = opts.unoServer ?? (await getSharedUnoServer(opts.unoServerOptions));
59
- return instance.convert(wordBuffer, format);
46
+ async function convertViaGotenberg(wordBuffer, opts) {
47
+ const client = opts.gotenbergClient ?? getSharedGotenbergClient(opts.gotenbergOptions);
48
+ const convertOpts = {
49
+ ...(opts.gotenbergConvertOptions ?? {}),
50
+ };
51
+ if (opts.timeout !== undefined && convertOpts.timeout === undefined) {
52
+ convertOpts.timeout = opts.timeout;
53
+ }
54
+ return client.convert(wordBuffer, convertOpts);
60
55
  }
61
56
  // ═══════════════════════════════════════════════
62
57
  // PROCESS MODE (original, 3-5s)
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@docverse-pdf/server",
3
- "version": "1.1.0",
4
- "description": "DocVerse Server SDK — server-side PDF processing, XFDF merge, Word to PDF (unoserver + LibreOffice), image extraction",
3
+ "version": "1.1.2",
4
+ "description": "DocVerse Server SDK — server-side PDF processing, XFDF merge, Word to PDF (Gotenberg + LibreOffice), image extraction",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",
@@ -1,71 +0,0 @@
1
- export interface UnoServerOptions {
2
- /** TCP port the unoserver daemon listens on (default: 2003). */
3
- port?: number;
4
- /** Host/interface the daemon binds to (default: 127.0.0.1). */
5
- host?: string;
6
- /** Path to the `unoserver` executable (default: auto-detect). */
7
- unoserverPath?: string;
8
- /** Path to the `unoconvert` client executable (default: auto-detect). */
9
- unoconvertPath?: string;
10
- /** Path to the LibreOffice `soffice` binary (passed to unoserver --executable). */
11
- libreOfficePath?: string;
12
- /** Dedicated UNO user-profile dir for this daemon (passed to unoserver --user-installation). */
13
- userInstallation?: string;
14
- /** Max time (ms) to wait for the daemon to become ready (default: 15000). */
15
- startTimeout?: number;
16
- /** Max time (ms) to wait for a single conversion (default: 30000). */
17
- convertTimeout?: number;
18
- }
19
- export interface UnoServerPoolOptions extends Omit<UnoServerOptions, 'port' | 'userInstallation'> {
20
- /** Number of parallel unoserver daemons (default: 2). */
21
- workers?: number;
22
- /** First TCP port; each worker gets basePort+i (default: 2003). */
23
- basePort?: number;
24
- /** Base dir for per-worker user-profile dirs (default: os.tmpdir()/docverse_unoserver). */
25
- tempDir?: string;
26
- }
27
- export interface UnoServerStats {
28
- workers: number;
29
- ready: number;
30
- busy: number;
31
- queued: number;
32
- }
33
- /**
34
- * Wraps a single `unoserver` daemon process and a matching `unoconvert` client.
35
- *
36
- * Requires the `unoserver` Python package on the host:
37
- * pip install unoserver
38
- *
39
- * A single running daemon holds a warm LibreOffice instance, so conversions
40
- * typically complete in 200–500 ms — roughly 10× faster than cold `soffice
41
- * --convert-to` invocations.
42
- */
43
- export declare class UnoServer {
44
- private proc;
45
- private readonly opts;
46
- private ready;
47
- constructor(options?: UnoServerOptions);
48
- get port(): number;
49
- get host(): string;
50
- isReady(): boolean;
51
- start(): Promise<void>;
52
- convert(input: Buffer | Uint8Array, format?: string): Promise<Buffer>;
53
- stop(): Promise<void>;
54
- }
55
- /**
56
- * A pool of warm `unoserver` daemons, each listening on its own port.
57
- * Use this when you need concurrent conversions — jobs are dispatched
58
- * to the first idle worker, otherwise queued FIFO.
59
- */
60
- export declare class UnoServerPool {
61
- private workers;
62
- private queue;
63
- private readonly opts;
64
- private running;
65
- constructor(options?: UnoServerPoolOptions);
66
- start(): Promise<void>;
67
- convert(input: Buffer | Uint8Array, format?: string): Promise<Buffer>;
68
- private dispatch;
69
- stop(): Promise<void>;
70
- getStats(): UnoServerStats;
71
- }
package/dist/unoServer.js DELETED
@@ -1,339 +0,0 @@
1
- import { spawn, execSync } from 'child_process';
2
- import { createConnection } from 'net';
3
- // ═══════════════════════════════════════════════
4
- // HELPERS
5
- // ═══════════════════════════════════════════════
6
- function which(cmd) {
7
- try {
8
- const out = execSync(`command -v ${cmd}`, { stdio: ['ignore', 'pipe', 'ignore'] })
9
- .toString()
10
- .trim();
11
- return out || null;
12
- }
13
- catch {
14
- return null;
15
- }
16
- }
17
- function findUnoserver() {
18
- const candidates = [
19
- 'unoserver',
20
- '/usr/local/bin/unoserver',
21
- '/usr/bin/unoserver',
22
- '/opt/homebrew/bin/unoserver',
23
- ];
24
- for (const c of candidates) {
25
- const resolved = c.startsWith('/') ? c : which(c);
26
- if (resolved)
27
- return resolved;
28
- }
29
- return 'unoserver';
30
- }
31
- function findUnoconvert() {
32
- const candidates = [
33
- 'unoconvert',
34
- '/usr/local/bin/unoconvert',
35
- '/usr/bin/unoconvert',
36
- '/opt/homebrew/bin/unoconvert',
37
- ];
38
- for (const c of candidates) {
39
- const resolved = c.startsWith('/') ? c : which(c);
40
- if (resolved)
41
- return resolved;
42
- }
43
- return 'unoconvert';
44
- }
45
- function findSoffice() {
46
- const candidates = [
47
- '/usr/bin/soffice',
48
- '/usr/bin/libreoffice',
49
- '/usr/local/bin/soffice',
50
- '/Applications/LibreOffice.app/Contents/MacOS/soffice',
51
- ];
52
- for (const c of candidates) {
53
- try {
54
- execSync(`${c} --version`, { stdio: 'ignore' });
55
- return c;
56
- }
57
- catch { }
58
- }
59
- return undefined;
60
- }
61
- async function probePort(host, port, timeoutMs = 500) {
62
- return new Promise((resolve) => {
63
- const sock = createConnection({ host, port });
64
- const timer = setTimeout(() => {
65
- sock.destroy();
66
- resolve(false);
67
- }, timeoutMs);
68
- sock.once('connect', () => {
69
- clearTimeout(timer);
70
- sock.end();
71
- resolve(true);
72
- });
73
- sock.once('error', () => {
74
- clearTimeout(timer);
75
- resolve(false);
76
- });
77
- });
78
- }
79
- async function waitForPort(host, port, deadline) {
80
- while (Date.now() < deadline) {
81
- if (await probePort(host, port))
82
- return;
83
- await new Promise((r) => setTimeout(r, 250));
84
- }
85
- throw new Error(`unoserver did not become ready on ${host}:${port} within the timeout`);
86
- }
87
- // ═══════════════════════════════════════════════
88
- // SINGLE UNOSERVER DAEMON
89
- // ═══════════════════════════════════════════════
90
- /**
91
- * Wraps a single `unoserver` daemon process and a matching `unoconvert` client.
92
- *
93
- * Requires the `unoserver` Python package on the host:
94
- * pip install unoserver
95
- *
96
- * A single running daemon holds a warm LibreOffice instance, so conversions
97
- * typically complete in 200–500 ms — roughly 10× faster than cold `soffice
98
- * --convert-to` invocations.
99
- */
100
- export class UnoServer {
101
- proc = null;
102
- opts;
103
- ready = false;
104
- constructor(options = {}) {
105
- this.opts = {
106
- port: options.port ?? 2003,
107
- host: options.host ?? '127.0.0.1',
108
- unoserverPath: options.unoserverPath ?? findUnoserver(),
109
- unoconvertPath: options.unoconvertPath ?? findUnoconvert(),
110
- libreOfficePath: options.libreOfficePath ?? findSoffice(),
111
- userInstallation: options.userInstallation,
112
- startTimeout: options.startTimeout ?? 15000,
113
- convertTimeout: options.convertTimeout ?? 30000,
114
- };
115
- }
116
- get port() {
117
- return this.opts.port;
118
- }
119
- get host() {
120
- return this.opts.host;
121
- }
122
- isReady() {
123
- return this.ready;
124
- }
125
- async start() {
126
- if (this.ready)
127
- return;
128
- const args = ['--interface', this.opts.host, '--port', String(this.opts.port)];
129
- if (this.opts.libreOfficePath) {
130
- args.push('--executable', this.opts.libreOfficePath);
131
- }
132
- if (this.opts.userInstallation) {
133
- args.push('--user-installation', this.opts.userInstallation);
134
- }
135
- const proc = spawn(this.opts.unoserverPath, args, {
136
- stdio: ['ignore', 'pipe', 'pipe'],
137
- detached: false,
138
- });
139
- proc.on('error', (err) => {
140
- this.ready = false;
141
- // Let the caller see the launch error via waitForPort timeout.
142
- // Emit to stderr so it's debuggable without swallowing silently.
143
- process.stderr.write(`[unoserver] spawn error: ${err.message}\n`);
144
- });
145
- proc.on('exit', () => {
146
- this.ready = false;
147
- this.proc = null;
148
- });
149
- this.proc = proc;
150
- const deadline = Date.now() + this.opts.startTimeout;
151
- try {
152
- await waitForPort(this.opts.host, this.opts.port, deadline);
153
- }
154
- catch (err) {
155
- await this.stop();
156
- throw err;
157
- }
158
- this.ready = true;
159
- }
160
- async convert(input, format = 'pdf') {
161
- if (!this.ready) {
162
- throw new Error('UnoServer not started. Call .start() first.');
163
- }
164
- return new Promise((resolve, reject) => {
165
- const args = [
166
- '--host', this.opts.host,
167
- '--port', String(this.opts.port),
168
- '--convert-to', format,
169
- '-', // input: stdin
170
- '-', // output: stdout
171
- ];
172
- const client = spawn(this.opts.unoconvertPath, args, {
173
- stdio: ['pipe', 'pipe', 'pipe'],
174
- });
175
- const chunks = [];
176
- const errChunks = [];
177
- let settled = false;
178
- const timer = setTimeout(() => {
179
- if (settled)
180
- return;
181
- settled = true;
182
- try {
183
- client.kill('SIGKILL');
184
- }
185
- catch { }
186
- reject(new Error(`unoconvert timed out after ${this.opts.convertTimeout}ms`));
187
- }, this.opts.convertTimeout);
188
- client.stdout.on('data', (c) => chunks.push(c));
189
- client.stderr.on('data', (c) => errChunks.push(c));
190
- client.on('error', (err) => {
191
- if (settled)
192
- return;
193
- settled = true;
194
- clearTimeout(timer);
195
- reject(err);
196
- });
197
- client.on('close', (code) => {
198
- if (settled)
199
- return;
200
- settled = true;
201
- clearTimeout(timer);
202
- if (code === 0) {
203
- resolve(Buffer.concat(chunks));
204
- }
205
- else {
206
- const stderr = Buffer.concat(errChunks).toString('utf8').trim();
207
- reject(new Error(`unoconvert exited with code ${code}: ${stderr}`));
208
- }
209
- });
210
- client.stdin.on('error', (err) => {
211
- if (settled)
212
- return;
213
- settled = true;
214
- clearTimeout(timer);
215
- reject(err);
216
- });
217
- client.stdin.end(Buffer.from(input));
218
- });
219
- }
220
- async stop() {
221
- this.ready = false;
222
- const proc = this.proc;
223
- this.proc = null;
224
- if (!proc)
225
- return;
226
- try {
227
- proc.kill('SIGTERM');
228
- }
229
- catch { }
230
- // Give it a moment to exit cleanly, then force.
231
- await new Promise((resolve) => {
232
- const killTimer = setTimeout(() => {
233
- try {
234
- proc.kill('SIGKILL');
235
- }
236
- catch { }
237
- resolve();
238
- }, 2000);
239
- proc.once('exit', () => {
240
- clearTimeout(killTimer);
241
- resolve();
242
- });
243
- });
244
- }
245
- }
246
- /**
247
- * A pool of warm `unoserver` daemons, each listening on its own port.
248
- * Use this when you need concurrent conversions — jobs are dispatched
249
- * to the first idle worker, otherwise queued FIFO.
250
- */
251
- export class UnoServerPool {
252
- workers = [];
253
- queue = [];
254
- opts;
255
- running = false;
256
- constructor(options = {}) {
257
- this.opts = {
258
- workers: options.workers ?? 2,
259
- basePort: options.basePort ?? 2003,
260
- tempDir: options.tempDir ?? `${process.env.TMPDIR ?? '/tmp'}/docverse_unoserver`,
261
- startTimeout: options.startTimeout ?? 15000,
262
- convertTimeout: options.convertTimeout ?? 30000,
263
- host: options.host,
264
- unoserverPath: options.unoserverPath,
265
- unoconvertPath: options.unoconvertPath,
266
- libreOfficePath: options.libreOfficePath,
267
- };
268
- }
269
- async start() {
270
- if (this.running)
271
- return;
272
- const { mkdirSync, existsSync } = await import('fs');
273
- if (!existsSync(this.opts.tempDir)) {
274
- mkdirSync(this.opts.tempDir, { recursive: true });
275
- }
276
- const servers = [];
277
- for (let i = 0; i < this.opts.workers; i++) {
278
- const userInstallation = `file://${this.opts.tempDir}/worker_${i}`;
279
- servers.push(new UnoServer({
280
- port: this.opts.basePort + i,
281
- host: this.opts.host,
282
- unoserverPath: this.opts.unoserverPath,
283
- unoconvertPath: this.opts.unoconvertPath,
284
- libreOfficePath: this.opts.libreOfficePath,
285
- userInstallation,
286
- startTimeout: this.opts.startTimeout,
287
- convertTimeout: this.opts.convertTimeout,
288
- }));
289
- }
290
- await Promise.all(servers.map((s) => s.start()));
291
- this.workers = servers.map((server) => ({ server, busy: false }));
292
- this.running = true;
293
- }
294
- async convert(input, format = 'pdf') {
295
- if (!this.running) {
296
- throw new Error('UnoServerPool not started. Call .start() first.');
297
- }
298
- const buf = Buffer.from(input);
299
- return new Promise((resolve, reject) => {
300
- const idle = this.workers.find((w) => !w.busy);
301
- if (idle) {
302
- this.dispatch(idle, buf, format, resolve, reject);
303
- }
304
- else {
305
- this.queue.push({ input: buf, format, resolve, reject });
306
- }
307
- });
308
- }
309
- dispatch(worker, buf, format, resolve, reject) {
310
- worker.busy = true;
311
- worker.server
312
- .convert(buf, format)
313
- .then(resolve, reject)
314
- .finally(() => {
315
- worker.busy = false;
316
- const next = this.queue.shift();
317
- if (next) {
318
- this.dispatch(worker, next.input, next.format, next.resolve, next.reject);
319
- }
320
- });
321
- }
322
- async stop() {
323
- this.running = false;
324
- const pending = this.queue.splice(0);
325
- for (const job of pending) {
326
- job.reject(new Error('UnoServerPool stopped before job completed'));
327
- }
328
- await Promise.all(this.workers.map((w) => w.server.stop()));
329
- this.workers = [];
330
- }
331
- getStats() {
332
- return {
333
- workers: this.workers.length,
334
- ready: this.workers.filter((w) => w.server.isReady()).length,
335
- busy: this.workers.filter((w) => w.busy).length,
336
- queued: this.queue.length,
337
- };
338
- }
339
- }